{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8536793580331228, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017073587160662456, "grad_norm": 0.984874427318573, "learning_rate": 8e-05, "loss": 1.8277, "step": 1 }, { "epoch": 0.0003414717432132491, "grad_norm": 2.0652096271514893, "learning_rate": 8e-05, "loss": 1.9121, "step": 2 }, { "epoch": 0.0005122076148198737, "grad_norm": 1.028632640838623, "learning_rate": 8e-05, "loss": 1.767, "step": 3 }, { "epoch": 0.0006829434864264982, "grad_norm": 1.1161367893218994, "learning_rate": 8e-05, "loss": 1.9291, "step": 4 }, { "epoch": 0.0008536793580331228, "grad_norm": 0.7931103706359863, "learning_rate": 8e-05, "loss": 1.9122, "step": 5 }, { "epoch": 0.0010244152296397474, "grad_norm": 0.7266281247138977, "learning_rate": 8e-05, "loss": 1.9258, "step": 6 }, { "epoch": 0.001195151101246372, "grad_norm": 0.6565880179405212, "learning_rate": 8e-05, "loss": 1.8015, "step": 7 }, { "epoch": 0.0013658869728529965, "grad_norm": 0.7315236330032349, "learning_rate": 8e-05, "loss": 1.9426, "step": 8 }, { "epoch": 0.001536622844459621, "grad_norm": 0.6117490530014038, "learning_rate": 8e-05, "loss": 1.9086, "step": 9 }, { "epoch": 0.0017073587160662455, "grad_norm": 0.6281381845474243, "learning_rate": 8e-05, "loss": 1.9584, "step": 10 }, { "epoch": 0.00187809458767287, "grad_norm": 0.7017361521720886, "learning_rate": 8e-05, "loss": 1.8707, "step": 11 }, { "epoch": 0.002048830459279495, "grad_norm": 0.6382277011871338, "learning_rate": 8e-05, "loss": 1.8707, "step": 12 }, { "epoch": 0.0022195663308861194, "grad_norm": 0.5695717930793762, "learning_rate": 8e-05, "loss": 1.8673, "step": 13 }, { "epoch": 0.002390302202492744, "grad_norm": 0.534807026386261, "learning_rate": 8e-05, "loss": 1.6003, "step": 14 }, { "epoch": 0.0025610380740993684, "grad_norm": 0.5130515098571777, "learning_rate": 8e-05, "loss": 1.8369, "step": 15 }, { "epoch": 0.002731773945705993, "grad_norm": 0.5873178839683533, "learning_rate": 8e-05, "loss": 1.8921, "step": 16 }, { "epoch": 0.0029025098173126175, "grad_norm": 0.5487902760505676, "learning_rate": 8e-05, "loss": 1.8066, "step": 17 }, { "epoch": 0.003073245688919242, "grad_norm": 0.5085095167160034, "learning_rate": 8e-05, "loss": 1.8704, "step": 18 }, { "epoch": 0.0032439815605258666, "grad_norm": 0.5867734551429749, "learning_rate": 8e-05, "loss": 1.668, "step": 19 }, { "epoch": 0.003414717432132491, "grad_norm": 0.5446407794952393, "learning_rate": 8e-05, "loss": 1.8952, "step": 20 }, { "epoch": 0.0035854533037391156, "grad_norm": 0.694047212600708, "learning_rate": 8e-05, "loss": 2.0447, "step": 21 }, { "epoch": 0.00375618917534574, "grad_norm": 0.5158835649490356, "learning_rate": 8e-05, "loss": 1.6716, "step": 22 }, { "epoch": 0.003926925046952365, "grad_norm": 0.5184605717658997, "learning_rate": 8e-05, "loss": 1.6682, "step": 23 }, { "epoch": 0.00409766091855899, "grad_norm": 0.5641247034072876, "learning_rate": 8e-05, "loss": 1.8282, "step": 24 }, { "epoch": 0.004268396790165614, "grad_norm": 0.496746301651001, "learning_rate": 8e-05, "loss": 1.601, "step": 25 }, { "epoch": 0.004439132661772239, "grad_norm": 0.4922087788581848, "learning_rate": 8e-05, "loss": 1.736, "step": 26 }, { "epoch": 0.004609868533378863, "grad_norm": 0.5168851613998413, "learning_rate": 8e-05, "loss": 1.7045, "step": 27 }, { "epoch": 0.004780604404985488, "grad_norm": 0.5458253026008606, "learning_rate": 8e-05, "loss": 1.7259, "step": 28 }, { "epoch": 0.004951340276592112, "grad_norm": 0.505854606628418, "learning_rate": 8e-05, "loss": 1.7582, "step": 29 }, { "epoch": 0.005122076148198737, "grad_norm": 0.540149986743927, "learning_rate": 8e-05, "loss": 1.6637, "step": 30 }, { "epoch": 0.005292812019805361, "grad_norm": 0.5268791317939758, "learning_rate": 8e-05, "loss": 1.5975, "step": 31 }, { "epoch": 0.005463547891411986, "grad_norm": 0.7249188423156738, "learning_rate": 8e-05, "loss": 2.055, "step": 32 }, { "epoch": 0.0056342837630186104, "grad_norm": 0.5146047472953796, "learning_rate": 8e-05, "loss": 1.853, "step": 33 }, { "epoch": 0.005805019634625235, "grad_norm": 0.4897962510585785, "learning_rate": 8e-05, "loss": 1.8206, "step": 34 }, { "epoch": 0.0059757555062318595, "grad_norm": 0.4945373237133026, "learning_rate": 8e-05, "loss": 1.7842, "step": 35 }, { "epoch": 0.006146491377838484, "grad_norm": 0.5072891712188721, "learning_rate": 8e-05, "loss": 1.8861, "step": 36 }, { "epoch": 0.006317227249445109, "grad_norm": 0.5136602520942688, "learning_rate": 8e-05, "loss": 1.8133, "step": 37 }, { "epoch": 0.006487963121051733, "grad_norm": 0.5170279145240784, "learning_rate": 8e-05, "loss": 1.7771, "step": 38 }, { "epoch": 0.006658698992658358, "grad_norm": 0.45667675137519836, "learning_rate": 8e-05, "loss": 1.6141, "step": 39 }, { "epoch": 0.006829434864264982, "grad_norm": 0.6465232372283936, "learning_rate": 8e-05, "loss": 1.7269, "step": 40 }, { "epoch": 0.007000170735871607, "grad_norm": 0.5361795425415039, "learning_rate": 8e-05, "loss": 1.7551, "step": 41 }, { "epoch": 0.007170906607478231, "grad_norm": 0.5216143131256104, "learning_rate": 8e-05, "loss": 1.8762, "step": 42 }, { "epoch": 0.007341642479084856, "grad_norm": 0.6230976581573486, "learning_rate": 8e-05, "loss": 1.7158, "step": 43 }, { "epoch": 0.00751237835069148, "grad_norm": 0.5228106379508972, "learning_rate": 8e-05, "loss": 1.6002, "step": 44 }, { "epoch": 0.007683114222298105, "grad_norm": 0.5083218216896057, "learning_rate": 8e-05, "loss": 1.8297, "step": 45 }, { "epoch": 0.00785385009390473, "grad_norm": 0.49918678402900696, "learning_rate": 8e-05, "loss": 1.8617, "step": 46 }, { "epoch": 0.008024585965511354, "grad_norm": 0.49985387921333313, "learning_rate": 8e-05, "loss": 1.7108, "step": 47 }, { "epoch": 0.00819532183711798, "grad_norm": 0.47472235560417175, "learning_rate": 8e-05, "loss": 1.8579, "step": 48 }, { "epoch": 0.008366057708724603, "grad_norm": 0.6900753378868103, "learning_rate": 8e-05, "loss": 1.9838, "step": 49 }, { "epoch": 0.008536793580331228, "grad_norm": 0.5124324560165405, "learning_rate": 8e-05, "loss": 1.958, "step": 50 }, { "epoch": 0.008707529451937852, "grad_norm": 0.5039240717887878, "learning_rate": 8e-05, "loss": 2.0222, "step": 51 }, { "epoch": 0.008878265323544477, "grad_norm": 0.5178497433662415, "learning_rate": 8e-05, "loss": 1.8601, "step": 52 }, { "epoch": 0.009049001195151101, "grad_norm": 0.5125068426132202, "learning_rate": 8e-05, "loss": 1.8827, "step": 53 }, { "epoch": 0.009219737066757727, "grad_norm": 0.49825766682624817, "learning_rate": 8e-05, "loss": 1.9982, "step": 54 }, { "epoch": 0.00939047293836435, "grad_norm": 0.5239601731300354, "learning_rate": 8e-05, "loss": 1.6746, "step": 55 }, { "epoch": 0.009561208809970976, "grad_norm": 0.729762077331543, "learning_rate": 8e-05, "loss": 1.7115, "step": 56 }, { "epoch": 0.0097319446815776, "grad_norm": 0.4677223563194275, "learning_rate": 8e-05, "loss": 1.7751, "step": 57 }, { "epoch": 0.009902680553184225, "grad_norm": 0.4785904586315155, "learning_rate": 8e-05, "loss": 1.7072, "step": 58 }, { "epoch": 0.010073416424790848, "grad_norm": 0.5048305988311768, "learning_rate": 8e-05, "loss": 1.904, "step": 59 }, { "epoch": 0.010244152296397474, "grad_norm": 0.5823861360549927, "learning_rate": 8e-05, "loss": 1.7538, "step": 60 }, { "epoch": 0.010414888168004097, "grad_norm": 0.6769489049911499, "learning_rate": 8e-05, "loss": 1.8961, "step": 61 }, { "epoch": 0.010585624039610723, "grad_norm": 0.4751957654953003, "learning_rate": 8e-05, "loss": 1.711, "step": 62 }, { "epoch": 0.010756359911217346, "grad_norm": 0.5029926896095276, "learning_rate": 8e-05, "loss": 1.5653, "step": 63 }, { "epoch": 0.010927095782823972, "grad_norm": 0.5223274230957031, "learning_rate": 8e-05, "loss": 1.7379, "step": 64 }, { "epoch": 0.011097831654430595, "grad_norm": 0.5371073484420776, "learning_rate": 8e-05, "loss": 2.042, "step": 65 }, { "epoch": 0.011268567526037221, "grad_norm": 0.5074995160102844, "learning_rate": 8e-05, "loss": 1.8584, "step": 66 }, { "epoch": 0.011439303397643845, "grad_norm": 0.4722536504268646, "learning_rate": 8e-05, "loss": 1.7056, "step": 67 }, { "epoch": 0.01161003926925047, "grad_norm": 0.4983955919742584, "learning_rate": 8e-05, "loss": 1.7112, "step": 68 }, { "epoch": 0.011780775140857094, "grad_norm": 0.7550249695777893, "learning_rate": 8e-05, "loss": 1.7211, "step": 69 }, { "epoch": 0.011951511012463719, "grad_norm": 0.48023349046707153, "learning_rate": 8e-05, "loss": 1.8257, "step": 70 }, { "epoch": 0.012122246884070343, "grad_norm": 0.5799466967582703, "learning_rate": 8e-05, "loss": 2.1966, "step": 71 }, { "epoch": 0.012292982755676968, "grad_norm": 0.4948328137397766, "learning_rate": 8e-05, "loss": 1.6065, "step": 72 }, { "epoch": 0.012463718627283592, "grad_norm": 0.5164682269096375, "learning_rate": 8e-05, "loss": 1.9643, "step": 73 }, { "epoch": 0.012634454498890217, "grad_norm": 0.5415278673171997, "learning_rate": 8e-05, "loss": 1.8468, "step": 74 }, { "epoch": 0.01280519037049684, "grad_norm": 0.45988425612449646, "learning_rate": 8e-05, "loss": 1.5733, "step": 75 }, { "epoch": 0.012975926242103466, "grad_norm": 0.5178214311599731, "learning_rate": 8e-05, "loss": 1.7655, "step": 76 }, { "epoch": 0.01314666211371009, "grad_norm": 0.5084595084190369, "learning_rate": 8e-05, "loss": 1.9474, "step": 77 }, { "epoch": 0.013317397985316715, "grad_norm": 0.4979074001312256, "learning_rate": 8e-05, "loss": 1.8161, "step": 78 }, { "epoch": 0.013488133856923339, "grad_norm": 0.4887935221195221, "learning_rate": 8e-05, "loss": 1.8132, "step": 79 }, { "epoch": 0.013658869728529964, "grad_norm": 0.4693731665611267, "learning_rate": 8e-05, "loss": 1.7616, "step": 80 }, { "epoch": 0.013829605600136588, "grad_norm": 0.4747352600097656, "learning_rate": 8e-05, "loss": 1.6617, "step": 81 }, { "epoch": 0.014000341471743213, "grad_norm": 0.470813125371933, "learning_rate": 8e-05, "loss": 1.6779, "step": 82 }, { "epoch": 0.014171077343349837, "grad_norm": 0.5038223266601562, "learning_rate": 8e-05, "loss": 1.9474, "step": 83 }, { "epoch": 0.014341813214956462, "grad_norm": 0.47798240184783936, "learning_rate": 8e-05, "loss": 1.7868, "step": 84 }, { "epoch": 0.014512549086563086, "grad_norm": 0.5056321024894714, "learning_rate": 8e-05, "loss": 1.8397, "step": 85 }, { "epoch": 0.014683284958169712, "grad_norm": 0.5007801651954651, "learning_rate": 8e-05, "loss": 1.8855, "step": 86 }, { "epoch": 0.014854020829776335, "grad_norm": 0.48175960779190063, "learning_rate": 8e-05, "loss": 1.7876, "step": 87 }, { "epoch": 0.01502475670138296, "grad_norm": 0.4425554871559143, "learning_rate": 8e-05, "loss": 1.6163, "step": 88 }, { "epoch": 0.015195492572989584, "grad_norm": 0.4779775142669678, "learning_rate": 8e-05, "loss": 1.7002, "step": 89 }, { "epoch": 0.01536622844459621, "grad_norm": 0.49986591935157776, "learning_rate": 8e-05, "loss": 1.9185, "step": 90 }, { "epoch": 0.015536964316202835, "grad_norm": 0.5019515156745911, "learning_rate": 8e-05, "loss": 1.8635, "step": 91 }, { "epoch": 0.01570770018780946, "grad_norm": 0.5004885792732239, "learning_rate": 8e-05, "loss": 1.879, "step": 92 }, { "epoch": 0.015878436059416082, "grad_norm": 0.4270426034927368, "learning_rate": 8e-05, "loss": 1.6564, "step": 93 }, { "epoch": 0.016049171931022708, "grad_norm": 0.44766777753829956, "learning_rate": 8e-05, "loss": 1.6246, "step": 94 }, { "epoch": 0.016219907802629333, "grad_norm": 0.47327834367752075, "learning_rate": 8e-05, "loss": 1.8714, "step": 95 }, { "epoch": 0.01639064367423596, "grad_norm": 0.4366976022720337, "learning_rate": 8e-05, "loss": 1.6792, "step": 96 }, { "epoch": 0.01656137954584258, "grad_norm": 0.468168169260025, "learning_rate": 8e-05, "loss": 1.8567, "step": 97 }, { "epoch": 0.016732115417449206, "grad_norm": 0.5180723071098328, "learning_rate": 8e-05, "loss": 1.8537, "step": 98 }, { "epoch": 0.01690285128905583, "grad_norm": 0.48789721727371216, "learning_rate": 8e-05, "loss": 1.6924, "step": 99 }, { "epoch": 0.017073587160662457, "grad_norm": 0.4872114360332489, "learning_rate": 8e-05, "loss": 1.8131, "step": 100 }, { "epoch": 0.01724432303226908, "grad_norm": 0.4449755847454071, "learning_rate": 8e-05, "loss": 1.7533, "step": 101 }, { "epoch": 0.017415058903875704, "grad_norm": 0.4629347026348114, "learning_rate": 8e-05, "loss": 1.6774, "step": 102 }, { "epoch": 0.01758579477548233, "grad_norm": 0.4555564224720001, "learning_rate": 8e-05, "loss": 1.5945, "step": 103 }, { "epoch": 0.017756530647088955, "grad_norm": 0.4659121632575989, "learning_rate": 8e-05, "loss": 1.668, "step": 104 }, { "epoch": 0.017927266518695577, "grad_norm": 0.5513396859169006, "learning_rate": 8e-05, "loss": 1.9161, "step": 105 }, { "epoch": 0.018098002390302202, "grad_norm": 0.5207008719444275, "learning_rate": 8e-05, "loss": 2.0593, "step": 106 }, { "epoch": 0.018268738261908828, "grad_norm": 0.4416663348674774, "learning_rate": 8e-05, "loss": 1.6738, "step": 107 }, { "epoch": 0.018439474133515453, "grad_norm": 0.4454891085624695, "learning_rate": 8e-05, "loss": 1.676, "step": 108 }, { "epoch": 0.018610210005122075, "grad_norm": 0.42976289987564087, "learning_rate": 8e-05, "loss": 1.7261, "step": 109 }, { "epoch": 0.0187809458767287, "grad_norm": 0.44468623399734497, "learning_rate": 8e-05, "loss": 1.5669, "step": 110 }, { "epoch": 0.018951681748335326, "grad_norm": 0.5443071126937866, "learning_rate": 8e-05, "loss": 2.1408, "step": 111 }, { "epoch": 0.01912241761994195, "grad_norm": 0.46535879373550415, "learning_rate": 8e-05, "loss": 1.8935, "step": 112 }, { "epoch": 0.019293153491548573, "grad_norm": 0.437745064496994, "learning_rate": 8e-05, "loss": 1.7261, "step": 113 }, { "epoch": 0.0194638893631552, "grad_norm": 0.535077691078186, "learning_rate": 8e-05, "loss": 2.1214, "step": 114 }, { "epoch": 0.019634625234761824, "grad_norm": 0.474835604429245, "learning_rate": 8e-05, "loss": 1.7765, "step": 115 }, { "epoch": 0.01980536110636845, "grad_norm": 0.5182405114173889, "learning_rate": 8e-05, "loss": 1.9712, "step": 116 }, { "epoch": 0.01997609697797507, "grad_norm": 0.5160141587257385, "learning_rate": 8e-05, "loss": 1.9458, "step": 117 }, { "epoch": 0.020146832849581697, "grad_norm": 0.5019220113754272, "learning_rate": 8e-05, "loss": 1.9563, "step": 118 }, { "epoch": 0.020317568721188322, "grad_norm": 0.47957783937454224, "learning_rate": 8e-05, "loss": 1.8285, "step": 119 }, { "epoch": 0.020488304592794947, "grad_norm": 0.521813690662384, "learning_rate": 8e-05, "loss": 1.8026, "step": 120 }, { "epoch": 0.02065904046440157, "grad_norm": 0.45566773414611816, "learning_rate": 8e-05, "loss": 1.8159, "step": 121 }, { "epoch": 0.020829776336008195, "grad_norm": 0.5315849781036377, "learning_rate": 8e-05, "loss": 1.9981, "step": 122 }, { "epoch": 0.02100051220761482, "grad_norm": 0.4863191843032837, "learning_rate": 8e-05, "loss": 1.9599, "step": 123 }, { "epoch": 0.021171248079221446, "grad_norm": 0.4831017851829529, "learning_rate": 8e-05, "loss": 1.8061, "step": 124 }, { "epoch": 0.021341983950828067, "grad_norm": 0.46996408700942993, "learning_rate": 8e-05, "loss": 1.6708, "step": 125 }, { "epoch": 0.021512719822434693, "grad_norm": 0.5161317586898804, "learning_rate": 8e-05, "loss": 1.8598, "step": 126 }, { "epoch": 0.021683455694041318, "grad_norm": 0.4701539874076843, "learning_rate": 8e-05, "loss": 1.8802, "step": 127 }, { "epoch": 0.021854191565647944, "grad_norm": 0.4764927625656128, "learning_rate": 8e-05, "loss": 1.7799, "step": 128 }, { "epoch": 0.022024927437254566, "grad_norm": 0.5038724541664124, "learning_rate": 8e-05, "loss": 1.9791, "step": 129 }, { "epoch": 0.02219566330886119, "grad_norm": 0.5046201348304749, "learning_rate": 8e-05, "loss": 1.8121, "step": 130 }, { "epoch": 0.022366399180467816, "grad_norm": 0.4328746199607849, "learning_rate": 8e-05, "loss": 1.5747, "step": 131 }, { "epoch": 0.022537135052074442, "grad_norm": 0.4719444215297699, "learning_rate": 8e-05, "loss": 1.7002, "step": 132 }, { "epoch": 0.022707870923681064, "grad_norm": 0.5325294733047485, "learning_rate": 8e-05, "loss": 1.9465, "step": 133 }, { "epoch": 0.02287860679528769, "grad_norm": 0.452109158039093, "learning_rate": 8e-05, "loss": 1.7815, "step": 134 }, { "epoch": 0.023049342666894315, "grad_norm": 0.49889788031578064, "learning_rate": 8e-05, "loss": 1.9652, "step": 135 }, { "epoch": 0.02322007853850094, "grad_norm": 0.4857355058193207, "learning_rate": 8e-05, "loss": 1.7977, "step": 136 }, { "epoch": 0.023390814410107565, "grad_norm": 0.477777898311615, "learning_rate": 8e-05, "loss": 1.8145, "step": 137 }, { "epoch": 0.023561550281714187, "grad_norm": 0.4663862884044647, "learning_rate": 8e-05, "loss": 1.8135, "step": 138 }, { "epoch": 0.023732286153320813, "grad_norm": 0.4581298232078552, "learning_rate": 8e-05, "loss": 1.726, "step": 139 }, { "epoch": 0.023903022024927438, "grad_norm": 0.4487064778804779, "learning_rate": 8e-05, "loss": 1.6945, "step": 140 }, { "epoch": 0.024073757896534063, "grad_norm": 0.5439662337303162, "learning_rate": 8e-05, "loss": 2.1614, "step": 141 }, { "epoch": 0.024244493768140685, "grad_norm": 0.4821493625640869, "learning_rate": 8e-05, "loss": 1.8083, "step": 142 }, { "epoch": 0.02441522963974731, "grad_norm": 0.48455899953842163, "learning_rate": 8e-05, "loss": 1.6992, "step": 143 }, { "epoch": 0.024585965511353936, "grad_norm": 0.4671146273612976, "learning_rate": 8e-05, "loss": 1.7558, "step": 144 }, { "epoch": 0.02475670138296056, "grad_norm": 0.44540953636169434, "learning_rate": 8e-05, "loss": 1.5405, "step": 145 }, { "epoch": 0.024927437254567184, "grad_norm": 0.4616137444972992, "learning_rate": 8e-05, "loss": 1.7832, "step": 146 }, { "epoch": 0.02509817312617381, "grad_norm": 0.4649691581726074, "learning_rate": 8e-05, "loss": 1.6698, "step": 147 }, { "epoch": 0.025268908997780434, "grad_norm": 0.46647632122039795, "learning_rate": 8e-05, "loss": 1.7887, "step": 148 }, { "epoch": 0.02543964486938706, "grad_norm": 0.4451594054698944, "learning_rate": 8e-05, "loss": 1.6876, "step": 149 }, { "epoch": 0.02561038074099368, "grad_norm": 0.4948977530002594, "learning_rate": 8e-05, "loss": 1.715, "step": 150 }, { "epoch": 0.025781116612600307, "grad_norm": 0.4807678759098053, "learning_rate": 8e-05, "loss": 1.806, "step": 151 }, { "epoch": 0.025951852484206932, "grad_norm": 0.4554213881492615, "learning_rate": 8e-05, "loss": 1.6728, "step": 152 }, { "epoch": 0.026122588355813558, "grad_norm": 0.5148962140083313, "learning_rate": 8e-05, "loss": 1.8054, "step": 153 }, { "epoch": 0.02629332422742018, "grad_norm": 0.47027069330215454, "learning_rate": 8e-05, "loss": 1.7846, "step": 154 }, { "epoch": 0.026464060099026805, "grad_norm": 0.44736677408218384, "learning_rate": 8e-05, "loss": 1.8433, "step": 155 }, { "epoch": 0.02663479597063343, "grad_norm": 0.45064836740493774, "learning_rate": 8e-05, "loss": 1.6977, "step": 156 }, { "epoch": 0.026805531842240056, "grad_norm": 0.48699885606765747, "learning_rate": 8e-05, "loss": 1.9063, "step": 157 }, { "epoch": 0.026976267713846678, "grad_norm": 0.514915406703949, "learning_rate": 8e-05, "loss": 1.7772, "step": 158 }, { "epoch": 0.027147003585453303, "grad_norm": 0.4905100464820862, "learning_rate": 8e-05, "loss": 1.8606, "step": 159 }, { "epoch": 0.02731773945705993, "grad_norm": 0.5056862235069275, "learning_rate": 8e-05, "loss": 1.771, "step": 160 }, { "epoch": 0.027488475328666554, "grad_norm": 0.48683127760887146, "learning_rate": 8e-05, "loss": 1.9431, "step": 161 }, { "epoch": 0.027659211200273176, "grad_norm": 0.6406216621398926, "learning_rate": 8e-05, "loss": 1.5536, "step": 162 }, { "epoch": 0.0278299470718798, "grad_norm": 0.4341851472854614, "learning_rate": 8e-05, "loss": 1.6819, "step": 163 }, { "epoch": 0.028000682943486427, "grad_norm": 0.4686916470527649, "learning_rate": 8e-05, "loss": 1.6813, "step": 164 }, { "epoch": 0.028171418815093052, "grad_norm": 0.498260498046875, "learning_rate": 8e-05, "loss": 1.908, "step": 165 }, { "epoch": 0.028342154686699674, "grad_norm": 0.46910420060157776, "learning_rate": 8e-05, "loss": 1.7351, "step": 166 }, { "epoch": 0.0285128905583063, "grad_norm": 0.44555917382240295, "learning_rate": 8e-05, "loss": 1.5912, "step": 167 }, { "epoch": 0.028683626429912925, "grad_norm": 0.466680109500885, "learning_rate": 8e-05, "loss": 1.7629, "step": 168 }, { "epoch": 0.02885436230151955, "grad_norm": 0.45286041498184204, "learning_rate": 8e-05, "loss": 1.6905, "step": 169 }, { "epoch": 0.029025098173126172, "grad_norm": 0.43861860036849976, "learning_rate": 8e-05, "loss": 1.7118, "step": 170 }, { "epoch": 0.029195834044732798, "grad_norm": 0.5066196918487549, "learning_rate": 8e-05, "loss": 1.8945, "step": 171 }, { "epoch": 0.029366569916339423, "grad_norm": 0.47320351004600525, "learning_rate": 8e-05, "loss": 1.729, "step": 172 }, { "epoch": 0.02953730578794605, "grad_norm": 0.467319518327713, "learning_rate": 8e-05, "loss": 1.534, "step": 173 }, { "epoch": 0.02970804165955267, "grad_norm": 0.5008344054222107, "learning_rate": 8e-05, "loss": 1.8554, "step": 174 }, { "epoch": 0.029878777531159296, "grad_norm": 0.48655131459236145, "learning_rate": 8e-05, "loss": 1.6972, "step": 175 }, { "epoch": 0.03004951340276592, "grad_norm": 0.4580397307872772, "learning_rate": 8e-05, "loss": 1.7439, "step": 176 }, { "epoch": 0.030220249274372547, "grad_norm": 0.466196745634079, "learning_rate": 8e-05, "loss": 1.7825, "step": 177 }, { "epoch": 0.03039098514597917, "grad_norm": 0.4853515028953552, "learning_rate": 8e-05, "loss": 1.7015, "step": 178 }, { "epoch": 0.030561721017585794, "grad_norm": 0.43904945254325867, "learning_rate": 8e-05, "loss": 1.7683, "step": 179 }, { "epoch": 0.03073245688919242, "grad_norm": 0.44885826110839844, "learning_rate": 8e-05, "loss": 1.6404, "step": 180 }, { "epoch": 0.030903192760799045, "grad_norm": 0.46436604857444763, "learning_rate": 8e-05, "loss": 1.8739, "step": 181 }, { "epoch": 0.03107392863240567, "grad_norm": 0.4560924470424652, "learning_rate": 8e-05, "loss": 1.8836, "step": 182 }, { "epoch": 0.031244664504012292, "grad_norm": 0.44748809933662415, "learning_rate": 8e-05, "loss": 1.5983, "step": 183 }, { "epoch": 0.03141540037561892, "grad_norm": 0.620189905166626, "learning_rate": 8e-05, "loss": 1.984, "step": 184 }, { "epoch": 0.03158613624722554, "grad_norm": 0.5209994316101074, "learning_rate": 8e-05, "loss": 1.919, "step": 185 }, { "epoch": 0.031756872118832165, "grad_norm": 0.4916991889476776, "learning_rate": 8e-05, "loss": 1.7995, "step": 186 }, { "epoch": 0.03192760799043879, "grad_norm": 0.4779764711856842, "learning_rate": 8e-05, "loss": 1.8828, "step": 187 }, { "epoch": 0.032098343862045416, "grad_norm": 0.48913559317588806, "learning_rate": 8e-05, "loss": 1.8311, "step": 188 }, { "epoch": 0.03226907973365204, "grad_norm": 0.47557204961776733, "learning_rate": 8e-05, "loss": 1.7484, "step": 189 }, { "epoch": 0.032439815605258666, "grad_norm": 0.4526785910129547, "learning_rate": 8e-05, "loss": 1.7496, "step": 190 }, { "epoch": 0.03261055147686529, "grad_norm": 0.48831480741500854, "learning_rate": 8e-05, "loss": 1.7127, "step": 191 }, { "epoch": 0.03278128734847192, "grad_norm": 0.4658520221710205, "learning_rate": 8e-05, "loss": 1.6899, "step": 192 }, { "epoch": 0.032952023220078536, "grad_norm": 0.44966885447502136, "learning_rate": 8e-05, "loss": 1.8593, "step": 193 }, { "epoch": 0.03312275909168516, "grad_norm": 0.47032150626182556, "learning_rate": 8e-05, "loss": 1.6809, "step": 194 }, { "epoch": 0.033293494963291786, "grad_norm": 0.48067763447761536, "learning_rate": 8e-05, "loss": 1.8996, "step": 195 }, { "epoch": 0.03346423083489841, "grad_norm": 0.5202171206474304, "learning_rate": 8e-05, "loss": 1.9487, "step": 196 }, { "epoch": 0.03363496670650504, "grad_norm": 0.48600322008132935, "learning_rate": 8e-05, "loss": 1.8785, "step": 197 }, { "epoch": 0.03380570257811166, "grad_norm": 0.4688952565193176, "learning_rate": 8e-05, "loss": 1.7631, "step": 198 }, { "epoch": 0.03397643844971829, "grad_norm": 0.49965277314186096, "learning_rate": 8e-05, "loss": 1.9622, "step": 199 }, { "epoch": 0.034147174321324913, "grad_norm": 0.45191776752471924, "learning_rate": 8e-05, "loss": 1.7636, "step": 200 }, { "epoch": 0.03431791019293153, "grad_norm": 0.4768770635128021, "learning_rate": 8e-05, "loss": 1.6697, "step": 201 }, { "epoch": 0.03448864606453816, "grad_norm": 0.5686438679695129, "learning_rate": 8e-05, "loss": 1.9649, "step": 202 }, { "epoch": 0.03465938193614478, "grad_norm": 0.49678438901901245, "learning_rate": 8e-05, "loss": 1.7675, "step": 203 }, { "epoch": 0.03483011780775141, "grad_norm": 0.4629178047180176, "learning_rate": 8e-05, "loss": 1.8558, "step": 204 }, { "epoch": 0.035000853679358034, "grad_norm": 0.48516717553138733, "learning_rate": 8e-05, "loss": 1.7826, "step": 205 }, { "epoch": 0.03517158955096466, "grad_norm": 0.46405932307243347, "learning_rate": 8e-05, "loss": 1.6946, "step": 206 }, { "epoch": 0.035342325422571284, "grad_norm": 0.43091821670532227, "learning_rate": 8e-05, "loss": 1.6642, "step": 207 }, { "epoch": 0.03551306129417791, "grad_norm": 0.4535653591156006, "learning_rate": 8e-05, "loss": 1.6694, "step": 208 }, { "epoch": 0.03568379716578453, "grad_norm": 0.48535406589508057, "learning_rate": 8e-05, "loss": 1.747, "step": 209 }, { "epoch": 0.035854533037391154, "grad_norm": 0.49231231212615967, "learning_rate": 8e-05, "loss": 1.8342, "step": 210 }, { "epoch": 0.03602526890899778, "grad_norm": 0.49159592390060425, "learning_rate": 8e-05, "loss": 1.8224, "step": 211 }, { "epoch": 0.036196004780604404, "grad_norm": 0.45674747228622437, "learning_rate": 8e-05, "loss": 1.708, "step": 212 }, { "epoch": 0.03636674065221103, "grad_norm": 0.4460693299770355, "learning_rate": 8e-05, "loss": 1.5657, "step": 213 }, { "epoch": 0.036537476523817655, "grad_norm": 0.4646352231502533, "learning_rate": 8e-05, "loss": 1.6927, "step": 214 }, { "epoch": 0.03670821239542428, "grad_norm": 0.44909512996673584, "learning_rate": 8e-05, "loss": 1.8448, "step": 215 }, { "epoch": 0.036878948267030906, "grad_norm": 0.46101996302604675, "learning_rate": 8e-05, "loss": 1.7, "step": 216 }, { "epoch": 0.037049684138637524, "grad_norm": 0.4664847254753113, "learning_rate": 8e-05, "loss": 1.8789, "step": 217 }, { "epoch": 0.03722042001024415, "grad_norm": 0.45331287384033203, "learning_rate": 8e-05, "loss": 1.8378, "step": 218 }, { "epoch": 0.037391155881850775, "grad_norm": 0.4488231837749481, "learning_rate": 8e-05, "loss": 1.8507, "step": 219 }, { "epoch": 0.0375618917534574, "grad_norm": 0.5267904996871948, "learning_rate": 8e-05, "loss": 1.7705, "step": 220 }, { "epoch": 0.037732627625064026, "grad_norm": 0.4690248370170593, "learning_rate": 8e-05, "loss": 1.8887, "step": 221 }, { "epoch": 0.03790336349667065, "grad_norm": 0.4350374937057495, "learning_rate": 8e-05, "loss": 1.6411, "step": 222 }, { "epoch": 0.03807409936827728, "grad_norm": 0.4836752712726593, "learning_rate": 8e-05, "loss": 1.7248, "step": 223 }, { "epoch": 0.0382448352398839, "grad_norm": 0.43415477871894836, "learning_rate": 8e-05, "loss": 1.7, "step": 224 }, { "epoch": 0.03841557111149052, "grad_norm": 0.44146519899368286, "learning_rate": 8e-05, "loss": 1.6718, "step": 225 }, { "epoch": 0.038586306983097146, "grad_norm": 0.46476247906684875, "learning_rate": 8e-05, "loss": 1.7192, "step": 226 }, { "epoch": 0.03875704285470377, "grad_norm": 0.44922181963920593, "learning_rate": 8e-05, "loss": 1.8292, "step": 227 }, { "epoch": 0.0389277787263104, "grad_norm": 0.45197880268096924, "learning_rate": 8e-05, "loss": 1.7858, "step": 228 }, { "epoch": 0.03909851459791702, "grad_norm": 0.4571561813354492, "learning_rate": 8e-05, "loss": 1.7147, "step": 229 }, { "epoch": 0.03926925046952365, "grad_norm": 0.4801291823387146, "learning_rate": 8e-05, "loss": 1.8887, "step": 230 }, { "epoch": 0.03943998634113027, "grad_norm": 0.45216163992881775, "learning_rate": 8e-05, "loss": 1.798, "step": 231 }, { "epoch": 0.0396107222127369, "grad_norm": 0.4773813486099243, "learning_rate": 8e-05, "loss": 1.8492, "step": 232 }, { "epoch": 0.039781458084343524, "grad_norm": 0.47434568405151367, "learning_rate": 8e-05, "loss": 1.8507, "step": 233 }, { "epoch": 0.03995219395595014, "grad_norm": 0.46107932925224304, "learning_rate": 8e-05, "loss": 1.5429, "step": 234 }, { "epoch": 0.04012292982755677, "grad_norm": 0.4758448004722595, "learning_rate": 8e-05, "loss": 1.8179, "step": 235 }, { "epoch": 0.04029366569916339, "grad_norm": 0.4545665681362152, "learning_rate": 8e-05, "loss": 1.8894, "step": 236 }, { "epoch": 0.04046440157077002, "grad_norm": 0.49284684658050537, "learning_rate": 8e-05, "loss": 1.818, "step": 237 }, { "epoch": 0.040635137442376644, "grad_norm": 0.46859461069107056, "learning_rate": 8e-05, "loss": 1.8711, "step": 238 }, { "epoch": 0.04080587331398327, "grad_norm": 0.49554839730262756, "learning_rate": 8e-05, "loss": 1.7381, "step": 239 }, { "epoch": 0.040976609185589895, "grad_norm": 0.45285263657569885, "learning_rate": 8e-05, "loss": 1.7059, "step": 240 }, { "epoch": 0.04114734505719652, "grad_norm": 0.43953168392181396, "learning_rate": 8e-05, "loss": 1.6655, "step": 241 }, { "epoch": 0.04131808092880314, "grad_norm": 0.4597913920879364, "learning_rate": 8e-05, "loss": 1.8798, "step": 242 }, { "epoch": 0.041488816800409764, "grad_norm": 0.43842262029647827, "learning_rate": 8e-05, "loss": 1.653, "step": 243 }, { "epoch": 0.04165955267201639, "grad_norm": 0.45754551887512207, "learning_rate": 8e-05, "loss": 1.7651, "step": 244 }, { "epoch": 0.041830288543623015, "grad_norm": 0.4215712249279022, "learning_rate": 8e-05, "loss": 1.5512, "step": 245 }, { "epoch": 0.04200102441522964, "grad_norm": 0.4614805281162262, "learning_rate": 8e-05, "loss": 1.8881, "step": 246 }, { "epoch": 0.042171760286836266, "grad_norm": 0.4637735188007355, "learning_rate": 8e-05, "loss": 1.7543, "step": 247 }, { "epoch": 0.04234249615844289, "grad_norm": 0.4294458031654358, "learning_rate": 8e-05, "loss": 1.592, "step": 248 }, { "epoch": 0.042513232030049516, "grad_norm": 0.4395756125450134, "learning_rate": 8e-05, "loss": 1.6858, "step": 249 }, { "epoch": 0.042683967901656135, "grad_norm": 0.47757187485694885, "learning_rate": 8e-05, "loss": 1.6994, "step": 250 }, { "epoch": 0.04285470377326276, "grad_norm": 0.4519607722759247, "learning_rate": 8e-05, "loss": 1.6995, "step": 251 }, { "epoch": 0.043025439644869386, "grad_norm": 0.5024729371070862, "learning_rate": 8e-05, "loss": 1.6618, "step": 252 }, { "epoch": 0.04319617551647601, "grad_norm": 0.4685952365398407, "learning_rate": 8e-05, "loss": 1.6292, "step": 253 }, { "epoch": 0.043366911388082637, "grad_norm": 0.4622642993927002, "learning_rate": 8e-05, "loss": 1.622, "step": 254 }, { "epoch": 0.04353764725968926, "grad_norm": 0.47050657868385315, "learning_rate": 8e-05, "loss": 1.7102, "step": 255 }, { "epoch": 0.04370838313129589, "grad_norm": 0.5027202367782593, "learning_rate": 8e-05, "loss": 1.8971, "step": 256 }, { "epoch": 0.04387911900290251, "grad_norm": 0.5044411420822144, "learning_rate": 8e-05, "loss": 1.9525, "step": 257 }, { "epoch": 0.04404985487450913, "grad_norm": 0.45148590207099915, "learning_rate": 8e-05, "loss": 1.8109, "step": 258 }, { "epoch": 0.04422059074611576, "grad_norm": 0.45295730233192444, "learning_rate": 8e-05, "loss": 1.8778, "step": 259 }, { "epoch": 0.04439132661772238, "grad_norm": 0.4823761582374573, "learning_rate": 8e-05, "loss": 1.9015, "step": 260 }, { "epoch": 0.04456206248932901, "grad_norm": 0.46531838178634644, "learning_rate": 8e-05, "loss": 1.7642, "step": 261 }, { "epoch": 0.04473279836093563, "grad_norm": 0.45283615589141846, "learning_rate": 8e-05, "loss": 1.6608, "step": 262 }, { "epoch": 0.04490353423254226, "grad_norm": 0.530863344669342, "learning_rate": 8e-05, "loss": 1.838, "step": 263 }, { "epoch": 0.045074270104148884, "grad_norm": 0.44861987233161926, "learning_rate": 8e-05, "loss": 1.7243, "step": 264 }, { "epoch": 0.04524500597575551, "grad_norm": 0.41836628317832947, "learning_rate": 8e-05, "loss": 1.6154, "step": 265 }, { "epoch": 0.04541574184736213, "grad_norm": 0.4883762001991272, "learning_rate": 8e-05, "loss": 1.8517, "step": 266 }, { "epoch": 0.04558647771896875, "grad_norm": 0.49337509274482727, "learning_rate": 8e-05, "loss": 1.8186, "step": 267 }, { "epoch": 0.04575721359057538, "grad_norm": 0.4525955021381378, "learning_rate": 8e-05, "loss": 1.7768, "step": 268 }, { "epoch": 0.045927949462182004, "grad_norm": 0.511612057685852, "learning_rate": 8e-05, "loss": 1.9362, "step": 269 }, { "epoch": 0.04609868533378863, "grad_norm": 0.45026537775993347, "learning_rate": 8e-05, "loss": 1.5865, "step": 270 }, { "epoch": 0.046269421205395254, "grad_norm": 0.47091561555862427, "learning_rate": 8e-05, "loss": 1.7356, "step": 271 }, { "epoch": 0.04644015707700188, "grad_norm": 0.45327994227409363, "learning_rate": 8e-05, "loss": 1.8868, "step": 272 }, { "epoch": 0.046610892948608505, "grad_norm": 0.44300612807273865, "learning_rate": 8e-05, "loss": 1.5948, "step": 273 }, { "epoch": 0.04678162882021513, "grad_norm": 0.44797173142433167, "learning_rate": 8e-05, "loss": 1.6494, "step": 274 }, { "epoch": 0.04695236469182175, "grad_norm": 0.4429998993873596, "learning_rate": 8e-05, "loss": 1.6959, "step": 275 }, { "epoch": 0.047123100563428374, "grad_norm": 0.4709814190864563, "learning_rate": 8e-05, "loss": 1.7153, "step": 276 }, { "epoch": 0.047293836435035, "grad_norm": 0.4272076189517975, "learning_rate": 8e-05, "loss": 1.6117, "step": 277 }, { "epoch": 0.047464572306641625, "grad_norm": 0.4669269621372223, "learning_rate": 8e-05, "loss": 1.7105, "step": 278 }, { "epoch": 0.04763530817824825, "grad_norm": 0.42532601952552795, "learning_rate": 8e-05, "loss": 1.605, "step": 279 }, { "epoch": 0.047806044049854876, "grad_norm": 0.43166500329971313, "learning_rate": 8e-05, "loss": 1.5592, "step": 280 }, { "epoch": 0.0479767799214615, "grad_norm": 0.4634902775287628, "learning_rate": 8e-05, "loss": 1.8179, "step": 281 }, { "epoch": 0.04814751579306813, "grad_norm": 0.45385411381721497, "learning_rate": 8e-05, "loss": 1.7391, "step": 282 }, { "epoch": 0.048318251664674745, "grad_norm": 0.5067617893218994, "learning_rate": 8e-05, "loss": 1.9352, "step": 283 }, { "epoch": 0.04848898753628137, "grad_norm": 0.47003212571144104, "learning_rate": 8e-05, "loss": 1.6699, "step": 284 }, { "epoch": 0.048659723407887996, "grad_norm": 0.45958903431892395, "learning_rate": 8e-05, "loss": 1.6003, "step": 285 }, { "epoch": 0.04883045927949462, "grad_norm": 0.4840228855609894, "learning_rate": 8e-05, "loss": 1.7194, "step": 286 }, { "epoch": 0.04900119515110125, "grad_norm": 0.45845186710357666, "learning_rate": 8e-05, "loss": 1.7219, "step": 287 }, { "epoch": 0.04917193102270787, "grad_norm": 0.4372282028198242, "learning_rate": 8e-05, "loss": 1.7143, "step": 288 }, { "epoch": 0.0493426668943145, "grad_norm": 0.4662127196788788, "learning_rate": 8e-05, "loss": 1.8869, "step": 289 }, { "epoch": 0.04951340276592112, "grad_norm": 0.45432761311531067, "learning_rate": 8e-05, "loss": 1.7757, "step": 290 }, { "epoch": 0.04968413863752774, "grad_norm": 0.4318701922893524, "learning_rate": 8e-05, "loss": 1.7467, "step": 291 }, { "epoch": 0.04985487450913437, "grad_norm": 0.4370262920856476, "learning_rate": 8e-05, "loss": 1.6376, "step": 292 }, { "epoch": 0.05002561038074099, "grad_norm": 0.5111391544342041, "learning_rate": 8e-05, "loss": 1.5274, "step": 293 }, { "epoch": 0.05019634625234762, "grad_norm": 0.47158581018447876, "learning_rate": 8e-05, "loss": 1.8561, "step": 294 }, { "epoch": 0.05036708212395424, "grad_norm": 0.468481183052063, "learning_rate": 8e-05, "loss": 1.6827, "step": 295 }, { "epoch": 0.05053781799556087, "grad_norm": 0.5639234185218811, "learning_rate": 8e-05, "loss": 1.9021, "step": 296 }, { "epoch": 0.050708553867167494, "grad_norm": 0.44318512082099915, "learning_rate": 8e-05, "loss": 1.8214, "step": 297 }, { "epoch": 0.05087928973877412, "grad_norm": 0.4497351348400116, "learning_rate": 8e-05, "loss": 1.7217, "step": 298 }, { "epoch": 0.05105002561038074, "grad_norm": 0.48738235235214233, "learning_rate": 8e-05, "loss": 1.8478, "step": 299 }, { "epoch": 0.05122076148198736, "grad_norm": 0.4800626039505005, "learning_rate": 8e-05, "loss": 2.0665, "step": 300 }, { "epoch": 0.05139149735359399, "grad_norm": 0.44250568747520447, "learning_rate": 8e-05, "loss": 1.5638, "step": 301 }, { "epoch": 0.051562233225200614, "grad_norm": 0.45217466354370117, "learning_rate": 8e-05, "loss": 1.6942, "step": 302 }, { "epoch": 0.05173296909680724, "grad_norm": 0.4680691063404083, "learning_rate": 8e-05, "loss": 1.7582, "step": 303 }, { "epoch": 0.051903704968413865, "grad_norm": 0.44583261013031006, "learning_rate": 8e-05, "loss": 1.6133, "step": 304 }, { "epoch": 0.05207444084002049, "grad_norm": 0.4323332905769348, "learning_rate": 8e-05, "loss": 1.6147, "step": 305 }, { "epoch": 0.052245176711627116, "grad_norm": 0.4536374509334564, "learning_rate": 8e-05, "loss": 1.7984, "step": 306 }, { "epoch": 0.052415912583233734, "grad_norm": 0.4635334014892578, "learning_rate": 8e-05, "loss": 1.7765, "step": 307 }, { "epoch": 0.05258664845484036, "grad_norm": 0.44797372817993164, "learning_rate": 8e-05, "loss": 1.7392, "step": 308 }, { "epoch": 0.052757384326446985, "grad_norm": 0.45309171080589294, "learning_rate": 8e-05, "loss": 1.8006, "step": 309 }, { "epoch": 0.05292812019805361, "grad_norm": 0.4662359952926636, "learning_rate": 8e-05, "loss": 1.6211, "step": 310 }, { "epoch": 0.053098856069660236, "grad_norm": 0.4998169243335724, "learning_rate": 8e-05, "loss": 1.9916, "step": 311 }, { "epoch": 0.05326959194126686, "grad_norm": 0.46077585220336914, "learning_rate": 8e-05, "loss": 1.7786, "step": 312 }, { "epoch": 0.05344032781287349, "grad_norm": 0.4551352262496948, "learning_rate": 8e-05, "loss": 1.7928, "step": 313 }, { "epoch": 0.05361106368448011, "grad_norm": 0.44146591424942017, "learning_rate": 8e-05, "loss": 1.7862, "step": 314 }, { "epoch": 0.05378179955608673, "grad_norm": 0.45397743582725525, "learning_rate": 8e-05, "loss": 1.7417, "step": 315 }, { "epoch": 0.053952535427693356, "grad_norm": 0.42834755778312683, "learning_rate": 8e-05, "loss": 1.674, "step": 316 }, { "epoch": 0.05412327129929998, "grad_norm": 0.4548366069793701, "learning_rate": 8e-05, "loss": 1.5892, "step": 317 }, { "epoch": 0.05429400717090661, "grad_norm": 0.46951961517333984, "learning_rate": 8e-05, "loss": 1.7515, "step": 318 }, { "epoch": 0.05446474304251323, "grad_norm": 0.45113319158554077, "learning_rate": 8e-05, "loss": 1.7442, "step": 319 }, { "epoch": 0.05463547891411986, "grad_norm": 0.45917055010795593, "learning_rate": 8e-05, "loss": 1.8248, "step": 320 }, { "epoch": 0.05480621478572648, "grad_norm": 0.4591272473335266, "learning_rate": 8e-05, "loss": 1.7446, "step": 321 }, { "epoch": 0.05497695065733311, "grad_norm": 0.44743719696998596, "learning_rate": 8e-05, "loss": 1.7549, "step": 322 }, { "epoch": 0.055147686528939734, "grad_norm": 0.4749975800514221, "learning_rate": 8e-05, "loss": 1.7946, "step": 323 }, { "epoch": 0.05531842240054635, "grad_norm": 0.48187464475631714, "learning_rate": 8e-05, "loss": 1.6908, "step": 324 }, { "epoch": 0.05548915827215298, "grad_norm": 0.4583243429660797, "learning_rate": 8e-05, "loss": 1.6936, "step": 325 }, { "epoch": 0.0556598941437596, "grad_norm": 0.4757544696331024, "learning_rate": 8e-05, "loss": 1.6752, "step": 326 }, { "epoch": 0.05583063001536623, "grad_norm": 0.4643852710723877, "learning_rate": 8e-05, "loss": 1.7349, "step": 327 }, { "epoch": 0.056001365886972854, "grad_norm": 0.4408283233642578, "learning_rate": 8e-05, "loss": 1.684, "step": 328 }, { "epoch": 0.05617210175857948, "grad_norm": 0.43199023604393005, "learning_rate": 8e-05, "loss": 1.6579, "step": 329 }, { "epoch": 0.056342837630186104, "grad_norm": 0.4451024532318115, "learning_rate": 8e-05, "loss": 1.8135, "step": 330 }, { "epoch": 0.05651357350179273, "grad_norm": 0.4784998893737793, "learning_rate": 8e-05, "loss": 1.5415, "step": 331 }, { "epoch": 0.05668430937339935, "grad_norm": 0.47663596272468567, "learning_rate": 8e-05, "loss": 1.9652, "step": 332 }, { "epoch": 0.056855045245005974, "grad_norm": 0.4444281756877899, "learning_rate": 8e-05, "loss": 1.6277, "step": 333 }, { "epoch": 0.0570257811166126, "grad_norm": 0.4673044979572296, "learning_rate": 8e-05, "loss": 1.7916, "step": 334 }, { "epoch": 0.057196516988219225, "grad_norm": 0.47287073731422424, "learning_rate": 8e-05, "loss": 1.6551, "step": 335 }, { "epoch": 0.05736725285982585, "grad_norm": 0.5028661489486694, "learning_rate": 8e-05, "loss": 2.1295, "step": 336 }, { "epoch": 0.057537988731432475, "grad_norm": 0.4553907513618469, "learning_rate": 8e-05, "loss": 1.8308, "step": 337 }, { "epoch": 0.0577087246030391, "grad_norm": 0.44941118359565735, "learning_rate": 8e-05, "loss": 1.7528, "step": 338 }, { "epoch": 0.057879460474645726, "grad_norm": 0.4200385808944702, "learning_rate": 8e-05, "loss": 1.4971, "step": 339 }, { "epoch": 0.058050196346252345, "grad_norm": 0.4895465075969696, "learning_rate": 8e-05, "loss": 2.0161, "step": 340 }, { "epoch": 0.05822093221785897, "grad_norm": 0.4285028874874115, "learning_rate": 8e-05, "loss": 1.6971, "step": 341 }, { "epoch": 0.058391668089465595, "grad_norm": 0.438037633895874, "learning_rate": 8e-05, "loss": 1.7795, "step": 342 }, { "epoch": 0.05856240396107222, "grad_norm": 0.43967974185943604, "learning_rate": 8e-05, "loss": 1.6362, "step": 343 }, { "epoch": 0.058733139832678846, "grad_norm": 0.4597330391407013, "learning_rate": 8e-05, "loss": 1.6612, "step": 344 }, { "epoch": 0.05890387570428547, "grad_norm": 0.440722793340683, "learning_rate": 8e-05, "loss": 1.6723, "step": 345 }, { "epoch": 0.0590746115758921, "grad_norm": 0.42665061354637146, "learning_rate": 8e-05, "loss": 1.6805, "step": 346 }, { "epoch": 0.05924534744749872, "grad_norm": 0.4327438473701477, "learning_rate": 8e-05, "loss": 1.7206, "step": 347 }, { "epoch": 0.05941608331910534, "grad_norm": 0.4366966187953949, "learning_rate": 8e-05, "loss": 1.6597, "step": 348 }, { "epoch": 0.059586819190711966, "grad_norm": 0.4585934281349182, "learning_rate": 8e-05, "loss": 1.6945, "step": 349 }, { "epoch": 0.05975755506231859, "grad_norm": 0.43125641345977783, "learning_rate": 8e-05, "loss": 1.5579, "step": 350 }, { "epoch": 0.05992829093392522, "grad_norm": 0.4577394425868988, "learning_rate": 8e-05, "loss": 1.8154, "step": 351 }, { "epoch": 0.06009902680553184, "grad_norm": 0.4473010301589966, "learning_rate": 8e-05, "loss": 1.7076, "step": 352 }, { "epoch": 0.06026976267713847, "grad_norm": 0.45458072423934937, "learning_rate": 8e-05, "loss": 1.7724, "step": 353 }, { "epoch": 0.06044049854874509, "grad_norm": 0.45326393842697144, "learning_rate": 8e-05, "loss": 1.7893, "step": 354 }, { "epoch": 0.06061123442035172, "grad_norm": 0.4802393615245819, "learning_rate": 8e-05, "loss": 1.6923, "step": 355 }, { "epoch": 0.06078197029195834, "grad_norm": 0.4548514485359192, "learning_rate": 8e-05, "loss": 1.8114, "step": 356 }, { "epoch": 0.06095270616356496, "grad_norm": 0.4607689380645752, "learning_rate": 8e-05, "loss": 1.7027, "step": 357 }, { "epoch": 0.06112344203517159, "grad_norm": 0.4263896346092224, "learning_rate": 8e-05, "loss": 1.7307, "step": 358 }, { "epoch": 0.06129417790677821, "grad_norm": 0.4447373151779175, "learning_rate": 8e-05, "loss": 1.7404, "step": 359 }, { "epoch": 0.06146491377838484, "grad_norm": 0.4487105906009674, "learning_rate": 8e-05, "loss": 1.7612, "step": 360 }, { "epoch": 0.061635649649991464, "grad_norm": 0.48120978474617004, "learning_rate": 8e-05, "loss": 1.7302, "step": 361 }, { "epoch": 0.06180638552159809, "grad_norm": 0.4423348903656006, "learning_rate": 8e-05, "loss": 1.6517, "step": 362 }, { "epoch": 0.061977121393204715, "grad_norm": 0.45144858956336975, "learning_rate": 8e-05, "loss": 1.4722, "step": 363 }, { "epoch": 0.06214785726481134, "grad_norm": 0.4208192825317383, "learning_rate": 8e-05, "loss": 1.6992, "step": 364 }, { "epoch": 0.06231859313641796, "grad_norm": 0.4466952085494995, "learning_rate": 8e-05, "loss": 1.7062, "step": 365 }, { "epoch": 0.062489329008024584, "grad_norm": 0.4173239469528198, "learning_rate": 8e-05, "loss": 1.5963, "step": 366 }, { "epoch": 0.06266006487963122, "grad_norm": 0.5200835466384888, "learning_rate": 8e-05, "loss": 1.8906, "step": 367 }, { "epoch": 0.06283080075123784, "grad_norm": 0.4415004551410675, "learning_rate": 8e-05, "loss": 1.77, "step": 368 }, { "epoch": 0.06300153662284445, "grad_norm": 0.44726482033729553, "learning_rate": 8e-05, "loss": 1.7258, "step": 369 }, { "epoch": 0.06317227249445108, "grad_norm": 0.4300463795661926, "learning_rate": 8e-05, "loss": 1.6046, "step": 370 }, { "epoch": 0.0633430083660577, "grad_norm": 0.48227977752685547, "learning_rate": 8e-05, "loss": 1.9753, "step": 371 }, { "epoch": 0.06351374423766433, "grad_norm": 0.461542546749115, "learning_rate": 8e-05, "loss": 1.7961, "step": 372 }, { "epoch": 0.06368448010927096, "grad_norm": 0.45884478092193604, "learning_rate": 8e-05, "loss": 1.8011, "step": 373 }, { "epoch": 0.06385521598087758, "grad_norm": 0.43118366599082947, "learning_rate": 8e-05, "loss": 1.55, "step": 374 }, { "epoch": 0.0640259518524842, "grad_norm": 0.4534015655517578, "learning_rate": 8e-05, "loss": 1.715, "step": 375 }, { "epoch": 0.06419668772409083, "grad_norm": 0.5195707082748413, "learning_rate": 8e-05, "loss": 2.0183, "step": 376 }, { "epoch": 0.06436742359569746, "grad_norm": 0.47695934772491455, "learning_rate": 8e-05, "loss": 1.8103, "step": 377 }, { "epoch": 0.06453815946730408, "grad_norm": 0.4389845132827759, "learning_rate": 8e-05, "loss": 1.8121, "step": 378 }, { "epoch": 0.06470889533891071, "grad_norm": 0.49127382040023804, "learning_rate": 8e-05, "loss": 1.7404, "step": 379 }, { "epoch": 0.06487963121051733, "grad_norm": 0.44547274708747864, "learning_rate": 8e-05, "loss": 1.7357, "step": 380 }, { "epoch": 0.06505036708212396, "grad_norm": 0.46656739711761475, "learning_rate": 8e-05, "loss": 1.6505, "step": 381 }, { "epoch": 0.06522110295373058, "grad_norm": 0.45662495493888855, "learning_rate": 8e-05, "loss": 1.6749, "step": 382 }, { "epoch": 0.06539183882533721, "grad_norm": 0.4885867238044739, "learning_rate": 8e-05, "loss": 1.9968, "step": 383 }, { "epoch": 0.06556257469694383, "grad_norm": 0.45888903737068176, "learning_rate": 8e-05, "loss": 1.6184, "step": 384 }, { "epoch": 0.06573331056855045, "grad_norm": 0.4600870907306671, "learning_rate": 8e-05, "loss": 1.7855, "step": 385 }, { "epoch": 0.06590404644015707, "grad_norm": 0.4732264578342438, "learning_rate": 8e-05, "loss": 1.8604, "step": 386 }, { "epoch": 0.0660747823117637, "grad_norm": 0.4527263641357422, "learning_rate": 8e-05, "loss": 1.7713, "step": 387 }, { "epoch": 0.06624551818337032, "grad_norm": 0.4780803918838501, "learning_rate": 8e-05, "loss": 1.7881, "step": 388 }, { "epoch": 0.06641625405497695, "grad_norm": 0.47249266505241394, "learning_rate": 8e-05, "loss": 1.7562, "step": 389 }, { "epoch": 0.06658698992658357, "grad_norm": 0.46080508828163147, "learning_rate": 8e-05, "loss": 1.6967, "step": 390 }, { "epoch": 0.0667577257981902, "grad_norm": 0.4382990896701813, "learning_rate": 8e-05, "loss": 1.5681, "step": 391 }, { "epoch": 0.06692846166979682, "grad_norm": 0.4496609568595886, "learning_rate": 8e-05, "loss": 1.5905, "step": 392 }, { "epoch": 0.06709919754140345, "grad_norm": 0.4758816361427307, "learning_rate": 8e-05, "loss": 1.8353, "step": 393 }, { "epoch": 0.06726993341301007, "grad_norm": 0.4331206977367401, "learning_rate": 8e-05, "loss": 1.7234, "step": 394 }, { "epoch": 0.0674406692846167, "grad_norm": 0.4421883821487427, "learning_rate": 8e-05, "loss": 1.6436, "step": 395 }, { "epoch": 0.06761140515622333, "grad_norm": 0.4685376286506653, "learning_rate": 8e-05, "loss": 1.6926, "step": 396 }, { "epoch": 0.06778214102782995, "grad_norm": 0.5034798979759216, "learning_rate": 8e-05, "loss": 1.9692, "step": 397 }, { "epoch": 0.06795287689943658, "grad_norm": 0.5046457052230835, "learning_rate": 8e-05, "loss": 1.7421, "step": 398 }, { "epoch": 0.0681236127710432, "grad_norm": 0.4418204128742218, "learning_rate": 8e-05, "loss": 1.6563, "step": 399 }, { "epoch": 0.06829434864264983, "grad_norm": 0.4781089425086975, "learning_rate": 8e-05, "loss": 1.5976, "step": 400 }, { "epoch": 0.06846508451425644, "grad_norm": 0.45380887389183044, "learning_rate": 8e-05, "loss": 1.7178, "step": 401 }, { "epoch": 0.06863582038586306, "grad_norm": 0.39809149503707886, "learning_rate": 8e-05, "loss": 1.3318, "step": 402 }, { "epoch": 0.06880655625746969, "grad_norm": 0.4849470257759094, "learning_rate": 8e-05, "loss": 1.771, "step": 403 }, { "epoch": 0.06897729212907631, "grad_norm": 0.4671993553638458, "learning_rate": 8e-05, "loss": 1.8336, "step": 404 }, { "epoch": 0.06914802800068294, "grad_norm": 0.4589877426624298, "learning_rate": 8e-05, "loss": 1.8499, "step": 405 }, { "epoch": 0.06931876387228957, "grad_norm": 0.4764503836631775, "learning_rate": 8e-05, "loss": 1.7538, "step": 406 }, { "epoch": 0.06948949974389619, "grad_norm": 0.4299392104148865, "learning_rate": 8e-05, "loss": 1.6808, "step": 407 }, { "epoch": 0.06966023561550282, "grad_norm": 0.4542272686958313, "learning_rate": 8e-05, "loss": 1.7686, "step": 408 }, { "epoch": 0.06983097148710944, "grad_norm": 0.45011600852012634, "learning_rate": 8e-05, "loss": 1.6436, "step": 409 }, { "epoch": 0.07000170735871607, "grad_norm": 0.4886605441570282, "learning_rate": 8e-05, "loss": 1.7211, "step": 410 }, { "epoch": 0.07017244323032269, "grad_norm": 0.4820287823677063, "learning_rate": 8e-05, "loss": 1.8781, "step": 411 }, { "epoch": 0.07034317910192932, "grad_norm": 0.4262748658657074, "learning_rate": 8e-05, "loss": 1.7766, "step": 412 }, { "epoch": 0.07051391497353594, "grad_norm": 0.4382219910621643, "learning_rate": 8e-05, "loss": 1.6263, "step": 413 }, { "epoch": 0.07068465084514257, "grad_norm": 0.43600529432296753, "learning_rate": 8e-05, "loss": 1.6455, "step": 414 }, { "epoch": 0.0708553867167492, "grad_norm": 0.46301496028900146, "learning_rate": 8e-05, "loss": 1.7316, "step": 415 }, { "epoch": 0.07102612258835582, "grad_norm": 0.4454903304576874, "learning_rate": 8e-05, "loss": 1.5182, "step": 416 }, { "epoch": 0.07119685845996244, "grad_norm": 0.47093480825424194, "learning_rate": 8e-05, "loss": 1.8128, "step": 417 }, { "epoch": 0.07136759433156906, "grad_norm": 0.4356008768081665, "learning_rate": 8e-05, "loss": 1.5658, "step": 418 }, { "epoch": 0.07153833020317568, "grad_norm": 0.4679908752441406, "learning_rate": 8e-05, "loss": 1.7117, "step": 419 }, { "epoch": 0.07170906607478231, "grad_norm": 0.46682414412498474, "learning_rate": 8e-05, "loss": 1.6709, "step": 420 }, { "epoch": 0.07187980194638893, "grad_norm": 0.43645405769348145, "learning_rate": 8e-05, "loss": 1.6303, "step": 421 }, { "epoch": 0.07205053781799556, "grad_norm": 0.43628308176994324, "learning_rate": 8e-05, "loss": 1.672, "step": 422 }, { "epoch": 0.07222127368960218, "grad_norm": 0.47812700271606445, "learning_rate": 8e-05, "loss": 1.8666, "step": 423 }, { "epoch": 0.07239200956120881, "grad_norm": 0.4867512583732605, "learning_rate": 8e-05, "loss": 1.8484, "step": 424 }, { "epoch": 0.07256274543281543, "grad_norm": 0.4567367434501648, "learning_rate": 8e-05, "loss": 1.8509, "step": 425 }, { "epoch": 0.07273348130442206, "grad_norm": 0.4396974444389343, "learning_rate": 8e-05, "loss": 1.775, "step": 426 }, { "epoch": 0.07290421717602869, "grad_norm": 0.46215373277664185, "learning_rate": 8e-05, "loss": 1.8738, "step": 427 }, { "epoch": 0.07307495304763531, "grad_norm": 0.41929328441619873, "learning_rate": 8e-05, "loss": 1.6481, "step": 428 }, { "epoch": 0.07324568891924194, "grad_norm": 0.4522722661495209, "learning_rate": 8e-05, "loss": 1.8071, "step": 429 }, { "epoch": 0.07341642479084856, "grad_norm": 0.45338302850723267, "learning_rate": 8e-05, "loss": 1.6476, "step": 430 }, { "epoch": 0.07358716066245519, "grad_norm": 0.43669575452804565, "learning_rate": 8e-05, "loss": 1.871, "step": 431 }, { "epoch": 0.07375789653406181, "grad_norm": 0.4563378393650055, "learning_rate": 8e-05, "loss": 1.8027, "step": 432 }, { "epoch": 0.07392863240566844, "grad_norm": 0.4655032157897949, "learning_rate": 8e-05, "loss": 1.8163, "step": 433 }, { "epoch": 0.07409936827727505, "grad_norm": 0.43200939893722534, "learning_rate": 8e-05, "loss": 1.7241, "step": 434 }, { "epoch": 0.07427010414888167, "grad_norm": 0.4542429447174072, "learning_rate": 8e-05, "loss": 1.7016, "step": 435 }, { "epoch": 0.0744408400204883, "grad_norm": 0.5035648941993713, "learning_rate": 8e-05, "loss": 1.7132, "step": 436 }, { "epoch": 0.07461157589209493, "grad_norm": 0.4733804762363434, "learning_rate": 8e-05, "loss": 1.77, "step": 437 }, { "epoch": 0.07478231176370155, "grad_norm": 0.4195294976234436, "learning_rate": 8e-05, "loss": 1.6425, "step": 438 }, { "epoch": 0.07495304763530818, "grad_norm": 0.4512338936328888, "learning_rate": 8e-05, "loss": 1.863, "step": 439 }, { "epoch": 0.0751237835069148, "grad_norm": 0.45781660079956055, "learning_rate": 8e-05, "loss": 1.8941, "step": 440 }, { "epoch": 0.07529451937852143, "grad_norm": 0.4970022141933441, "learning_rate": 8e-05, "loss": 2.0695, "step": 441 }, { "epoch": 0.07546525525012805, "grad_norm": 1.3636471033096313, "learning_rate": 8e-05, "loss": 1.8188, "step": 442 }, { "epoch": 0.07563599112173468, "grad_norm": 0.44788074493408203, "learning_rate": 8e-05, "loss": 1.8064, "step": 443 }, { "epoch": 0.0758067269933413, "grad_norm": 0.45218735933303833, "learning_rate": 8e-05, "loss": 1.785, "step": 444 }, { "epoch": 0.07597746286494793, "grad_norm": 0.4361308217048645, "learning_rate": 8e-05, "loss": 1.775, "step": 445 }, { "epoch": 0.07614819873655455, "grad_norm": 0.4713241159915924, "learning_rate": 8e-05, "loss": 1.7303, "step": 446 }, { "epoch": 0.07631893460816118, "grad_norm": 0.433146595954895, "learning_rate": 8e-05, "loss": 1.6639, "step": 447 }, { "epoch": 0.0764896704797678, "grad_norm": 0.4535893499851227, "learning_rate": 8e-05, "loss": 1.7304, "step": 448 }, { "epoch": 0.07666040635137443, "grad_norm": 0.44266921281814575, "learning_rate": 8e-05, "loss": 1.6789, "step": 449 }, { "epoch": 0.07683114222298104, "grad_norm": 0.44785189628601074, "learning_rate": 8e-05, "loss": 1.5713, "step": 450 }, { "epoch": 0.07700187809458767, "grad_norm": 0.48177850246429443, "learning_rate": 8e-05, "loss": 1.8665, "step": 451 }, { "epoch": 0.07717261396619429, "grad_norm": 0.4776946008205414, "learning_rate": 8e-05, "loss": 1.9615, "step": 452 }, { "epoch": 0.07734334983780092, "grad_norm": 0.47359952330589294, "learning_rate": 8e-05, "loss": 1.6659, "step": 453 }, { "epoch": 0.07751408570940754, "grad_norm": 0.4892372786998749, "learning_rate": 8e-05, "loss": 1.7504, "step": 454 }, { "epoch": 0.07768482158101417, "grad_norm": 0.4634447693824768, "learning_rate": 8e-05, "loss": 1.7796, "step": 455 }, { "epoch": 0.0778555574526208, "grad_norm": 0.4917285442352295, "learning_rate": 8e-05, "loss": 2.017, "step": 456 }, { "epoch": 0.07802629332422742, "grad_norm": 0.4346023499965668, "learning_rate": 8e-05, "loss": 1.8166, "step": 457 }, { "epoch": 0.07819702919583404, "grad_norm": 0.4557175934314728, "learning_rate": 8e-05, "loss": 1.7543, "step": 458 }, { "epoch": 0.07836776506744067, "grad_norm": 0.4638034403324127, "learning_rate": 8e-05, "loss": 1.7004, "step": 459 }, { "epoch": 0.0785385009390473, "grad_norm": 0.4526844918727875, "learning_rate": 8e-05, "loss": 1.8151, "step": 460 }, { "epoch": 0.07870923681065392, "grad_norm": 0.48515912890434265, "learning_rate": 8e-05, "loss": 1.859, "step": 461 }, { "epoch": 0.07887997268226055, "grad_norm": 0.4684448838233948, "learning_rate": 8e-05, "loss": 1.7639, "step": 462 }, { "epoch": 0.07905070855386717, "grad_norm": 0.45355117321014404, "learning_rate": 8e-05, "loss": 1.7327, "step": 463 }, { "epoch": 0.0792214444254738, "grad_norm": 0.4538944959640503, "learning_rate": 8e-05, "loss": 1.8747, "step": 464 }, { "epoch": 0.07939218029708042, "grad_norm": 0.4373980760574341, "learning_rate": 8e-05, "loss": 1.7347, "step": 465 }, { "epoch": 0.07956291616868705, "grad_norm": 0.4347183108329773, "learning_rate": 8e-05, "loss": 1.7155, "step": 466 }, { "epoch": 0.07973365204029366, "grad_norm": 0.4352721869945526, "learning_rate": 8e-05, "loss": 1.6738, "step": 467 }, { "epoch": 0.07990438791190028, "grad_norm": 0.4679059684276581, "learning_rate": 8e-05, "loss": 1.7801, "step": 468 }, { "epoch": 0.08007512378350691, "grad_norm": 0.44871142506599426, "learning_rate": 8e-05, "loss": 1.8352, "step": 469 }, { "epoch": 0.08024585965511354, "grad_norm": 0.43751218914985657, "learning_rate": 8e-05, "loss": 1.6343, "step": 470 }, { "epoch": 0.08041659552672016, "grad_norm": 0.497718870639801, "learning_rate": 8e-05, "loss": 1.9275, "step": 471 }, { "epoch": 0.08058733139832679, "grad_norm": 0.4839646518230438, "learning_rate": 8e-05, "loss": 1.8563, "step": 472 }, { "epoch": 0.08075806726993341, "grad_norm": 0.48524045944213867, "learning_rate": 8e-05, "loss": 1.9694, "step": 473 }, { "epoch": 0.08092880314154004, "grad_norm": 0.46757763624191284, "learning_rate": 8e-05, "loss": 1.8001, "step": 474 }, { "epoch": 0.08109953901314666, "grad_norm": 0.452698290348053, "learning_rate": 8e-05, "loss": 1.6062, "step": 475 }, { "epoch": 0.08127027488475329, "grad_norm": 0.44944509863853455, "learning_rate": 8e-05, "loss": 1.717, "step": 476 }, { "epoch": 0.08144101075635991, "grad_norm": 0.49399104714393616, "learning_rate": 8e-05, "loss": 1.7999, "step": 477 }, { "epoch": 0.08161174662796654, "grad_norm": 0.45935678482055664, "learning_rate": 8e-05, "loss": 1.7492, "step": 478 }, { "epoch": 0.08178248249957316, "grad_norm": 0.4686856269836426, "learning_rate": 8e-05, "loss": 1.8037, "step": 479 }, { "epoch": 0.08195321837117979, "grad_norm": 0.44057902693748474, "learning_rate": 8e-05, "loss": 1.7107, "step": 480 }, { "epoch": 0.08212395424278641, "grad_norm": 0.5741490721702576, "learning_rate": 8e-05, "loss": 2.056, "step": 481 }, { "epoch": 0.08229469011439304, "grad_norm": 0.45151954889297485, "learning_rate": 8e-05, "loss": 1.7693, "step": 482 }, { "epoch": 0.08246542598599965, "grad_norm": 0.43373385071754456, "learning_rate": 8e-05, "loss": 1.7635, "step": 483 }, { "epoch": 0.08263616185760628, "grad_norm": 0.4513067305088043, "learning_rate": 8e-05, "loss": 1.8526, "step": 484 }, { "epoch": 0.0828068977292129, "grad_norm": 0.452556312084198, "learning_rate": 8e-05, "loss": 1.6777, "step": 485 }, { "epoch": 0.08297763360081953, "grad_norm": 0.4408188462257385, "learning_rate": 8e-05, "loss": 1.7337, "step": 486 }, { "epoch": 0.08314836947242615, "grad_norm": 0.46815404295921326, "learning_rate": 8e-05, "loss": 1.679, "step": 487 }, { "epoch": 0.08331910534403278, "grad_norm": 0.42265579104423523, "learning_rate": 8e-05, "loss": 1.6152, "step": 488 }, { "epoch": 0.0834898412156394, "grad_norm": 0.4501568078994751, "learning_rate": 8e-05, "loss": 1.6304, "step": 489 }, { "epoch": 0.08366057708724603, "grad_norm": 0.4559720456600189, "learning_rate": 8e-05, "loss": 1.9611, "step": 490 }, { "epoch": 0.08383131295885266, "grad_norm": 0.4689841866493225, "learning_rate": 8e-05, "loss": 1.627, "step": 491 }, { "epoch": 0.08400204883045928, "grad_norm": 0.45273908972740173, "learning_rate": 8e-05, "loss": 1.6316, "step": 492 }, { "epoch": 0.0841727847020659, "grad_norm": 0.46849045157432556, "learning_rate": 8e-05, "loss": 1.8211, "step": 493 }, { "epoch": 0.08434352057367253, "grad_norm": 0.4524925649166107, "learning_rate": 8e-05, "loss": 1.655, "step": 494 }, { "epoch": 0.08451425644527916, "grad_norm": 0.4565533399581909, "learning_rate": 8e-05, "loss": 1.6424, "step": 495 }, { "epoch": 0.08468499231688578, "grad_norm": 0.47277405858039856, "learning_rate": 8e-05, "loss": 1.716, "step": 496 }, { "epoch": 0.08485572818849241, "grad_norm": 0.4398384690284729, "learning_rate": 8e-05, "loss": 1.6767, "step": 497 }, { "epoch": 0.08502646406009903, "grad_norm": 0.47322794795036316, "learning_rate": 8e-05, "loss": 1.7859, "step": 498 }, { "epoch": 0.08519719993170564, "grad_norm": 0.44220906496047974, "learning_rate": 8e-05, "loss": 1.7397, "step": 499 }, { "epoch": 0.08536793580331227, "grad_norm": 0.49476730823516846, "learning_rate": 8e-05, "loss": 2.1168, "step": 500 }, { "epoch": 0.0855386716749189, "grad_norm": 0.4926798939704895, "learning_rate": 8e-05, "loss": 1.7514, "step": 501 }, { "epoch": 0.08570940754652552, "grad_norm": 0.44655290246009827, "learning_rate": 8e-05, "loss": 1.6426, "step": 502 }, { "epoch": 0.08588014341813215, "grad_norm": 0.443061888217926, "learning_rate": 8e-05, "loss": 1.6298, "step": 503 }, { "epoch": 0.08605087928973877, "grad_norm": 0.505246639251709, "learning_rate": 8e-05, "loss": 1.754, "step": 504 }, { "epoch": 0.0862216151613454, "grad_norm": 0.4359424412250519, "learning_rate": 8e-05, "loss": 1.6859, "step": 505 }, { "epoch": 0.08639235103295202, "grad_norm": 0.4787672162055969, "learning_rate": 8e-05, "loss": 1.7728, "step": 506 }, { "epoch": 0.08656308690455865, "grad_norm": 0.4699687957763672, "learning_rate": 8e-05, "loss": 1.8528, "step": 507 }, { "epoch": 0.08673382277616527, "grad_norm": 0.4508266746997833, "learning_rate": 8e-05, "loss": 1.7123, "step": 508 }, { "epoch": 0.0869045586477719, "grad_norm": 0.4421223998069763, "learning_rate": 8e-05, "loss": 1.8058, "step": 509 }, { "epoch": 0.08707529451937852, "grad_norm": 0.4739244878292084, "learning_rate": 8e-05, "loss": 1.7668, "step": 510 }, { "epoch": 0.08724603039098515, "grad_norm": 0.4537983536720276, "learning_rate": 8e-05, "loss": 1.922, "step": 511 }, { "epoch": 0.08741676626259177, "grad_norm": 0.4538816511631012, "learning_rate": 8e-05, "loss": 1.7276, "step": 512 }, { "epoch": 0.0875875021341984, "grad_norm": 0.47644931077957153, "learning_rate": 8e-05, "loss": 1.8074, "step": 513 }, { "epoch": 0.08775823800580503, "grad_norm": 0.4771662950515747, "learning_rate": 8e-05, "loss": 1.7157, "step": 514 }, { "epoch": 0.08792897387741165, "grad_norm": 0.5580923557281494, "learning_rate": 8e-05, "loss": 2.0066, "step": 515 }, { "epoch": 0.08809970974901826, "grad_norm": 0.42866119742393494, "learning_rate": 8e-05, "loss": 1.687, "step": 516 }, { "epoch": 0.08827044562062489, "grad_norm": 0.47917434573173523, "learning_rate": 8e-05, "loss": 1.7713, "step": 517 }, { "epoch": 0.08844118149223151, "grad_norm": 0.4511472284793854, "learning_rate": 8e-05, "loss": 1.6845, "step": 518 }, { "epoch": 0.08861191736383814, "grad_norm": 0.4516735076904297, "learning_rate": 8e-05, "loss": 1.7635, "step": 519 }, { "epoch": 0.08878265323544476, "grad_norm": 0.5081110000610352, "learning_rate": 8e-05, "loss": 2.0312, "step": 520 }, { "epoch": 0.08895338910705139, "grad_norm": 0.4327872693538666, "learning_rate": 8e-05, "loss": 1.7017, "step": 521 }, { "epoch": 0.08912412497865801, "grad_norm": 0.4318854510784149, "learning_rate": 8e-05, "loss": 1.5972, "step": 522 }, { "epoch": 0.08929486085026464, "grad_norm": 0.4589429199695587, "learning_rate": 8e-05, "loss": 1.7477, "step": 523 }, { "epoch": 0.08946559672187127, "grad_norm": 0.456632524728775, "learning_rate": 8e-05, "loss": 1.7497, "step": 524 }, { "epoch": 0.08963633259347789, "grad_norm": 0.4493088126182556, "learning_rate": 8e-05, "loss": 1.9426, "step": 525 }, { "epoch": 0.08980706846508452, "grad_norm": 0.4481428861618042, "learning_rate": 8e-05, "loss": 1.7852, "step": 526 }, { "epoch": 0.08997780433669114, "grad_norm": 0.4372050166130066, "learning_rate": 8e-05, "loss": 1.7301, "step": 527 }, { "epoch": 0.09014854020829777, "grad_norm": 0.4670218825340271, "learning_rate": 8e-05, "loss": 1.7174, "step": 528 }, { "epoch": 0.09031927607990439, "grad_norm": 0.47615349292755127, "learning_rate": 8e-05, "loss": 1.8318, "step": 529 }, { "epoch": 0.09049001195151102, "grad_norm": 0.4535890817642212, "learning_rate": 8e-05, "loss": 1.6476, "step": 530 }, { "epoch": 0.09066074782311764, "grad_norm": 0.4682008624076843, "learning_rate": 8e-05, "loss": 1.7185, "step": 531 }, { "epoch": 0.09083148369472425, "grad_norm": 0.454171359539032, "learning_rate": 8e-05, "loss": 1.7101, "step": 532 }, { "epoch": 0.09100221956633088, "grad_norm": 0.46746888756752014, "learning_rate": 8e-05, "loss": 1.8714, "step": 533 }, { "epoch": 0.0911729554379375, "grad_norm": 0.43854156136512756, "learning_rate": 8e-05, "loss": 1.6758, "step": 534 }, { "epoch": 0.09134369130954413, "grad_norm": 0.4682394564151764, "learning_rate": 8e-05, "loss": 1.8041, "step": 535 }, { "epoch": 0.09151442718115076, "grad_norm": 0.4405398666858673, "learning_rate": 8e-05, "loss": 1.7055, "step": 536 }, { "epoch": 0.09168516305275738, "grad_norm": 0.4625595211982727, "learning_rate": 8e-05, "loss": 1.7536, "step": 537 }, { "epoch": 0.09185589892436401, "grad_norm": 0.43273600935935974, "learning_rate": 8e-05, "loss": 1.57, "step": 538 }, { "epoch": 0.09202663479597063, "grad_norm": 0.4675979018211365, "learning_rate": 8e-05, "loss": 1.8568, "step": 539 }, { "epoch": 0.09219737066757726, "grad_norm": 0.47791150212287903, "learning_rate": 8e-05, "loss": 1.7777, "step": 540 }, { "epoch": 0.09236810653918388, "grad_norm": 0.45072266459465027, "learning_rate": 8e-05, "loss": 1.8455, "step": 541 }, { "epoch": 0.09253884241079051, "grad_norm": 0.4230378270149231, "learning_rate": 8e-05, "loss": 1.6826, "step": 542 }, { "epoch": 0.09270957828239713, "grad_norm": 0.4235169291496277, "learning_rate": 8e-05, "loss": 1.6013, "step": 543 }, { "epoch": 0.09288031415400376, "grad_norm": 0.419633150100708, "learning_rate": 8e-05, "loss": 1.6618, "step": 544 }, { "epoch": 0.09305105002561039, "grad_norm": 0.4744793176651001, "learning_rate": 8e-05, "loss": 1.721, "step": 545 }, { "epoch": 0.09322178589721701, "grad_norm": 0.4658953547477722, "learning_rate": 8e-05, "loss": 1.5469, "step": 546 }, { "epoch": 0.09339252176882364, "grad_norm": 0.4550154507160187, "learning_rate": 8e-05, "loss": 1.8184, "step": 547 }, { "epoch": 0.09356325764043026, "grad_norm": 0.4836101531982422, "learning_rate": 8e-05, "loss": 1.7943, "step": 548 }, { "epoch": 0.09373399351203687, "grad_norm": 0.4335220754146576, "learning_rate": 8e-05, "loss": 1.657, "step": 549 }, { "epoch": 0.0939047293836435, "grad_norm": 0.46048465371131897, "learning_rate": 8e-05, "loss": 1.768, "step": 550 }, { "epoch": 0.09407546525525012, "grad_norm": 0.46026456356048584, "learning_rate": 8e-05, "loss": 1.7956, "step": 551 }, { "epoch": 0.09424620112685675, "grad_norm": 0.45427268743515015, "learning_rate": 8e-05, "loss": 1.6666, "step": 552 }, { "epoch": 0.09441693699846337, "grad_norm": 0.47262343764305115, "learning_rate": 8e-05, "loss": 1.819, "step": 553 }, { "epoch": 0.09458767287007, "grad_norm": 0.4228203594684601, "learning_rate": 8e-05, "loss": 1.6716, "step": 554 }, { "epoch": 0.09475840874167663, "grad_norm": 0.47598788142204285, "learning_rate": 8e-05, "loss": 1.9474, "step": 555 }, { "epoch": 0.09492914461328325, "grad_norm": 0.45395761728286743, "learning_rate": 8e-05, "loss": 1.7532, "step": 556 }, { "epoch": 0.09509988048488988, "grad_norm": 0.4840429723262787, "learning_rate": 8e-05, "loss": 1.7062, "step": 557 }, { "epoch": 0.0952706163564965, "grad_norm": 0.4267044961452484, "learning_rate": 8e-05, "loss": 1.7272, "step": 558 }, { "epoch": 0.09544135222810313, "grad_norm": 0.43867483735084534, "learning_rate": 8e-05, "loss": 1.659, "step": 559 }, { "epoch": 0.09561208809970975, "grad_norm": 0.47539767622947693, "learning_rate": 8e-05, "loss": 1.6665, "step": 560 }, { "epoch": 0.09578282397131638, "grad_norm": 0.4639579951763153, "learning_rate": 8e-05, "loss": 1.798, "step": 561 }, { "epoch": 0.095953559842923, "grad_norm": 0.467826783657074, "learning_rate": 8e-05, "loss": 1.7679, "step": 562 }, { "epoch": 0.09612429571452963, "grad_norm": 0.4371953308582306, "learning_rate": 8e-05, "loss": 1.7176, "step": 563 }, { "epoch": 0.09629503158613625, "grad_norm": 0.4308265745639801, "learning_rate": 8e-05, "loss": 1.6925, "step": 564 }, { "epoch": 0.09646576745774287, "grad_norm": 0.4573603570461273, "learning_rate": 8e-05, "loss": 1.5271, "step": 565 }, { "epoch": 0.09663650332934949, "grad_norm": 0.4514971077442169, "learning_rate": 8e-05, "loss": 1.8231, "step": 566 }, { "epoch": 0.09680723920095612, "grad_norm": 0.46066415309906006, "learning_rate": 8e-05, "loss": 1.8187, "step": 567 }, { "epoch": 0.09697797507256274, "grad_norm": 0.4498605728149414, "learning_rate": 8e-05, "loss": 1.7704, "step": 568 }, { "epoch": 0.09714871094416937, "grad_norm": 0.46262457966804504, "learning_rate": 8e-05, "loss": 1.7422, "step": 569 }, { "epoch": 0.09731944681577599, "grad_norm": 0.4483664035797119, "learning_rate": 8e-05, "loss": 1.576, "step": 570 }, { "epoch": 0.09749018268738262, "grad_norm": 0.4700812101364136, "learning_rate": 8e-05, "loss": 1.6564, "step": 571 }, { "epoch": 0.09766091855898924, "grad_norm": 0.4646429121494293, "learning_rate": 8e-05, "loss": 1.8822, "step": 572 }, { "epoch": 0.09783165443059587, "grad_norm": 0.4631807506084442, "learning_rate": 8e-05, "loss": 1.8145, "step": 573 }, { "epoch": 0.0980023903022025, "grad_norm": 0.4503132998943329, "learning_rate": 8e-05, "loss": 1.6443, "step": 574 }, { "epoch": 0.09817312617380912, "grad_norm": 0.44691574573516846, "learning_rate": 8e-05, "loss": 1.7861, "step": 575 }, { "epoch": 0.09834386204541574, "grad_norm": 0.4431731402873993, "learning_rate": 8e-05, "loss": 1.6318, "step": 576 }, { "epoch": 0.09851459791702237, "grad_norm": 0.4683443605899811, "learning_rate": 8e-05, "loss": 1.6578, "step": 577 }, { "epoch": 0.098685333788629, "grad_norm": 0.4744807779788971, "learning_rate": 8e-05, "loss": 1.8965, "step": 578 }, { "epoch": 0.09885606966023562, "grad_norm": 0.4347449243068695, "learning_rate": 8e-05, "loss": 1.6487, "step": 579 }, { "epoch": 0.09902680553184225, "grad_norm": 0.4655593931674957, "learning_rate": 8e-05, "loss": 1.7928, "step": 580 }, { "epoch": 0.09919754140344886, "grad_norm": 0.48974379897117615, "learning_rate": 8e-05, "loss": 1.7551, "step": 581 }, { "epoch": 0.09936827727505548, "grad_norm": 0.5250823497772217, "learning_rate": 8e-05, "loss": 1.644, "step": 582 }, { "epoch": 0.09953901314666211, "grad_norm": 0.460784912109375, "learning_rate": 8e-05, "loss": 1.8006, "step": 583 }, { "epoch": 0.09970974901826873, "grad_norm": 0.4422128200531006, "learning_rate": 8e-05, "loss": 1.5922, "step": 584 }, { "epoch": 0.09988048488987536, "grad_norm": 0.42034417390823364, "learning_rate": 8e-05, "loss": 1.3963, "step": 585 }, { "epoch": 0.10005122076148198, "grad_norm": 0.4356089234352112, "learning_rate": 8e-05, "loss": 1.696, "step": 586 }, { "epoch": 0.10022195663308861, "grad_norm": 0.46241411566734314, "learning_rate": 8e-05, "loss": 1.9386, "step": 587 }, { "epoch": 0.10039269250469524, "grad_norm": 0.44461897015571594, "learning_rate": 8e-05, "loss": 1.6952, "step": 588 }, { "epoch": 0.10056342837630186, "grad_norm": 0.5163662433624268, "learning_rate": 8e-05, "loss": 1.8651, "step": 589 }, { "epoch": 0.10073416424790849, "grad_norm": 0.4413129985332489, "learning_rate": 8e-05, "loss": 1.6057, "step": 590 }, { "epoch": 0.10090490011951511, "grad_norm": 0.4022691547870636, "learning_rate": 8e-05, "loss": 1.3791, "step": 591 }, { "epoch": 0.10107563599112174, "grad_norm": 0.4609820246696472, "learning_rate": 8e-05, "loss": 1.806, "step": 592 }, { "epoch": 0.10124637186272836, "grad_norm": 0.4812966287136078, "learning_rate": 8e-05, "loss": 1.855, "step": 593 }, { "epoch": 0.10141710773433499, "grad_norm": 0.43217626214027405, "learning_rate": 8e-05, "loss": 1.6627, "step": 594 }, { "epoch": 0.10158784360594161, "grad_norm": 0.45578041672706604, "learning_rate": 8e-05, "loss": 1.688, "step": 595 }, { "epoch": 0.10175857947754824, "grad_norm": 0.49193260073661804, "learning_rate": 8e-05, "loss": 1.9027, "step": 596 }, { "epoch": 0.10192931534915486, "grad_norm": 0.4556460380554199, "learning_rate": 8e-05, "loss": 1.6926, "step": 597 }, { "epoch": 0.10210005122076148, "grad_norm": 0.5108190774917603, "learning_rate": 8e-05, "loss": 1.8094, "step": 598 }, { "epoch": 0.1022707870923681, "grad_norm": 0.47432681918144226, "learning_rate": 8e-05, "loss": 1.6968, "step": 599 }, { "epoch": 0.10244152296397473, "grad_norm": 0.44054144620895386, "learning_rate": 8e-05, "loss": 1.5765, "step": 600 }, { "epoch": 0.10261225883558135, "grad_norm": 0.4654546082019806, "learning_rate": 8e-05, "loss": 1.5889, "step": 601 }, { "epoch": 0.10278299470718798, "grad_norm": 0.45769956707954407, "learning_rate": 8e-05, "loss": 1.6742, "step": 602 }, { "epoch": 0.1029537305787946, "grad_norm": 0.4528130292892456, "learning_rate": 8e-05, "loss": 1.691, "step": 603 }, { "epoch": 0.10312446645040123, "grad_norm": 0.4466986656188965, "learning_rate": 8e-05, "loss": 1.8546, "step": 604 }, { "epoch": 0.10329520232200785, "grad_norm": 0.4626997411251068, "learning_rate": 8e-05, "loss": 1.7186, "step": 605 }, { "epoch": 0.10346593819361448, "grad_norm": 0.42103081941604614, "learning_rate": 8e-05, "loss": 1.626, "step": 606 }, { "epoch": 0.1036366740652211, "grad_norm": 0.44925978779792786, "learning_rate": 8e-05, "loss": 1.7455, "step": 607 }, { "epoch": 0.10380740993682773, "grad_norm": 0.44690948724746704, "learning_rate": 8e-05, "loss": 1.7523, "step": 608 }, { "epoch": 0.10397814580843436, "grad_norm": 0.49134138226509094, "learning_rate": 8e-05, "loss": 1.9201, "step": 609 }, { "epoch": 0.10414888168004098, "grad_norm": 0.4706639051437378, "learning_rate": 8e-05, "loss": 1.9584, "step": 610 }, { "epoch": 0.1043196175516476, "grad_norm": 0.45697128772735596, "learning_rate": 8e-05, "loss": 1.5957, "step": 611 }, { "epoch": 0.10449035342325423, "grad_norm": 0.45105481147766113, "learning_rate": 8e-05, "loss": 1.8948, "step": 612 }, { "epoch": 0.10466108929486086, "grad_norm": 0.43617600202560425, "learning_rate": 8e-05, "loss": 1.7277, "step": 613 }, { "epoch": 0.10483182516646747, "grad_norm": 0.4444302022457123, "learning_rate": 8e-05, "loss": 1.8378, "step": 614 }, { "epoch": 0.1050025610380741, "grad_norm": 0.47409623861312866, "learning_rate": 8e-05, "loss": 1.7285, "step": 615 }, { "epoch": 0.10517329690968072, "grad_norm": 0.4572072923183441, "learning_rate": 8e-05, "loss": 1.7275, "step": 616 }, { "epoch": 0.10534403278128734, "grad_norm": 0.4535636603832245, "learning_rate": 8e-05, "loss": 1.749, "step": 617 }, { "epoch": 0.10551476865289397, "grad_norm": 0.4499237537384033, "learning_rate": 8e-05, "loss": 1.6587, "step": 618 }, { "epoch": 0.1056855045245006, "grad_norm": 0.4401490390300751, "learning_rate": 8e-05, "loss": 1.6387, "step": 619 }, { "epoch": 0.10585624039610722, "grad_norm": 0.4633047580718994, "learning_rate": 8e-05, "loss": 1.7253, "step": 620 }, { "epoch": 0.10602697626771385, "grad_norm": 0.4575956165790558, "learning_rate": 8e-05, "loss": 1.7346, "step": 621 }, { "epoch": 0.10619771213932047, "grad_norm": 0.4430898427963257, "learning_rate": 8e-05, "loss": 1.6842, "step": 622 }, { "epoch": 0.1063684480109271, "grad_norm": 0.4621991813182831, "learning_rate": 8e-05, "loss": 1.8109, "step": 623 }, { "epoch": 0.10653918388253372, "grad_norm": 0.4531979262828827, "learning_rate": 8e-05, "loss": 1.8235, "step": 624 }, { "epoch": 0.10670991975414035, "grad_norm": 0.4504040777683258, "learning_rate": 8e-05, "loss": 1.7648, "step": 625 }, { "epoch": 0.10688065562574697, "grad_norm": 0.4988024830818176, "learning_rate": 8e-05, "loss": 1.6613, "step": 626 }, { "epoch": 0.1070513914973536, "grad_norm": 0.47779184579849243, "learning_rate": 8e-05, "loss": 1.4636, "step": 627 }, { "epoch": 0.10722212736896022, "grad_norm": 0.41520535945892334, "learning_rate": 8e-05, "loss": 1.7009, "step": 628 }, { "epoch": 0.10739286324056685, "grad_norm": 0.42950010299682617, "learning_rate": 8e-05, "loss": 1.6765, "step": 629 }, { "epoch": 0.10756359911217346, "grad_norm": 0.4623848795890808, "learning_rate": 8e-05, "loss": 1.7747, "step": 630 }, { "epoch": 0.10773433498378009, "grad_norm": 0.4596050977706909, "learning_rate": 8e-05, "loss": 1.7489, "step": 631 }, { "epoch": 0.10790507085538671, "grad_norm": 0.4692271053791046, "learning_rate": 8e-05, "loss": 1.9362, "step": 632 }, { "epoch": 0.10807580672699334, "grad_norm": 0.4494847357273102, "learning_rate": 8e-05, "loss": 1.9526, "step": 633 }, { "epoch": 0.10824654259859996, "grad_norm": 0.4422329366207123, "learning_rate": 8e-05, "loss": 1.6441, "step": 634 }, { "epoch": 0.10841727847020659, "grad_norm": 0.4731101393699646, "learning_rate": 8e-05, "loss": 1.8125, "step": 635 }, { "epoch": 0.10858801434181321, "grad_norm": 0.5107797980308533, "learning_rate": 8e-05, "loss": 1.932, "step": 636 }, { "epoch": 0.10875875021341984, "grad_norm": 0.41998204588890076, "learning_rate": 8e-05, "loss": 1.6753, "step": 637 }, { "epoch": 0.10892948608502646, "grad_norm": 0.49071767926216125, "learning_rate": 8e-05, "loss": 1.8941, "step": 638 }, { "epoch": 0.10910022195663309, "grad_norm": 0.45737767219543457, "learning_rate": 8e-05, "loss": 1.7296, "step": 639 }, { "epoch": 0.10927095782823971, "grad_norm": 0.4347904324531555, "learning_rate": 8e-05, "loss": 1.6456, "step": 640 }, { "epoch": 0.10944169369984634, "grad_norm": 0.4274546205997467, "learning_rate": 8e-05, "loss": 1.6954, "step": 641 }, { "epoch": 0.10961242957145297, "grad_norm": 0.4607022702693939, "learning_rate": 8e-05, "loss": 1.8117, "step": 642 }, { "epoch": 0.10978316544305959, "grad_norm": 0.4252704679965973, "learning_rate": 8e-05, "loss": 1.7942, "step": 643 }, { "epoch": 0.10995390131466622, "grad_norm": 0.42857566475868225, "learning_rate": 8e-05, "loss": 1.7001, "step": 644 }, { "epoch": 0.11012463718627284, "grad_norm": 0.4648403227329254, "learning_rate": 8e-05, "loss": 1.8373, "step": 645 }, { "epoch": 0.11029537305787947, "grad_norm": 0.4613080620765686, "learning_rate": 8e-05, "loss": 1.8227, "step": 646 }, { "epoch": 0.11046610892948608, "grad_norm": 0.45803970098495483, "learning_rate": 8e-05, "loss": 1.653, "step": 647 }, { "epoch": 0.1106368448010927, "grad_norm": 0.48311176896095276, "learning_rate": 8e-05, "loss": 1.8065, "step": 648 }, { "epoch": 0.11080758067269933, "grad_norm": 0.4579721987247467, "learning_rate": 8e-05, "loss": 1.8029, "step": 649 }, { "epoch": 0.11097831654430595, "grad_norm": 0.44906431436538696, "learning_rate": 8e-05, "loss": 1.7895, "step": 650 }, { "epoch": 0.11114905241591258, "grad_norm": 0.5034238696098328, "learning_rate": 8e-05, "loss": 2.1371, "step": 651 }, { "epoch": 0.1113197882875192, "grad_norm": 0.46965041756629944, "learning_rate": 8e-05, "loss": 1.6443, "step": 652 }, { "epoch": 0.11149052415912583, "grad_norm": 0.4968666434288025, "learning_rate": 8e-05, "loss": 1.8076, "step": 653 }, { "epoch": 0.11166126003073246, "grad_norm": 0.4611842930316925, "learning_rate": 8e-05, "loss": 1.8213, "step": 654 }, { "epoch": 0.11183199590233908, "grad_norm": 0.4806687533855438, "learning_rate": 8e-05, "loss": 1.7961, "step": 655 }, { "epoch": 0.11200273177394571, "grad_norm": 0.4553872048854828, "learning_rate": 8e-05, "loss": 1.7125, "step": 656 }, { "epoch": 0.11217346764555233, "grad_norm": 0.4542956054210663, "learning_rate": 8e-05, "loss": 1.7038, "step": 657 }, { "epoch": 0.11234420351715896, "grad_norm": 0.4549236595630646, "learning_rate": 8e-05, "loss": 1.807, "step": 658 }, { "epoch": 0.11251493938876558, "grad_norm": 0.4630296528339386, "learning_rate": 8e-05, "loss": 1.7167, "step": 659 }, { "epoch": 0.11268567526037221, "grad_norm": 0.43665194511413574, "learning_rate": 8e-05, "loss": 1.7668, "step": 660 }, { "epoch": 0.11285641113197883, "grad_norm": 0.41082096099853516, "learning_rate": 8e-05, "loss": 1.5403, "step": 661 }, { "epoch": 0.11302714700358546, "grad_norm": 0.5184293389320374, "learning_rate": 8e-05, "loss": 2.0096, "step": 662 }, { "epoch": 0.11319788287519207, "grad_norm": 0.44052761793136597, "learning_rate": 8e-05, "loss": 1.7035, "step": 663 }, { "epoch": 0.1133686187467987, "grad_norm": 0.43192625045776367, "learning_rate": 8e-05, "loss": 1.5948, "step": 664 }, { "epoch": 0.11353935461840532, "grad_norm": 0.4479033350944519, "learning_rate": 8e-05, "loss": 1.7519, "step": 665 }, { "epoch": 0.11371009049001195, "grad_norm": 0.4710054099559784, "learning_rate": 8e-05, "loss": 1.7771, "step": 666 }, { "epoch": 0.11388082636161857, "grad_norm": 0.45758363604545593, "learning_rate": 8e-05, "loss": 1.8202, "step": 667 }, { "epoch": 0.1140515622332252, "grad_norm": 0.4625811278820038, "learning_rate": 8e-05, "loss": 1.7806, "step": 668 }, { "epoch": 0.11422229810483182, "grad_norm": 0.47318458557128906, "learning_rate": 8e-05, "loss": 1.6869, "step": 669 }, { "epoch": 0.11439303397643845, "grad_norm": 0.4205014109611511, "learning_rate": 8e-05, "loss": 1.6046, "step": 670 }, { "epoch": 0.11456376984804507, "grad_norm": 0.43786418437957764, "learning_rate": 8e-05, "loss": 1.6601, "step": 671 }, { "epoch": 0.1147345057196517, "grad_norm": 0.45929813385009766, "learning_rate": 8e-05, "loss": 1.697, "step": 672 }, { "epoch": 0.11490524159125833, "grad_norm": 0.46327605843544006, "learning_rate": 8e-05, "loss": 1.783, "step": 673 }, { "epoch": 0.11507597746286495, "grad_norm": 0.4517524838447571, "learning_rate": 8e-05, "loss": 1.8437, "step": 674 }, { "epoch": 0.11524671333447158, "grad_norm": 0.5116130709648132, "learning_rate": 8e-05, "loss": 1.7407, "step": 675 }, { "epoch": 0.1154174492060782, "grad_norm": 0.45141446590423584, "learning_rate": 8e-05, "loss": 1.9253, "step": 676 }, { "epoch": 0.11558818507768483, "grad_norm": 0.4183353781700134, "learning_rate": 8e-05, "loss": 1.4685, "step": 677 }, { "epoch": 0.11575892094929145, "grad_norm": 0.46799150109291077, "learning_rate": 8e-05, "loss": 1.8373, "step": 678 }, { "epoch": 0.11592965682089806, "grad_norm": 0.46076229214668274, "learning_rate": 8e-05, "loss": 1.7809, "step": 679 }, { "epoch": 0.11610039269250469, "grad_norm": 0.45284348726272583, "learning_rate": 8e-05, "loss": 1.7778, "step": 680 }, { "epoch": 0.11627112856411131, "grad_norm": 0.4534223973751068, "learning_rate": 8e-05, "loss": 1.7546, "step": 681 }, { "epoch": 0.11644186443571794, "grad_norm": 0.4663488268852234, "learning_rate": 8e-05, "loss": 1.7946, "step": 682 }, { "epoch": 0.11661260030732457, "grad_norm": 0.43188172578811646, "learning_rate": 8e-05, "loss": 1.584, "step": 683 }, { "epoch": 0.11678333617893119, "grad_norm": 0.48066475987434387, "learning_rate": 8e-05, "loss": 2.004, "step": 684 }, { "epoch": 0.11695407205053782, "grad_norm": 0.42926478385925293, "learning_rate": 8e-05, "loss": 1.8668, "step": 685 }, { "epoch": 0.11712480792214444, "grad_norm": 0.47831371426582336, "learning_rate": 8e-05, "loss": 1.8653, "step": 686 }, { "epoch": 0.11729554379375107, "grad_norm": 0.4724123477935791, "learning_rate": 8e-05, "loss": 1.8415, "step": 687 }, { "epoch": 0.11746627966535769, "grad_norm": 0.4412477910518646, "learning_rate": 8e-05, "loss": 1.6227, "step": 688 }, { "epoch": 0.11763701553696432, "grad_norm": 0.4299234449863434, "learning_rate": 8e-05, "loss": 1.6024, "step": 689 }, { "epoch": 0.11780775140857094, "grad_norm": 0.46895337104797363, "learning_rate": 8e-05, "loss": 1.7657, "step": 690 }, { "epoch": 0.11797848728017757, "grad_norm": 0.4527714252471924, "learning_rate": 8e-05, "loss": 1.8379, "step": 691 }, { "epoch": 0.1181492231517842, "grad_norm": 0.4483269453048706, "learning_rate": 8e-05, "loss": 1.7835, "step": 692 }, { "epoch": 0.11831995902339082, "grad_norm": 0.44055140018463135, "learning_rate": 8e-05, "loss": 1.7366, "step": 693 }, { "epoch": 0.11849069489499744, "grad_norm": 0.43905794620513916, "learning_rate": 8e-05, "loss": 1.5783, "step": 694 }, { "epoch": 0.11866143076660407, "grad_norm": 0.42942720651626587, "learning_rate": 8e-05, "loss": 1.5943, "step": 695 }, { "epoch": 0.11883216663821068, "grad_norm": 0.4737808108329773, "learning_rate": 8e-05, "loss": 1.9365, "step": 696 }, { "epoch": 0.11900290250981731, "grad_norm": 0.49118173122406006, "learning_rate": 8e-05, "loss": 1.8551, "step": 697 }, { "epoch": 0.11917363838142393, "grad_norm": 0.440589040517807, "learning_rate": 8e-05, "loss": 1.7457, "step": 698 }, { "epoch": 0.11934437425303056, "grad_norm": 0.45333337783813477, "learning_rate": 8e-05, "loss": 1.752, "step": 699 }, { "epoch": 0.11951511012463718, "grad_norm": 0.4507703185081482, "learning_rate": 8e-05, "loss": 1.7007, "step": 700 }, { "epoch": 0.11968584599624381, "grad_norm": 0.46208956837654114, "learning_rate": 8e-05, "loss": 1.8836, "step": 701 }, { "epoch": 0.11985658186785043, "grad_norm": 0.4427480399608612, "learning_rate": 8e-05, "loss": 1.8018, "step": 702 }, { "epoch": 0.12002731773945706, "grad_norm": 0.4181579053401947, "learning_rate": 8e-05, "loss": 1.4455, "step": 703 }, { "epoch": 0.12019805361106368, "grad_norm": 0.4948076009750366, "learning_rate": 8e-05, "loss": 1.9131, "step": 704 }, { "epoch": 0.12036878948267031, "grad_norm": 0.4996519386768341, "learning_rate": 8e-05, "loss": 1.8033, "step": 705 }, { "epoch": 0.12053952535427694, "grad_norm": 0.4705049395561218, "learning_rate": 8e-05, "loss": 1.7919, "step": 706 }, { "epoch": 0.12071026122588356, "grad_norm": 0.43460628390312195, "learning_rate": 8e-05, "loss": 1.6713, "step": 707 }, { "epoch": 0.12088099709749019, "grad_norm": 0.4295788109302521, "learning_rate": 8e-05, "loss": 1.5999, "step": 708 }, { "epoch": 0.12105173296909681, "grad_norm": 0.4206583797931671, "learning_rate": 8e-05, "loss": 1.5837, "step": 709 }, { "epoch": 0.12122246884070344, "grad_norm": 0.45066237449645996, "learning_rate": 8e-05, "loss": 1.6128, "step": 710 }, { "epoch": 0.12139320471231006, "grad_norm": 0.4853072166442871, "learning_rate": 8e-05, "loss": 1.6511, "step": 711 }, { "epoch": 0.12156394058391667, "grad_norm": 0.4762587547302246, "learning_rate": 8e-05, "loss": 1.8025, "step": 712 }, { "epoch": 0.1217346764555233, "grad_norm": 0.4861604869365692, "learning_rate": 8e-05, "loss": 1.7914, "step": 713 }, { "epoch": 0.12190541232712993, "grad_norm": 0.46593841910362244, "learning_rate": 8e-05, "loss": 1.7724, "step": 714 }, { "epoch": 0.12207614819873655, "grad_norm": 0.45298662781715393, "learning_rate": 8e-05, "loss": 1.8394, "step": 715 }, { "epoch": 0.12224688407034318, "grad_norm": 0.4642561078071594, "learning_rate": 8e-05, "loss": 1.7901, "step": 716 }, { "epoch": 0.1224176199419498, "grad_norm": 0.48154598474502563, "learning_rate": 8e-05, "loss": 1.8153, "step": 717 }, { "epoch": 0.12258835581355643, "grad_norm": 0.4419565796852112, "learning_rate": 8e-05, "loss": 1.6671, "step": 718 }, { "epoch": 0.12275909168516305, "grad_norm": 0.4488522708415985, "learning_rate": 8e-05, "loss": 1.6419, "step": 719 }, { "epoch": 0.12292982755676968, "grad_norm": 0.4802161455154419, "learning_rate": 8e-05, "loss": 1.8951, "step": 720 }, { "epoch": 0.1231005634283763, "grad_norm": 0.4488983750343323, "learning_rate": 8e-05, "loss": 1.691, "step": 721 }, { "epoch": 0.12327129929998293, "grad_norm": 0.45231470465660095, "learning_rate": 8e-05, "loss": 1.711, "step": 722 }, { "epoch": 0.12344203517158955, "grad_norm": 0.46398910880088806, "learning_rate": 8e-05, "loss": 1.6938, "step": 723 }, { "epoch": 0.12361277104319618, "grad_norm": 0.5046457052230835, "learning_rate": 8e-05, "loss": 1.8375, "step": 724 }, { "epoch": 0.1237835069148028, "grad_norm": 0.4170818626880646, "learning_rate": 8e-05, "loss": 1.5977, "step": 725 }, { "epoch": 0.12395424278640943, "grad_norm": 0.489488422870636, "learning_rate": 8e-05, "loss": 1.7843, "step": 726 }, { "epoch": 0.12412497865801606, "grad_norm": 0.4414406716823578, "learning_rate": 8e-05, "loss": 1.7091, "step": 727 }, { "epoch": 0.12429571452962268, "grad_norm": 0.4572165012359619, "learning_rate": 8e-05, "loss": 1.7778, "step": 728 }, { "epoch": 0.12446645040122929, "grad_norm": 0.4478634297847748, "learning_rate": 8e-05, "loss": 1.5751, "step": 729 }, { "epoch": 0.12463718627283592, "grad_norm": 0.43520867824554443, "learning_rate": 8e-05, "loss": 1.7244, "step": 730 }, { "epoch": 0.12480792214444254, "grad_norm": 0.44835159182548523, "learning_rate": 8e-05, "loss": 1.8589, "step": 731 }, { "epoch": 0.12497865801604917, "grad_norm": 0.462549090385437, "learning_rate": 8e-05, "loss": 1.8471, "step": 732 }, { "epoch": 0.1251493938876558, "grad_norm": 0.4568072557449341, "learning_rate": 8e-05, "loss": 1.7208, "step": 733 }, { "epoch": 0.12532012975926243, "grad_norm": 0.440589040517807, "learning_rate": 8e-05, "loss": 1.6701, "step": 734 }, { "epoch": 0.12549086563086906, "grad_norm": 0.5091049671173096, "learning_rate": 8e-05, "loss": 1.8024, "step": 735 }, { "epoch": 0.12566160150247568, "grad_norm": 0.46883079409599304, "learning_rate": 8e-05, "loss": 1.7363, "step": 736 }, { "epoch": 0.12583233737408228, "grad_norm": 0.4392944276332855, "learning_rate": 8e-05, "loss": 1.4726, "step": 737 }, { "epoch": 0.1260030732456889, "grad_norm": 0.5293715596199036, "learning_rate": 8e-05, "loss": 1.8461, "step": 738 }, { "epoch": 0.12617380911729553, "grad_norm": 0.5029656291007996, "learning_rate": 8e-05, "loss": 1.7261, "step": 739 }, { "epoch": 0.12634454498890216, "grad_norm": 0.4565611779689789, "learning_rate": 8e-05, "loss": 1.785, "step": 740 }, { "epoch": 0.12651528086050878, "grad_norm": 0.4678799510002136, "learning_rate": 8e-05, "loss": 1.7625, "step": 741 }, { "epoch": 0.1266860167321154, "grad_norm": 0.46782320737838745, "learning_rate": 8e-05, "loss": 1.6747, "step": 742 }, { "epoch": 0.12685675260372203, "grad_norm": 0.4874090850353241, "learning_rate": 8e-05, "loss": 2.071, "step": 743 }, { "epoch": 0.12702748847532866, "grad_norm": 0.4611451029777527, "learning_rate": 8e-05, "loss": 1.8059, "step": 744 }, { "epoch": 0.12719822434693528, "grad_norm": 0.4433640241622925, "learning_rate": 8e-05, "loss": 1.7184, "step": 745 }, { "epoch": 0.1273689602185419, "grad_norm": 0.4551624357700348, "learning_rate": 8e-05, "loss": 1.7061, "step": 746 }, { "epoch": 0.12753969609014854, "grad_norm": 0.4425301253795624, "learning_rate": 8e-05, "loss": 1.7397, "step": 747 }, { "epoch": 0.12771043196175516, "grad_norm": 0.4515827000141144, "learning_rate": 8e-05, "loss": 1.684, "step": 748 }, { "epoch": 0.1278811678333618, "grad_norm": 0.4614942967891693, "learning_rate": 8e-05, "loss": 1.7764, "step": 749 }, { "epoch": 0.1280519037049684, "grad_norm": 0.4654172360897064, "learning_rate": 8e-05, "loss": 1.8881, "step": 750 }, { "epoch": 0.12822263957657504, "grad_norm": 0.43020397424697876, "learning_rate": 8e-05, "loss": 1.7392, "step": 751 }, { "epoch": 0.12839337544818166, "grad_norm": 0.49065810441970825, "learning_rate": 8e-05, "loss": 1.7596, "step": 752 }, { "epoch": 0.1285641113197883, "grad_norm": 0.46682143211364746, "learning_rate": 8e-05, "loss": 1.5719, "step": 753 }, { "epoch": 0.1287348471913949, "grad_norm": 0.46513259410858154, "learning_rate": 8e-05, "loss": 1.8479, "step": 754 }, { "epoch": 0.12890558306300154, "grad_norm": 0.4601372182369232, "learning_rate": 8e-05, "loss": 1.6814, "step": 755 }, { "epoch": 0.12907631893460816, "grad_norm": 0.4606035351753235, "learning_rate": 8e-05, "loss": 1.7747, "step": 756 }, { "epoch": 0.1292470548062148, "grad_norm": 0.4624948501586914, "learning_rate": 8e-05, "loss": 1.8209, "step": 757 }, { "epoch": 0.12941779067782141, "grad_norm": 0.4294663965702057, "learning_rate": 8e-05, "loss": 1.7003, "step": 758 }, { "epoch": 0.12958852654942804, "grad_norm": 0.43792036175727844, "learning_rate": 8e-05, "loss": 1.5788, "step": 759 }, { "epoch": 0.12975926242103467, "grad_norm": 0.46307700872421265, "learning_rate": 8e-05, "loss": 1.8652, "step": 760 }, { "epoch": 0.1299299982926413, "grad_norm": 0.5023964047431946, "learning_rate": 8e-05, "loss": 1.8685, "step": 761 }, { "epoch": 0.13010073416424792, "grad_norm": 0.4980110228061676, "learning_rate": 8e-05, "loss": 1.668, "step": 762 }, { "epoch": 0.13027147003585454, "grad_norm": 0.46938052773475647, "learning_rate": 8e-05, "loss": 1.7545, "step": 763 }, { "epoch": 0.13044220590746117, "grad_norm": 0.4426388144493103, "learning_rate": 8e-05, "loss": 1.6282, "step": 764 }, { "epoch": 0.1306129417790678, "grad_norm": 0.44980546832084656, "learning_rate": 8e-05, "loss": 1.6001, "step": 765 }, { "epoch": 0.13078367765067442, "grad_norm": 0.44437727332115173, "learning_rate": 8e-05, "loss": 1.7756, "step": 766 }, { "epoch": 0.13095441352228104, "grad_norm": 0.3639990985393524, "learning_rate": 8e-05, "loss": 1.2268, "step": 767 }, { "epoch": 0.13112514939388767, "grad_norm": 0.4977494478225708, "learning_rate": 8e-05, "loss": 1.9255, "step": 768 }, { "epoch": 0.13129588526549427, "grad_norm": 0.4842672049999237, "learning_rate": 8e-05, "loss": 1.9171, "step": 769 }, { "epoch": 0.1314666211371009, "grad_norm": 0.4735569357872009, "learning_rate": 8e-05, "loss": 1.7661, "step": 770 }, { "epoch": 0.13163735700870752, "grad_norm": 0.44106805324554443, "learning_rate": 8e-05, "loss": 1.5751, "step": 771 }, { "epoch": 0.13180809288031414, "grad_norm": 0.43814659118652344, "learning_rate": 8e-05, "loss": 1.6007, "step": 772 }, { "epoch": 0.13197882875192077, "grad_norm": 0.447917103767395, "learning_rate": 8e-05, "loss": 1.6355, "step": 773 }, { "epoch": 0.1321495646235274, "grad_norm": 0.45688551664352417, "learning_rate": 8e-05, "loss": 1.7335, "step": 774 }, { "epoch": 0.13232030049513402, "grad_norm": 0.46020933985710144, "learning_rate": 8e-05, "loss": 1.7177, "step": 775 }, { "epoch": 0.13249103636674064, "grad_norm": 0.46775364875793457, "learning_rate": 8e-05, "loss": 1.9321, "step": 776 }, { "epoch": 0.13266177223834727, "grad_norm": 0.46344587206840515, "learning_rate": 8e-05, "loss": 1.6888, "step": 777 }, { "epoch": 0.1328325081099539, "grad_norm": 0.43287304043769836, "learning_rate": 8e-05, "loss": 1.6825, "step": 778 }, { "epoch": 0.13300324398156052, "grad_norm": 0.5130947232246399, "learning_rate": 8e-05, "loss": 1.8677, "step": 779 }, { "epoch": 0.13317397985316715, "grad_norm": 0.5070828795433044, "learning_rate": 8e-05, "loss": 1.9203, "step": 780 }, { "epoch": 0.13334471572477377, "grad_norm": 0.44037488102912903, "learning_rate": 8e-05, "loss": 1.429, "step": 781 }, { "epoch": 0.1335154515963804, "grad_norm": 0.48649105429649353, "learning_rate": 8e-05, "loss": 1.5909, "step": 782 }, { "epoch": 0.13368618746798702, "grad_norm": 0.48643219470977783, "learning_rate": 8e-05, "loss": 1.7672, "step": 783 }, { "epoch": 0.13385692333959365, "grad_norm": 0.45604002475738525, "learning_rate": 8e-05, "loss": 1.8167, "step": 784 }, { "epoch": 0.13402765921120027, "grad_norm": 0.49483051896095276, "learning_rate": 8e-05, "loss": 1.8803, "step": 785 }, { "epoch": 0.1341983950828069, "grad_norm": 0.45025599002838135, "learning_rate": 8e-05, "loss": 1.6696, "step": 786 }, { "epoch": 0.13436913095441352, "grad_norm": 0.47571203112602234, "learning_rate": 8e-05, "loss": 1.7774, "step": 787 }, { "epoch": 0.13453986682602015, "grad_norm": 0.4508216977119446, "learning_rate": 8e-05, "loss": 1.7196, "step": 788 }, { "epoch": 0.13471060269762677, "grad_norm": 0.48760542273521423, "learning_rate": 8e-05, "loss": 1.9556, "step": 789 }, { "epoch": 0.1348813385692334, "grad_norm": 0.45738285779953003, "learning_rate": 8e-05, "loss": 1.8341, "step": 790 }, { "epoch": 0.13505207444084003, "grad_norm": 0.4732484817504883, "learning_rate": 8e-05, "loss": 1.6476, "step": 791 }, { "epoch": 0.13522281031244665, "grad_norm": 0.47944650053977966, "learning_rate": 8e-05, "loss": 1.798, "step": 792 }, { "epoch": 0.13539354618405328, "grad_norm": 0.46067649126052856, "learning_rate": 8e-05, "loss": 1.7237, "step": 793 }, { "epoch": 0.1355642820556599, "grad_norm": 0.4302305579185486, "learning_rate": 8e-05, "loss": 1.6587, "step": 794 }, { "epoch": 0.13573501792726653, "grad_norm": 0.4734790623188019, "learning_rate": 8e-05, "loss": 1.6236, "step": 795 }, { "epoch": 0.13590575379887315, "grad_norm": 0.4454515874385834, "learning_rate": 8e-05, "loss": 1.7774, "step": 796 }, { "epoch": 0.13607648967047978, "grad_norm": 0.4776926636695862, "learning_rate": 8e-05, "loss": 1.7723, "step": 797 }, { "epoch": 0.1362472255420864, "grad_norm": 0.44919687509536743, "learning_rate": 8e-05, "loss": 1.5833, "step": 798 }, { "epoch": 0.13641796141369303, "grad_norm": 0.4118146002292633, "learning_rate": 8e-05, "loss": 1.4508, "step": 799 }, { "epoch": 0.13658869728529965, "grad_norm": 0.4509067237377167, "learning_rate": 8e-05, "loss": 1.6301, "step": 800 }, { "epoch": 0.13675943315690628, "grad_norm": 0.45580169558525085, "learning_rate": 8e-05, "loss": 1.6993, "step": 801 }, { "epoch": 0.13693016902851288, "grad_norm": 0.4634571373462677, "learning_rate": 8e-05, "loss": 1.7896, "step": 802 }, { "epoch": 0.1371009049001195, "grad_norm": 0.4389374554157257, "learning_rate": 8e-05, "loss": 1.7628, "step": 803 }, { "epoch": 0.13727164077172613, "grad_norm": 0.457655131816864, "learning_rate": 8e-05, "loss": 1.6843, "step": 804 }, { "epoch": 0.13744237664333275, "grad_norm": 0.46848607063293457, "learning_rate": 8e-05, "loss": 1.8588, "step": 805 }, { "epoch": 0.13761311251493938, "grad_norm": 0.4423011839389801, "learning_rate": 8e-05, "loss": 1.7532, "step": 806 }, { "epoch": 0.137783848386546, "grad_norm": 0.5099880695343018, "learning_rate": 8e-05, "loss": 1.7551, "step": 807 }, { "epoch": 0.13795458425815263, "grad_norm": 0.43246111273765564, "learning_rate": 8e-05, "loss": 1.6755, "step": 808 }, { "epoch": 0.13812532012975925, "grad_norm": 0.4447002708911896, "learning_rate": 8e-05, "loss": 1.8366, "step": 809 }, { "epoch": 0.13829605600136588, "grad_norm": 0.4976888597011566, "learning_rate": 8e-05, "loss": 1.7314, "step": 810 }, { "epoch": 0.1384667918729725, "grad_norm": 0.47202518582344055, "learning_rate": 8e-05, "loss": 1.887, "step": 811 }, { "epoch": 0.13863752774457913, "grad_norm": 0.4160774350166321, "learning_rate": 8e-05, "loss": 1.6566, "step": 812 }, { "epoch": 0.13880826361618576, "grad_norm": 0.4188424348831177, "learning_rate": 8e-05, "loss": 1.5336, "step": 813 }, { "epoch": 0.13897899948779238, "grad_norm": 0.4317874610424042, "learning_rate": 8e-05, "loss": 1.5341, "step": 814 }, { "epoch": 0.139149735359399, "grad_norm": 0.4792192876338959, "learning_rate": 8e-05, "loss": 1.8342, "step": 815 }, { "epoch": 0.13932047123100563, "grad_norm": 0.45523181557655334, "learning_rate": 8e-05, "loss": 1.6706, "step": 816 }, { "epoch": 0.13949120710261226, "grad_norm": 0.4896884560585022, "learning_rate": 8e-05, "loss": 1.8664, "step": 817 }, { "epoch": 0.13966194297421888, "grad_norm": 0.46416381001472473, "learning_rate": 8e-05, "loss": 1.565, "step": 818 }, { "epoch": 0.1398326788458255, "grad_norm": 0.437192440032959, "learning_rate": 8e-05, "loss": 1.5209, "step": 819 }, { "epoch": 0.14000341471743213, "grad_norm": 0.4709034264087677, "learning_rate": 8e-05, "loss": 1.7452, "step": 820 }, { "epoch": 0.14017415058903876, "grad_norm": 0.4790910482406616, "learning_rate": 8e-05, "loss": 1.6617, "step": 821 }, { "epoch": 0.14034488646064538, "grad_norm": 0.476779967546463, "learning_rate": 8e-05, "loss": 1.8665, "step": 822 }, { "epoch": 0.140515622332252, "grad_norm": 0.4493958652019501, "learning_rate": 8e-05, "loss": 1.7417, "step": 823 }, { "epoch": 0.14068635820385864, "grad_norm": 0.42380499839782715, "learning_rate": 8e-05, "loss": 1.5622, "step": 824 }, { "epoch": 0.14085709407546526, "grad_norm": 0.4712841510772705, "learning_rate": 8e-05, "loss": 1.8585, "step": 825 }, { "epoch": 0.1410278299470719, "grad_norm": 0.44263139367103577, "learning_rate": 8e-05, "loss": 1.7475, "step": 826 }, { "epoch": 0.1411985658186785, "grad_norm": 0.44473254680633545, "learning_rate": 8e-05, "loss": 1.5388, "step": 827 }, { "epoch": 0.14136930169028514, "grad_norm": 0.46545714139938354, "learning_rate": 8e-05, "loss": 1.7351, "step": 828 }, { "epoch": 0.14154003756189176, "grad_norm": 0.45534849166870117, "learning_rate": 8e-05, "loss": 1.7077, "step": 829 }, { "epoch": 0.1417107734334984, "grad_norm": 0.4647299349308014, "learning_rate": 8e-05, "loss": 1.8562, "step": 830 }, { "epoch": 0.141881509305105, "grad_norm": 0.5992990732192993, "learning_rate": 8e-05, "loss": 1.6875, "step": 831 }, { "epoch": 0.14205224517671164, "grad_norm": 0.44566163420677185, "learning_rate": 8e-05, "loss": 1.6925, "step": 832 }, { "epoch": 0.14222298104831826, "grad_norm": 0.44251781702041626, "learning_rate": 8e-05, "loss": 1.6074, "step": 833 }, { "epoch": 0.1423937169199249, "grad_norm": 0.5093763470649719, "learning_rate": 8e-05, "loss": 2.1073, "step": 834 }, { "epoch": 0.1425644527915315, "grad_norm": 0.4334981143474579, "learning_rate": 8e-05, "loss": 1.5851, "step": 835 }, { "epoch": 0.1427351886631381, "grad_norm": 0.46526435017585754, "learning_rate": 8e-05, "loss": 1.8342, "step": 836 }, { "epoch": 0.14290592453474474, "grad_norm": 0.4793323576450348, "learning_rate": 8e-05, "loss": 1.7297, "step": 837 }, { "epoch": 0.14307666040635136, "grad_norm": 0.4384273588657379, "learning_rate": 8e-05, "loss": 1.5265, "step": 838 }, { "epoch": 0.143247396277958, "grad_norm": 0.44309985637664795, "learning_rate": 8e-05, "loss": 1.7817, "step": 839 }, { "epoch": 0.14341813214956461, "grad_norm": 0.46759599447250366, "learning_rate": 8e-05, "loss": 1.8823, "step": 840 }, { "epoch": 0.14358886802117124, "grad_norm": 0.46439656615257263, "learning_rate": 8e-05, "loss": 1.6066, "step": 841 }, { "epoch": 0.14375960389277787, "grad_norm": 0.5119403600692749, "learning_rate": 8e-05, "loss": 1.6563, "step": 842 }, { "epoch": 0.1439303397643845, "grad_norm": 0.47866395115852356, "learning_rate": 8e-05, "loss": 1.7653, "step": 843 }, { "epoch": 0.14410107563599112, "grad_norm": 0.4779225289821625, "learning_rate": 8e-05, "loss": 1.8513, "step": 844 }, { "epoch": 0.14427181150759774, "grad_norm": 0.5001221299171448, "learning_rate": 8e-05, "loss": 1.8093, "step": 845 }, { "epoch": 0.14444254737920437, "grad_norm": 0.45129403471946716, "learning_rate": 8e-05, "loss": 1.6838, "step": 846 }, { "epoch": 0.144613283250811, "grad_norm": 0.4599730968475342, "learning_rate": 8e-05, "loss": 1.7005, "step": 847 }, { "epoch": 0.14478401912241762, "grad_norm": 0.4542209506034851, "learning_rate": 8e-05, "loss": 1.6212, "step": 848 }, { "epoch": 0.14495475499402424, "grad_norm": 0.4574262499809265, "learning_rate": 8e-05, "loss": 1.8342, "step": 849 }, { "epoch": 0.14512549086563087, "grad_norm": 0.4735952615737915, "learning_rate": 8e-05, "loss": 1.7634, "step": 850 }, { "epoch": 0.1452962267372375, "grad_norm": 0.4777659773826599, "learning_rate": 8e-05, "loss": 1.7742, "step": 851 }, { "epoch": 0.14546696260884412, "grad_norm": 0.442022442817688, "learning_rate": 8e-05, "loss": 1.6991, "step": 852 }, { "epoch": 0.14563769848045074, "grad_norm": 0.4782080352306366, "learning_rate": 8e-05, "loss": 1.6223, "step": 853 }, { "epoch": 0.14580843435205737, "grad_norm": 0.44213730096817017, "learning_rate": 8e-05, "loss": 1.6171, "step": 854 }, { "epoch": 0.145979170223664, "grad_norm": 0.4410727024078369, "learning_rate": 8e-05, "loss": 1.763, "step": 855 }, { "epoch": 0.14614990609527062, "grad_norm": 0.46674033999443054, "learning_rate": 8e-05, "loss": 1.7263, "step": 856 }, { "epoch": 0.14632064196687725, "grad_norm": 0.444429874420166, "learning_rate": 8e-05, "loss": 1.6684, "step": 857 }, { "epoch": 0.14649137783848387, "grad_norm": 0.47188645601272583, "learning_rate": 8e-05, "loss": 1.7968, "step": 858 }, { "epoch": 0.1466621137100905, "grad_norm": 0.4298073947429657, "learning_rate": 8e-05, "loss": 1.7244, "step": 859 }, { "epoch": 0.14683284958169712, "grad_norm": 0.46763670444488525, "learning_rate": 8e-05, "loss": 1.826, "step": 860 }, { "epoch": 0.14700358545330375, "grad_norm": 0.4502532184123993, "learning_rate": 8e-05, "loss": 1.8316, "step": 861 }, { "epoch": 0.14717432132491037, "grad_norm": 0.43032166361808777, "learning_rate": 8e-05, "loss": 1.543, "step": 862 }, { "epoch": 0.147345057196517, "grad_norm": 0.4187735617160797, "learning_rate": 8e-05, "loss": 1.5166, "step": 863 }, { "epoch": 0.14751579306812362, "grad_norm": 0.47125595808029175, "learning_rate": 8e-05, "loss": 1.8887, "step": 864 }, { "epoch": 0.14768652893973025, "grad_norm": 0.4507071375846863, "learning_rate": 8e-05, "loss": 1.7923, "step": 865 }, { "epoch": 0.14785726481133687, "grad_norm": 0.41040170192718506, "learning_rate": 8e-05, "loss": 1.5142, "step": 866 }, { "epoch": 0.1480280006829435, "grad_norm": 0.4499456286430359, "learning_rate": 8e-05, "loss": 1.7334, "step": 867 }, { "epoch": 0.1481987365545501, "grad_norm": 0.43977320194244385, "learning_rate": 8e-05, "loss": 1.7618, "step": 868 }, { "epoch": 0.14836947242615672, "grad_norm": 0.48080629110336304, "learning_rate": 8e-05, "loss": 1.8074, "step": 869 }, { "epoch": 0.14854020829776335, "grad_norm": 0.4431345760822296, "learning_rate": 8e-05, "loss": 1.7526, "step": 870 }, { "epoch": 0.14871094416936997, "grad_norm": 0.4777228534221649, "learning_rate": 8e-05, "loss": 1.913, "step": 871 }, { "epoch": 0.1488816800409766, "grad_norm": 0.45234858989715576, "learning_rate": 8e-05, "loss": 1.745, "step": 872 }, { "epoch": 0.14905241591258322, "grad_norm": 0.4001792073249817, "learning_rate": 8e-05, "loss": 1.4834, "step": 873 }, { "epoch": 0.14922315178418985, "grad_norm": 0.4313156008720398, "learning_rate": 8e-05, "loss": 1.7532, "step": 874 }, { "epoch": 0.14939388765579648, "grad_norm": 0.4390343129634857, "learning_rate": 8e-05, "loss": 1.5756, "step": 875 }, { "epoch": 0.1495646235274031, "grad_norm": 0.42864561080932617, "learning_rate": 8e-05, "loss": 1.7082, "step": 876 }, { "epoch": 0.14973535939900973, "grad_norm": 0.4564012289047241, "learning_rate": 8e-05, "loss": 1.8397, "step": 877 }, { "epoch": 0.14990609527061635, "grad_norm": 0.45202770829200745, "learning_rate": 8e-05, "loss": 1.7222, "step": 878 }, { "epoch": 0.15007683114222298, "grad_norm": 0.4422377347946167, "learning_rate": 8e-05, "loss": 1.7158, "step": 879 }, { "epoch": 0.1502475670138296, "grad_norm": 0.46004971861839294, "learning_rate": 8e-05, "loss": 1.7071, "step": 880 }, { "epoch": 0.15041830288543623, "grad_norm": 0.4652254283428192, "learning_rate": 8e-05, "loss": 1.7556, "step": 881 }, { "epoch": 0.15058903875704285, "grad_norm": 0.4704051911830902, "learning_rate": 8e-05, "loss": 1.8655, "step": 882 }, { "epoch": 0.15075977462864948, "grad_norm": 0.45921197533607483, "learning_rate": 8e-05, "loss": 1.8108, "step": 883 }, { "epoch": 0.1509305105002561, "grad_norm": 0.4580092430114746, "learning_rate": 8e-05, "loss": 1.5515, "step": 884 }, { "epoch": 0.15110124637186273, "grad_norm": 0.4841068387031555, "learning_rate": 8e-05, "loss": 1.7932, "step": 885 }, { "epoch": 0.15127198224346936, "grad_norm": 0.5240221619606018, "learning_rate": 8e-05, "loss": 2.0104, "step": 886 }, { "epoch": 0.15144271811507598, "grad_norm": 0.4731379747390747, "learning_rate": 8e-05, "loss": 1.7871, "step": 887 }, { "epoch": 0.1516134539866826, "grad_norm": 0.4867551624774933, "learning_rate": 8e-05, "loss": 1.9188, "step": 888 }, { "epoch": 0.15178418985828923, "grad_norm": 0.4949553310871124, "learning_rate": 8e-05, "loss": 1.8262, "step": 889 }, { "epoch": 0.15195492572989586, "grad_norm": 0.47736093401908875, "learning_rate": 8e-05, "loss": 1.7135, "step": 890 }, { "epoch": 0.15212566160150248, "grad_norm": 0.47036248445510864, "learning_rate": 8e-05, "loss": 1.7321, "step": 891 }, { "epoch": 0.1522963974731091, "grad_norm": 0.4975561201572418, "learning_rate": 8e-05, "loss": 1.8607, "step": 892 }, { "epoch": 0.15246713334471573, "grad_norm": 0.46005916595458984, "learning_rate": 8e-05, "loss": 1.651, "step": 893 }, { "epoch": 0.15263786921632236, "grad_norm": 0.5789629220962524, "learning_rate": 8e-05, "loss": 2.3182, "step": 894 }, { "epoch": 0.15280860508792898, "grad_norm": 0.44269612431526184, "learning_rate": 8e-05, "loss": 1.8125, "step": 895 }, { "epoch": 0.1529793409595356, "grad_norm": 0.4424428939819336, "learning_rate": 8e-05, "loss": 1.6497, "step": 896 }, { "epoch": 0.15315007683114223, "grad_norm": 0.4549713730812073, "learning_rate": 8e-05, "loss": 1.6374, "step": 897 }, { "epoch": 0.15332081270274886, "grad_norm": 0.43351778388023376, "learning_rate": 8e-05, "loss": 1.7791, "step": 898 }, { "epoch": 0.15349154857435549, "grad_norm": 0.4457946717739105, "learning_rate": 8e-05, "loss": 1.7002, "step": 899 }, { "epoch": 0.15366228444596208, "grad_norm": 0.4548703730106354, "learning_rate": 8e-05, "loss": 1.7565, "step": 900 }, { "epoch": 0.1538330203175687, "grad_norm": 0.43296125531196594, "learning_rate": 8e-05, "loss": 1.6716, "step": 901 }, { "epoch": 0.15400375618917533, "grad_norm": 0.48732322454452515, "learning_rate": 8e-05, "loss": 1.634, "step": 902 }, { "epoch": 0.15417449206078196, "grad_norm": 0.4476817846298218, "learning_rate": 8e-05, "loss": 1.7467, "step": 903 }, { "epoch": 0.15434522793238858, "grad_norm": 0.4242105782032013, "learning_rate": 8e-05, "loss": 1.8079, "step": 904 }, { "epoch": 0.1545159638039952, "grad_norm": 0.4637218713760376, "learning_rate": 8e-05, "loss": 1.8168, "step": 905 }, { "epoch": 0.15468669967560184, "grad_norm": 0.46189799904823303, "learning_rate": 8e-05, "loss": 1.8028, "step": 906 }, { "epoch": 0.15485743554720846, "grad_norm": 0.4534951448440552, "learning_rate": 8e-05, "loss": 1.7801, "step": 907 }, { "epoch": 0.15502817141881509, "grad_norm": 0.4416378140449524, "learning_rate": 8e-05, "loss": 1.741, "step": 908 }, { "epoch": 0.1551989072904217, "grad_norm": 0.4290127456188202, "learning_rate": 8e-05, "loss": 1.5, "step": 909 }, { "epoch": 0.15536964316202834, "grad_norm": 0.4656996726989746, "learning_rate": 8e-05, "loss": 1.7958, "step": 910 }, { "epoch": 0.15554037903363496, "grad_norm": 0.42185264825820923, "learning_rate": 8e-05, "loss": 1.7545, "step": 911 }, { "epoch": 0.1557111149052416, "grad_norm": 0.482660710811615, "learning_rate": 8e-05, "loss": 1.8375, "step": 912 }, { "epoch": 0.1558818507768482, "grad_norm": 0.4520467519760132, "learning_rate": 8e-05, "loss": 1.7618, "step": 913 }, { "epoch": 0.15605258664845484, "grad_norm": 0.4486923813819885, "learning_rate": 8e-05, "loss": 1.6836, "step": 914 }, { "epoch": 0.15622332252006146, "grad_norm": 0.42867907881736755, "learning_rate": 8e-05, "loss": 1.4377, "step": 915 }, { "epoch": 0.1563940583916681, "grad_norm": 0.4996095597743988, "learning_rate": 8e-05, "loss": 1.9077, "step": 916 }, { "epoch": 0.15656479426327471, "grad_norm": 0.4224725365638733, "learning_rate": 8e-05, "loss": 1.5624, "step": 917 }, { "epoch": 0.15673553013488134, "grad_norm": 0.48907244205474854, "learning_rate": 8e-05, "loss": 1.885, "step": 918 }, { "epoch": 0.15690626600648797, "grad_norm": 0.4736793339252472, "learning_rate": 8e-05, "loss": 1.6678, "step": 919 }, { "epoch": 0.1570770018780946, "grad_norm": 0.49974825978279114, "learning_rate": 8e-05, "loss": 1.8463, "step": 920 }, { "epoch": 0.15724773774970122, "grad_norm": 0.4680217206478119, "learning_rate": 8e-05, "loss": 1.8094, "step": 921 }, { "epoch": 0.15741847362130784, "grad_norm": 0.44850462675094604, "learning_rate": 8e-05, "loss": 1.7779, "step": 922 }, { "epoch": 0.15758920949291447, "grad_norm": 0.44388166069984436, "learning_rate": 8e-05, "loss": 1.7458, "step": 923 }, { "epoch": 0.1577599453645211, "grad_norm": 0.440399706363678, "learning_rate": 8e-05, "loss": 1.6066, "step": 924 }, { "epoch": 0.15793068123612772, "grad_norm": 0.4747973680496216, "learning_rate": 8e-05, "loss": 1.7784, "step": 925 }, { "epoch": 0.15810141710773434, "grad_norm": 0.48111847043037415, "learning_rate": 8e-05, "loss": 1.7296, "step": 926 }, { "epoch": 0.15827215297934097, "grad_norm": 0.5099611282348633, "learning_rate": 8e-05, "loss": 1.8543, "step": 927 }, { "epoch": 0.1584428888509476, "grad_norm": 0.4476122558116913, "learning_rate": 8e-05, "loss": 1.7904, "step": 928 }, { "epoch": 0.15861362472255422, "grad_norm": 0.4596826732158661, "learning_rate": 8e-05, "loss": 1.6997, "step": 929 }, { "epoch": 0.15878436059416084, "grad_norm": 0.40916356444358826, "learning_rate": 8e-05, "loss": 1.1632, "step": 930 }, { "epoch": 0.15895509646576747, "grad_norm": 0.4408723711967468, "learning_rate": 8e-05, "loss": 1.3944, "step": 931 }, { "epoch": 0.1591258323373741, "grad_norm": 0.4683617055416107, "learning_rate": 8e-05, "loss": 1.7791, "step": 932 }, { "epoch": 0.1592965682089807, "grad_norm": 0.4599247872829437, "learning_rate": 8e-05, "loss": 1.6327, "step": 933 }, { "epoch": 0.15946730408058732, "grad_norm": 0.4597547650337219, "learning_rate": 8e-05, "loss": 1.7535, "step": 934 }, { "epoch": 0.15963803995219394, "grad_norm": 0.49582549929618835, "learning_rate": 8e-05, "loss": 1.8181, "step": 935 }, { "epoch": 0.15980877582380057, "grad_norm": 0.4718499779701233, "learning_rate": 8e-05, "loss": 1.6524, "step": 936 }, { "epoch": 0.1599795116954072, "grad_norm": 0.48830336332321167, "learning_rate": 8e-05, "loss": 1.4083, "step": 937 }, { "epoch": 0.16015024756701382, "grad_norm": 0.4214014410972595, "learning_rate": 8e-05, "loss": 1.6164, "step": 938 }, { "epoch": 0.16032098343862045, "grad_norm": 0.4418695867061615, "learning_rate": 8e-05, "loss": 1.8252, "step": 939 }, { "epoch": 0.16049171931022707, "grad_norm": 0.4365369975566864, "learning_rate": 8e-05, "loss": 1.65, "step": 940 }, { "epoch": 0.1606624551818337, "grad_norm": 0.4107338488101959, "learning_rate": 8e-05, "loss": 1.6453, "step": 941 }, { "epoch": 0.16083319105344032, "grad_norm": 0.4892653822898865, "learning_rate": 8e-05, "loss": 1.7343, "step": 942 }, { "epoch": 0.16100392692504695, "grad_norm": 0.42189350724220276, "learning_rate": 8e-05, "loss": 1.6399, "step": 943 }, { "epoch": 0.16117466279665357, "grad_norm": 0.5064341425895691, "learning_rate": 8e-05, "loss": 1.8055, "step": 944 }, { "epoch": 0.1613453986682602, "grad_norm": 0.4870012104511261, "learning_rate": 8e-05, "loss": 1.8931, "step": 945 }, { "epoch": 0.16151613453986682, "grad_norm": 0.4463498890399933, "learning_rate": 8e-05, "loss": 1.6329, "step": 946 }, { "epoch": 0.16168687041147345, "grad_norm": 0.46688738465309143, "learning_rate": 8e-05, "loss": 1.6967, "step": 947 }, { "epoch": 0.16185760628308007, "grad_norm": 0.45009276270866394, "learning_rate": 8e-05, "loss": 1.7885, "step": 948 }, { "epoch": 0.1620283421546867, "grad_norm": 0.4627588987350464, "learning_rate": 8e-05, "loss": 1.7504, "step": 949 }, { "epoch": 0.16219907802629333, "grad_norm": 0.43667006492614746, "learning_rate": 8e-05, "loss": 1.5528, "step": 950 }, { "epoch": 0.16236981389789995, "grad_norm": 0.45931845903396606, "learning_rate": 8e-05, "loss": 1.848, "step": 951 }, { "epoch": 0.16254054976950658, "grad_norm": 0.45936113595962524, "learning_rate": 8e-05, "loss": 1.6739, "step": 952 }, { "epoch": 0.1627112856411132, "grad_norm": 0.45506417751312256, "learning_rate": 8e-05, "loss": 1.7654, "step": 953 }, { "epoch": 0.16288202151271983, "grad_norm": 0.4198547899723053, "learning_rate": 8e-05, "loss": 1.6317, "step": 954 }, { "epoch": 0.16305275738432645, "grad_norm": 0.48663806915283203, "learning_rate": 8e-05, "loss": 1.8217, "step": 955 }, { "epoch": 0.16322349325593308, "grad_norm": 0.4418083429336548, "learning_rate": 8e-05, "loss": 1.6834, "step": 956 }, { "epoch": 0.1633942291275397, "grad_norm": 0.4463340640068054, "learning_rate": 8e-05, "loss": 1.757, "step": 957 }, { "epoch": 0.16356496499914633, "grad_norm": 0.5065399408340454, "learning_rate": 8e-05, "loss": 1.74, "step": 958 }, { "epoch": 0.16373570087075295, "grad_norm": 0.47413942217826843, "learning_rate": 8e-05, "loss": 1.6509, "step": 959 }, { "epoch": 0.16390643674235958, "grad_norm": 0.43942737579345703, "learning_rate": 8e-05, "loss": 1.6182, "step": 960 }, { "epoch": 0.1640771726139662, "grad_norm": 0.5295132398605347, "learning_rate": 8e-05, "loss": 1.7914, "step": 961 }, { "epoch": 0.16424790848557283, "grad_norm": 0.47411367297172546, "learning_rate": 8e-05, "loss": 1.761, "step": 962 }, { "epoch": 0.16441864435717946, "grad_norm": 0.44282323122024536, "learning_rate": 8e-05, "loss": 1.7497, "step": 963 }, { "epoch": 0.16458938022878608, "grad_norm": 0.4392602741718292, "learning_rate": 8e-05, "loss": 1.6728, "step": 964 }, { "epoch": 0.1647601161003927, "grad_norm": 0.5028290152549744, "learning_rate": 8e-05, "loss": 1.5662, "step": 965 }, { "epoch": 0.1649308519719993, "grad_norm": 0.46347877383232117, "learning_rate": 8e-05, "loss": 1.7446, "step": 966 }, { "epoch": 0.16510158784360593, "grad_norm": 0.4617728590965271, "learning_rate": 8e-05, "loss": 1.6931, "step": 967 }, { "epoch": 0.16527232371521255, "grad_norm": 0.4795340597629547, "learning_rate": 8e-05, "loss": 1.6989, "step": 968 }, { "epoch": 0.16544305958681918, "grad_norm": 0.4332267642021179, "learning_rate": 8e-05, "loss": 1.6643, "step": 969 }, { "epoch": 0.1656137954584258, "grad_norm": 0.4573584198951721, "learning_rate": 8e-05, "loss": 1.7706, "step": 970 }, { "epoch": 0.16578453133003243, "grad_norm": 0.4728851616382599, "learning_rate": 8e-05, "loss": 1.8028, "step": 971 }, { "epoch": 0.16595526720163906, "grad_norm": 0.4544992744922638, "learning_rate": 8e-05, "loss": 1.6123, "step": 972 }, { "epoch": 0.16612600307324568, "grad_norm": 0.43813061714172363, "learning_rate": 8e-05, "loss": 1.6912, "step": 973 }, { "epoch": 0.1662967389448523, "grad_norm": 0.45849549770355225, "learning_rate": 8e-05, "loss": 1.676, "step": 974 }, { "epoch": 0.16646747481645893, "grad_norm": 0.44400838017463684, "learning_rate": 8e-05, "loss": 1.7751, "step": 975 }, { "epoch": 0.16663821068806556, "grad_norm": 0.46767133474349976, "learning_rate": 8e-05, "loss": 1.75, "step": 976 }, { "epoch": 0.16680894655967218, "grad_norm": 0.4777848422527313, "learning_rate": 8e-05, "loss": 1.7411, "step": 977 }, { "epoch": 0.1669796824312788, "grad_norm": 0.5072817802429199, "learning_rate": 8e-05, "loss": 1.846, "step": 978 }, { "epoch": 0.16715041830288543, "grad_norm": 0.4392523765563965, "learning_rate": 8e-05, "loss": 1.6877, "step": 979 }, { "epoch": 0.16732115417449206, "grad_norm": 0.5045742392539978, "learning_rate": 8e-05, "loss": 1.7645, "step": 980 }, { "epoch": 0.16749189004609868, "grad_norm": 0.4328124523162842, "learning_rate": 8e-05, "loss": 1.6402, "step": 981 }, { "epoch": 0.1676626259177053, "grad_norm": 0.45213451981544495, "learning_rate": 8e-05, "loss": 1.6861, "step": 982 }, { "epoch": 0.16783336178931194, "grad_norm": 0.46014317870140076, "learning_rate": 8e-05, "loss": 1.7852, "step": 983 }, { "epoch": 0.16800409766091856, "grad_norm": 0.4393269419670105, "learning_rate": 8e-05, "loss": 1.767, "step": 984 }, { "epoch": 0.1681748335325252, "grad_norm": 0.4853580892086029, "learning_rate": 8e-05, "loss": 1.887, "step": 985 }, { "epoch": 0.1683455694041318, "grad_norm": 0.42527902126312256, "learning_rate": 8e-05, "loss": 1.5816, "step": 986 }, { "epoch": 0.16851630527573844, "grad_norm": 0.49486809968948364, "learning_rate": 8e-05, "loss": 1.8343, "step": 987 }, { "epoch": 0.16868704114734506, "grad_norm": 0.43438562750816345, "learning_rate": 8e-05, "loss": 1.6463, "step": 988 }, { "epoch": 0.1688577770189517, "grad_norm": 0.45922425389289856, "learning_rate": 8e-05, "loss": 1.733, "step": 989 }, { "epoch": 0.1690285128905583, "grad_norm": 0.4510825276374817, "learning_rate": 8e-05, "loss": 1.8215, "step": 990 }, { "epoch": 0.16919924876216494, "grad_norm": 0.46495023369789124, "learning_rate": 8e-05, "loss": 1.8454, "step": 991 }, { "epoch": 0.16936998463377156, "grad_norm": 0.43503230810165405, "learning_rate": 8e-05, "loss": 1.6851, "step": 992 }, { "epoch": 0.1695407205053782, "grad_norm": 0.5133619904518127, "learning_rate": 8e-05, "loss": 1.9136, "step": 993 }, { "epoch": 0.16971145637698482, "grad_norm": 0.503833532333374, "learning_rate": 8e-05, "loss": 2.0377, "step": 994 }, { "epoch": 0.16988219224859144, "grad_norm": 0.47862696647644043, "learning_rate": 8e-05, "loss": 1.7523, "step": 995 }, { "epoch": 0.17005292812019807, "grad_norm": 0.42127662897109985, "learning_rate": 8e-05, "loss": 1.591, "step": 996 }, { "epoch": 0.1702236639918047, "grad_norm": 0.5066868662834167, "learning_rate": 8e-05, "loss": 1.9878, "step": 997 }, { "epoch": 0.1703943998634113, "grad_norm": 0.44399401545524597, "learning_rate": 8e-05, "loss": 1.7619, "step": 998 }, { "epoch": 0.17056513573501791, "grad_norm": 0.43579667806625366, "learning_rate": 8e-05, "loss": 1.563, "step": 999 }, { "epoch": 0.17073587160662454, "grad_norm": 0.532248854637146, "learning_rate": 8e-05, "loss": 1.816, "step": 1000 }, { "epoch": 0.17090660747823117, "grad_norm": 0.5078158974647522, "learning_rate": 8e-05, "loss": 1.9058, "step": 1001 }, { "epoch": 0.1710773433498378, "grad_norm": 0.4724283218383789, "learning_rate": 8e-05, "loss": 1.8018, "step": 1002 }, { "epoch": 0.17124807922144442, "grad_norm": 0.45574259757995605, "learning_rate": 8e-05, "loss": 1.7154, "step": 1003 }, { "epoch": 0.17141881509305104, "grad_norm": 0.4803370237350464, "learning_rate": 8e-05, "loss": 1.7321, "step": 1004 }, { "epoch": 0.17158955096465767, "grad_norm": 0.45709317922592163, "learning_rate": 8e-05, "loss": 1.8215, "step": 1005 }, { "epoch": 0.1717602868362643, "grad_norm": 0.4232978820800781, "learning_rate": 8e-05, "loss": 1.6117, "step": 1006 }, { "epoch": 0.17193102270787092, "grad_norm": 0.4643423557281494, "learning_rate": 8e-05, "loss": 1.9484, "step": 1007 }, { "epoch": 0.17210175857947754, "grad_norm": 0.47203510999679565, "learning_rate": 8e-05, "loss": 1.7588, "step": 1008 }, { "epoch": 0.17227249445108417, "grad_norm": 0.4465533494949341, "learning_rate": 8e-05, "loss": 1.6715, "step": 1009 }, { "epoch": 0.1724432303226908, "grad_norm": 0.4807758331298828, "learning_rate": 8e-05, "loss": 1.866, "step": 1010 }, { "epoch": 0.17261396619429742, "grad_norm": 0.48214465379714966, "learning_rate": 8e-05, "loss": 1.8079, "step": 1011 }, { "epoch": 0.17278470206590404, "grad_norm": 0.47284552454948425, "learning_rate": 8e-05, "loss": 1.8024, "step": 1012 }, { "epoch": 0.17295543793751067, "grad_norm": 0.46473824977874756, "learning_rate": 8e-05, "loss": 1.7925, "step": 1013 }, { "epoch": 0.1731261738091173, "grad_norm": 0.4360958933830261, "learning_rate": 8e-05, "loss": 1.5723, "step": 1014 }, { "epoch": 0.17329690968072392, "grad_norm": 0.41851183772087097, "learning_rate": 8e-05, "loss": 1.5148, "step": 1015 }, { "epoch": 0.17346764555233055, "grad_norm": 0.4503248631954193, "learning_rate": 8e-05, "loss": 1.7628, "step": 1016 }, { "epoch": 0.17363838142393717, "grad_norm": 0.4699077606201172, "learning_rate": 8e-05, "loss": 1.7543, "step": 1017 }, { "epoch": 0.1738091172955438, "grad_norm": 0.45411884784698486, "learning_rate": 8e-05, "loss": 1.6664, "step": 1018 }, { "epoch": 0.17397985316715042, "grad_norm": 0.5098505616188049, "learning_rate": 8e-05, "loss": 1.7895, "step": 1019 }, { "epoch": 0.17415058903875705, "grad_norm": 0.5387850403785706, "learning_rate": 8e-05, "loss": 1.631, "step": 1020 }, { "epoch": 0.17432132491036367, "grad_norm": 0.5046359300613403, "learning_rate": 8e-05, "loss": 1.6103, "step": 1021 }, { "epoch": 0.1744920607819703, "grad_norm": 0.4415619373321533, "learning_rate": 8e-05, "loss": 1.5586, "step": 1022 }, { "epoch": 0.17466279665357692, "grad_norm": 0.44800180196762085, "learning_rate": 8e-05, "loss": 1.7131, "step": 1023 }, { "epoch": 0.17483353252518355, "grad_norm": 0.43496978282928467, "learning_rate": 8e-05, "loss": 1.841, "step": 1024 }, { "epoch": 0.17500426839679017, "grad_norm": 0.4938103258609772, "learning_rate": 8e-05, "loss": 1.8381, "step": 1025 }, { "epoch": 0.1751750042683968, "grad_norm": 0.4292472004890442, "learning_rate": 8e-05, "loss": 1.7481, "step": 1026 }, { "epoch": 0.17534574014000343, "grad_norm": 0.45373764634132385, "learning_rate": 8e-05, "loss": 1.7192, "step": 1027 }, { "epoch": 0.17551647601161005, "grad_norm": 0.4945794343948364, "learning_rate": 8e-05, "loss": 1.6574, "step": 1028 }, { "epoch": 0.17568721188321668, "grad_norm": 0.49623531103134155, "learning_rate": 8e-05, "loss": 1.9921, "step": 1029 }, { "epoch": 0.1758579477548233, "grad_norm": 0.49382439255714417, "learning_rate": 8e-05, "loss": 1.9286, "step": 1030 }, { "epoch": 0.1760286836264299, "grad_norm": 0.46702128648757935, "learning_rate": 8e-05, "loss": 1.6556, "step": 1031 }, { "epoch": 0.17619941949803652, "grad_norm": 0.45427173376083374, "learning_rate": 8e-05, "loss": 1.7418, "step": 1032 }, { "epoch": 0.17637015536964315, "grad_norm": 0.43947988748550415, "learning_rate": 8e-05, "loss": 1.7848, "step": 1033 }, { "epoch": 0.17654089124124978, "grad_norm": 0.4577616751194, "learning_rate": 8e-05, "loss": 1.6368, "step": 1034 }, { "epoch": 0.1767116271128564, "grad_norm": 0.4653101861476898, "learning_rate": 8e-05, "loss": 1.6575, "step": 1035 }, { "epoch": 0.17688236298446303, "grad_norm": 0.4276367723941803, "learning_rate": 8e-05, "loss": 1.6769, "step": 1036 }, { "epoch": 0.17705309885606965, "grad_norm": 0.4359693229198456, "learning_rate": 8e-05, "loss": 1.6637, "step": 1037 }, { "epoch": 0.17722383472767628, "grad_norm": 0.4848443865776062, "learning_rate": 8e-05, "loss": 1.842, "step": 1038 }, { "epoch": 0.1773945705992829, "grad_norm": 0.43890661001205444, "learning_rate": 8e-05, "loss": 1.6444, "step": 1039 }, { "epoch": 0.17756530647088953, "grad_norm": 0.44414210319519043, "learning_rate": 8e-05, "loss": 1.6591, "step": 1040 }, { "epoch": 0.17773604234249615, "grad_norm": 0.4731042683124542, "learning_rate": 8e-05, "loss": 1.7406, "step": 1041 }, { "epoch": 0.17790677821410278, "grad_norm": 0.4594701826572418, "learning_rate": 8e-05, "loss": 1.6866, "step": 1042 }, { "epoch": 0.1780775140857094, "grad_norm": 0.47460561990737915, "learning_rate": 8e-05, "loss": 1.7571, "step": 1043 }, { "epoch": 0.17824824995731603, "grad_norm": 0.45115897059440613, "learning_rate": 8e-05, "loss": 1.9127, "step": 1044 }, { "epoch": 0.17841898582892265, "grad_norm": 0.48657599091529846, "learning_rate": 8e-05, "loss": 1.9315, "step": 1045 }, { "epoch": 0.17858972170052928, "grad_norm": 0.47435951232910156, "learning_rate": 8e-05, "loss": 1.6578, "step": 1046 }, { "epoch": 0.1787604575721359, "grad_norm": 0.4496399462223053, "learning_rate": 8e-05, "loss": 1.759, "step": 1047 }, { "epoch": 0.17893119344374253, "grad_norm": 0.4427736699581146, "learning_rate": 8e-05, "loss": 1.6359, "step": 1048 }, { "epoch": 0.17910192931534916, "grad_norm": 0.4507405459880829, "learning_rate": 8e-05, "loss": 1.751, "step": 1049 }, { "epoch": 0.17927266518695578, "grad_norm": 0.48055294156074524, "learning_rate": 8e-05, "loss": 1.812, "step": 1050 }, { "epoch": 0.1794434010585624, "grad_norm": 0.4263698160648346, "learning_rate": 8e-05, "loss": 1.5626, "step": 1051 }, { "epoch": 0.17961413693016903, "grad_norm": 0.4836342930793762, "learning_rate": 8e-05, "loss": 1.9223, "step": 1052 }, { "epoch": 0.17978487280177566, "grad_norm": 0.4582754969596863, "learning_rate": 8e-05, "loss": 1.7061, "step": 1053 }, { "epoch": 0.17995560867338228, "grad_norm": 0.449609637260437, "learning_rate": 8e-05, "loss": 1.7928, "step": 1054 }, { "epoch": 0.1801263445449889, "grad_norm": 0.4469185471534729, "learning_rate": 8e-05, "loss": 1.6439, "step": 1055 }, { "epoch": 0.18029708041659553, "grad_norm": 0.4544421434402466, "learning_rate": 8e-05, "loss": 1.6669, "step": 1056 }, { "epoch": 0.18046781628820216, "grad_norm": 0.5294197201728821, "learning_rate": 8e-05, "loss": 1.7091, "step": 1057 }, { "epoch": 0.18063855215980879, "grad_norm": 0.46540775895118713, "learning_rate": 8e-05, "loss": 1.6917, "step": 1058 }, { "epoch": 0.1808092880314154, "grad_norm": 0.4514217674732208, "learning_rate": 8e-05, "loss": 1.6494, "step": 1059 }, { "epoch": 0.18098002390302204, "grad_norm": 0.4687007665634155, "learning_rate": 8e-05, "loss": 1.7742, "step": 1060 }, { "epoch": 0.18115075977462866, "grad_norm": 0.4393805265426636, "learning_rate": 8e-05, "loss": 1.648, "step": 1061 }, { "epoch": 0.1813214956462353, "grad_norm": 0.43615397810935974, "learning_rate": 8e-05, "loss": 1.7457, "step": 1062 }, { "epoch": 0.1814922315178419, "grad_norm": 0.4556162655353546, "learning_rate": 8e-05, "loss": 1.7551, "step": 1063 }, { "epoch": 0.1816629673894485, "grad_norm": 0.45511311292648315, "learning_rate": 8e-05, "loss": 1.6679, "step": 1064 }, { "epoch": 0.18183370326105514, "grad_norm": 0.45560920238494873, "learning_rate": 8e-05, "loss": 1.7061, "step": 1065 }, { "epoch": 0.18200443913266176, "grad_norm": 0.5237537622451782, "learning_rate": 8e-05, "loss": 1.4111, "step": 1066 }, { "epoch": 0.18217517500426839, "grad_norm": 0.4467342495918274, "learning_rate": 8e-05, "loss": 1.6774, "step": 1067 }, { "epoch": 0.182345910875875, "grad_norm": 0.49515724182128906, "learning_rate": 8e-05, "loss": 1.8955, "step": 1068 }, { "epoch": 0.18251664674748164, "grad_norm": 0.4710926413536072, "learning_rate": 8e-05, "loss": 1.5882, "step": 1069 }, { "epoch": 0.18268738261908826, "grad_norm": 0.4479275047779083, "learning_rate": 8e-05, "loss": 1.6575, "step": 1070 }, { "epoch": 0.1828581184906949, "grad_norm": 0.46218621730804443, "learning_rate": 8e-05, "loss": 1.7534, "step": 1071 }, { "epoch": 0.1830288543623015, "grad_norm": 0.44543373584747314, "learning_rate": 8e-05, "loss": 1.7158, "step": 1072 }, { "epoch": 0.18319959023390814, "grad_norm": 0.4432619512081146, "learning_rate": 8e-05, "loss": 1.7639, "step": 1073 }, { "epoch": 0.18337032610551476, "grad_norm": 0.43835633993148804, "learning_rate": 8e-05, "loss": 1.6025, "step": 1074 }, { "epoch": 0.1835410619771214, "grad_norm": 0.4706260561943054, "learning_rate": 8e-05, "loss": 1.8843, "step": 1075 }, { "epoch": 0.18371179784872801, "grad_norm": 0.4652147889137268, "learning_rate": 8e-05, "loss": 2.0128, "step": 1076 }, { "epoch": 0.18388253372033464, "grad_norm": 0.44117578864097595, "learning_rate": 8e-05, "loss": 1.7227, "step": 1077 }, { "epoch": 0.18405326959194127, "grad_norm": 0.47990626096725464, "learning_rate": 8e-05, "loss": 1.8329, "step": 1078 }, { "epoch": 0.1842240054635479, "grad_norm": 0.443166047334671, "learning_rate": 8e-05, "loss": 1.5167, "step": 1079 }, { "epoch": 0.18439474133515452, "grad_norm": 0.44209548830986023, "learning_rate": 8e-05, "loss": 1.7815, "step": 1080 }, { "epoch": 0.18456547720676114, "grad_norm": 0.474128395318985, "learning_rate": 8e-05, "loss": 1.641, "step": 1081 }, { "epoch": 0.18473621307836777, "grad_norm": 0.468071311712265, "learning_rate": 8e-05, "loss": 1.6156, "step": 1082 }, { "epoch": 0.1849069489499744, "grad_norm": 0.4560562074184418, "learning_rate": 8e-05, "loss": 1.8714, "step": 1083 }, { "epoch": 0.18507768482158102, "grad_norm": 0.4986371397972107, "learning_rate": 8e-05, "loss": 1.6923, "step": 1084 }, { "epoch": 0.18524842069318764, "grad_norm": 0.4684634506702423, "learning_rate": 8e-05, "loss": 1.7554, "step": 1085 }, { "epoch": 0.18541915656479427, "grad_norm": 0.46162205934524536, "learning_rate": 8e-05, "loss": 1.6698, "step": 1086 }, { "epoch": 0.1855898924364009, "grad_norm": 0.45396214723587036, "learning_rate": 8e-05, "loss": 1.6692, "step": 1087 }, { "epoch": 0.18576062830800752, "grad_norm": 0.4804074466228485, "learning_rate": 8e-05, "loss": 1.8635, "step": 1088 }, { "epoch": 0.18593136417961414, "grad_norm": 0.46000856161117554, "learning_rate": 8e-05, "loss": 1.7093, "step": 1089 }, { "epoch": 0.18610210005122077, "grad_norm": 0.44920581579208374, "learning_rate": 8e-05, "loss": 1.7764, "step": 1090 }, { "epoch": 0.1862728359228274, "grad_norm": 0.42190060019493103, "learning_rate": 8e-05, "loss": 1.6876, "step": 1091 }, { "epoch": 0.18644357179443402, "grad_norm": 0.458646297454834, "learning_rate": 8e-05, "loss": 1.6443, "step": 1092 }, { "epoch": 0.18661430766604065, "grad_norm": 0.43586185574531555, "learning_rate": 8e-05, "loss": 1.6814, "step": 1093 }, { "epoch": 0.18678504353764727, "grad_norm": 0.4752790927886963, "learning_rate": 8e-05, "loss": 1.777, "step": 1094 }, { "epoch": 0.1869557794092539, "grad_norm": 0.45709434151649475, "learning_rate": 8e-05, "loss": 1.6721, "step": 1095 }, { "epoch": 0.18712651528086052, "grad_norm": 0.44119489192962646, "learning_rate": 8e-05, "loss": 1.6624, "step": 1096 }, { "epoch": 0.18729725115246712, "grad_norm": 0.46544814109802246, "learning_rate": 8e-05, "loss": 1.7547, "step": 1097 }, { "epoch": 0.18746798702407375, "grad_norm": 0.44342443346977234, "learning_rate": 8e-05, "loss": 1.7242, "step": 1098 }, { "epoch": 0.18763872289568037, "grad_norm": 0.46511033177375793, "learning_rate": 8e-05, "loss": 1.7359, "step": 1099 }, { "epoch": 0.187809458767287, "grad_norm": 0.44651880860328674, "learning_rate": 8e-05, "loss": 1.7946, "step": 1100 }, { "epoch": 0.18798019463889362, "grad_norm": 0.4556266963481903, "learning_rate": 8e-05, "loss": 1.6696, "step": 1101 }, { "epoch": 0.18815093051050025, "grad_norm": 0.4809948205947876, "learning_rate": 8e-05, "loss": 1.8138, "step": 1102 }, { "epoch": 0.18832166638210687, "grad_norm": 0.46809881925582886, "learning_rate": 8e-05, "loss": 1.7625, "step": 1103 }, { "epoch": 0.1884924022537135, "grad_norm": 0.46366849541664124, "learning_rate": 8e-05, "loss": 1.7373, "step": 1104 }, { "epoch": 0.18866313812532012, "grad_norm": 0.4578344523906708, "learning_rate": 8e-05, "loss": 1.7685, "step": 1105 }, { "epoch": 0.18883387399692675, "grad_norm": 0.46258580684661865, "learning_rate": 8e-05, "loss": 1.7904, "step": 1106 }, { "epoch": 0.18900460986853337, "grad_norm": 0.4645377993583679, "learning_rate": 8e-05, "loss": 1.6681, "step": 1107 }, { "epoch": 0.18917534574014, "grad_norm": 0.4556599259376526, "learning_rate": 8e-05, "loss": 1.6522, "step": 1108 }, { "epoch": 0.18934608161174662, "grad_norm": 0.4515933394432068, "learning_rate": 8e-05, "loss": 1.6143, "step": 1109 }, { "epoch": 0.18951681748335325, "grad_norm": 0.46394261717796326, "learning_rate": 8e-05, "loss": 1.7769, "step": 1110 }, { "epoch": 0.18968755335495988, "grad_norm": 0.4626063108444214, "learning_rate": 8e-05, "loss": 1.7716, "step": 1111 }, { "epoch": 0.1898582892265665, "grad_norm": 0.46789345145225525, "learning_rate": 8e-05, "loss": 1.7483, "step": 1112 }, { "epoch": 0.19002902509817313, "grad_norm": 0.45301464200019836, "learning_rate": 8e-05, "loss": 1.6619, "step": 1113 }, { "epoch": 0.19019976096977975, "grad_norm": 0.500743567943573, "learning_rate": 8e-05, "loss": 1.887, "step": 1114 }, { "epoch": 0.19037049684138638, "grad_norm": 0.4926111698150635, "learning_rate": 8e-05, "loss": 1.7849, "step": 1115 }, { "epoch": 0.190541232712993, "grad_norm": 0.47279468178749084, "learning_rate": 8e-05, "loss": 1.8008, "step": 1116 }, { "epoch": 0.19071196858459963, "grad_norm": 0.48324665427207947, "learning_rate": 8e-05, "loss": 1.8662, "step": 1117 }, { "epoch": 0.19088270445620625, "grad_norm": 0.44984227418899536, "learning_rate": 8e-05, "loss": 1.7715, "step": 1118 }, { "epoch": 0.19105344032781288, "grad_norm": 0.49667850136756897, "learning_rate": 8e-05, "loss": 1.6724, "step": 1119 }, { "epoch": 0.1912241761994195, "grad_norm": 0.4426158368587494, "learning_rate": 8e-05, "loss": 1.8599, "step": 1120 }, { "epoch": 0.19139491207102613, "grad_norm": 0.4647299647331238, "learning_rate": 8e-05, "loss": 1.6816, "step": 1121 }, { "epoch": 0.19156564794263276, "grad_norm": 0.43249520659446716, "learning_rate": 8e-05, "loss": 1.5649, "step": 1122 }, { "epoch": 0.19173638381423938, "grad_norm": 0.48708033561706543, "learning_rate": 8e-05, "loss": 1.7914, "step": 1123 }, { "epoch": 0.191907119685846, "grad_norm": 0.4632466435432434, "learning_rate": 8e-05, "loss": 1.7308, "step": 1124 }, { "epoch": 0.19207785555745263, "grad_norm": 0.4326145052909851, "learning_rate": 8e-05, "loss": 1.5315, "step": 1125 }, { "epoch": 0.19224859142905926, "grad_norm": 0.4425449073314667, "learning_rate": 8e-05, "loss": 1.7716, "step": 1126 }, { "epoch": 0.19241932730066588, "grad_norm": 0.49101895093917847, "learning_rate": 8e-05, "loss": 1.8017, "step": 1127 }, { "epoch": 0.1925900631722725, "grad_norm": 0.4677045941352844, "learning_rate": 8e-05, "loss": 1.7527, "step": 1128 }, { "epoch": 0.1927607990438791, "grad_norm": 0.4237181842327118, "learning_rate": 8e-05, "loss": 1.5627, "step": 1129 }, { "epoch": 0.19293153491548573, "grad_norm": 0.47838330268859863, "learning_rate": 8e-05, "loss": 1.8195, "step": 1130 }, { "epoch": 0.19310227078709236, "grad_norm": 0.4392055869102478, "learning_rate": 8e-05, "loss": 1.7548, "step": 1131 }, { "epoch": 0.19327300665869898, "grad_norm": 0.451045423746109, "learning_rate": 8e-05, "loss": 1.7182, "step": 1132 }, { "epoch": 0.1934437425303056, "grad_norm": 0.4661296010017395, "learning_rate": 8e-05, "loss": 1.6377, "step": 1133 }, { "epoch": 0.19361447840191223, "grad_norm": 0.4483095109462738, "learning_rate": 8e-05, "loss": 1.6724, "step": 1134 }, { "epoch": 0.19378521427351886, "grad_norm": 0.4587257206439972, "learning_rate": 8e-05, "loss": 1.7619, "step": 1135 }, { "epoch": 0.19395595014512548, "grad_norm": 0.4477125406265259, "learning_rate": 8e-05, "loss": 1.6985, "step": 1136 }, { "epoch": 0.1941266860167321, "grad_norm": 0.44415321946144104, "learning_rate": 8e-05, "loss": 1.7296, "step": 1137 }, { "epoch": 0.19429742188833873, "grad_norm": 0.43663549423217773, "learning_rate": 8e-05, "loss": 1.6785, "step": 1138 }, { "epoch": 0.19446815775994536, "grad_norm": 0.4633432924747467, "learning_rate": 8e-05, "loss": 1.8197, "step": 1139 }, { "epoch": 0.19463889363155198, "grad_norm": 0.41987344622612, "learning_rate": 8e-05, "loss": 1.6739, "step": 1140 }, { "epoch": 0.1948096295031586, "grad_norm": 0.4260905385017395, "learning_rate": 8e-05, "loss": 1.7659, "step": 1141 }, { "epoch": 0.19498036537476524, "grad_norm": 0.42726409435272217, "learning_rate": 8e-05, "loss": 1.673, "step": 1142 }, { "epoch": 0.19515110124637186, "grad_norm": 0.4656676650047302, "learning_rate": 8e-05, "loss": 1.7736, "step": 1143 }, { "epoch": 0.1953218371179785, "grad_norm": 0.4755629599094391, "learning_rate": 8e-05, "loss": 1.9236, "step": 1144 }, { "epoch": 0.1954925729895851, "grad_norm": 0.5026397705078125, "learning_rate": 8e-05, "loss": 1.5839, "step": 1145 }, { "epoch": 0.19566330886119174, "grad_norm": 0.4525088965892792, "learning_rate": 8e-05, "loss": 1.7126, "step": 1146 }, { "epoch": 0.19583404473279836, "grad_norm": 0.46368730068206787, "learning_rate": 8e-05, "loss": 1.8331, "step": 1147 }, { "epoch": 0.196004780604405, "grad_norm": 0.484377384185791, "learning_rate": 8e-05, "loss": 1.9276, "step": 1148 }, { "epoch": 0.1961755164760116, "grad_norm": 0.4357842803001404, "learning_rate": 8e-05, "loss": 1.72, "step": 1149 }, { "epoch": 0.19634625234761824, "grad_norm": 0.46637284755706787, "learning_rate": 8e-05, "loss": 1.7765, "step": 1150 }, { "epoch": 0.19651698821922486, "grad_norm": 0.4706471264362335, "learning_rate": 8e-05, "loss": 1.7927, "step": 1151 }, { "epoch": 0.1966877240908315, "grad_norm": 0.43806809186935425, "learning_rate": 8e-05, "loss": 1.7625, "step": 1152 }, { "epoch": 0.19685845996243811, "grad_norm": 0.44194719195365906, "learning_rate": 8e-05, "loss": 1.5416, "step": 1153 }, { "epoch": 0.19702919583404474, "grad_norm": 0.4378540813922882, "learning_rate": 8e-05, "loss": 1.5989, "step": 1154 }, { "epoch": 0.19719993170565137, "grad_norm": 0.5052894949913025, "learning_rate": 8e-05, "loss": 1.7244, "step": 1155 }, { "epoch": 0.197370667577258, "grad_norm": 0.4605990946292877, "learning_rate": 8e-05, "loss": 1.7632, "step": 1156 }, { "epoch": 0.19754140344886462, "grad_norm": 0.4951755106449127, "learning_rate": 8e-05, "loss": 1.7215, "step": 1157 }, { "epoch": 0.19771213932047124, "grad_norm": 0.43976566195487976, "learning_rate": 8e-05, "loss": 1.6087, "step": 1158 }, { "epoch": 0.19788287519207787, "grad_norm": 0.4946649670600891, "learning_rate": 8e-05, "loss": 1.662, "step": 1159 }, { "epoch": 0.1980536110636845, "grad_norm": 0.46480393409729004, "learning_rate": 8e-05, "loss": 1.812, "step": 1160 }, { "epoch": 0.19822434693529112, "grad_norm": 0.44436681270599365, "learning_rate": 8e-05, "loss": 1.7312, "step": 1161 }, { "epoch": 0.19839508280689772, "grad_norm": 0.4976743757724762, "learning_rate": 8e-05, "loss": 1.8011, "step": 1162 }, { "epoch": 0.19856581867850434, "grad_norm": 0.44412049651145935, "learning_rate": 8e-05, "loss": 1.7336, "step": 1163 }, { "epoch": 0.19873655455011097, "grad_norm": 0.4284884035587311, "learning_rate": 8e-05, "loss": 1.5503, "step": 1164 }, { "epoch": 0.1989072904217176, "grad_norm": 0.45491132140159607, "learning_rate": 8e-05, "loss": 1.7632, "step": 1165 }, { "epoch": 0.19907802629332422, "grad_norm": 0.4668032228946686, "learning_rate": 8e-05, "loss": 1.716, "step": 1166 }, { "epoch": 0.19924876216493084, "grad_norm": 0.49975791573524475, "learning_rate": 8e-05, "loss": 1.9838, "step": 1167 }, { "epoch": 0.19941949803653747, "grad_norm": 0.46148058772087097, "learning_rate": 8e-05, "loss": 1.7429, "step": 1168 }, { "epoch": 0.1995902339081441, "grad_norm": 0.4436276853084564, "learning_rate": 8e-05, "loss": 1.6812, "step": 1169 }, { "epoch": 0.19976096977975072, "grad_norm": 0.4899176359176636, "learning_rate": 8e-05, "loss": 1.8024, "step": 1170 }, { "epoch": 0.19993170565135734, "grad_norm": 0.4359695017337799, "learning_rate": 8e-05, "loss": 1.6426, "step": 1171 }, { "epoch": 0.20010244152296397, "grad_norm": 0.47540879249572754, "learning_rate": 8e-05, "loss": 1.7418, "step": 1172 }, { "epoch": 0.2002731773945706, "grad_norm": 0.4546247720718384, "learning_rate": 8e-05, "loss": 1.5606, "step": 1173 }, { "epoch": 0.20044391326617722, "grad_norm": 0.4863145351409912, "learning_rate": 8e-05, "loss": 1.8528, "step": 1174 }, { "epoch": 0.20061464913778385, "grad_norm": 0.4326512813568115, "learning_rate": 8e-05, "loss": 1.6791, "step": 1175 }, { "epoch": 0.20078538500939047, "grad_norm": 0.45540767908096313, "learning_rate": 8e-05, "loss": 1.7571, "step": 1176 }, { "epoch": 0.2009561208809971, "grad_norm": 0.4506058096885681, "learning_rate": 8e-05, "loss": 1.5792, "step": 1177 }, { "epoch": 0.20112685675260372, "grad_norm": 0.48098328709602356, "learning_rate": 8e-05, "loss": 1.7369, "step": 1178 }, { "epoch": 0.20129759262421035, "grad_norm": 0.4551424980163574, "learning_rate": 8e-05, "loss": 1.801, "step": 1179 }, { "epoch": 0.20146832849581697, "grad_norm": 0.48190632462501526, "learning_rate": 8e-05, "loss": 1.8232, "step": 1180 }, { "epoch": 0.2016390643674236, "grad_norm": 0.4498426914215088, "learning_rate": 8e-05, "loss": 1.6193, "step": 1181 }, { "epoch": 0.20180980023903022, "grad_norm": 0.44888192415237427, "learning_rate": 8e-05, "loss": 1.6956, "step": 1182 }, { "epoch": 0.20198053611063685, "grad_norm": 0.4456915259361267, "learning_rate": 8e-05, "loss": 1.7437, "step": 1183 }, { "epoch": 0.20215127198224347, "grad_norm": 0.4647716283798218, "learning_rate": 8e-05, "loss": 1.8013, "step": 1184 }, { "epoch": 0.2023220078538501, "grad_norm": 0.4697854816913605, "learning_rate": 8e-05, "loss": 1.6931, "step": 1185 }, { "epoch": 0.20249274372545673, "grad_norm": 0.4533938467502594, "learning_rate": 8e-05, "loss": 1.6994, "step": 1186 }, { "epoch": 0.20266347959706335, "grad_norm": 0.43884700536727905, "learning_rate": 8e-05, "loss": 1.5955, "step": 1187 }, { "epoch": 0.20283421546866998, "grad_norm": 0.48193734884262085, "learning_rate": 8e-05, "loss": 1.7449, "step": 1188 }, { "epoch": 0.2030049513402766, "grad_norm": 0.449602872133255, "learning_rate": 8e-05, "loss": 1.6807, "step": 1189 }, { "epoch": 0.20317568721188323, "grad_norm": 0.4552322328090668, "learning_rate": 8e-05, "loss": 1.6488, "step": 1190 }, { "epoch": 0.20334642308348985, "grad_norm": 0.4644480049610138, "learning_rate": 8e-05, "loss": 1.6943, "step": 1191 }, { "epoch": 0.20351715895509648, "grad_norm": 0.5012860298156738, "learning_rate": 8e-05, "loss": 1.8125, "step": 1192 }, { "epoch": 0.2036878948267031, "grad_norm": 0.4629169702529907, "learning_rate": 8e-05, "loss": 1.7148, "step": 1193 }, { "epoch": 0.20385863069830973, "grad_norm": 0.4603479504585266, "learning_rate": 8e-05, "loss": 1.8412, "step": 1194 }, { "epoch": 0.20402936656991633, "grad_norm": 0.4752821922302246, "learning_rate": 8e-05, "loss": 1.79, "step": 1195 }, { "epoch": 0.20420010244152295, "grad_norm": 0.436958372592926, "learning_rate": 8e-05, "loss": 1.6667, "step": 1196 }, { "epoch": 0.20437083831312958, "grad_norm": 0.4716370701789856, "learning_rate": 8e-05, "loss": 1.6877, "step": 1197 }, { "epoch": 0.2045415741847362, "grad_norm": 0.45331135392189026, "learning_rate": 8e-05, "loss": 1.7676, "step": 1198 }, { "epoch": 0.20471231005634283, "grad_norm": 0.43221592903137207, "learning_rate": 8e-05, "loss": 1.6437, "step": 1199 }, { "epoch": 0.20488304592794945, "grad_norm": 0.4506204426288605, "learning_rate": 8e-05, "loss": 1.7557, "step": 1200 }, { "epoch": 0.20505378179955608, "grad_norm": 0.41508719325065613, "learning_rate": 8e-05, "loss": 1.467, "step": 1201 }, { "epoch": 0.2052245176711627, "grad_norm": 0.4890176057815552, "learning_rate": 8e-05, "loss": 1.8275, "step": 1202 }, { "epoch": 0.20539525354276933, "grad_norm": 0.4685975909233093, "learning_rate": 8e-05, "loss": 1.8365, "step": 1203 }, { "epoch": 0.20556598941437595, "grad_norm": 0.48265013098716736, "learning_rate": 8e-05, "loss": 1.6994, "step": 1204 }, { "epoch": 0.20573672528598258, "grad_norm": 0.49598610401153564, "learning_rate": 8e-05, "loss": 1.9404, "step": 1205 }, { "epoch": 0.2059074611575892, "grad_norm": 0.4298776090145111, "learning_rate": 8e-05, "loss": 1.5614, "step": 1206 }, { "epoch": 0.20607819702919583, "grad_norm": 0.4406728744506836, "learning_rate": 8e-05, "loss": 1.7634, "step": 1207 }, { "epoch": 0.20624893290080246, "grad_norm": 0.4401385188102722, "learning_rate": 8e-05, "loss": 1.7327, "step": 1208 }, { "epoch": 0.20641966877240908, "grad_norm": 0.47997555136680603, "learning_rate": 8e-05, "loss": 1.8148, "step": 1209 }, { "epoch": 0.2065904046440157, "grad_norm": 0.45727548003196716, "learning_rate": 8e-05, "loss": 1.7473, "step": 1210 }, { "epoch": 0.20676114051562233, "grad_norm": 0.4818153977394104, "learning_rate": 8e-05, "loss": 1.7842, "step": 1211 }, { "epoch": 0.20693187638722896, "grad_norm": 0.46312814950942993, "learning_rate": 8e-05, "loss": 1.8786, "step": 1212 }, { "epoch": 0.20710261225883558, "grad_norm": 0.4742242693901062, "learning_rate": 8e-05, "loss": 1.4937, "step": 1213 }, { "epoch": 0.2072733481304422, "grad_norm": 0.4626515805721283, "learning_rate": 8e-05, "loss": 1.8319, "step": 1214 }, { "epoch": 0.20744408400204883, "grad_norm": 0.44475483894348145, "learning_rate": 8e-05, "loss": 1.5801, "step": 1215 }, { "epoch": 0.20761481987365546, "grad_norm": 0.44571182131767273, "learning_rate": 8e-05, "loss": 1.7259, "step": 1216 }, { "epoch": 0.20778555574526208, "grad_norm": 0.4366648495197296, "learning_rate": 8e-05, "loss": 1.7242, "step": 1217 }, { "epoch": 0.2079562916168687, "grad_norm": 0.42941808700561523, "learning_rate": 8e-05, "loss": 1.5014, "step": 1218 }, { "epoch": 0.20812702748847534, "grad_norm": 0.45547324419021606, "learning_rate": 8e-05, "loss": 1.7641, "step": 1219 }, { "epoch": 0.20829776336008196, "grad_norm": 0.4442674219608307, "learning_rate": 8e-05, "loss": 1.6906, "step": 1220 }, { "epoch": 0.2084684992316886, "grad_norm": 0.47224631905555725, "learning_rate": 8e-05, "loss": 1.775, "step": 1221 }, { "epoch": 0.2086392351032952, "grad_norm": 0.518185555934906, "learning_rate": 8e-05, "loss": 1.974, "step": 1222 }, { "epoch": 0.20880997097490184, "grad_norm": 0.4541724920272827, "learning_rate": 8e-05, "loss": 1.7172, "step": 1223 }, { "epoch": 0.20898070684650846, "grad_norm": 0.4706520736217499, "learning_rate": 8e-05, "loss": 1.7848, "step": 1224 }, { "epoch": 0.2091514427181151, "grad_norm": 0.5513375401496887, "learning_rate": 8e-05, "loss": 2.0103, "step": 1225 }, { "epoch": 0.2093221785897217, "grad_norm": 0.4760344922542572, "learning_rate": 8e-05, "loss": 1.7246, "step": 1226 }, { "epoch": 0.20949291446132834, "grad_norm": 0.4746195673942566, "learning_rate": 8e-05, "loss": 1.7534, "step": 1227 }, { "epoch": 0.20966365033293494, "grad_norm": 0.4465510845184326, "learning_rate": 8e-05, "loss": 1.5673, "step": 1228 }, { "epoch": 0.20983438620454156, "grad_norm": 0.42981117963790894, "learning_rate": 8e-05, "loss": 1.5978, "step": 1229 }, { "epoch": 0.2100051220761482, "grad_norm": 0.4791514277458191, "learning_rate": 8e-05, "loss": 1.6407, "step": 1230 }, { "epoch": 0.2101758579477548, "grad_norm": 0.48004817962646484, "learning_rate": 8e-05, "loss": 1.8104, "step": 1231 }, { "epoch": 0.21034659381936144, "grad_norm": 0.5307628512382507, "learning_rate": 8e-05, "loss": 1.7486, "step": 1232 }, { "epoch": 0.21051732969096806, "grad_norm": 0.4629287123680115, "learning_rate": 8e-05, "loss": 1.6692, "step": 1233 }, { "epoch": 0.2106880655625747, "grad_norm": 0.49045684933662415, "learning_rate": 8e-05, "loss": 1.8164, "step": 1234 }, { "epoch": 0.21085880143418131, "grad_norm": 0.45618751645088196, "learning_rate": 8e-05, "loss": 1.6687, "step": 1235 }, { "epoch": 0.21102953730578794, "grad_norm": 0.47296079993247986, "learning_rate": 8e-05, "loss": 1.6783, "step": 1236 }, { "epoch": 0.21120027317739457, "grad_norm": 0.47785621881484985, "learning_rate": 8e-05, "loss": 1.6173, "step": 1237 }, { "epoch": 0.2113710090490012, "grad_norm": 0.4615747332572937, "learning_rate": 8e-05, "loss": 1.635, "step": 1238 }, { "epoch": 0.21154174492060782, "grad_norm": 0.4998646080493927, "learning_rate": 8e-05, "loss": 1.9339, "step": 1239 }, { "epoch": 0.21171248079221444, "grad_norm": 0.4688693881034851, "learning_rate": 8e-05, "loss": 1.8194, "step": 1240 }, { "epoch": 0.21188321666382107, "grad_norm": 0.46734383702278137, "learning_rate": 8e-05, "loss": 1.6811, "step": 1241 }, { "epoch": 0.2120539525354277, "grad_norm": 0.4667396545410156, "learning_rate": 8e-05, "loss": 1.6779, "step": 1242 }, { "epoch": 0.21222468840703432, "grad_norm": 0.4549136459827423, "learning_rate": 8e-05, "loss": 1.6986, "step": 1243 }, { "epoch": 0.21239542427864094, "grad_norm": 0.4750044345855713, "learning_rate": 8e-05, "loss": 1.658, "step": 1244 }, { "epoch": 0.21256616015024757, "grad_norm": 0.4956780672073364, "learning_rate": 8e-05, "loss": 1.8522, "step": 1245 }, { "epoch": 0.2127368960218542, "grad_norm": 0.5402913689613342, "learning_rate": 8e-05, "loss": 1.7496, "step": 1246 }, { "epoch": 0.21290763189346082, "grad_norm": 0.5109158158302307, "learning_rate": 8e-05, "loss": 1.7758, "step": 1247 }, { "epoch": 0.21307836776506744, "grad_norm": 0.45374202728271484, "learning_rate": 8e-05, "loss": 1.7364, "step": 1248 }, { "epoch": 0.21324910363667407, "grad_norm": 0.4553177058696747, "learning_rate": 8e-05, "loss": 1.8026, "step": 1249 }, { "epoch": 0.2134198395082807, "grad_norm": 0.46601730585098267, "learning_rate": 8e-05, "loss": 1.7463, "step": 1250 }, { "epoch": 0.21359057537988732, "grad_norm": 0.4790455400943756, "learning_rate": 8e-05, "loss": 1.8677, "step": 1251 }, { "epoch": 0.21376131125149395, "grad_norm": 0.4880172908306122, "learning_rate": 8e-05, "loss": 1.679, "step": 1252 }, { "epoch": 0.21393204712310057, "grad_norm": 0.48118099570274353, "learning_rate": 8e-05, "loss": 1.6726, "step": 1253 }, { "epoch": 0.2141027829947072, "grad_norm": 0.4828796088695526, "learning_rate": 8e-05, "loss": 1.7475, "step": 1254 }, { "epoch": 0.21427351886631382, "grad_norm": 0.481399804353714, "learning_rate": 8e-05, "loss": 1.8553, "step": 1255 }, { "epoch": 0.21444425473792045, "grad_norm": 0.4738909602165222, "learning_rate": 8e-05, "loss": 1.8321, "step": 1256 }, { "epoch": 0.21461499060952707, "grad_norm": 0.4651585817337036, "learning_rate": 8e-05, "loss": 1.9063, "step": 1257 }, { "epoch": 0.2147857264811337, "grad_norm": 0.45188671350479126, "learning_rate": 8e-05, "loss": 1.6402, "step": 1258 }, { "epoch": 0.21495646235274032, "grad_norm": 0.5382876992225647, "learning_rate": 8e-05, "loss": 1.7369, "step": 1259 }, { "epoch": 0.21512719822434692, "grad_norm": 0.49552440643310547, "learning_rate": 8e-05, "loss": 1.7886, "step": 1260 }, { "epoch": 0.21529793409595355, "grad_norm": 0.46183323860168457, "learning_rate": 8e-05, "loss": 1.7079, "step": 1261 }, { "epoch": 0.21546866996756017, "grad_norm": 0.44668012857437134, "learning_rate": 8e-05, "loss": 1.7169, "step": 1262 }, { "epoch": 0.2156394058391668, "grad_norm": 0.4882241189479828, "learning_rate": 8e-05, "loss": 1.6169, "step": 1263 }, { "epoch": 0.21581014171077342, "grad_norm": 0.46589526534080505, "learning_rate": 8e-05, "loss": 1.6205, "step": 1264 }, { "epoch": 0.21598087758238005, "grad_norm": 0.4217557907104492, "learning_rate": 8e-05, "loss": 1.6152, "step": 1265 }, { "epoch": 0.21615161345398667, "grad_norm": 0.4564601182937622, "learning_rate": 8e-05, "loss": 1.7261, "step": 1266 }, { "epoch": 0.2163223493255933, "grad_norm": 0.45971664786338806, "learning_rate": 8e-05, "loss": 1.7571, "step": 1267 }, { "epoch": 0.21649308519719992, "grad_norm": 0.45896562933921814, "learning_rate": 8e-05, "loss": 1.6708, "step": 1268 }, { "epoch": 0.21666382106880655, "grad_norm": 0.5285836458206177, "learning_rate": 8e-05, "loss": 1.9199, "step": 1269 }, { "epoch": 0.21683455694041318, "grad_norm": 0.4346429407596588, "learning_rate": 8e-05, "loss": 1.7117, "step": 1270 }, { "epoch": 0.2170052928120198, "grad_norm": 0.5012820363044739, "learning_rate": 8e-05, "loss": 1.6397, "step": 1271 }, { "epoch": 0.21717602868362643, "grad_norm": 0.45300665497779846, "learning_rate": 8e-05, "loss": 1.8264, "step": 1272 }, { "epoch": 0.21734676455523305, "grad_norm": 0.45214587450027466, "learning_rate": 8e-05, "loss": 1.8155, "step": 1273 }, { "epoch": 0.21751750042683968, "grad_norm": 0.43397557735443115, "learning_rate": 8e-05, "loss": 1.7514, "step": 1274 }, { "epoch": 0.2176882362984463, "grad_norm": 0.5043384432792664, "learning_rate": 8e-05, "loss": 1.8099, "step": 1275 }, { "epoch": 0.21785897217005293, "grad_norm": 0.43792521953582764, "learning_rate": 8e-05, "loss": 1.7109, "step": 1276 }, { "epoch": 0.21802970804165955, "grad_norm": 0.44246089458465576, "learning_rate": 8e-05, "loss": 1.7309, "step": 1277 }, { "epoch": 0.21820044391326618, "grad_norm": 0.43166178464889526, "learning_rate": 8e-05, "loss": 1.7093, "step": 1278 }, { "epoch": 0.2183711797848728, "grad_norm": 0.49238908290863037, "learning_rate": 8e-05, "loss": 1.8614, "step": 1279 }, { "epoch": 0.21854191565647943, "grad_norm": 0.42383745312690735, "learning_rate": 8e-05, "loss": 1.4426, "step": 1280 }, { "epoch": 0.21871265152808606, "grad_norm": 0.43670374155044556, "learning_rate": 8e-05, "loss": 1.5812, "step": 1281 }, { "epoch": 0.21888338739969268, "grad_norm": 0.5049770474433899, "learning_rate": 8e-05, "loss": 1.8393, "step": 1282 }, { "epoch": 0.2190541232712993, "grad_norm": 0.46073976159095764, "learning_rate": 8e-05, "loss": 1.874, "step": 1283 }, { "epoch": 0.21922485914290593, "grad_norm": 0.516298770904541, "learning_rate": 8e-05, "loss": 2.0576, "step": 1284 }, { "epoch": 0.21939559501451256, "grad_norm": 0.4511831998825073, "learning_rate": 8e-05, "loss": 1.7202, "step": 1285 }, { "epoch": 0.21956633088611918, "grad_norm": 0.44207966327667236, "learning_rate": 8e-05, "loss": 1.6679, "step": 1286 }, { "epoch": 0.2197370667577258, "grad_norm": 0.43935972452163696, "learning_rate": 8e-05, "loss": 1.6082, "step": 1287 }, { "epoch": 0.21990780262933243, "grad_norm": 0.47828415036201477, "learning_rate": 8e-05, "loss": 1.7331, "step": 1288 }, { "epoch": 0.22007853850093906, "grad_norm": 0.47706228494644165, "learning_rate": 8e-05, "loss": 1.7802, "step": 1289 }, { "epoch": 0.22024927437254568, "grad_norm": 0.45268696546554565, "learning_rate": 8e-05, "loss": 1.7665, "step": 1290 }, { "epoch": 0.2204200102441523, "grad_norm": 0.4739084839820862, "learning_rate": 8e-05, "loss": 1.7296, "step": 1291 }, { "epoch": 0.22059074611575893, "grad_norm": 0.46438947319984436, "learning_rate": 8e-05, "loss": 1.6333, "step": 1292 }, { "epoch": 0.22076148198736553, "grad_norm": 0.4735943377017975, "learning_rate": 8e-05, "loss": 1.804, "step": 1293 }, { "epoch": 0.22093221785897216, "grad_norm": 0.4531584680080414, "learning_rate": 8e-05, "loss": 1.6562, "step": 1294 }, { "epoch": 0.22110295373057878, "grad_norm": 0.44463178515434265, "learning_rate": 8e-05, "loss": 1.5793, "step": 1295 }, { "epoch": 0.2212736896021854, "grad_norm": 0.45195138454437256, "learning_rate": 8e-05, "loss": 1.649, "step": 1296 }, { "epoch": 0.22144442547379203, "grad_norm": 0.5042051672935486, "learning_rate": 8e-05, "loss": 1.9797, "step": 1297 }, { "epoch": 0.22161516134539866, "grad_norm": 0.3890305757522583, "learning_rate": 8e-05, "loss": 1.2905, "step": 1298 }, { "epoch": 0.22178589721700528, "grad_norm": 0.44472476840019226, "learning_rate": 8e-05, "loss": 1.5916, "step": 1299 }, { "epoch": 0.2219566330886119, "grad_norm": 0.45445388555526733, "learning_rate": 8e-05, "loss": 1.5035, "step": 1300 }, { "epoch": 0.22212736896021854, "grad_norm": 0.4521560072898865, "learning_rate": 8e-05, "loss": 1.664, "step": 1301 }, { "epoch": 0.22229810483182516, "grad_norm": 0.4621025621891022, "learning_rate": 8e-05, "loss": 1.7847, "step": 1302 }, { "epoch": 0.22246884070343179, "grad_norm": 0.4439074695110321, "learning_rate": 8e-05, "loss": 1.7852, "step": 1303 }, { "epoch": 0.2226395765750384, "grad_norm": 0.44455909729003906, "learning_rate": 8e-05, "loss": 1.7229, "step": 1304 }, { "epoch": 0.22281031244664504, "grad_norm": 0.4280337393283844, "learning_rate": 8e-05, "loss": 1.69, "step": 1305 }, { "epoch": 0.22298104831825166, "grad_norm": 0.45455464720726013, "learning_rate": 8e-05, "loss": 1.6109, "step": 1306 }, { "epoch": 0.2231517841898583, "grad_norm": 0.46900948882102966, "learning_rate": 8e-05, "loss": 1.682, "step": 1307 }, { "epoch": 0.2233225200614649, "grad_norm": 0.45370814204216003, "learning_rate": 8e-05, "loss": 1.6507, "step": 1308 }, { "epoch": 0.22349325593307154, "grad_norm": 0.46461474895477295, "learning_rate": 8e-05, "loss": 1.8067, "step": 1309 }, { "epoch": 0.22366399180467816, "grad_norm": 0.48245877027511597, "learning_rate": 8e-05, "loss": 1.775, "step": 1310 }, { "epoch": 0.2238347276762848, "grad_norm": 0.49169817566871643, "learning_rate": 8e-05, "loss": 1.5881, "step": 1311 }, { "epoch": 0.22400546354789141, "grad_norm": 0.48882168531417847, "learning_rate": 8e-05, "loss": 1.6415, "step": 1312 }, { "epoch": 0.22417619941949804, "grad_norm": 0.49520814418792725, "learning_rate": 8e-05, "loss": 1.68, "step": 1313 }, { "epoch": 0.22434693529110467, "grad_norm": 0.42200741171836853, "learning_rate": 8e-05, "loss": 1.5844, "step": 1314 }, { "epoch": 0.2245176711627113, "grad_norm": 0.4359530210494995, "learning_rate": 8e-05, "loss": 1.5285, "step": 1315 }, { "epoch": 0.22468840703431792, "grad_norm": 0.43584680557250977, "learning_rate": 8e-05, "loss": 1.6478, "step": 1316 }, { "epoch": 0.22485914290592454, "grad_norm": 0.5251561403274536, "learning_rate": 8e-05, "loss": 1.7551, "step": 1317 }, { "epoch": 0.22502987877753117, "grad_norm": 0.5080098509788513, "learning_rate": 8e-05, "loss": 2.0261, "step": 1318 }, { "epoch": 0.2252006146491378, "grad_norm": 0.46105217933654785, "learning_rate": 8e-05, "loss": 1.7142, "step": 1319 }, { "epoch": 0.22537135052074442, "grad_norm": 0.4335024654865265, "learning_rate": 8e-05, "loss": 1.6128, "step": 1320 }, { "epoch": 0.22554208639235104, "grad_norm": 0.4571915566921234, "learning_rate": 8e-05, "loss": 1.8193, "step": 1321 }, { "epoch": 0.22571282226395767, "grad_norm": 0.4509830176830292, "learning_rate": 8e-05, "loss": 1.7063, "step": 1322 }, { "epoch": 0.2258835581355643, "grad_norm": 0.41856834292411804, "learning_rate": 8e-05, "loss": 1.6231, "step": 1323 }, { "epoch": 0.22605429400717092, "grad_norm": 0.45287835597991943, "learning_rate": 8e-05, "loss": 1.6774, "step": 1324 }, { "epoch": 0.22622502987877754, "grad_norm": 0.46507278084754944, "learning_rate": 8e-05, "loss": 1.8518, "step": 1325 }, { "epoch": 0.22639576575038414, "grad_norm": 0.4482403099536896, "learning_rate": 8e-05, "loss": 1.7812, "step": 1326 }, { "epoch": 0.22656650162199077, "grad_norm": 0.47900182008743286, "learning_rate": 8e-05, "loss": 1.9219, "step": 1327 }, { "epoch": 0.2267372374935974, "grad_norm": 0.4767565131187439, "learning_rate": 8e-05, "loss": 1.7594, "step": 1328 }, { "epoch": 0.22690797336520402, "grad_norm": 0.4479288160800934, "learning_rate": 8e-05, "loss": 1.7125, "step": 1329 }, { "epoch": 0.22707870923681064, "grad_norm": 0.45299115777015686, "learning_rate": 8e-05, "loss": 1.6224, "step": 1330 }, { "epoch": 0.22724944510841727, "grad_norm": 0.44788169860839844, "learning_rate": 8e-05, "loss": 1.6768, "step": 1331 }, { "epoch": 0.2274201809800239, "grad_norm": 0.4486752152442932, "learning_rate": 8e-05, "loss": 1.7148, "step": 1332 }, { "epoch": 0.22759091685163052, "grad_norm": 0.47046583890914917, "learning_rate": 8e-05, "loss": 1.8413, "step": 1333 }, { "epoch": 0.22776165272323715, "grad_norm": 0.4958024322986603, "learning_rate": 8e-05, "loss": 1.9265, "step": 1334 }, { "epoch": 0.22793238859484377, "grad_norm": 0.5038763284683228, "learning_rate": 8e-05, "loss": 1.6554, "step": 1335 }, { "epoch": 0.2281031244664504, "grad_norm": 0.4888961911201477, "learning_rate": 8e-05, "loss": 2.0102, "step": 1336 }, { "epoch": 0.22827386033805702, "grad_norm": 0.44518187642097473, "learning_rate": 8e-05, "loss": 1.7714, "step": 1337 }, { "epoch": 0.22844459620966365, "grad_norm": 0.46039512753486633, "learning_rate": 8e-05, "loss": 1.7816, "step": 1338 }, { "epoch": 0.22861533208127027, "grad_norm": 0.45455074310302734, "learning_rate": 8e-05, "loss": 1.8277, "step": 1339 }, { "epoch": 0.2287860679528769, "grad_norm": 0.4532495439052582, "learning_rate": 8e-05, "loss": 1.6194, "step": 1340 }, { "epoch": 0.22895680382448352, "grad_norm": 0.45629507303237915, "learning_rate": 8e-05, "loss": 1.843, "step": 1341 }, { "epoch": 0.22912753969609015, "grad_norm": 0.45584532618522644, "learning_rate": 8e-05, "loss": 1.6907, "step": 1342 }, { "epoch": 0.22929827556769677, "grad_norm": 0.4941563904285431, "learning_rate": 8e-05, "loss": 1.9341, "step": 1343 }, { "epoch": 0.2294690114393034, "grad_norm": 0.5015853047370911, "learning_rate": 8e-05, "loss": 1.8175, "step": 1344 }, { "epoch": 0.22963974731091003, "grad_norm": 0.4501447081565857, "learning_rate": 8e-05, "loss": 1.6125, "step": 1345 }, { "epoch": 0.22981048318251665, "grad_norm": 0.421179860830307, "learning_rate": 8e-05, "loss": 1.5077, "step": 1346 }, { "epoch": 0.22998121905412328, "grad_norm": 0.44239068031311035, "learning_rate": 8e-05, "loss": 1.7704, "step": 1347 }, { "epoch": 0.2301519549257299, "grad_norm": 0.5041502118110657, "learning_rate": 8e-05, "loss": 1.6422, "step": 1348 }, { "epoch": 0.23032269079733653, "grad_norm": 0.4503922164440155, "learning_rate": 8e-05, "loss": 1.791, "step": 1349 }, { "epoch": 0.23049342666894315, "grad_norm": 0.5080987215042114, "learning_rate": 8e-05, "loss": 1.8724, "step": 1350 }, { "epoch": 0.23066416254054978, "grad_norm": 0.504002571105957, "learning_rate": 8e-05, "loss": 1.7699, "step": 1351 }, { "epoch": 0.2308348984121564, "grad_norm": 0.4363219738006592, "learning_rate": 8e-05, "loss": 1.5927, "step": 1352 }, { "epoch": 0.23100563428376303, "grad_norm": 0.44268617033958435, "learning_rate": 8e-05, "loss": 1.664, "step": 1353 }, { "epoch": 0.23117637015536965, "grad_norm": 0.4628807008266449, "learning_rate": 8e-05, "loss": 1.7471, "step": 1354 }, { "epoch": 0.23134710602697628, "grad_norm": 0.4531369209289551, "learning_rate": 8e-05, "loss": 1.6487, "step": 1355 }, { "epoch": 0.2315178418985829, "grad_norm": 0.47215506434440613, "learning_rate": 8e-05, "loss": 1.9103, "step": 1356 }, { "epoch": 0.23168857777018953, "grad_norm": 0.4868447482585907, "learning_rate": 8e-05, "loss": 1.7566, "step": 1357 }, { "epoch": 0.23185931364179613, "grad_norm": 0.43566063046455383, "learning_rate": 8e-05, "loss": 1.4744, "step": 1358 }, { "epoch": 0.23203004951340275, "grad_norm": 0.4651631712913513, "learning_rate": 8e-05, "loss": 1.8373, "step": 1359 }, { "epoch": 0.23220078538500938, "grad_norm": 0.47947216033935547, "learning_rate": 8e-05, "loss": 1.7887, "step": 1360 }, { "epoch": 0.232371521256616, "grad_norm": 0.46876633167266846, "learning_rate": 8e-05, "loss": 1.7119, "step": 1361 }, { "epoch": 0.23254225712822263, "grad_norm": 0.4458506405353546, "learning_rate": 8e-05, "loss": 1.7631, "step": 1362 }, { "epoch": 0.23271299299982925, "grad_norm": 0.45883309841156006, "learning_rate": 8e-05, "loss": 1.7958, "step": 1363 }, { "epoch": 0.23288372887143588, "grad_norm": 0.4489084482192993, "learning_rate": 8e-05, "loss": 1.7071, "step": 1364 }, { "epoch": 0.2330544647430425, "grad_norm": 0.42886102199554443, "learning_rate": 8e-05, "loss": 1.7319, "step": 1365 }, { "epoch": 0.23322520061464913, "grad_norm": 0.43516480922698975, "learning_rate": 8e-05, "loss": 1.6373, "step": 1366 }, { "epoch": 0.23339593648625576, "grad_norm": 0.47618368268013, "learning_rate": 8e-05, "loss": 1.9898, "step": 1367 }, { "epoch": 0.23356667235786238, "grad_norm": 0.48623600602149963, "learning_rate": 8e-05, "loss": 1.9531, "step": 1368 }, { "epoch": 0.233737408229469, "grad_norm": 0.4519324004650116, "learning_rate": 8e-05, "loss": 1.7649, "step": 1369 }, { "epoch": 0.23390814410107563, "grad_norm": 0.43098169565200806, "learning_rate": 8e-05, "loss": 1.7654, "step": 1370 }, { "epoch": 0.23407887997268226, "grad_norm": 0.44700387120246887, "learning_rate": 8e-05, "loss": 1.5128, "step": 1371 }, { "epoch": 0.23424961584428888, "grad_norm": 0.4534705877304077, "learning_rate": 8e-05, "loss": 1.7095, "step": 1372 }, { "epoch": 0.2344203517158955, "grad_norm": 0.4406139850616455, "learning_rate": 8e-05, "loss": 1.5622, "step": 1373 }, { "epoch": 0.23459108758750213, "grad_norm": 0.4496249258518219, "learning_rate": 8e-05, "loss": 1.8091, "step": 1374 }, { "epoch": 0.23476182345910876, "grad_norm": 0.4171314239501953, "learning_rate": 8e-05, "loss": 1.3531, "step": 1375 }, { "epoch": 0.23493255933071538, "grad_norm": 0.4801623821258545, "learning_rate": 8e-05, "loss": 1.6623, "step": 1376 }, { "epoch": 0.235103295202322, "grad_norm": 0.45930761098861694, "learning_rate": 8e-05, "loss": 1.7469, "step": 1377 }, { "epoch": 0.23527403107392864, "grad_norm": 0.4459248185157776, "learning_rate": 8e-05, "loss": 1.7214, "step": 1378 }, { "epoch": 0.23544476694553526, "grad_norm": 0.46064332127571106, "learning_rate": 8e-05, "loss": 1.6086, "step": 1379 }, { "epoch": 0.2356155028171419, "grad_norm": 0.47128233313560486, "learning_rate": 8e-05, "loss": 1.8758, "step": 1380 }, { "epoch": 0.2357862386887485, "grad_norm": 0.4940902888774872, "learning_rate": 8e-05, "loss": 1.646, "step": 1381 }, { "epoch": 0.23595697456035514, "grad_norm": 0.5020127296447754, "learning_rate": 8e-05, "loss": 1.5411, "step": 1382 }, { "epoch": 0.23612771043196176, "grad_norm": 0.4456776976585388, "learning_rate": 8e-05, "loss": 1.6068, "step": 1383 }, { "epoch": 0.2362984463035684, "grad_norm": 0.44684141874313354, "learning_rate": 8e-05, "loss": 1.8318, "step": 1384 }, { "epoch": 0.236469182175175, "grad_norm": 0.4690016210079193, "learning_rate": 8e-05, "loss": 1.9044, "step": 1385 }, { "epoch": 0.23663991804678164, "grad_norm": 0.46104907989501953, "learning_rate": 8e-05, "loss": 1.6964, "step": 1386 }, { "epoch": 0.23681065391838826, "grad_norm": 0.4646522104740143, "learning_rate": 8e-05, "loss": 1.722, "step": 1387 }, { "epoch": 0.2369813897899949, "grad_norm": 0.4655124545097351, "learning_rate": 8e-05, "loss": 1.848, "step": 1388 }, { "epoch": 0.23715212566160151, "grad_norm": 0.49427521228790283, "learning_rate": 8e-05, "loss": 1.9542, "step": 1389 }, { "epoch": 0.23732286153320814, "grad_norm": 0.4779433310031891, "learning_rate": 8e-05, "loss": 2.0011, "step": 1390 }, { "epoch": 0.23749359740481474, "grad_norm": 0.447837769985199, "learning_rate": 8e-05, "loss": 1.7177, "step": 1391 }, { "epoch": 0.23766433327642136, "grad_norm": 0.440499871969223, "learning_rate": 8e-05, "loss": 1.6677, "step": 1392 }, { "epoch": 0.237835069148028, "grad_norm": 0.44730064272880554, "learning_rate": 8e-05, "loss": 1.6919, "step": 1393 }, { "epoch": 0.23800580501963461, "grad_norm": 0.447333961725235, "learning_rate": 8e-05, "loss": 1.5918, "step": 1394 }, { "epoch": 0.23817654089124124, "grad_norm": 0.46496838331222534, "learning_rate": 8e-05, "loss": 1.6576, "step": 1395 }, { "epoch": 0.23834727676284787, "grad_norm": 0.4709150791168213, "learning_rate": 8e-05, "loss": 1.7809, "step": 1396 }, { "epoch": 0.2385180126344545, "grad_norm": 0.4825021028518677, "learning_rate": 8e-05, "loss": 1.9762, "step": 1397 }, { "epoch": 0.23868874850606112, "grad_norm": 0.4817480146884918, "learning_rate": 8e-05, "loss": 1.7974, "step": 1398 }, { "epoch": 0.23885948437766774, "grad_norm": 0.440911203622818, "learning_rate": 8e-05, "loss": 1.6788, "step": 1399 }, { "epoch": 0.23903022024927437, "grad_norm": 0.44602376222610474, "learning_rate": 8e-05, "loss": 1.6833, "step": 1400 }, { "epoch": 0.239200956120881, "grad_norm": 0.4694054126739502, "learning_rate": 8e-05, "loss": 1.8557, "step": 1401 }, { "epoch": 0.23937169199248762, "grad_norm": 0.5382222533226013, "learning_rate": 8e-05, "loss": 1.8719, "step": 1402 }, { "epoch": 0.23954242786409424, "grad_norm": 0.4806840717792511, "learning_rate": 8e-05, "loss": 1.8045, "step": 1403 }, { "epoch": 0.23971316373570087, "grad_norm": 0.43654757738113403, "learning_rate": 8e-05, "loss": 1.7197, "step": 1404 }, { "epoch": 0.2398838996073075, "grad_norm": 0.44001296162605286, "learning_rate": 8e-05, "loss": 1.5252, "step": 1405 }, { "epoch": 0.24005463547891412, "grad_norm": 0.46210426092147827, "learning_rate": 8e-05, "loss": 1.5605, "step": 1406 }, { "epoch": 0.24022537135052074, "grad_norm": 0.44686582684516907, "learning_rate": 8e-05, "loss": 1.6255, "step": 1407 }, { "epoch": 0.24039610722212737, "grad_norm": 0.5917032361030579, "learning_rate": 8e-05, "loss": 1.8917, "step": 1408 }, { "epoch": 0.240566843093734, "grad_norm": 0.4760607182979584, "learning_rate": 8e-05, "loss": 1.6122, "step": 1409 }, { "epoch": 0.24073757896534062, "grad_norm": 0.44657638669013977, "learning_rate": 8e-05, "loss": 1.5943, "step": 1410 }, { "epoch": 0.24090831483694725, "grad_norm": 0.44896557927131653, "learning_rate": 8e-05, "loss": 1.6656, "step": 1411 }, { "epoch": 0.24107905070855387, "grad_norm": 0.4905566871166229, "learning_rate": 8e-05, "loss": 2.0182, "step": 1412 }, { "epoch": 0.2412497865801605, "grad_norm": 0.464622437953949, "learning_rate": 8e-05, "loss": 1.797, "step": 1413 }, { "epoch": 0.24142052245176712, "grad_norm": 0.4899992048740387, "learning_rate": 8e-05, "loss": 2.0143, "step": 1414 }, { "epoch": 0.24159125832337375, "grad_norm": 0.42877304553985596, "learning_rate": 8e-05, "loss": 1.5757, "step": 1415 }, { "epoch": 0.24176199419498037, "grad_norm": 0.4396146833896637, "learning_rate": 8e-05, "loss": 1.6192, "step": 1416 }, { "epoch": 0.241932730066587, "grad_norm": 0.47054070234298706, "learning_rate": 8e-05, "loss": 1.8171, "step": 1417 }, { "epoch": 0.24210346593819362, "grad_norm": 0.4540466368198395, "learning_rate": 8e-05, "loss": 1.7407, "step": 1418 }, { "epoch": 0.24227420180980025, "grad_norm": 0.44658389687538147, "learning_rate": 8e-05, "loss": 1.6441, "step": 1419 }, { "epoch": 0.24244493768140687, "grad_norm": 0.4483252167701721, "learning_rate": 8e-05, "loss": 1.8054, "step": 1420 }, { "epoch": 0.2426156735530135, "grad_norm": 0.43310409784317017, "learning_rate": 8e-05, "loss": 1.5692, "step": 1421 }, { "epoch": 0.24278640942462013, "grad_norm": 0.44163981080055237, "learning_rate": 8e-05, "loss": 1.5581, "step": 1422 }, { "epoch": 0.24295714529622675, "grad_norm": 0.46107298135757446, "learning_rate": 8e-05, "loss": 1.7687, "step": 1423 }, { "epoch": 0.24312788116783335, "grad_norm": 0.45935970544815063, "learning_rate": 8e-05, "loss": 1.8269, "step": 1424 }, { "epoch": 0.24329861703943997, "grad_norm": 0.5505486726760864, "learning_rate": 8e-05, "loss": 1.8203, "step": 1425 }, { "epoch": 0.2434693529110466, "grad_norm": 0.44584232568740845, "learning_rate": 8e-05, "loss": 1.7958, "step": 1426 }, { "epoch": 0.24364008878265322, "grad_norm": 0.45638802647590637, "learning_rate": 8e-05, "loss": 1.6314, "step": 1427 }, { "epoch": 0.24381082465425985, "grad_norm": 0.49535274505615234, "learning_rate": 8e-05, "loss": 1.9576, "step": 1428 }, { "epoch": 0.24398156052586648, "grad_norm": 0.4616897404193878, "learning_rate": 8e-05, "loss": 1.7209, "step": 1429 }, { "epoch": 0.2441522963974731, "grad_norm": 0.4322304427623749, "learning_rate": 8e-05, "loss": 1.6275, "step": 1430 }, { "epoch": 0.24432303226907973, "grad_norm": 0.4821236729621887, "learning_rate": 8e-05, "loss": 1.7117, "step": 1431 }, { "epoch": 0.24449376814068635, "grad_norm": 0.46829262375831604, "learning_rate": 8e-05, "loss": 1.7514, "step": 1432 }, { "epoch": 0.24466450401229298, "grad_norm": 0.4965200424194336, "learning_rate": 8e-05, "loss": 1.9623, "step": 1433 }, { "epoch": 0.2448352398838996, "grad_norm": 0.4489729404449463, "learning_rate": 8e-05, "loss": 1.7089, "step": 1434 }, { "epoch": 0.24500597575550623, "grad_norm": 0.460237979888916, "learning_rate": 8e-05, "loss": 1.7849, "step": 1435 }, { "epoch": 0.24517671162711285, "grad_norm": 0.4677680730819702, "learning_rate": 8e-05, "loss": 1.7616, "step": 1436 }, { "epoch": 0.24534744749871948, "grad_norm": 0.48224759101867676, "learning_rate": 8e-05, "loss": 1.773, "step": 1437 }, { "epoch": 0.2455181833703261, "grad_norm": 0.4772965908050537, "learning_rate": 8e-05, "loss": 1.7776, "step": 1438 }, { "epoch": 0.24568891924193273, "grad_norm": 0.47731176018714905, "learning_rate": 8e-05, "loss": 1.8442, "step": 1439 }, { "epoch": 0.24585965511353935, "grad_norm": 0.49973687529563904, "learning_rate": 8e-05, "loss": 1.8529, "step": 1440 }, { "epoch": 0.24603039098514598, "grad_norm": 0.43765705823898315, "learning_rate": 8e-05, "loss": 1.2617, "step": 1441 }, { "epoch": 0.2462011268567526, "grad_norm": 0.4769539535045624, "learning_rate": 8e-05, "loss": 1.7911, "step": 1442 }, { "epoch": 0.24637186272835923, "grad_norm": 0.45270058512687683, "learning_rate": 8e-05, "loss": 1.7911, "step": 1443 }, { "epoch": 0.24654259859996586, "grad_norm": 0.4546486437320709, "learning_rate": 8e-05, "loss": 1.7022, "step": 1444 }, { "epoch": 0.24671333447157248, "grad_norm": 0.47902950644493103, "learning_rate": 8e-05, "loss": 1.7483, "step": 1445 }, { "epoch": 0.2468840703431791, "grad_norm": 0.5309086441993713, "learning_rate": 8e-05, "loss": 1.8391, "step": 1446 }, { "epoch": 0.24705480621478573, "grad_norm": 0.4960760176181793, "learning_rate": 8e-05, "loss": 1.8856, "step": 1447 }, { "epoch": 0.24722554208639236, "grad_norm": 0.44523605704307556, "learning_rate": 8e-05, "loss": 1.6674, "step": 1448 }, { "epoch": 0.24739627795799898, "grad_norm": 0.4517652094364166, "learning_rate": 8e-05, "loss": 1.4763, "step": 1449 }, { "epoch": 0.2475670138296056, "grad_norm": 0.526369571685791, "learning_rate": 8e-05, "loss": 1.9151, "step": 1450 }, { "epoch": 0.24773774970121223, "grad_norm": 0.4597453474998474, "learning_rate": 8e-05, "loss": 1.762, "step": 1451 }, { "epoch": 0.24790848557281886, "grad_norm": 0.4775269329547882, "learning_rate": 8e-05, "loss": 1.8102, "step": 1452 }, { "epoch": 0.24807922144442549, "grad_norm": 0.465261846780777, "learning_rate": 8e-05, "loss": 1.7395, "step": 1453 }, { "epoch": 0.2482499573160321, "grad_norm": 0.46572116017341614, "learning_rate": 8e-05, "loss": 1.7559, "step": 1454 }, { "epoch": 0.24842069318763874, "grad_norm": 0.47411972284317017, "learning_rate": 8e-05, "loss": 1.5463, "step": 1455 }, { "epoch": 0.24859142905924536, "grad_norm": 0.4290204644203186, "learning_rate": 8e-05, "loss": 1.5768, "step": 1456 }, { "epoch": 0.24876216493085196, "grad_norm": 0.5074236392974854, "learning_rate": 8e-05, "loss": 1.6735, "step": 1457 }, { "epoch": 0.24893290080245858, "grad_norm": 0.474200576543808, "learning_rate": 8e-05, "loss": 1.7285, "step": 1458 }, { "epoch": 0.2491036366740652, "grad_norm": 0.4270091652870178, "learning_rate": 8e-05, "loss": 1.5882, "step": 1459 }, { "epoch": 0.24927437254567184, "grad_norm": 0.47300565242767334, "learning_rate": 8e-05, "loss": 1.758, "step": 1460 }, { "epoch": 0.24944510841727846, "grad_norm": 0.46187761425971985, "learning_rate": 8e-05, "loss": 1.729, "step": 1461 }, { "epoch": 0.24961584428888509, "grad_norm": 0.45753613114356995, "learning_rate": 8e-05, "loss": 1.6571, "step": 1462 }, { "epoch": 0.2497865801604917, "grad_norm": 0.48651930689811707, "learning_rate": 8e-05, "loss": 1.9174, "step": 1463 }, { "epoch": 0.24995731603209834, "grad_norm": 0.42692577838897705, "learning_rate": 8e-05, "loss": 1.4745, "step": 1464 }, { "epoch": 0.25012805190370496, "grad_norm": 0.4740496277809143, "learning_rate": 8e-05, "loss": 1.8912, "step": 1465 }, { "epoch": 0.2502987877753116, "grad_norm": 0.43415209650993347, "learning_rate": 8e-05, "loss": 1.6775, "step": 1466 }, { "epoch": 0.2504695236469182, "grad_norm": 0.48874714970588684, "learning_rate": 8e-05, "loss": 1.8037, "step": 1467 }, { "epoch": 0.25064025951852487, "grad_norm": 0.44115206599235535, "learning_rate": 8e-05, "loss": 1.5814, "step": 1468 }, { "epoch": 0.25081099539013146, "grad_norm": 0.4127998650074005, "learning_rate": 8e-05, "loss": 1.4475, "step": 1469 }, { "epoch": 0.2509817312617381, "grad_norm": 0.47522905468940735, "learning_rate": 8e-05, "loss": 1.6111, "step": 1470 }, { "epoch": 0.2511524671333447, "grad_norm": 0.43897897005081177, "learning_rate": 8e-05, "loss": 1.7241, "step": 1471 }, { "epoch": 0.25132320300495137, "grad_norm": 0.45677176117897034, "learning_rate": 8e-05, "loss": 1.5905, "step": 1472 }, { "epoch": 0.25149393887655797, "grad_norm": 0.4480850398540497, "learning_rate": 8e-05, "loss": 1.8006, "step": 1473 }, { "epoch": 0.25166467474816456, "grad_norm": 0.4795920252799988, "learning_rate": 8e-05, "loss": 1.8331, "step": 1474 }, { "epoch": 0.2518354106197712, "grad_norm": 0.48657429218292236, "learning_rate": 8e-05, "loss": 1.8426, "step": 1475 }, { "epoch": 0.2520061464913778, "grad_norm": 0.46248331665992737, "learning_rate": 8e-05, "loss": 1.6776, "step": 1476 }, { "epoch": 0.25217688236298447, "grad_norm": 0.48157691955566406, "learning_rate": 8e-05, "loss": 1.5953, "step": 1477 }, { "epoch": 0.25234761823459106, "grad_norm": 0.45289215445518494, "learning_rate": 8e-05, "loss": 1.5695, "step": 1478 }, { "epoch": 0.2525183541061977, "grad_norm": 0.46873611211776733, "learning_rate": 8e-05, "loss": 1.6152, "step": 1479 }, { "epoch": 0.2526890899778043, "grad_norm": 0.508687436580658, "learning_rate": 8e-05, "loss": 1.9612, "step": 1480 }, { "epoch": 0.25285982584941097, "grad_norm": 0.4642130136489868, "learning_rate": 8e-05, "loss": 1.8107, "step": 1481 }, { "epoch": 0.25303056172101757, "grad_norm": 0.488258421421051, "learning_rate": 8e-05, "loss": 1.8321, "step": 1482 }, { "epoch": 0.2532012975926242, "grad_norm": 0.4482232630252838, "learning_rate": 8e-05, "loss": 1.6733, "step": 1483 }, { "epoch": 0.2533720334642308, "grad_norm": 0.45923903584480286, "learning_rate": 8e-05, "loss": 1.8127, "step": 1484 }, { "epoch": 0.25354276933583747, "grad_norm": 0.4389362633228302, "learning_rate": 8e-05, "loss": 1.7223, "step": 1485 }, { "epoch": 0.25371350520744407, "grad_norm": 0.48416557908058167, "learning_rate": 8e-05, "loss": 1.868, "step": 1486 }, { "epoch": 0.2538842410790507, "grad_norm": 0.4848282039165497, "learning_rate": 8e-05, "loss": 1.8073, "step": 1487 }, { "epoch": 0.2540549769506573, "grad_norm": 0.4251391589641571, "learning_rate": 8e-05, "loss": 1.5683, "step": 1488 }, { "epoch": 0.25422571282226397, "grad_norm": 0.48194971680641174, "learning_rate": 8e-05, "loss": 1.775, "step": 1489 }, { "epoch": 0.25439644869387057, "grad_norm": 0.4817880690097809, "learning_rate": 8e-05, "loss": 1.8269, "step": 1490 }, { "epoch": 0.2545671845654772, "grad_norm": 0.468232125043869, "learning_rate": 8e-05, "loss": 1.6103, "step": 1491 }, { "epoch": 0.2547379204370838, "grad_norm": 0.44875475764274597, "learning_rate": 8e-05, "loss": 1.5831, "step": 1492 }, { "epoch": 0.2549086563086905, "grad_norm": 0.46827152371406555, "learning_rate": 8e-05, "loss": 1.7652, "step": 1493 }, { "epoch": 0.25507939218029707, "grad_norm": 0.4801056981086731, "learning_rate": 8e-05, "loss": 2.0366, "step": 1494 }, { "epoch": 0.2552501280519037, "grad_norm": 0.4596904218196869, "learning_rate": 8e-05, "loss": 1.8237, "step": 1495 }, { "epoch": 0.2554208639235103, "grad_norm": 0.4654986560344696, "learning_rate": 8e-05, "loss": 1.887, "step": 1496 }, { "epoch": 0.255591599795117, "grad_norm": 0.4441089332103729, "learning_rate": 8e-05, "loss": 1.659, "step": 1497 }, { "epoch": 0.2557623356667236, "grad_norm": 0.4931698143482208, "learning_rate": 8e-05, "loss": 1.7752, "step": 1498 }, { "epoch": 0.2559330715383302, "grad_norm": 0.4295077919960022, "learning_rate": 8e-05, "loss": 1.5936, "step": 1499 }, { "epoch": 0.2561038074099368, "grad_norm": 0.45535269379615784, "learning_rate": 8e-05, "loss": 1.6545, "step": 1500 }, { "epoch": 0.2562745432815435, "grad_norm": 0.4311533272266388, "learning_rate": 8e-05, "loss": 1.6957, "step": 1501 }, { "epoch": 0.2564452791531501, "grad_norm": 0.4715679883956909, "learning_rate": 8e-05, "loss": 1.833, "step": 1502 }, { "epoch": 0.2566160150247567, "grad_norm": 0.475820928812027, "learning_rate": 8e-05, "loss": 1.6028, "step": 1503 }, { "epoch": 0.2567867508963633, "grad_norm": 0.5740247368812561, "learning_rate": 8e-05, "loss": 1.7404, "step": 1504 }, { "epoch": 0.25695748676797, "grad_norm": 0.47386202216148376, "learning_rate": 8e-05, "loss": 1.888, "step": 1505 }, { "epoch": 0.2571282226395766, "grad_norm": 0.444698303937912, "learning_rate": 8e-05, "loss": 1.7384, "step": 1506 }, { "epoch": 0.2572989585111832, "grad_norm": 0.47479692101478577, "learning_rate": 8e-05, "loss": 1.4774, "step": 1507 }, { "epoch": 0.2574696943827898, "grad_norm": 0.46353641152381897, "learning_rate": 8e-05, "loss": 1.664, "step": 1508 }, { "epoch": 0.2576404302543964, "grad_norm": 0.45346876978874207, "learning_rate": 8e-05, "loss": 1.6977, "step": 1509 }, { "epoch": 0.2578111661260031, "grad_norm": 0.43778517842292786, "learning_rate": 8e-05, "loss": 1.6141, "step": 1510 }, { "epoch": 0.2579819019976097, "grad_norm": 0.595479428768158, "learning_rate": 8e-05, "loss": 1.7479, "step": 1511 }, { "epoch": 0.25815263786921633, "grad_norm": 0.5295188426971436, "learning_rate": 8e-05, "loss": 1.6263, "step": 1512 }, { "epoch": 0.2583233737408229, "grad_norm": 0.4657239317893982, "learning_rate": 8e-05, "loss": 1.6429, "step": 1513 }, { "epoch": 0.2584941096124296, "grad_norm": 0.47978851199150085, "learning_rate": 8e-05, "loss": 1.8022, "step": 1514 }, { "epoch": 0.2586648454840362, "grad_norm": 0.4499625265598297, "learning_rate": 8e-05, "loss": 1.6877, "step": 1515 }, { "epoch": 0.25883558135564283, "grad_norm": 0.48673757910728455, "learning_rate": 8e-05, "loss": 1.7487, "step": 1516 }, { "epoch": 0.2590063172272494, "grad_norm": 0.4334790110588074, "learning_rate": 8e-05, "loss": 1.594, "step": 1517 }, { "epoch": 0.2591770530988561, "grad_norm": 0.49861037731170654, "learning_rate": 8e-05, "loss": 1.92, "step": 1518 }, { "epoch": 0.2593477889704627, "grad_norm": 0.5455080270767212, "learning_rate": 8e-05, "loss": 1.8496, "step": 1519 }, { "epoch": 0.25951852484206933, "grad_norm": 0.5056236386299133, "learning_rate": 8e-05, "loss": 1.6432, "step": 1520 }, { "epoch": 0.25968926071367593, "grad_norm": 0.4658260643482208, "learning_rate": 8e-05, "loss": 1.603, "step": 1521 }, { "epoch": 0.2598599965852826, "grad_norm": 0.4835836887359619, "learning_rate": 8e-05, "loss": 1.7183, "step": 1522 }, { "epoch": 0.2600307324568892, "grad_norm": 0.4593888521194458, "learning_rate": 8e-05, "loss": 1.7133, "step": 1523 }, { "epoch": 0.26020146832849583, "grad_norm": 0.4513009488582611, "learning_rate": 8e-05, "loss": 1.6923, "step": 1524 }, { "epoch": 0.26037220420010243, "grad_norm": 0.5005597472190857, "learning_rate": 8e-05, "loss": 1.7735, "step": 1525 }, { "epoch": 0.2605429400717091, "grad_norm": 0.4907269775867462, "learning_rate": 8e-05, "loss": 1.6895, "step": 1526 }, { "epoch": 0.2607136759433157, "grad_norm": 0.4578991234302521, "learning_rate": 8e-05, "loss": 1.6985, "step": 1527 }, { "epoch": 0.26088441181492233, "grad_norm": 0.4561558961868286, "learning_rate": 8e-05, "loss": 1.6619, "step": 1528 }, { "epoch": 0.26105514768652893, "grad_norm": 0.44838017225265503, "learning_rate": 8e-05, "loss": 1.5577, "step": 1529 }, { "epoch": 0.2612258835581356, "grad_norm": 0.4868425726890564, "learning_rate": 8e-05, "loss": 1.7482, "step": 1530 }, { "epoch": 0.2613966194297422, "grad_norm": 0.5140555500984192, "learning_rate": 8e-05, "loss": 1.6861, "step": 1531 }, { "epoch": 0.26156735530134884, "grad_norm": 0.4956618845462799, "learning_rate": 8e-05, "loss": 1.8505, "step": 1532 }, { "epoch": 0.26173809117295543, "grad_norm": 0.4723648428916931, "learning_rate": 8e-05, "loss": 1.7403, "step": 1533 }, { "epoch": 0.2619088270445621, "grad_norm": 0.4368148744106293, "learning_rate": 8e-05, "loss": 1.6088, "step": 1534 }, { "epoch": 0.2620795629161687, "grad_norm": 0.4597669839859009, "learning_rate": 8e-05, "loss": 1.6624, "step": 1535 }, { "epoch": 0.26225029878777534, "grad_norm": 0.4537868797779083, "learning_rate": 8e-05, "loss": 1.715, "step": 1536 }, { "epoch": 0.26242103465938194, "grad_norm": 0.5196936726570129, "learning_rate": 8e-05, "loss": 1.9084, "step": 1537 }, { "epoch": 0.26259177053098853, "grad_norm": 0.4504498243331909, "learning_rate": 8e-05, "loss": 1.5989, "step": 1538 }, { "epoch": 0.2627625064025952, "grad_norm": 0.43897855281829834, "learning_rate": 8e-05, "loss": 1.637, "step": 1539 }, { "epoch": 0.2629332422742018, "grad_norm": 0.44508156180381775, "learning_rate": 8e-05, "loss": 1.5107, "step": 1540 }, { "epoch": 0.26310397814580844, "grad_norm": 0.46743929386138916, "learning_rate": 8e-05, "loss": 1.7744, "step": 1541 }, { "epoch": 0.26327471401741503, "grad_norm": 0.45907101035118103, "learning_rate": 8e-05, "loss": 1.805, "step": 1542 }, { "epoch": 0.2634454498890217, "grad_norm": 0.4398573040962219, "learning_rate": 8e-05, "loss": 1.6385, "step": 1543 }, { "epoch": 0.2636161857606283, "grad_norm": 0.47005048394203186, "learning_rate": 8e-05, "loss": 1.729, "step": 1544 }, { "epoch": 0.26378692163223494, "grad_norm": 0.47655534744262695, "learning_rate": 8e-05, "loss": 1.4964, "step": 1545 }, { "epoch": 0.26395765750384154, "grad_norm": 0.48293808102607727, "learning_rate": 8e-05, "loss": 1.6161, "step": 1546 }, { "epoch": 0.2641283933754482, "grad_norm": 0.4613310992717743, "learning_rate": 8e-05, "loss": 1.6981, "step": 1547 }, { "epoch": 0.2642991292470548, "grad_norm": 0.45580053329467773, "learning_rate": 8e-05, "loss": 1.7062, "step": 1548 }, { "epoch": 0.26446986511866144, "grad_norm": 0.463114857673645, "learning_rate": 8e-05, "loss": 1.695, "step": 1549 }, { "epoch": 0.26464060099026804, "grad_norm": 0.43424081802368164, "learning_rate": 8e-05, "loss": 1.5973, "step": 1550 }, { "epoch": 0.2648113368618747, "grad_norm": 0.4792413115501404, "learning_rate": 8e-05, "loss": 1.7708, "step": 1551 }, { "epoch": 0.2649820727334813, "grad_norm": 0.4450448751449585, "learning_rate": 8e-05, "loss": 1.798, "step": 1552 }, { "epoch": 0.26515280860508794, "grad_norm": 0.43676337599754333, "learning_rate": 8e-05, "loss": 1.5727, "step": 1553 }, { "epoch": 0.26532354447669454, "grad_norm": 0.47638964653015137, "learning_rate": 8e-05, "loss": 1.8169, "step": 1554 }, { "epoch": 0.2654942803483012, "grad_norm": 0.44688543677330017, "learning_rate": 8e-05, "loss": 1.6223, "step": 1555 }, { "epoch": 0.2656650162199078, "grad_norm": 0.45689624547958374, "learning_rate": 8e-05, "loss": 1.741, "step": 1556 }, { "epoch": 0.26583575209151444, "grad_norm": 0.4593023359775543, "learning_rate": 8e-05, "loss": 1.7364, "step": 1557 }, { "epoch": 0.26600648796312104, "grad_norm": 0.4342665672302246, "learning_rate": 8e-05, "loss": 1.6161, "step": 1558 }, { "epoch": 0.2661772238347277, "grad_norm": 0.5187017917633057, "learning_rate": 8e-05, "loss": 1.7258, "step": 1559 }, { "epoch": 0.2663479597063343, "grad_norm": 0.46390289068222046, "learning_rate": 8e-05, "loss": 1.6792, "step": 1560 }, { "epoch": 0.26651869557794095, "grad_norm": 0.44367387890815735, "learning_rate": 8e-05, "loss": 1.5716, "step": 1561 }, { "epoch": 0.26668943144954754, "grad_norm": 0.45633143186569214, "learning_rate": 8e-05, "loss": 1.6269, "step": 1562 }, { "epoch": 0.2668601673211542, "grad_norm": 0.432868093252182, "learning_rate": 8e-05, "loss": 1.7329, "step": 1563 }, { "epoch": 0.2670309031927608, "grad_norm": 0.4454665780067444, "learning_rate": 8e-05, "loss": 1.8279, "step": 1564 }, { "epoch": 0.26720163906436745, "grad_norm": 0.47819629311561584, "learning_rate": 8e-05, "loss": 1.6936, "step": 1565 }, { "epoch": 0.26737237493597404, "grad_norm": 0.4410143494606018, "learning_rate": 8e-05, "loss": 1.689, "step": 1566 }, { "epoch": 0.2675431108075807, "grad_norm": 0.4492010176181793, "learning_rate": 8e-05, "loss": 1.6823, "step": 1567 }, { "epoch": 0.2677138466791873, "grad_norm": 0.43376222252845764, "learning_rate": 8e-05, "loss": 1.7399, "step": 1568 }, { "epoch": 0.26788458255079395, "grad_norm": 0.5141605138778687, "learning_rate": 8e-05, "loss": 1.9153, "step": 1569 }, { "epoch": 0.26805531842240055, "grad_norm": 0.494798868894577, "learning_rate": 8e-05, "loss": 1.9218, "step": 1570 }, { "epoch": 0.26822605429400714, "grad_norm": 0.46099698543548584, "learning_rate": 8e-05, "loss": 1.876, "step": 1571 }, { "epoch": 0.2683967901656138, "grad_norm": 0.4571555554866791, "learning_rate": 8e-05, "loss": 1.8387, "step": 1572 }, { "epoch": 0.2685675260372204, "grad_norm": 0.4657588303089142, "learning_rate": 8e-05, "loss": 1.8365, "step": 1573 }, { "epoch": 0.26873826190882705, "grad_norm": 0.507112443447113, "learning_rate": 8e-05, "loss": 1.8409, "step": 1574 }, { "epoch": 0.26890899778043365, "grad_norm": 0.49921607971191406, "learning_rate": 8e-05, "loss": 1.7051, "step": 1575 }, { "epoch": 0.2690797336520403, "grad_norm": 0.4895377457141876, "learning_rate": 8e-05, "loss": 1.7149, "step": 1576 }, { "epoch": 0.2692504695236469, "grad_norm": 0.5588774085044861, "learning_rate": 8e-05, "loss": 1.838, "step": 1577 }, { "epoch": 0.26942120539525355, "grad_norm": 0.46514061093330383, "learning_rate": 8e-05, "loss": 1.7096, "step": 1578 }, { "epoch": 0.26959194126686015, "grad_norm": 0.43270841240882874, "learning_rate": 8e-05, "loss": 1.4824, "step": 1579 }, { "epoch": 0.2697626771384668, "grad_norm": 0.46187517046928406, "learning_rate": 8e-05, "loss": 1.6769, "step": 1580 }, { "epoch": 0.2699334130100734, "grad_norm": 0.4493083953857422, "learning_rate": 8e-05, "loss": 1.8013, "step": 1581 }, { "epoch": 0.27010414888168005, "grad_norm": 0.44827502965927124, "learning_rate": 8e-05, "loss": 1.6242, "step": 1582 }, { "epoch": 0.27027488475328665, "grad_norm": 0.47460314631462097, "learning_rate": 8e-05, "loss": 1.8133, "step": 1583 }, { "epoch": 0.2704456206248933, "grad_norm": 0.46251875162124634, "learning_rate": 8e-05, "loss": 1.7943, "step": 1584 }, { "epoch": 0.2706163564964999, "grad_norm": 0.46181726455688477, "learning_rate": 8e-05, "loss": 1.7873, "step": 1585 }, { "epoch": 0.27078709236810655, "grad_norm": 0.43819934129714966, "learning_rate": 8e-05, "loss": 1.4397, "step": 1586 }, { "epoch": 0.27095782823971315, "grad_norm": 0.5400148630142212, "learning_rate": 8e-05, "loss": 1.6858, "step": 1587 }, { "epoch": 0.2711285641113198, "grad_norm": 0.5476415157318115, "learning_rate": 8e-05, "loss": 1.6639, "step": 1588 }, { "epoch": 0.2712992999829264, "grad_norm": 0.4880874752998352, "learning_rate": 8e-05, "loss": 1.7454, "step": 1589 }, { "epoch": 0.27147003585453305, "grad_norm": 0.503110408782959, "learning_rate": 8e-05, "loss": 1.8175, "step": 1590 }, { "epoch": 0.27164077172613965, "grad_norm": 0.4837026596069336, "learning_rate": 8e-05, "loss": 1.7355, "step": 1591 }, { "epoch": 0.2718115075977463, "grad_norm": 0.4322516918182373, "learning_rate": 8e-05, "loss": 1.6583, "step": 1592 }, { "epoch": 0.2719822434693529, "grad_norm": 0.4649880528450012, "learning_rate": 8e-05, "loss": 1.7558, "step": 1593 }, { "epoch": 0.27215297934095956, "grad_norm": 0.4774116277694702, "learning_rate": 8e-05, "loss": 1.8312, "step": 1594 }, { "epoch": 0.27232371521256615, "grad_norm": 0.42121145129203796, "learning_rate": 8e-05, "loss": 1.6233, "step": 1595 }, { "epoch": 0.2724944510841728, "grad_norm": 0.5109352469444275, "learning_rate": 8e-05, "loss": 1.7573, "step": 1596 }, { "epoch": 0.2726651869557794, "grad_norm": 0.49431997537612915, "learning_rate": 8e-05, "loss": 1.684, "step": 1597 }, { "epoch": 0.27283592282738606, "grad_norm": 0.4429910480976105, "learning_rate": 8e-05, "loss": 1.7105, "step": 1598 }, { "epoch": 0.27300665869899265, "grad_norm": 0.48552629351615906, "learning_rate": 8e-05, "loss": 1.8789, "step": 1599 }, { "epoch": 0.2731773945705993, "grad_norm": 0.45991435647010803, "learning_rate": 8e-05, "loss": 1.5981, "step": 1600 }, { "epoch": 0.2733481304422059, "grad_norm": 0.46999505162239075, "learning_rate": 8e-05, "loss": 1.5525, "step": 1601 }, { "epoch": 0.27351886631381256, "grad_norm": 0.4954102337360382, "learning_rate": 8e-05, "loss": 1.8134, "step": 1602 }, { "epoch": 0.27368960218541916, "grad_norm": 0.4399009346961975, "learning_rate": 8e-05, "loss": 1.6594, "step": 1603 }, { "epoch": 0.27386033805702575, "grad_norm": 0.4869663119316101, "learning_rate": 8e-05, "loss": 1.6896, "step": 1604 }, { "epoch": 0.2740310739286324, "grad_norm": 0.4931175112724304, "learning_rate": 8e-05, "loss": 1.7291, "step": 1605 }, { "epoch": 0.274201809800239, "grad_norm": 0.46451008319854736, "learning_rate": 8e-05, "loss": 1.719, "step": 1606 }, { "epoch": 0.27437254567184566, "grad_norm": 0.45005446672439575, "learning_rate": 8e-05, "loss": 1.7225, "step": 1607 }, { "epoch": 0.27454328154345226, "grad_norm": 0.4577392339706421, "learning_rate": 8e-05, "loss": 1.6259, "step": 1608 }, { "epoch": 0.2747140174150589, "grad_norm": 0.47988778352737427, "learning_rate": 8e-05, "loss": 1.6701, "step": 1609 }, { "epoch": 0.2748847532866655, "grad_norm": 0.43838903307914734, "learning_rate": 8e-05, "loss": 1.596, "step": 1610 }, { "epoch": 0.27505548915827216, "grad_norm": 0.42638707160949707, "learning_rate": 8e-05, "loss": 1.6452, "step": 1611 }, { "epoch": 0.27522622502987876, "grad_norm": 0.45078518986701965, "learning_rate": 8e-05, "loss": 1.6517, "step": 1612 }, { "epoch": 0.2753969609014854, "grad_norm": 0.4469480514526367, "learning_rate": 8e-05, "loss": 1.6708, "step": 1613 }, { "epoch": 0.275567696773092, "grad_norm": 0.4455205202102661, "learning_rate": 8e-05, "loss": 1.6794, "step": 1614 }, { "epoch": 0.27573843264469866, "grad_norm": 0.45533713698387146, "learning_rate": 8e-05, "loss": 1.7302, "step": 1615 }, { "epoch": 0.27590916851630526, "grad_norm": 0.47743311524391174, "learning_rate": 8e-05, "loss": 1.8999, "step": 1616 }, { "epoch": 0.2760799043879119, "grad_norm": 0.4906444847583771, "learning_rate": 8e-05, "loss": 1.7871, "step": 1617 }, { "epoch": 0.2762506402595185, "grad_norm": 0.4617570638656616, "learning_rate": 8e-05, "loss": 1.7297, "step": 1618 }, { "epoch": 0.27642137613112516, "grad_norm": 0.46680310368537903, "learning_rate": 8e-05, "loss": 1.6354, "step": 1619 }, { "epoch": 0.27659211200273176, "grad_norm": 0.49134647846221924, "learning_rate": 8e-05, "loss": 1.7142, "step": 1620 }, { "epoch": 0.2767628478743384, "grad_norm": 0.45954087376594543, "learning_rate": 8e-05, "loss": 1.6331, "step": 1621 }, { "epoch": 0.276933583745945, "grad_norm": 0.43581119179725647, "learning_rate": 8e-05, "loss": 1.6143, "step": 1622 }, { "epoch": 0.27710431961755166, "grad_norm": 0.4439903199672699, "learning_rate": 8e-05, "loss": 1.7557, "step": 1623 }, { "epoch": 0.27727505548915826, "grad_norm": 0.42626023292541504, "learning_rate": 8e-05, "loss": 1.5316, "step": 1624 }, { "epoch": 0.2774457913607649, "grad_norm": 0.4549587368965149, "learning_rate": 8e-05, "loss": 1.8646, "step": 1625 }, { "epoch": 0.2776165272323715, "grad_norm": 0.4526881277561188, "learning_rate": 8e-05, "loss": 1.772, "step": 1626 }, { "epoch": 0.27778726310397817, "grad_norm": 0.4710768163204193, "learning_rate": 8e-05, "loss": 1.7137, "step": 1627 }, { "epoch": 0.27795799897558476, "grad_norm": 0.4626278579235077, "learning_rate": 8e-05, "loss": 1.5567, "step": 1628 }, { "epoch": 0.2781287348471914, "grad_norm": 0.48504939675331116, "learning_rate": 8e-05, "loss": 1.6659, "step": 1629 }, { "epoch": 0.278299470718798, "grad_norm": 0.46209263801574707, "learning_rate": 8e-05, "loss": 1.7125, "step": 1630 }, { "epoch": 0.27847020659040467, "grad_norm": 0.4432145059108734, "learning_rate": 8e-05, "loss": 1.5988, "step": 1631 }, { "epoch": 0.27864094246201127, "grad_norm": 0.44664859771728516, "learning_rate": 8e-05, "loss": 1.6857, "step": 1632 }, { "epoch": 0.2788116783336179, "grad_norm": 0.4396354854106903, "learning_rate": 8e-05, "loss": 1.5326, "step": 1633 }, { "epoch": 0.2789824142052245, "grad_norm": 0.45460736751556396, "learning_rate": 8e-05, "loss": 1.7045, "step": 1634 }, { "epoch": 0.27915315007683117, "grad_norm": 0.4497687518596649, "learning_rate": 8e-05, "loss": 1.7262, "step": 1635 }, { "epoch": 0.27932388594843777, "grad_norm": 0.45364999771118164, "learning_rate": 8e-05, "loss": 1.7227, "step": 1636 }, { "epoch": 0.27949462182004436, "grad_norm": 0.4364113509654999, "learning_rate": 8e-05, "loss": 1.5539, "step": 1637 }, { "epoch": 0.279665357691651, "grad_norm": 0.473797470331192, "learning_rate": 8e-05, "loss": 1.6315, "step": 1638 }, { "epoch": 0.2798360935632576, "grad_norm": 0.463115394115448, "learning_rate": 8e-05, "loss": 1.8457, "step": 1639 }, { "epoch": 0.28000682943486427, "grad_norm": 0.4542536437511444, "learning_rate": 8e-05, "loss": 1.6233, "step": 1640 }, { "epoch": 0.28017756530647087, "grad_norm": 0.4237498641014099, "learning_rate": 8e-05, "loss": 1.6828, "step": 1641 }, { "epoch": 0.2803483011780775, "grad_norm": 0.4852365553379059, "learning_rate": 8e-05, "loss": 1.8248, "step": 1642 }, { "epoch": 0.2805190370496841, "grad_norm": 0.4478296637535095, "learning_rate": 8e-05, "loss": 1.6247, "step": 1643 }, { "epoch": 0.28068977292129077, "grad_norm": 0.4511982500553131, "learning_rate": 8e-05, "loss": 1.691, "step": 1644 }, { "epoch": 0.28086050879289737, "grad_norm": 0.506106972694397, "learning_rate": 8e-05, "loss": 1.8181, "step": 1645 }, { "epoch": 0.281031244664504, "grad_norm": 0.4334641098976135, "learning_rate": 8e-05, "loss": 1.6606, "step": 1646 }, { "epoch": 0.2812019805361106, "grad_norm": 0.4845578074455261, "learning_rate": 8e-05, "loss": 1.7056, "step": 1647 }, { "epoch": 0.28137271640771727, "grad_norm": 0.5252092480659485, "learning_rate": 8e-05, "loss": 1.849, "step": 1648 }, { "epoch": 0.28154345227932387, "grad_norm": 0.46454888582229614, "learning_rate": 8e-05, "loss": 1.7107, "step": 1649 }, { "epoch": 0.2817141881509305, "grad_norm": 0.4618401825428009, "learning_rate": 8e-05, "loss": 1.6062, "step": 1650 }, { "epoch": 0.2818849240225371, "grad_norm": 0.441863477230072, "learning_rate": 8e-05, "loss": 1.7822, "step": 1651 }, { "epoch": 0.2820556598941438, "grad_norm": 0.43069204688072205, "learning_rate": 8e-05, "loss": 1.7933, "step": 1652 }, { "epoch": 0.28222639576575037, "grad_norm": 0.491485059261322, "learning_rate": 8e-05, "loss": 1.7797, "step": 1653 }, { "epoch": 0.282397131637357, "grad_norm": 0.5368055105209351, "learning_rate": 8e-05, "loss": 1.8286, "step": 1654 }, { "epoch": 0.2825678675089636, "grad_norm": 0.45581451058387756, "learning_rate": 8e-05, "loss": 1.7169, "step": 1655 }, { "epoch": 0.2827386033805703, "grad_norm": 0.4418584406375885, "learning_rate": 8e-05, "loss": 1.7562, "step": 1656 }, { "epoch": 0.2829093392521769, "grad_norm": 0.47766366600990295, "learning_rate": 8e-05, "loss": 1.7629, "step": 1657 }, { "epoch": 0.2830800751237835, "grad_norm": 0.49151811003685, "learning_rate": 8e-05, "loss": 1.9315, "step": 1658 }, { "epoch": 0.2832508109953901, "grad_norm": 0.44391706585884094, "learning_rate": 8e-05, "loss": 1.7407, "step": 1659 }, { "epoch": 0.2834215468669968, "grad_norm": 0.4346827566623688, "learning_rate": 8e-05, "loss": 1.7108, "step": 1660 }, { "epoch": 0.2835922827386034, "grad_norm": 0.46341919898986816, "learning_rate": 8e-05, "loss": 1.6589, "step": 1661 }, { "epoch": 0.28376301861021, "grad_norm": 0.4593949317932129, "learning_rate": 8e-05, "loss": 1.5864, "step": 1662 }, { "epoch": 0.2839337544818166, "grad_norm": 0.4569384753704071, "learning_rate": 8e-05, "loss": 1.6334, "step": 1663 }, { "epoch": 0.2841044903534233, "grad_norm": 0.5555171370506287, "learning_rate": 8e-05, "loss": 1.9902, "step": 1664 }, { "epoch": 0.2842752262250299, "grad_norm": 0.43992993235588074, "learning_rate": 8e-05, "loss": 1.5747, "step": 1665 }, { "epoch": 0.28444596209663653, "grad_norm": 0.4771362841129303, "learning_rate": 8e-05, "loss": 1.8466, "step": 1666 }, { "epoch": 0.2846166979682431, "grad_norm": 0.4606800377368927, "learning_rate": 8e-05, "loss": 1.573, "step": 1667 }, { "epoch": 0.2847874338398498, "grad_norm": 0.4450996220111847, "learning_rate": 8e-05, "loss": 1.5699, "step": 1668 }, { "epoch": 0.2849581697114564, "grad_norm": 0.4894962012767792, "learning_rate": 8e-05, "loss": 1.8681, "step": 1669 }, { "epoch": 0.285128905583063, "grad_norm": 0.445442795753479, "learning_rate": 8e-05, "loss": 1.8283, "step": 1670 }, { "epoch": 0.28529964145466963, "grad_norm": 0.4531092047691345, "learning_rate": 8e-05, "loss": 1.7238, "step": 1671 }, { "epoch": 0.2854703773262762, "grad_norm": 0.4916914999485016, "learning_rate": 8e-05, "loss": 1.6534, "step": 1672 }, { "epoch": 0.2856411131978829, "grad_norm": 0.5262187719345093, "learning_rate": 8e-05, "loss": 2.1388, "step": 1673 }, { "epoch": 0.2858118490694895, "grad_norm": 0.4797658324241638, "learning_rate": 8e-05, "loss": 1.8045, "step": 1674 }, { "epoch": 0.28598258494109613, "grad_norm": 0.4381534457206726, "learning_rate": 8e-05, "loss": 1.6447, "step": 1675 }, { "epoch": 0.2861533208127027, "grad_norm": 0.4805016815662384, "learning_rate": 8e-05, "loss": 1.7207, "step": 1676 }, { "epoch": 0.2863240566843094, "grad_norm": 0.45608630776405334, "learning_rate": 8e-05, "loss": 1.691, "step": 1677 }, { "epoch": 0.286494792555916, "grad_norm": 0.504036009311676, "learning_rate": 8e-05, "loss": 1.7402, "step": 1678 }, { "epoch": 0.28666552842752263, "grad_norm": 0.4788033664226532, "learning_rate": 8e-05, "loss": 1.9942, "step": 1679 }, { "epoch": 0.28683626429912923, "grad_norm": 0.45475250482559204, "learning_rate": 8e-05, "loss": 1.7459, "step": 1680 }, { "epoch": 0.2870070001707359, "grad_norm": 0.4710817337036133, "learning_rate": 8e-05, "loss": 1.5954, "step": 1681 }, { "epoch": 0.2871777360423425, "grad_norm": 0.4852932095527649, "learning_rate": 8e-05, "loss": 1.7153, "step": 1682 }, { "epoch": 0.28734847191394913, "grad_norm": 0.4617898464202881, "learning_rate": 8e-05, "loss": 1.6262, "step": 1683 }, { "epoch": 0.28751920778555573, "grad_norm": 0.4153468608856201, "learning_rate": 8e-05, "loss": 1.6156, "step": 1684 }, { "epoch": 0.2876899436571624, "grad_norm": 0.463250994682312, "learning_rate": 8e-05, "loss": 1.8098, "step": 1685 }, { "epoch": 0.287860679528769, "grad_norm": 0.4427756667137146, "learning_rate": 8e-05, "loss": 1.7523, "step": 1686 }, { "epoch": 0.28803141540037563, "grad_norm": 0.4407213628292084, "learning_rate": 8e-05, "loss": 1.6232, "step": 1687 }, { "epoch": 0.28820215127198223, "grad_norm": 0.4604834020137787, "learning_rate": 8e-05, "loss": 1.6613, "step": 1688 }, { "epoch": 0.2883728871435889, "grad_norm": 0.4669470191001892, "learning_rate": 8e-05, "loss": 1.6542, "step": 1689 }, { "epoch": 0.2885436230151955, "grad_norm": 0.4409142732620239, "learning_rate": 8e-05, "loss": 1.623, "step": 1690 }, { "epoch": 0.28871435888680214, "grad_norm": 0.4705773890018463, "learning_rate": 8e-05, "loss": 1.7314, "step": 1691 }, { "epoch": 0.28888509475840873, "grad_norm": 0.43847280740737915, "learning_rate": 8e-05, "loss": 1.5989, "step": 1692 }, { "epoch": 0.2890558306300154, "grad_norm": 0.49963927268981934, "learning_rate": 8e-05, "loss": 1.8298, "step": 1693 }, { "epoch": 0.289226566501622, "grad_norm": 0.49244287610054016, "learning_rate": 8e-05, "loss": 1.6457, "step": 1694 }, { "epoch": 0.28939730237322864, "grad_norm": 0.4581116735935211, "learning_rate": 8e-05, "loss": 1.8516, "step": 1695 }, { "epoch": 0.28956803824483524, "grad_norm": 0.4674279987812042, "learning_rate": 8e-05, "loss": 1.6749, "step": 1696 }, { "epoch": 0.2897387741164419, "grad_norm": 0.46406012773513794, "learning_rate": 8e-05, "loss": 1.7427, "step": 1697 }, { "epoch": 0.2899095099880485, "grad_norm": 0.438199520111084, "learning_rate": 8e-05, "loss": 1.6182, "step": 1698 }, { "epoch": 0.29008024585965514, "grad_norm": 0.533038318157196, "learning_rate": 8e-05, "loss": 1.6252, "step": 1699 }, { "epoch": 0.29025098173126174, "grad_norm": 0.5212662220001221, "learning_rate": 8e-05, "loss": 1.6847, "step": 1700 }, { "epoch": 0.2904217176028684, "grad_norm": 0.5334706902503967, "learning_rate": 8e-05, "loss": 1.692, "step": 1701 }, { "epoch": 0.290592453474475, "grad_norm": 0.4730949401855469, "learning_rate": 8e-05, "loss": 1.6191, "step": 1702 }, { "epoch": 0.2907631893460816, "grad_norm": 0.46200692653656006, "learning_rate": 8e-05, "loss": 1.63, "step": 1703 }, { "epoch": 0.29093392521768824, "grad_norm": 0.5091993808746338, "learning_rate": 8e-05, "loss": 1.9435, "step": 1704 }, { "epoch": 0.29110466108929484, "grad_norm": 0.46678945422172546, "learning_rate": 8e-05, "loss": 1.703, "step": 1705 }, { "epoch": 0.2912753969609015, "grad_norm": 0.5043109655380249, "learning_rate": 8e-05, "loss": 1.7102, "step": 1706 }, { "epoch": 0.2914461328325081, "grad_norm": 0.5109253525733948, "learning_rate": 8e-05, "loss": 1.8066, "step": 1707 }, { "epoch": 0.29161686870411474, "grad_norm": 0.47604069113731384, "learning_rate": 8e-05, "loss": 1.6776, "step": 1708 }, { "epoch": 0.29178760457572134, "grad_norm": 0.5071348547935486, "learning_rate": 8e-05, "loss": 1.7806, "step": 1709 }, { "epoch": 0.291958340447328, "grad_norm": 0.4663616418838501, "learning_rate": 8e-05, "loss": 1.7466, "step": 1710 }, { "epoch": 0.2921290763189346, "grad_norm": 0.4584963619709015, "learning_rate": 8e-05, "loss": 1.7687, "step": 1711 }, { "epoch": 0.29229981219054124, "grad_norm": 0.5053565502166748, "learning_rate": 8e-05, "loss": 1.8496, "step": 1712 }, { "epoch": 0.29247054806214784, "grad_norm": 0.48187246918678284, "learning_rate": 8e-05, "loss": 1.7743, "step": 1713 }, { "epoch": 0.2926412839337545, "grad_norm": 0.5052699446678162, "learning_rate": 8e-05, "loss": 1.857, "step": 1714 }, { "epoch": 0.2928120198053611, "grad_norm": 0.4405006468296051, "learning_rate": 8e-05, "loss": 1.7208, "step": 1715 }, { "epoch": 0.29298275567696774, "grad_norm": 0.4562983512878418, "learning_rate": 8e-05, "loss": 1.6238, "step": 1716 }, { "epoch": 0.29315349154857434, "grad_norm": 0.45098358392715454, "learning_rate": 8e-05, "loss": 1.7535, "step": 1717 }, { "epoch": 0.293324227420181, "grad_norm": 0.45628821849823, "learning_rate": 8e-05, "loss": 1.759, "step": 1718 }, { "epoch": 0.2934949632917876, "grad_norm": 0.44430607557296753, "learning_rate": 8e-05, "loss": 1.6621, "step": 1719 }, { "epoch": 0.29366569916339424, "grad_norm": 0.4588455855846405, "learning_rate": 8e-05, "loss": 1.9085, "step": 1720 }, { "epoch": 0.29383643503500084, "grad_norm": 0.4654273986816406, "learning_rate": 8e-05, "loss": 1.6667, "step": 1721 }, { "epoch": 0.2940071709066075, "grad_norm": 0.5048471093177795, "learning_rate": 8e-05, "loss": 1.7539, "step": 1722 }, { "epoch": 0.2941779067782141, "grad_norm": 0.5083041191101074, "learning_rate": 8e-05, "loss": 1.681, "step": 1723 }, { "epoch": 0.29434864264982075, "grad_norm": 0.4613249897956848, "learning_rate": 8e-05, "loss": 1.8498, "step": 1724 }, { "epoch": 0.29451937852142734, "grad_norm": 0.47101473808288574, "learning_rate": 8e-05, "loss": 1.713, "step": 1725 }, { "epoch": 0.294690114393034, "grad_norm": 0.4632411599159241, "learning_rate": 8e-05, "loss": 1.8187, "step": 1726 }, { "epoch": 0.2948608502646406, "grad_norm": 0.47957465052604675, "learning_rate": 8e-05, "loss": 1.7863, "step": 1727 }, { "epoch": 0.29503158613624725, "grad_norm": 0.44779446721076965, "learning_rate": 8e-05, "loss": 1.5933, "step": 1728 }, { "epoch": 0.29520232200785385, "grad_norm": 0.48290175199508667, "learning_rate": 8e-05, "loss": 1.7812, "step": 1729 }, { "epoch": 0.2953730578794605, "grad_norm": 0.46204784512519836, "learning_rate": 8e-05, "loss": 1.7865, "step": 1730 }, { "epoch": 0.2955437937510671, "grad_norm": 0.49414366483688354, "learning_rate": 8e-05, "loss": 1.5279, "step": 1731 }, { "epoch": 0.29571452962267375, "grad_norm": 0.5263096690177917, "learning_rate": 8e-05, "loss": 1.7289, "step": 1732 }, { "epoch": 0.29588526549428035, "grad_norm": 0.4752468168735504, "learning_rate": 8e-05, "loss": 1.7048, "step": 1733 }, { "epoch": 0.296056001365887, "grad_norm": 0.5121238231658936, "learning_rate": 8e-05, "loss": 1.8788, "step": 1734 }, { "epoch": 0.2962267372374936, "grad_norm": 0.4649833142757416, "learning_rate": 8e-05, "loss": 1.5928, "step": 1735 }, { "epoch": 0.2963974731091002, "grad_norm": 0.501901388168335, "learning_rate": 8e-05, "loss": 1.8029, "step": 1736 }, { "epoch": 0.29656820898070685, "grad_norm": 0.43837085366249084, "learning_rate": 8e-05, "loss": 1.6339, "step": 1737 }, { "epoch": 0.29673894485231345, "grad_norm": 0.454603374004364, "learning_rate": 8e-05, "loss": 1.7095, "step": 1738 }, { "epoch": 0.2969096807239201, "grad_norm": 0.4827982485294342, "learning_rate": 8e-05, "loss": 1.7129, "step": 1739 }, { "epoch": 0.2970804165955267, "grad_norm": 0.4156411290168762, "learning_rate": 8e-05, "loss": 1.4909, "step": 1740 }, { "epoch": 0.29725115246713335, "grad_norm": 0.4631652235984802, "learning_rate": 8e-05, "loss": 1.6075, "step": 1741 }, { "epoch": 0.29742188833873995, "grad_norm": 0.48221030831336975, "learning_rate": 8e-05, "loss": 1.7186, "step": 1742 }, { "epoch": 0.2975926242103466, "grad_norm": 0.45653098821640015, "learning_rate": 8e-05, "loss": 1.7551, "step": 1743 }, { "epoch": 0.2977633600819532, "grad_norm": 0.4537773132324219, "learning_rate": 8e-05, "loss": 1.7451, "step": 1744 }, { "epoch": 0.29793409595355985, "grad_norm": 0.46289128065109253, "learning_rate": 8e-05, "loss": 1.8211, "step": 1745 }, { "epoch": 0.29810483182516645, "grad_norm": 0.44398775696754456, "learning_rate": 8e-05, "loss": 1.7966, "step": 1746 }, { "epoch": 0.2982755676967731, "grad_norm": 0.453146368265152, "learning_rate": 8e-05, "loss": 1.6606, "step": 1747 }, { "epoch": 0.2984463035683797, "grad_norm": 0.5248463153839111, "learning_rate": 8e-05, "loss": 1.8975, "step": 1748 }, { "epoch": 0.29861703943998635, "grad_norm": 0.4540226459503174, "learning_rate": 8e-05, "loss": 1.8479, "step": 1749 }, { "epoch": 0.29878777531159295, "grad_norm": 0.49934661388397217, "learning_rate": 8e-05, "loss": 1.7709, "step": 1750 }, { "epoch": 0.2989585111831996, "grad_norm": 0.47629573941230774, "learning_rate": 8e-05, "loss": 1.9185, "step": 1751 }, { "epoch": 0.2991292470548062, "grad_norm": 0.43592336773872375, "learning_rate": 8e-05, "loss": 1.6117, "step": 1752 }, { "epoch": 0.29929998292641286, "grad_norm": 0.4489610493183136, "learning_rate": 8e-05, "loss": 1.568, "step": 1753 }, { "epoch": 0.29947071879801945, "grad_norm": 0.4353593587875366, "learning_rate": 8e-05, "loss": 1.7073, "step": 1754 }, { "epoch": 0.2996414546696261, "grad_norm": 0.4886056184768677, "learning_rate": 8e-05, "loss": 1.743, "step": 1755 }, { "epoch": 0.2998121905412327, "grad_norm": 0.47069793939590454, "learning_rate": 8e-05, "loss": 1.7299, "step": 1756 }, { "epoch": 0.29998292641283936, "grad_norm": 0.47148939967155457, "learning_rate": 8e-05, "loss": 1.7445, "step": 1757 }, { "epoch": 0.30015366228444595, "grad_norm": 0.4590200185775757, "learning_rate": 8e-05, "loss": 1.7379, "step": 1758 }, { "epoch": 0.3003243981560526, "grad_norm": 0.44023841619491577, "learning_rate": 8e-05, "loss": 1.7612, "step": 1759 }, { "epoch": 0.3004951340276592, "grad_norm": 0.5302191972732544, "learning_rate": 8e-05, "loss": 1.6093, "step": 1760 }, { "epoch": 0.30066586989926586, "grad_norm": 0.5012679100036621, "learning_rate": 8e-05, "loss": 1.7494, "step": 1761 }, { "epoch": 0.30083660577087246, "grad_norm": 0.46323317289352417, "learning_rate": 8e-05, "loss": 1.4599, "step": 1762 }, { "epoch": 0.3010073416424791, "grad_norm": 0.4694952368736267, "learning_rate": 8e-05, "loss": 1.7805, "step": 1763 }, { "epoch": 0.3011780775140857, "grad_norm": 0.44524136185646057, "learning_rate": 8e-05, "loss": 1.6204, "step": 1764 }, { "epoch": 0.30134881338569236, "grad_norm": 0.49193480610847473, "learning_rate": 8e-05, "loss": 1.8238, "step": 1765 }, { "epoch": 0.30151954925729896, "grad_norm": 0.4415793716907501, "learning_rate": 8e-05, "loss": 1.6687, "step": 1766 }, { "epoch": 0.30169028512890556, "grad_norm": 0.478651225566864, "learning_rate": 8e-05, "loss": 1.7652, "step": 1767 }, { "epoch": 0.3018610210005122, "grad_norm": 0.4865941107273102, "learning_rate": 8e-05, "loss": 1.8592, "step": 1768 }, { "epoch": 0.3020317568721188, "grad_norm": 0.4679223895072937, "learning_rate": 8e-05, "loss": 1.5586, "step": 1769 }, { "epoch": 0.30220249274372546, "grad_norm": 0.5274567604064941, "learning_rate": 8e-05, "loss": 1.7149, "step": 1770 }, { "epoch": 0.30237322861533206, "grad_norm": 0.4477078318595886, "learning_rate": 8e-05, "loss": 1.6207, "step": 1771 }, { "epoch": 0.3025439644869387, "grad_norm": 0.5097717642784119, "learning_rate": 8e-05, "loss": 1.9658, "step": 1772 }, { "epoch": 0.3027147003585453, "grad_norm": 0.5321958661079407, "learning_rate": 8e-05, "loss": 1.8662, "step": 1773 }, { "epoch": 0.30288543623015196, "grad_norm": 0.4535421133041382, "learning_rate": 8e-05, "loss": 1.5251, "step": 1774 }, { "epoch": 0.30305617210175856, "grad_norm": 0.49565190076828003, "learning_rate": 8e-05, "loss": 1.6815, "step": 1775 }, { "epoch": 0.3032269079733652, "grad_norm": 0.46760210394859314, "learning_rate": 8e-05, "loss": 1.809, "step": 1776 }, { "epoch": 0.3033976438449718, "grad_norm": 0.45839571952819824, "learning_rate": 8e-05, "loss": 1.6136, "step": 1777 }, { "epoch": 0.30356837971657846, "grad_norm": 0.47626128792762756, "learning_rate": 8e-05, "loss": 1.6653, "step": 1778 }, { "epoch": 0.30373911558818506, "grad_norm": 0.43720030784606934, "learning_rate": 8e-05, "loss": 1.6225, "step": 1779 }, { "epoch": 0.3039098514597917, "grad_norm": 0.4689311981201172, "learning_rate": 8e-05, "loss": 1.7147, "step": 1780 }, { "epoch": 0.3040805873313983, "grad_norm": 0.45504194498062134, "learning_rate": 8e-05, "loss": 1.7892, "step": 1781 }, { "epoch": 0.30425132320300496, "grad_norm": 0.47290125489234924, "learning_rate": 8e-05, "loss": 1.7757, "step": 1782 }, { "epoch": 0.30442205907461156, "grad_norm": 0.43989017605781555, "learning_rate": 8e-05, "loss": 1.5014, "step": 1783 }, { "epoch": 0.3045927949462182, "grad_norm": 0.49053317308425903, "learning_rate": 8e-05, "loss": 1.7144, "step": 1784 }, { "epoch": 0.3047635308178248, "grad_norm": 0.4525509774684906, "learning_rate": 8e-05, "loss": 1.7396, "step": 1785 }, { "epoch": 0.30493426668943147, "grad_norm": 0.4351325035095215, "learning_rate": 8e-05, "loss": 1.5936, "step": 1786 }, { "epoch": 0.30510500256103806, "grad_norm": 0.4627186357975006, "learning_rate": 8e-05, "loss": 1.7062, "step": 1787 }, { "epoch": 0.3052757384326447, "grad_norm": 0.4677678048610687, "learning_rate": 8e-05, "loss": 1.8206, "step": 1788 }, { "epoch": 0.3054464743042513, "grad_norm": 0.43497374653816223, "learning_rate": 8e-05, "loss": 1.7931, "step": 1789 }, { "epoch": 0.30561721017585797, "grad_norm": 0.4284355044364929, "learning_rate": 8e-05, "loss": 1.5289, "step": 1790 }, { "epoch": 0.30578794604746456, "grad_norm": 0.44523701071739197, "learning_rate": 8e-05, "loss": 1.7647, "step": 1791 }, { "epoch": 0.3059586819190712, "grad_norm": 0.4731326401233673, "learning_rate": 8e-05, "loss": 1.7808, "step": 1792 }, { "epoch": 0.3061294177906778, "grad_norm": 0.45552337169647217, "learning_rate": 8e-05, "loss": 1.6882, "step": 1793 }, { "epoch": 0.30630015366228447, "grad_norm": 0.47729751467704773, "learning_rate": 8e-05, "loss": 1.7823, "step": 1794 }, { "epoch": 0.30647088953389107, "grad_norm": 0.4952051639556885, "learning_rate": 8e-05, "loss": 1.7466, "step": 1795 }, { "epoch": 0.3066416254054977, "grad_norm": 0.4492247998714447, "learning_rate": 8e-05, "loss": 1.7294, "step": 1796 }, { "epoch": 0.3068123612771043, "grad_norm": 0.5252705216407776, "learning_rate": 8e-05, "loss": 1.9098, "step": 1797 }, { "epoch": 0.30698309714871097, "grad_norm": 0.4647979140281677, "learning_rate": 8e-05, "loss": 1.8664, "step": 1798 }, { "epoch": 0.30715383302031757, "grad_norm": 0.5148090124130249, "learning_rate": 8e-05, "loss": 1.9944, "step": 1799 }, { "epoch": 0.30732456889192417, "grad_norm": 0.45040416717529297, "learning_rate": 8e-05, "loss": 1.5965, "step": 1800 }, { "epoch": 0.3074953047635308, "grad_norm": 0.47491654753685, "learning_rate": 8e-05, "loss": 1.6023, "step": 1801 }, { "epoch": 0.3076660406351374, "grad_norm": 0.46935343742370605, "learning_rate": 8e-05, "loss": 1.5476, "step": 1802 }, { "epoch": 0.30783677650674407, "grad_norm": 0.46137306094169617, "learning_rate": 8e-05, "loss": 1.7601, "step": 1803 }, { "epoch": 0.30800751237835067, "grad_norm": 0.4400305151939392, "learning_rate": 8e-05, "loss": 1.63, "step": 1804 }, { "epoch": 0.3081782482499573, "grad_norm": 0.46004560589790344, "learning_rate": 8e-05, "loss": 1.7007, "step": 1805 }, { "epoch": 0.3083489841215639, "grad_norm": 0.4536219537258148, "learning_rate": 8e-05, "loss": 1.7504, "step": 1806 }, { "epoch": 0.30851971999317057, "grad_norm": 0.4254974126815796, "learning_rate": 8e-05, "loss": 1.6995, "step": 1807 }, { "epoch": 0.30869045586477717, "grad_norm": 0.4680779278278351, "learning_rate": 8e-05, "loss": 1.7225, "step": 1808 }, { "epoch": 0.3088611917363838, "grad_norm": 0.5138937830924988, "learning_rate": 8e-05, "loss": 1.6816, "step": 1809 }, { "epoch": 0.3090319276079904, "grad_norm": 0.449569970369339, "learning_rate": 8e-05, "loss": 1.8207, "step": 1810 }, { "epoch": 0.3092026634795971, "grad_norm": 0.4760797321796417, "learning_rate": 8e-05, "loss": 1.6699, "step": 1811 }, { "epoch": 0.30937339935120367, "grad_norm": 0.43567976355552673, "learning_rate": 8e-05, "loss": 1.5584, "step": 1812 }, { "epoch": 0.3095441352228103, "grad_norm": 0.48296472430229187, "learning_rate": 8e-05, "loss": 1.7364, "step": 1813 }, { "epoch": 0.3097148710944169, "grad_norm": 0.48725929856300354, "learning_rate": 8e-05, "loss": 1.7023, "step": 1814 }, { "epoch": 0.3098856069660236, "grad_norm": 0.4501340985298157, "learning_rate": 8e-05, "loss": 1.6731, "step": 1815 }, { "epoch": 0.31005634283763017, "grad_norm": 0.4679238498210907, "learning_rate": 8e-05, "loss": 1.6127, "step": 1816 }, { "epoch": 0.3102270787092368, "grad_norm": 0.47578516602516174, "learning_rate": 8e-05, "loss": 1.7645, "step": 1817 }, { "epoch": 0.3103978145808434, "grad_norm": 0.45573294162750244, "learning_rate": 8e-05, "loss": 1.6018, "step": 1818 }, { "epoch": 0.3105685504524501, "grad_norm": 0.4684441089630127, "learning_rate": 8e-05, "loss": 1.64, "step": 1819 }, { "epoch": 0.3107392863240567, "grad_norm": 0.45673125982284546, "learning_rate": 8e-05, "loss": 1.6879, "step": 1820 }, { "epoch": 0.3109100221956633, "grad_norm": 0.4392636716365814, "learning_rate": 8e-05, "loss": 1.6088, "step": 1821 }, { "epoch": 0.3110807580672699, "grad_norm": 0.48075130581855774, "learning_rate": 8e-05, "loss": 1.6845, "step": 1822 }, { "epoch": 0.3112514939388766, "grad_norm": 0.43586552143096924, "learning_rate": 8e-05, "loss": 1.7162, "step": 1823 }, { "epoch": 0.3114222298104832, "grad_norm": 0.4609968662261963, "learning_rate": 8e-05, "loss": 1.7388, "step": 1824 }, { "epoch": 0.31159296568208983, "grad_norm": 0.4612633287906647, "learning_rate": 8e-05, "loss": 1.6695, "step": 1825 }, { "epoch": 0.3117637015536964, "grad_norm": 0.4732525050640106, "learning_rate": 8e-05, "loss": 1.7939, "step": 1826 }, { "epoch": 0.3119344374253031, "grad_norm": 0.45165717601776123, "learning_rate": 8e-05, "loss": 1.63, "step": 1827 }, { "epoch": 0.3121051732969097, "grad_norm": 0.46846261620521545, "learning_rate": 8e-05, "loss": 1.8698, "step": 1828 }, { "epoch": 0.31227590916851633, "grad_norm": 0.48738765716552734, "learning_rate": 8e-05, "loss": 1.8405, "step": 1829 }, { "epoch": 0.31244664504012293, "grad_norm": 0.44526076316833496, "learning_rate": 8e-05, "loss": 1.7222, "step": 1830 }, { "epoch": 0.3126173809117296, "grad_norm": 0.4548918902873993, "learning_rate": 8e-05, "loss": 1.789, "step": 1831 }, { "epoch": 0.3127881167833362, "grad_norm": 0.45914945006370544, "learning_rate": 8e-05, "loss": 1.6092, "step": 1832 }, { "epoch": 0.3129588526549428, "grad_norm": 0.47654423117637634, "learning_rate": 8e-05, "loss": 1.8258, "step": 1833 }, { "epoch": 0.31312958852654943, "grad_norm": 0.45412322878837585, "learning_rate": 8e-05, "loss": 1.6288, "step": 1834 }, { "epoch": 0.313300324398156, "grad_norm": 0.43396657705307007, "learning_rate": 8e-05, "loss": 1.6116, "step": 1835 }, { "epoch": 0.3134710602697627, "grad_norm": 0.4451257288455963, "learning_rate": 8e-05, "loss": 1.6179, "step": 1836 }, { "epoch": 0.3136417961413693, "grad_norm": 0.4618985652923584, "learning_rate": 8e-05, "loss": 1.6935, "step": 1837 }, { "epoch": 0.31381253201297593, "grad_norm": 0.43554234504699707, "learning_rate": 8e-05, "loss": 1.5174, "step": 1838 }, { "epoch": 0.31398326788458253, "grad_norm": 0.4544583261013031, "learning_rate": 8e-05, "loss": 1.6557, "step": 1839 }, { "epoch": 0.3141540037561892, "grad_norm": 0.489035427570343, "learning_rate": 8e-05, "loss": 1.6808, "step": 1840 }, { "epoch": 0.3143247396277958, "grad_norm": 0.45419755578041077, "learning_rate": 8e-05, "loss": 1.6748, "step": 1841 }, { "epoch": 0.31449547549940243, "grad_norm": 0.4600871503353119, "learning_rate": 8e-05, "loss": 1.7916, "step": 1842 }, { "epoch": 0.31466621137100903, "grad_norm": 0.46299582719802856, "learning_rate": 8e-05, "loss": 1.7602, "step": 1843 }, { "epoch": 0.3148369472426157, "grad_norm": 0.44662153720855713, "learning_rate": 8e-05, "loss": 1.7433, "step": 1844 }, { "epoch": 0.3150076831142223, "grad_norm": 0.5171521902084351, "learning_rate": 8e-05, "loss": 1.8792, "step": 1845 }, { "epoch": 0.31517841898582893, "grad_norm": 0.5028080940246582, "learning_rate": 8e-05, "loss": 1.5604, "step": 1846 }, { "epoch": 0.31534915485743553, "grad_norm": 0.48180991411209106, "learning_rate": 8e-05, "loss": 1.7144, "step": 1847 }, { "epoch": 0.3155198907290422, "grad_norm": 0.47995129227638245, "learning_rate": 8e-05, "loss": 1.7051, "step": 1848 }, { "epoch": 0.3156906266006488, "grad_norm": 0.4633718430995941, "learning_rate": 8e-05, "loss": 1.6399, "step": 1849 }, { "epoch": 0.31586136247225544, "grad_norm": 0.5466080904006958, "learning_rate": 8e-05, "loss": 1.6553, "step": 1850 }, { "epoch": 0.31603209834386203, "grad_norm": 0.4920186400413513, "learning_rate": 8e-05, "loss": 1.7278, "step": 1851 }, { "epoch": 0.3162028342154687, "grad_norm": 0.4575703740119934, "learning_rate": 8e-05, "loss": 1.6748, "step": 1852 }, { "epoch": 0.3163735700870753, "grad_norm": 0.47607335448265076, "learning_rate": 8e-05, "loss": 1.6657, "step": 1853 }, { "epoch": 0.31654430595868194, "grad_norm": 0.4928761422634125, "learning_rate": 8e-05, "loss": 1.8334, "step": 1854 }, { "epoch": 0.31671504183028854, "grad_norm": 0.4733491837978363, "learning_rate": 8e-05, "loss": 1.6137, "step": 1855 }, { "epoch": 0.3168857777018952, "grad_norm": 0.46435895562171936, "learning_rate": 8e-05, "loss": 1.6951, "step": 1856 }, { "epoch": 0.3170565135735018, "grad_norm": 0.4468999207019806, "learning_rate": 8e-05, "loss": 1.6518, "step": 1857 }, { "epoch": 0.31722724944510844, "grad_norm": 0.46201545000076294, "learning_rate": 8e-05, "loss": 1.602, "step": 1858 }, { "epoch": 0.31739798531671504, "grad_norm": 0.43686601519584656, "learning_rate": 8e-05, "loss": 1.6922, "step": 1859 }, { "epoch": 0.3175687211883217, "grad_norm": 0.4712730050086975, "learning_rate": 8e-05, "loss": 1.7549, "step": 1860 }, { "epoch": 0.3177394570599283, "grad_norm": 0.46376582980155945, "learning_rate": 8e-05, "loss": 1.7702, "step": 1861 }, { "epoch": 0.31791019293153494, "grad_norm": 0.46891236305236816, "learning_rate": 8e-05, "loss": 1.7362, "step": 1862 }, { "epoch": 0.31808092880314154, "grad_norm": 0.5069165825843811, "learning_rate": 8e-05, "loss": 1.653, "step": 1863 }, { "epoch": 0.3182516646747482, "grad_norm": 0.482729434967041, "learning_rate": 8e-05, "loss": 1.6416, "step": 1864 }, { "epoch": 0.3184224005463548, "grad_norm": 0.4481070935726166, "learning_rate": 8e-05, "loss": 1.5307, "step": 1865 }, { "epoch": 0.3185931364179614, "grad_norm": 0.481286883354187, "learning_rate": 8e-05, "loss": 1.8041, "step": 1866 }, { "epoch": 0.31876387228956804, "grad_norm": 0.4869597554206848, "learning_rate": 8e-05, "loss": 1.6363, "step": 1867 }, { "epoch": 0.31893460816117464, "grad_norm": 0.4710119366645813, "learning_rate": 8e-05, "loss": 1.5902, "step": 1868 }, { "epoch": 0.3191053440327813, "grad_norm": 0.4842173457145691, "learning_rate": 8e-05, "loss": 1.8601, "step": 1869 }, { "epoch": 0.3192760799043879, "grad_norm": 0.4491094648838043, "learning_rate": 8e-05, "loss": 1.6848, "step": 1870 }, { "epoch": 0.31944681577599454, "grad_norm": 0.4386301040649414, "learning_rate": 8e-05, "loss": 1.6861, "step": 1871 }, { "epoch": 0.31961755164760114, "grad_norm": 0.4882320165634155, "learning_rate": 8e-05, "loss": 1.787, "step": 1872 }, { "epoch": 0.3197882875192078, "grad_norm": 0.44143980741500854, "learning_rate": 8e-05, "loss": 1.6115, "step": 1873 }, { "epoch": 0.3199590233908144, "grad_norm": 0.44633689522743225, "learning_rate": 8e-05, "loss": 1.6226, "step": 1874 }, { "epoch": 0.32012975926242104, "grad_norm": 0.47218018770217896, "learning_rate": 8e-05, "loss": 1.6995, "step": 1875 }, { "epoch": 0.32030049513402764, "grad_norm": 0.42806264758110046, "learning_rate": 8e-05, "loss": 1.6117, "step": 1876 }, { "epoch": 0.3204712310056343, "grad_norm": 0.47656309604644775, "learning_rate": 8e-05, "loss": 1.7315, "step": 1877 }, { "epoch": 0.3206419668772409, "grad_norm": 0.4769209325313568, "learning_rate": 8e-05, "loss": 1.8632, "step": 1878 }, { "epoch": 0.32081270274884754, "grad_norm": 0.4554208219051361, "learning_rate": 8e-05, "loss": 1.6832, "step": 1879 }, { "epoch": 0.32098343862045414, "grad_norm": 0.5119964480400085, "learning_rate": 8e-05, "loss": 1.8556, "step": 1880 }, { "epoch": 0.3211541744920608, "grad_norm": 0.42933353781700134, "learning_rate": 8e-05, "loss": 1.6202, "step": 1881 }, { "epoch": 0.3213249103636674, "grad_norm": 0.4032427966594696, "learning_rate": 8e-05, "loss": 1.5034, "step": 1882 }, { "epoch": 0.32149564623527405, "grad_norm": 0.4614081382751465, "learning_rate": 8e-05, "loss": 1.7459, "step": 1883 }, { "epoch": 0.32166638210688064, "grad_norm": 0.42099443078041077, "learning_rate": 8e-05, "loss": 1.6044, "step": 1884 }, { "epoch": 0.3218371179784873, "grad_norm": 0.46545934677124023, "learning_rate": 8e-05, "loss": 1.6832, "step": 1885 }, { "epoch": 0.3220078538500939, "grad_norm": 0.4823605418205261, "learning_rate": 8e-05, "loss": 1.6935, "step": 1886 }, { "epoch": 0.32217858972170055, "grad_norm": 0.4231642484664917, "learning_rate": 8e-05, "loss": 1.4679, "step": 1887 }, { "epoch": 0.32234932559330715, "grad_norm": 0.523083508014679, "learning_rate": 8e-05, "loss": 1.715, "step": 1888 }, { "epoch": 0.3225200614649138, "grad_norm": 0.46157923340797424, "learning_rate": 8e-05, "loss": 1.6192, "step": 1889 }, { "epoch": 0.3226907973365204, "grad_norm": 0.4917651414871216, "learning_rate": 8e-05, "loss": 1.8866, "step": 1890 }, { "epoch": 0.32286153320812705, "grad_norm": 0.4680168330669403, "learning_rate": 8e-05, "loss": 1.7627, "step": 1891 }, { "epoch": 0.32303226907973365, "grad_norm": 0.500500500202179, "learning_rate": 8e-05, "loss": 1.7354, "step": 1892 }, { "epoch": 0.3232030049513403, "grad_norm": 0.4579605758190155, "learning_rate": 8e-05, "loss": 1.6978, "step": 1893 }, { "epoch": 0.3233737408229469, "grad_norm": 0.43540331721305847, "learning_rate": 8e-05, "loss": 1.5907, "step": 1894 }, { "epoch": 0.32354447669455355, "grad_norm": 0.4657589793205261, "learning_rate": 8e-05, "loss": 1.7155, "step": 1895 }, { "epoch": 0.32371521256616015, "grad_norm": 0.4444985091686249, "learning_rate": 8e-05, "loss": 1.5512, "step": 1896 }, { "epoch": 0.3238859484377668, "grad_norm": 0.46785518527030945, "learning_rate": 8e-05, "loss": 1.7385, "step": 1897 }, { "epoch": 0.3240566843093734, "grad_norm": 0.4438624382019043, "learning_rate": 8e-05, "loss": 1.6261, "step": 1898 }, { "epoch": 0.32422742018098, "grad_norm": 0.45461636781692505, "learning_rate": 8e-05, "loss": 1.609, "step": 1899 }, { "epoch": 0.32439815605258665, "grad_norm": 0.4826289415359497, "learning_rate": 8e-05, "loss": 1.8125, "step": 1900 }, { "epoch": 0.32456889192419325, "grad_norm": 0.4518076479434967, "learning_rate": 8e-05, "loss": 1.6234, "step": 1901 }, { "epoch": 0.3247396277957999, "grad_norm": 0.45679622888565063, "learning_rate": 8e-05, "loss": 1.6877, "step": 1902 }, { "epoch": 0.3249103636674065, "grad_norm": 0.48572200536727905, "learning_rate": 8e-05, "loss": 1.7704, "step": 1903 }, { "epoch": 0.32508109953901315, "grad_norm": 0.47857537865638733, "learning_rate": 8e-05, "loss": 1.7513, "step": 1904 }, { "epoch": 0.32525183541061975, "grad_norm": 0.5307595729827881, "learning_rate": 8e-05, "loss": 1.8692, "step": 1905 }, { "epoch": 0.3254225712822264, "grad_norm": 0.467401385307312, "learning_rate": 8e-05, "loss": 1.6707, "step": 1906 }, { "epoch": 0.325593307153833, "grad_norm": 0.47158485651016235, "learning_rate": 8e-05, "loss": 1.6609, "step": 1907 }, { "epoch": 0.32576404302543965, "grad_norm": 0.5198235511779785, "learning_rate": 8e-05, "loss": 1.6309, "step": 1908 }, { "epoch": 0.32593477889704625, "grad_norm": 0.472156286239624, "learning_rate": 8e-05, "loss": 1.7385, "step": 1909 }, { "epoch": 0.3261055147686529, "grad_norm": 0.455342173576355, "learning_rate": 8e-05, "loss": 1.7176, "step": 1910 }, { "epoch": 0.3262762506402595, "grad_norm": 0.5010833740234375, "learning_rate": 8e-05, "loss": 1.8372, "step": 1911 }, { "epoch": 0.32644698651186616, "grad_norm": 0.43889832496643066, "learning_rate": 8e-05, "loss": 1.629, "step": 1912 }, { "epoch": 0.32661772238347275, "grad_norm": 0.45082831382751465, "learning_rate": 8e-05, "loss": 1.7549, "step": 1913 }, { "epoch": 0.3267884582550794, "grad_norm": 0.5156621336936951, "learning_rate": 8e-05, "loss": 1.678, "step": 1914 }, { "epoch": 0.326959194126686, "grad_norm": 0.47655031085014343, "learning_rate": 8e-05, "loss": 1.8896, "step": 1915 }, { "epoch": 0.32712992999829266, "grad_norm": 0.543146550655365, "learning_rate": 8e-05, "loss": 1.8637, "step": 1916 }, { "epoch": 0.32730066586989925, "grad_norm": 0.45966148376464844, "learning_rate": 8e-05, "loss": 1.7168, "step": 1917 }, { "epoch": 0.3274714017415059, "grad_norm": 0.49897849559783936, "learning_rate": 8e-05, "loss": 1.8373, "step": 1918 }, { "epoch": 0.3276421376131125, "grad_norm": 0.46889203786849976, "learning_rate": 8e-05, "loss": 1.6978, "step": 1919 }, { "epoch": 0.32781287348471916, "grad_norm": 0.4474010169506073, "learning_rate": 8e-05, "loss": 1.6414, "step": 1920 }, { "epoch": 0.32798360935632576, "grad_norm": 0.4717528820037842, "learning_rate": 8e-05, "loss": 1.7217, "step": 1921 }, { "epoch": 0.3281543452279324, "grad_norm": 0.42631182074546814, "learning_rate": 8e-05, "loss": 1.6106, "step": 1922 }, { "epoch": 0.328325081099539, "grad_norm": 0.4606059491634369, "learning_rate": 8e-05, "loss": 1.6414, "step": 1923 }, { "epoch": 0.32849581697114566, "grad_norm": 0.6056921482086182, "learning_rate": 8e-05, "loss": 1.6987, "step": 1924 }, { "epoch": 0.32866655284275226, "grad_norm": 0.4687308073043823, "learning_rate": 8e-05, "loss": 1.6242, "step": 1925 }, { "epoch": 0.3288372887143589, "grad_norm": 0.48305413126945496, "learning_rate": 8e-05, "loss": 1.6859, "step": 1926 }, { "epoch": 0.3290080245859655, "grad_norm": 0.45511922240257263, "learning_rate": 8e-05, "loss": 1.7201, "step": 1927 }, { "epoch": 0.32917876045757216, "grad_norm": 0.4641616642475128, "learning_rate": 8e-05, "loss": 1.568, "step": 1928 }, { "epoch": 0.32934949632917876, "grad_norm": 0.45328670740127563, "learning_rate": 8e-05, "loss": 1.6835, "step": 1929 }, { "epoch": 0.3295202322007854, "grad_norm": 0.49716901779174805, "learning_rate": 8e-05, "loss": 2.0702, "step": 1930 }, { "epoch": 0.329690968072392, "grad_norm": 0.4805561900138855, "learning_rate": 8e-05, "loss": 1.7601, "step": 1931 }, { "epoch": 0.3298617039439986, "grad_norm": 0.5099261403083801, "learning_rate": 8e-05, "loss": 1.8642, "step": 1932 }, { "epoch": 0.33003243981560526, "grad_norm": 0.5259479880332947, "learning_rate": 8e-05, "loss": 1.8842, "step": 1933 }, { "epoch": 0.33020317568721186, "grad_norm": 0.49327895045280457, "learning_rate": 8e-05, "loss": 1.6884, "step": 1934 }, { "epoch": 0.3303739115588185, "grad_norm": 0.45628976821899414, "learning_rate": 8e-05, "loss": 1.6488, "step": 1935 }, { "epoch": 0.3305446474304251, "grad_norm": 0.4335923194885254, "learning_rate": 8e-05, "loss": 1.5595, "step": 1936 }, { "epoch": 0.33071538330203176, "grad_norm": 0.5014899969100952, "learning_rate": 8e-05, "loss": 1.8371, "step": 1937 }, { "epoch": 0.33088611917363836, "grad_norm": 0.559735894203186, "learning_rate": 8e-05, "loss": 1.6984, "step": 1938 }, { "epoch": 0.331056855045245, "grad_norm": 0.4748379588127136, "learning_rate": 8e-05, "loss": 1.614, "step": 1939 }, { "epoch": 0.3312275909168516, "grad_norm": 0.49055010080337524, "learning_rate": 8e-05, "loss": 1.9736, "step": 1940 }, { "epoch": 0.33139832678845826, "grad_norm": 0.4907020926475525, "learning_rate": 8e-05, "loss": 1.8762, "step": 1941 }, { "epoch": 0.33156906266006486, "grad_norm": 0.5281545519828796, "learning_rate": 8e-05, "loss": 1.8067, "step": 1942 }, { "epoch": 0.3317397985316715, "grad_norm": 0.4559493660926819, "learning_rate": 8e-05, "loss": 1.8112, "step": 1943 }, { "epoch": 0.3319105344032781, "grad_norm": 0.44826024770736694, "learning_rate": 8e-05, "loss": 1.6961, "step": 1944 }, { "epoch": 0.33208127027488477, "grad_norm": 0.47942978143692017, "learning_rate": 8e-05, "loss": 1.8258, "step": 1945 }, { "epoch": 0.33225200614649136, "grad_norm": 0.4631817638874054, "learning_rate": 8e-05, "loss": 1.874, "step": 1946 }, { "epoch": 0.332422742018098, "grad_norm": 0.45535627007484436, "learning_rate": 8e-05, "loss": 1.7334, "step": 1947 }, { "epoch": 0.3325934778897046, "grad_norm": 0.46410563588142395, "learning_rate": 8e-05, "loss": 1.65, "step": 1948 }, { "epoch": 0.33276421376131127, "grad_norm": 0.4227137565612793, "learning_rate": 8e-05, "loss": 1.4266, "step": 1949 }, { "epoch": 0.33293494963291786, "grad_norm": 0.4973393976688385, "learning_rate": 8e-05, "loss": 1.7824, "step": 1950 }, { "epoch": 0.3331056855045245, "grad_norm": 0.5230864882469177, "learning_rate": 8e-05, "loss": 1.7772, "step": 1951 }, { "epoch": 0.3332764213761311, "grad_norm": 0.47836005687713623, "learning_rate": 8e-05, "loss": 1.6894, "step": 1952 }, { "epoch": 0.33344715724773777, "grad_norm": 0.43310362100601196, "learning_rate": 8e-05, "loss": 1.6788, "step": 1953 }, { "epoch": 0.33361789311934437, "grad_norm": 0.4530152976512909, "learning_rate": 8e-05, "loss": 1.7764, "step": 1954 }, { "epoch": 0.333788628990951, "grad_norm": 0.46465250849723816, "learning_rate": 8e-05, "loss": 1.8604, "step": 1955 }, { "epoch": 0.3339593648625576, "grad_norm": 0.5347518920898438, "learning_rate": 8e-05, "loss": 1.6985, "step": 1956 }, { "epoch": 0.33413010073416427, "grad_norm": 0.4470164179801941, "learning_rate": 8e-05, "loss": 1.6079, "step": 1957 }, { "epoch": 0.33430083660577087, "grad_norm": 0.4572913646697998, "learning_rate": 8e-05, "loss": 1.6018, "step": 1958 }, { "epoch": 0.3344715724773775, "grad_norm": 0.46929097175598145, "learning_rate": 8e-05, "loss": 1.7103, "step": 1959 }, { "epoch": 0.3346423083489841, "grad_norm": 0.4921543300151825, "learning_rate": 8e-05, "loss": 1.6587, "step": 1960 }, { "epoch": 0.33481304422059077, "grad_norm": 0.5154151320457458, "learning_rate": 8e-05, "loss": 1.7986, "step": 1961 }, { "epoch": 0.33498378009219737, "grad_norm": 0.47366416454315186, "learning_rate": 8e-05, "loss": 1.6116, "step": 1962 }, { "epoch": 0.335154515963804, "grad_norm": 0.48739978671073914, "learning_rate": 8e-05, "loss": 1.7224, "step": 1963 }, { "epoch": 0.3353252518354106, "grad_norm": 0.4867439568042755, "learning_rate": 8e-05, "loss": 1.7706, "step": 1964 }, { "epoch": 0.3354959877070172, "grad_norm": 0.43780067563056946, "learning_rate": 8e-05, "loss": 1.5781, "step": 1965 }, { "epoch": 0.33566672357862387, "grad_norm": 0.5513235926628113, "learning_rate": 8e-05, "loss": 1.8656, "step": 1966 }, { "epoch": 0.33583745945023047, "grad_norm": 0.4885498881340027, "learning_rate": 8e-05, "loss": 1.6892, "step": 1967 }, { "epoch": 0.3360081953218371, "grad_norm": 0.4404436945915222, "learning_rate": 8e-05, "loss": 1.6991, "step": 1968 }, { "epoch": 0.3361789311934437, "grad_norm": 0.42584335803985596, "learning_rate": 8e-05, "loss": 1.6793, "step": 1969 }, { "epoch": 0.3363496670650504, "grad_norm": 0.4603241980075836, "learning_rate": 8e-05, "loss": 1.69, "step": 1970 }, { "epoch": 0.33652040293665697, "grad_norm": 0.45768609642982483, "learning_rate": 8e-05, "loss": 1.7215, "step": 1971 }, { "epoch": 0.3366911388082636, "grad_norm": 0.5296070575714111, "learning_rate": 8e-05, "loss": 2.177, "step": 1972 }, { "epoch": 0.3368618746798702, "grad_norm": 0.43210679292678833, "learning_rate": 8e-05, "loss": 1.6053, "step": 1973 }, { "epoch": 0.3370326105514769, "grad_norm": 0.5244123935699463, "learning_rate": 8e-05, "loss": 1.9582, "step": 1974 }, { "epoch": 0.33720334642308347, "grad_norm": 0.5189810991287231, "learning_rate": 8e-05, "loss": 1.7976, "step": 1975 }, { "epoch": 0.3373740822946901, "grad_norm": 0.47392532229423523, "learning_rate": 8e-05, "loss": 1.6945, "step": 1976 }, { "epoch": 0.3375448181662967, "grad_norm": 0.4500195384025574, "learning_rate": 8e-05, "loss": 1.6605, "step": 1977 }, { "epoch": 0.3377155540379034, "grad_norm": 0.5669680237770081, "learning_rate": 8e-05, "loss": 1.8555, "step": 1978 }, { "epoch": 0.33788628990951, "grad_norm": 0.538758397102356, "learning_rate": 8e-05, "loss": 2.0637, "step": 1979 }, { "epoch": 0.3380570257811166, "grad_norm": 0.4588993787765503, "learning_rate": 8e-05, "loss": 1.6969, "step": 1980 }, { "epoch": 0.3382277616527232, "grad_norm": 0.5195175409317017, "learning_rate": 8e-05, "loss": 1.8417, "step": 1981 }, { "epoch": 0.3383984975243299, "grad_norm": 0.47832581400871277, "learning_rate": 8e-05, "loss": 1.7407, "step": 1982 }, { "epoch": 0.3385692333959365, "grad_norm": 0.5217552185058594, "learning_rate": 8e-05, "loss": 1.7506, "step": 1983 }, { "epoch": 0.33873996926754313, "grad_norm": 0.5111126899719238, "learning_rate": 8e-05, "loss": 1.8828, "step": 1984 }, { "epoch": 0.3389107051391497, "grad_norm": 0.4572914242744446, "learning_rate": 8e-05, "loss": 1.6212, "step": 1985 }, { "epoch": 0.3390814410107564, "grad_norm": 0.4526968002319336, "learning_rate": 8e-05, "loss": 1.7274, "step": 1986 }, { "epoch": 0.339252176882363, "grad_norm": 0.4512120187282562, "learning_rate": 8e-05, "loss": 1.5848, "step": 1987 }, { "epoch": 0.33942291275396963, "grad_norm": 0.4531479775905609, "learning_rate": 8e-05, "loss": 1.6395, "step": 1988 }, { "epoch": 0.3395936486255762, "grad_norm": 0.49920934438705444, "learning_rate": 8e-05, "loss": 1.7915, "step": 1989 }, { "epoch": 0.3397643844971829, "grad_norm": 0.45511648058891296, "learning_rate": 8e-05, "loss": 1.7257, "step": 1990 }, { "epoch": 0.3399351203687895, "grad_norm": 0.4677150845527649, "learning_rate": 8e-05, "loss": 1.6841, "step": 1991 }, { "epoch": 0.34010585624039613, "grad_norm": 0.5579793453216553, "learning_rate": 8e-05, "loss": 1.928, "step": 1992 }, { "epoch": 0.34027659211200273, "grad_norm": 0.4810863733291626, "learning_rate": 8e-05, "loss": 1.6559, "step": 1993 }, { "epoch": 0.3404473279836094, "grad_norm": 0.4607120752334595, "learning_rate": 8e-05, "loss": 1.6712, "step": 1994 }, { "epoch": 0.340618063855216, "grad_norm": 0.46328404545783997, "learning_rate": 8e-05, "loss": 1.5872, "step": 1995 }, { "epoch": 0.3407887997268226, "grad_norm": 0.46395301818847656, "learning_rate": 8e-05, "loss": 1.7578, "step": 1996 }, { "epoch": 0.34095953559842923, "grad_norm": 0.4552900195121765, "learning_rate": 8e-05, "loss": 1.6754, "step": 1997 }, { "epoch": 0.34113027147003583, "grad_norm": 0.4663562476634979, "learning_rate": 8e-05, "loss": 1.6566, "step": 1998 }, { "epoch": 0.3413010073416425, "grad_norm": 0.4467025399208069, "learning_rate": 8e-05, "loss": 1.2243, "step": 1999 }, { "epoch": 0.3414717432132491, "grad_norm": 0.4682059586048126, "learning_rate": 8e-05, "loss": 1.7142, "step": 2000 }, { "epoch": 0.34164247908485573, "grad_norm": 0.45217183232307434, "learning_rate": 8e-05, "loss": 1.6818, "step": 2001 }, { "epoch": 0.34181321495646233, "grad_norm": 0.48879554867744446, "learning_rate": 8e-05, "loss": 1.9251, "step": 2002 }, { "epoch": 0.341983950828069, "grad_norm": 0.4986742436885834, "learning_rate": 8e-05, "loss": 1.7526, "step": 2003 }, { "epoch": 0.3421546866996756, "grad_norm": 0.48586878180503845, "learning_rate": 8e-05, "loss": 1.9094, "step": 2004 }, { "epoch": 0.34232542257128223, "grad_norm": 0.43946221470832825, "learning_rate": 8e-05, "loss": 1.5967, "step": 2005 }, { "epoch": 0.34249615844288883, "grad_norm": 0.49964892864227295, "learning_rate": 8e-05, "loss": 1.6603, "step": 2006 }, { "epoch": 0.3426668943144955, "grad_norm": 0.46628522872924805, "learning_rate": 8e-05, "loss": 1.7002, "step": 2007 }, { "epoch": 0.3428376301861021, "grad_norm": 0.46340104937553406, "learning_rate": 8e-05, "loss": 1.7633, "step": 2008 }, { "epoch": 0.34300836605770874, "grad_norm": 0.49899670481681824, "learning_rate": 8e-05, "loss": 1.6645, "step": 2009 }, { "epoch": 0.34317910192931533, "grad_norm": 0.4325730502605438, "learning_rate": 8e-05, "loss": 1.7695, "step": 2010 }, { "epoch": 0.343349837800922, "grad_norm": 0.48860591650009155, "learning_rate": 8e-05, "loss": 1.8247, "step": 2011 }, { "epoch": 0.3435205736725286, "grad_norm": 0.44568490982055664, "learning_rate": 8e-05, "loss": 1.6277, "step": 2012 }, { "epoch": 0.34369130954413524, "grad_norm": 0.4667462408542633, "learning_rate": 8e-05, "loss": 1.679, "step": 2013 }, { "epoch": 0.34386204541574183, "grad_norm": 0.4645029604434967, "learning_rate": 8e-05, "loss": 1.7999, "step": 2014 }, { "epoch": 0.3440327812873485, "grad_norm": 0.44732391834259033, "learning_rate": 8e-05, "loss": 1.5988, "step": 2015 }, { "epoch": 0.3442035171589551, "grad_norm": 0.4749414920806885, "learning_rate": 8e-05, "loss": 1.8081, "step": 2016 }, { "epoch": 0.34437425303056174, "grad_norm": 0.4997918903827667, "learning_rate": 8e-05, "loss": 1.7819, "step": 2017 }, { "epoch": 0.34454498890216834, "grad_norm": 0.5127208828926086, "learning_rate": 8e-05, "loss": 1.7935, "step": 2018 }, { "epoch": 0.344715724773775, "grad_norm": 0.5001400113105774, "learning_rate": 8e-05, "loss": 1.756, "step": 2019 }, { "epoch": 0.3448864606453816, "grad_norm": 0.45538127422332764, "learning_rate": 8e-05, "loss": 1.7142, "step": 2020 }, { "epoch": 0.34505719651698824, "grad_norm": 0.45046013593673706, "learning_rate": 8e-05, "loss": 1.7287, "step": 2021 }, { "epoch": 0.34522793238859484, "grad_norm": 0.49369731545448303, "learning_rate": 8e-05, "loss": 1.7975, "step": 2022 }, { "epoch": 0.3453986682602015, "grad_norm": 0.4604179859161377, "learning_rate": 8e-05, "loss": 1.7232, "step": 2023 }, { "epoch": 0.3455694041318081, "grad_norm": 0.5044283270835876, "learning_rate": 8e-05, "loss": 1.8572, "step": 2024 }, { "epoch": 0.34574014000341474, "grad_norm": 0.48091068863868713, "learning_rate": 8e-05, "loss": 1.5623, "step": 2025 }, { "epoch": 0.34591087587502134, "grad_norm": 0.4468981921672821, "learning_rate": 8e-05, "loss": 1.7339, "step": 2026 }, { "epoch": 0.346081611746628, "grad_norm": 0.45262858271598816, "learning_rate": 8e-05, "loss": 1.6608, "step": 2027 }, { "epoch": 0.3462523476182346, "grad_norm": 0.46456393599510193, "learning_rate": 8e-05, "loss": 1.6996, "step": 2028 }, { "epoch": 0.3464230834898412, "grad_norm": 0.487901896238327, "learning_rate": 8e-05, "loss": 1.6964, "step": 2029 }, { "epoch": 0.34659381936144784, "grad_norm": 0.5000905394554138, "learning_rate": 8e-05, "loss": 1.7828, "step": 2030 }, { "epoch": 0.34676455523305444, "grad_norm": 0.45887795090675354, "learning_rate": 8e-05, "loss": 1.7273, "step": 2031 }, { "epoch": 0.3469352911046611, "grad_norm": 0.45011165738105774, "learning_rate": 8e-05, "loss": 1.5864, "step": 2032 }, { "epoch": 0.3471060269762677, "grad_norm": 0.4771392047405243, "learning_rate": 8e-05, "loss": 1.7471, "step": 2033 }, { "epoch": 0.34727676284787434, "grad_norm": 0.5213372707366943, "learning_rate": 8e-05, "loss": 1.9387, "step": 2034 }, { "epoch": 0.34744749871948094, "grad_norm": 0.4899488389492035, "learning_rate": 8e-05, "loss": 1.8017, "step": 2035 }, { "epoch": 0.3476182345910876, "grad_norm": 0.4565853178501129, "learning_rate": 8e-05, "loss": 1.7653, "step": 2036 }, { "epoch": 0.3477889704626942, "grad_norm": 0.47103193402290344, "learning_rate": 8e-05, "loss": 1.7749, "step": 2037 }, { "epoch": 0.34795970633430084, "grad_norm": 0.4416932761669159, "learning_rate": 8e-05, "loss": 1.7295, "step": 2038 }, { "epoch": 0.34813044220590744, "grad_norm": 0.4542889893054962, "learning_rate": 8e-05, "loss": 1.7725, "step": 2039 }, { "epoch": 0.3483011780775141, "grad_norm": 0.47980326414108276, "learning_rate": 8e-05, "loss": 1.8297, "step": 2040 }, { "epoch": 0.3484719139491207, "grad_norm": 0.43621334433555603, "learning_rate": 8e-05, "loss": 1.6692, "step": 2041 }, { "epoch": 0.34864264982072735, "grad_norm": 0.43704336881637573, "learning_rate": 8e-05, "loss": 1.6718, "step": 2042 }, { "epoch": 0.34881338569233394, "grad_norm": 0.4446067214012146, "learning_rate": 8e-05, "loss": 1.7428, "step": 2043 }, { "epoch": 0.3489841215639406, "grad_norm": 0.4481487572193146, "learning_rate": 8e-05, "loss": 1.546, "step": 2044 }, { "epoch": 0.3491548574355472, "grad_norm": 0.47868332266807556, "learning_rate": 8e-05, "loss": 1.871, "step": 2045 }, { "epoch": 0.34932559330715385, "grad_norm": 0.4832029938697815, "learning_rate": 8e-05, "loss": 1.6132, "step": 2046 }, { "epoch": 0.34949632917876045, "grad_norm": 0.45494866371154785, "learning_rate": 8e-05, "loss": 1.6915, "step": 2047 }, { "epoch": 0.3496670650503671, "grad_norm": 0.4523508846759796, "learning_rate": 8e-05, "loss": 1.6995, "step": 2048 }, { "epoch": 0.3498378009219737, "grad_norm": 0.4366416931152344, "learning_rate": 8e-05, "loss": 1.6388, "step": 2049 }, { "epoch": 0.35000853679358035, "grad_norm": 0.4462726414203644, "learning_rate": 8e-05, "loss": 1.4962, "step": 2050 }, { "epoch": 0.35017927266518695, "grad_norm": 0.43231672048568726, "learning_rate": 8e-05, "loss": 1.5696, "step": 2051 }, { "epoch": 0.3503500085367936, "grad_norm": 0.46851909160614014, "learning_rate": 8e-05, "loss": 1.7917, "step": 2052 }, { "epoch": 0.3505207444084002, "grad_norm": 0.443303644657135, "learning_rate": 8e-05, "loss": 1.5899, "step": 2053 }, { "epoch": 0.35069148028000685, "grad_norm": 0.4750514626502991, "learning_rate": 8e-05, "loss": 1.7199, "step": 2054 }, { "epoch": 0.35086221615161345, "grad_norm": 0.4276409149169922, "learning_rate": 8e-05, "loss": 1.7596, "step": 2055 }, { "epoch": 0.3510329520232201, "grad_norm": 0.5081743001937866, "learning_rate": 8e-05, "loss": 1.6679, "step": 2056 }, { "epoch": 0.3512036878948267, "grad_norm": 0.47930479049682617, "learning_rate": 8e-05, "loss": 1.6984, "step": 2057 }, { "epoch": 0.35137442376643335, "grad_norm": 0.45400914549827576, "learning_rate": 8e-05, "loss": 1.7031, "step": 2058 }, { "epoch": 0.35154515963803995, "grad_norm": 0.4565233290195465, "learning_rate": 8e-05, "loss": 1.7786, "step": 2059 }, { "epoch": 0.3517158955096466, "grad_norm": 0.45850440859794617, "learning_rate": 8e-05, "loss": 1.7597, "step": 2060 }, { "epoch": 0.3518866313812532, "grad_norm": 0.46595498919487, "learning_rate": 8e-05, "loss": 1.7211, "step": 2061 }, { "epoch": 0.3520573672528598, "grad_norm": 0.46992626786231995, "learning_rate": 8e-05, "loss": 1.8513, "step": 2062 }, { "epoch": 0.35222810312446645, "grad_norm": 0.4678153395652771, "learning_rate": 8e-05, "loss": 1.6014, "step": 2063 }, { "epoch": 0.35239883899607305, "grad_norm": 0.47371941804885864, "learning_rate": 8e-05, "loss": 1.875, "step": 2064 }, { "epoch": 0.3525695748676797, "grad_norm": 0.43740856647491455, "learning_rate": 8e-05, "loss": 1.5145, "step": 2065 }, { "epoch": 0.3527403107392863, "grad_norm": 0.4885193705558777, "learning_rate": 8e-05, "loss": 1.7002, "step": 2066 }, { "epoch": 0.35291104661089295, "grad_norm": 0.46635669469833374, "learning_rate": 8e-05, "loss": 1.7851, "step": 2067 }, { "epoch": 0.35308178248249955, "grad_norm": 0.4991675317287445, "learning_rate": 8e-05, "loss": 1.8558, "step": 2068 }, { "epoch": 0.3532525183541062, "grad_norm": 0.4822664260864258, "learning_rate": 8e-05, "loss": 1.7954, "step": 2069 }, { "epoch": 0.3534232542257128, "grad_norm": 0.4163990616798401, "learning_rate": 8e-05, "loss": 1.0755, "step": 2070 }, { "epoch": 0.35359399009731945, "grad_norm": 0.48030874133110046, "learning_rate": 8e-05, "loss": 1.8236, "step": 2071 }, { "epoch": 0.35376472596892605, "grad_norm": 0.46650952100753784, "learning_rate": 8e-05, "loss": 1.8799, "step": 2072 }, { "epoch": 0.3539354618405327, "grad_norm": 0.4641493856906891, "learning_rate": 8e-05, "loss": 1.7149, "step": 2073 }, { "epoch": 0.3541061977121393, "grad_norm": 0.46915292739868164, "learning_rate": 8e-05, "loss": 1.5866, "step": 2074 }, { "epoch": 0.35427693358374596, "grad_norm": 0.44763389229774475, "learning_rate": 8e-05, "loss": 1.6414, "step": 2075 }, { "epoch": 0.35444766945535255, "grad_norm": 0.45028457045555115, "learning_rate": 8e-05, "loss": 1.5732, "step": 2076 }, { "epoch": 0.3546184053269592, "grad_norm": 0.4615210294723511, "learning_rate": 8e-05, "loss": 1.6589, "step": 2077 }, { "epoch": 0.3547891411985658, "grad_norm": 0.5248599648475647, "learning_rate": 8e-05, "loss": 1.5949, "step": 2078 }, { "epoch": 0.35495987707017246, "grad_norm": 0.4491666257381439, "learning_rate": 8e-05, "loss": 1.5646, "step": 2079 }, { "epoch": 0.35513061294177906, "grad_norm": 0.47784435749053955, "learning_rate": 8e-05, "loss": 1.8641, "step": 2080 }, { "epoch": 0.3553013488133857, "grad_norm": 0.4516776204109192, "learning_rate": 8e-05, "loss": 1.5249, "step": 2081 }, { "epoch": 0.3554720846849923, "grad_norm": 0.44781193137168884, "learning_rate": 8e-05, "loss": 1.8575, "step": 2082 }, { "epoch": 0.35564282055659896, "grad_norm": 0.4651031196117401, "learning_rate": 8e-05, "loss": 1.7043, "step": 2083 }, { "epoch": 0.35581355642820556, "grad_norm": 0.4689962863922119, "learning_rate": 8e-05, "loss": 1.6494, "step": 2084 }, { "epoch": 0.3559842922998122, "grad_norm": 0.49843698740005493, "learning_rate": 8e-05, "loss": 1.6707, "step": 2085 }, { "epoch": 0.3561550281714188, "grad_norm": 0.4720013737678528, "learning_rate": 8e-05, "loss": 1.7209, "step": 2086 }, { "epoch": 0.35632576404302546, "grad_norm": 0.474491149187088, "learning_rate": 8e-05, "loss": 1.7343, "step": 2087 }, { "epoch": 0.35649649991463206, "grad_norm": 0.4483530521392822, "learning_rate": 8e-05, "loss": 1.6508, "step": 2088 }, { "epoch": 0.3566672357862387, "grad_norm": 0.44867628812789917, "learning_rate": 8e-05, "loss": 1.7323, "step": 2089 }, { "epoch": 0.3568379716578453, "grad_norm": 0.5034573078155518, "learning_rate": 8e-05, "loss": 1.9658, "step": 2090 }, { "epoch": 0.35700870752945196, "grad_norm": 0.4780046045780182, "learning_rate": 8e-05, "loss": 1.7925, "step": 2091 }, { "epoch": 0.35717944340105856, "grad_norm": 0.48595672845840454, "learning_rate": 8e-05, "loss": 1.8509, "step": 2092 }, { "epoch": 0.3573501792726652, "grad_norm": 0.4369411766529083, "learning_rate": 8e-05, "loss": 1.64, "step": 2093 }, { "epoch": 0.3575209151442718, "grad_norm": 0.445234477519989, "learning_rate": 8e-05, "loss": 1.6548, "step": 2094 }, { "epoch": 0.3576916510158784, "grad_norm": 0.44147053360939026, "learning_rate": 8e-05, "loss": 1.683, "step": 2095 }, { "epoch": 0.35786238688748506, "grad_norm": 0.5068433880805969, "learning_rate": 8e-05, "loss": 1.6312, "step": 2096 }, { "epoch": 0.35803312275909166, "grad_norm": 0.45070165395736694, "learning_rate": 8e-05, "loss": 1.7201, "step": 2097 }, { "epoch": 0.3582038586306983, "grad_norm": 0.4859742820262909, "learning_rate": 8e-05, "loss": 1.7667, "step": 2098 }, { "epoch": 0.3583745945023049, "grad_norm": 0.47402462363243103, "learning_rate": 8e-05, "loss": 1.7929, "step": 2099 }, { "epoch": 0.35854533037391156, "grad_norm": 0.5448895692825317, "learning_rate": 8e-05, "loss": 1.8017, "step": 2100 }, { "epoch": 0.35871606624551816, "grad_norm": 0.44297710061073303, "learning_rate": 8e-05, "loss": 1.5391, "step": 2101 }, { "epoch": 0.3588868021171248, "grad_norm": 0.4700406491756439, "learning_rate": 8e-05, "loss": 1.6594, "step": 2102 }, { "epoch": 0.3590575379887314, "grad_norm": 0.5054619908332825, "learning_rate": 8e-05, "loss": 2.052, "step": 2103 }, { "epoch": 0.35922827386033807, "grad_norm": 0.4485531747341156, "learning_rate": 8e-05, "loss": 1.7136, "step": 2104 }, { "epoch": 0.35939900973194466, "grad_norm": 0.46100476384162903, "learning_rate": 8e-05, "loss": 1.6419, "step": 2105 }, { "epoch": 0.3595697456035513, "grad_norm": 0.4433172941207886, "learning_rate": 8e-05, "loss": 1.5734, "step": 2106 }, { "epoch": 0.3597404814751579, "grad_norm": 0.45773762464523315, "learning_rate": 8e-05, "loss": 1.6872, "step": 2107 }, { "epoch": 0.35991121734676457, "grad_norm": 0.47448471188545227, "learning_rate": 8e-05, "loss": 1.7607, "step": 2108 }, { "epoch": 0.36008195321837116, "grad_norm": 0.47450846433639526, "learning_rate": 8e-05, "loss": 1.6769, "step": 2109 }, { "epoch": 0.3602526890899778, "grad_norm": 0.5342341661453247, "learning_rate": 8e-05, "loss": 1.736, "step": 2110 }, { "epoch": 0.3604234249615844, "grad_norm": 0.492334246635437, "learning_rate": 8e-05, "loss": 1.7906, "step": 2111 }, { "epoch": 0.36059416083319107, "grad_norm": 0.4473508894443512, "learning_rate": 8e-05, "loss": 1.7202, "step": 2112 }, { "epoch": 0.36076489670479767, "grad_norm": 0.4417788088321686, "learning_rate": 8e-05, "loss": 1.6581, "step": 2113 }, { "epoch": 0.3609356325764043, "grad_norm": 0.47113940119743347, "learning_rate": 8e-05, "loss": 1.6686, "step": 2114 }, { "epoch": 0.3611063684480109, "grad_norm": 0.4361540377140045, "learning_rate": 8e-05, "loss": 1.5588, "step": 2115 }, { "epoch": 0.36127710431961757, "grad_norm": 0.44503673911094666, "learning_rate": 8e-05, "loss": 1.6586, "step": 2116 }, { "epoch": 0.36144784019122417, "grad_norm": 0.4412596523761749, "learning_rate": 8e-05, "loss": 1.6162, "step": 2117 }, { "epoch": 0.3616185760628308, "grad_norm": 0.46373251080513, "learning_rate": 8e-05, "loss": 1.8123, "step": 2118 }, { "epoch": 0.3617893119344374, "grad_norm": 0.5235573649406433, "learning_rate": 8e-05, "loss": 1.8145, "step": 2119 }, { "epoch": 0.36196004780604407, "grad_norm": 0.46262165904045105, "learning_rate": 8e-05, "loss": 1.673, "step": 2120 }, { "epoch": 0.36213078367765067, "grad_norm": 0.4758453965187073, "learning_rate": 8e-05, "loss": 1.7987, "step": 2121 }, { "epoch": 0.3623015195492573, "grad_norm": 0.42847439646720886, "learning_rate": 8e-05, "loss": 1.5684, "step": 2122 }, { "epoch": 0.3624722554208639, "grad_norm": 0.42534139752388, "learning_rate": 8e-05, "loss": 1.5927, "step": 2123 }, { "epoch": 0.3626429912924706, "grad_norm": 0.4305834472179413, "learning_rate": 8e-05, "loss": 1.5466, "step": 2124 }, { "epoch": 0.36281372716407717, "grad_norm": 0.4984394609928131, "learning_rate": 8e-05, "loss": 1.787, "step": 2125 }, { "epoch": 0.3629844630356838, "grad_norm": 0.466020405292511, "learning_rate": 8e-05, "loss": 1.7254, "step": 2126 }, { "epoch": 0.3631551989072904, "grad_norm": 0.4654143750667572, "learning_rate": 8e-05, "loss": 1.7606, "step": 2127 }, { "epoch": 0.363325934778897, "grad_norm": 0.41740378737449646, "learning_rate": 8e-05, "loss": 1.6262, "step": 2128 }, { "epoch": 0.3634966706505037, "grad_norm": 0.5023866891860962, "learning_rate": 8e-05, "loss": 1.9679, "step": 2129 }, { "epoch": 0.36366740652211027, "grad_norm": 0.45131221413612366, "learning_rate": 8e-05, "loss": 1.6725, "step": 2130 }, { "epoch": 0.3638381423937169, "grad_norm": 0.4831066429615021, "learning_rate": 8e-05, "loss": 1.8052, "step": 2131 }, { "epoch": 0.3640088782653235, "grad_norm": 0.46792611479759216, "learning_rate": 8e-05, "loss": 1.7634, "step": 2132 }, { "epoch": 0.3641796141369302, "grad_norm": 0.48729392886161804, "learning_rate": 8e-05, "loss": 1.7293, "step": 2133 }, { "epoch": 0.36435035000853677, "grad_norm": 0.4643469750881195, "learning_rate": 8e-05, "loss": 1.716, "step": 2134 }, { "epoch": 0.3645210858801434, "grad_norm": 0.5032289624214172, "learning_rate": 8e-05, "loss": 1.7386, "step": 2135 }, { "epoch": 0.36469182175175, "grad_norm": 0.4701254963874817, "learning_rate": 8e-05, "loss": 1.8308, "step": 2136 }, { "epoch": 0.3648625576233567, "grad_norm": 0.4873569905757904, "learning_rate": 8e-05, "loss": 1.8814, "step": 2137 }, { "epoch": 0.3650332934949633, "grad_norm": 0.4658535420894623, "learning_rate": 8e-05, "loss": 1.6324, "step": 2138 }, { "epoch": 0.3652040293665699, "grad_norm": 0.45934003591537476, "learning_rate": 8e-05, "loss": 1.6574, "step": 2139 }, { "epoch": 0.3653747652381765, "grad_norm": 0.48520776629447937, "learning_rate": 8e-05, "loss": 1.5829, "step": 2140 }, { "epoch": 0.3655455011097832, "grad_norm": 0.4630596339702606, "learning_rate": 8e-05, "loss": 1.7355, "step": 2141 }, { "epoch": 0.3657162369813898, "grad_norm": 0.4712507426738739, "learning_rate": 8e-05, "loss": 1.7642, "step": 2142 }, { "epoch": 0.36588697285299643, "grad_norm": 0.45912131667137146, "learning_rate": 8e-05, "loss": 1.7538, "step": 2143 }, { "epoch": 0.366057708724603, "grad_norm": 0.5502820611000061, "learning_rate": 8e-05, "loss": 1.8383, "step": 2144 }, { "epoch": 0.3662284445962097, "grad_norm": 0.4894097149372101, "learning_rate": 8e-05, "loss": 1.623, "step": 2145 }, { "epoch": 0.3663991804678163, "grad_norm": 0.44864320755004883, "learning_rate": 8e-05, "loss": 1.7569, "step": 2146 }, { "epoch": 0.36656991633942293, "grad_norm": 0.4791463613510132, "learning_rate": 8e-05, "loss": 1.798, "step": 2147 }, { "epoch": 0.3667406522110295, "grad_norm": 0.4485110342502594, "learning_rate": 8e-05, "loss": 1.6753, "step": 2148 }, { "epoch": 0.3669113880826362, "grad_norm": 0.4559869170188904, "learning_rate": 8e-05, "loss": 1.7276, "step": 2149 }, { "epoch": 0.3670821239542428, "grad_norm": 0.433702290058136, "learning_rate": 8e-05, "loss": 1.638, "step": 2150 }, { "epoch": 0.36725285982584943, "grad_norm": 0.4262603223323822, "learning_rate": 8e-05, "loss": 1.4893, "step": 2151 }, { "epoch": 0.36742359569745603, "grad_norm": 0.5038962364196777, "learning_rate": 8e-05, "loss": 1.8591, "step": 2152 }, { "epoch": 0.3675943315690627, "grad_norm": 0.4718177318572998, "learning_rate": 8e-05, "loss": 1.679, "step": 2153 }, { "epoch": 0.3677650674406693, "grad_norm": 0.5145515203475952, "learning_rate": 8e-05, "loss": 1.8261, "step": 2154 }, { "epoch": 0.36793580331227593, "grad_norm": 0.4361129105091095, "learning_rate": 8e-05, "loss": 1.7092, "step": 2155 }, { "epoch": 0.36810653918388253, "grad_norm": 0.4735560417175293, "learning_rate": 8e-05, "loss": 1.7808, "step": 2156 }, { "epoch": 0.3682772750554892, "grad_norm": 0.46481558680534363, "learning_rate": 8e-05, "loss": 1.597, "step": 2157 }, { "epoch": 0.3684480109270958, "grad_norm": 0.48279818892478943, "learning_rate": 8e-05, "loss": 1.7478, "step": 2158 }, { "epoch": 0.36861874679870243, "grad_norm": 0.44015029072761536, "learning_rate": 8e-05, "loss": 1.5883, "step": 2159 }, { "epoch": 0.36878948267030903, "grad_norm": 0.44287440180778503, "learning_rate": 8e-05, "loss": 1.5424, "step": 2160 }, { "epoch": 0.36896021854191563, "grad_norm": 0.4652153551578522, "learning_rate": 8e-05, "loss": 1.5534, "step": 2161 }, { "epoch": 0.3691309544135223, "grad_norm": 0.4560166597366333, "learning_rate": 8e-05, "loss": 1.8277, "step": 2162 }, { "epoch": 0.3693016902851289, "grad_norm": 0.4983198046684265, "learning_rate": 8e-05, "loss": 1.9224, "step": 2163 }, { "epoch": 0.36947242615673553, "grad_norm": 0.4568823277950287, "learning_rate": 8e-05, "loss": 1.7684, "step": 2164 }, { "epoch": 0.36964316202834213, "grad_norm": 0.4699454605579376, "learning_rate": 8e-05, "loss": 1.5568, "step": 2165 }, { "epoch": 0.3698138978999488, "grad_norm": 0.4712265729904175, "learning_rate": 8e-05, "loss": 1.771, "step": 2166 }, { "epoch": 0.3699846337715554, "grad_norm": 0.44362571835517883, "learning_rate": 8e-05, "loss": 1.599, "step": 2167 }, { "epoch": 0.37015536964316204, "grad_norm": 0.43453508615493774, "learning_rate": 8e-05, "loss": 1.5388, "step": 2168 }, { "epoch": 0.37032610551476863, "grad_norm": 0.48891186714172363, "learning_rate": 8e-05, "loss": 1.7567, "step": 2169 }, { "epoch": 0.3704968413863753, "grad_norm": 0.5203781127929688, "learning_rate": 8e-05, "loss": 1.9089, "step": 2170 }, { "epoch": 0.3706675772579819, "grad_norm": 0.4672975242137909, "learning_rate": 8e-05, "loss": 1.7059, "step": 2171 }, { "epoch": 0.37083831312958854, "grad_norm": 0.4498775005340576, "learning_rate": 8e-05, "loss": 1.5035, "step": 2172 }, { "epoch": 0.37100904900119513, "grad_norm": 0.44661301374435425, "learning_rate": 8e-05, "loss": 1.665, "step": 2173 }, { "epoch": 0.3711797848728018, "grad_norm": 0.48426178097724915, "learning_rate": 8e-05, "loss": 1.7891, "step": 2174 }, { "epoch": 0.3713505207444084, "grad_norm": 0.5056416392326355, "learning_rate": 8e-05, "loss": 1.5971, "step": 2175 }, { "epoch": 0.37152125661601504, "grad_norm": 0.4860306680202484, "learning_rate": 8e-05, "loss": 1.8071, "step": 2176 }, { "epoch": 0.37169199248762164, "grad_norm": 0.46264076232910156, "learning_rate": 8e-05, "loss": 1.8374, "step": 2177 }, { "epoch": 0.3718627283592283, "grad_norm": 0.4609784185886383, "learning_rate": 8e-05, "loss": 1.6338, "step": 2178 }, { "epoch": 0.3720334642308349, "grad_norm": 0.46557772159576416, "learning_rate": 8e-05, "loss": 1.6892, "step": 2179 }, { "epoch": 0.37220420010244154, "grad_norm": 0.453555703163147, "learning_rate": 8e-05, "loss": 1.6319, "step": 2180 }, { "epoch": 0.37237493597404814, "grad_norm": 0.4766259491443634, "learning_rate": 8e-05, "loss": 1.6354, "step": 2181 }, { "epoch": 0.3725456718456548, "grad_norm": 0.4774046540260315, "learning_rate": 8e-05, "loss": 1.7506, "step": 2182 }, { "epoch": 0.3727164077172614, "grad_norm": 0.47020336985588074, "learning_rate": 8e-05, "loss": 1.7449, "step": 2183 }, { "epoch": 0.37288714358886804, "grad_norm": 0.4876503348350525, "learning_rate": 8e-05, "loss": 1.8141, "step": 2184 }, { "epoch": 0.37305787946047464, "grad_norm": 0.46830543875694275, "learning_rate": 8e-05, "loss": 1.8135, "step": 2185 }, { "epoch": 0.3732286153320813, "grad_norm": 0.49408724904060364, "learning_rate": 8e-05, "loss": 1.9158, "step": 2186 }, { "epoch": 0.3733993512036879, "grad_norm": 0.44862988591194153, "learning_rate": 8e-05, "loss": 1.7018, "step": 2187 }, { "epoch": 0.37357008707529454, "grad_norm": 0.4228358864784241, "learning_rate": 8e-05, "loss": 1.4299, "step": 2188 }, { "epoch": 0.37374082294690114, "grad_norm": 0.44844284653663635, "learning_rate": 8e-05, "loss": 1.5747, "step": 2189 }, { "epoch": 0.3739115588185078, "grad_norm": 0.47521308064460754, "learning_rate": 8e-05, "loss": 1.7932, "step": 2190 }, { "epoch": 0.3740822946901144, "grad_norm": 0.46987345814704895, "learning_rate": 8e-05, "loss": 1.6413, "step": 2191 }, { "epoch": 0.37425303056172105, "grad_norm": 0.46471959352493286, "learning_rate": 8e-05, "loss": 1.7161, "step": 2192 }, { "epoch": 0.37442376643332764, "grad_norm": 0.45306211709976196, "learning_rate": 8e-05, "loss": 1.7261, "step": 2193 }, { "epoch": 0.37459450230493424, "grad_norm": 0.4318316876888275, "learning_rate": 8e-05, "loss": 1.6913, "step": 2194 }, { "epoch": 0.3747652381765409, "grad_norm": 0.43113699555397034, "learning_rate": 8e-05, "loss": 1.6162, "step": 2195 }, { "epoch": 0.3749359740481475, "grad_norm": 0.4645802080631256, "learning_rate": 8e-05, "loss": 1.7818, "step": 2196 }, { "epoch": 0.37510670991975414, "grad_norm": 0.48650550842285156, "learning_rate": 8e-05, "loss": 1.8632, "step": 2197 }, { "epoch": 0.37527744579136074, "grad_norm": 0.44973066449165344, "learning_rate": 8e-05, "loss": 1.6775, "step": 2198 }, { "epoch": 0.3754481816629674, "grad_norm": 0.4729710817337036, "learning_rate": 8e-05, "loss": 1.7255, "step": 2199 }, { "epoch": 0.375618917534574, "grad_norm": 0.473898708820343, "learning_rate": 8e-05, "loss": 1.615, "step": 2200 }, { "epoch": 0.37578965340618065, "grad_norm": 0.5298947095870972, "learning_rate": 8e-05, "loss": 1.9143, "step": 2201 }, { "epoch": 0.37596038927778724, "grad_norm": 0.4445017874240875, "learning_rate": 8e-05, "loss": 1.7358, "step": 2202 }, { "epoch": 0.3761311251493939, "grad_norm": 0.46980345249176025, "learning_rate": 8e-05, "loss": 1.7468, "step": 2203 }, { "epoch": 0.3763018610210005, "grad_norm": 0.4911816716194153, "learning_rate": 8e-05, "loss": 1.7605, "step": 2204 }, { "epoch": 0.37647259689260715, "grad_norm": 0.443610280752182, "learning_rate": 8e-05, "loss": 1.7191, "step": 2205 }, { "epoch": 0.37664333276421375, "grad_norm": 0.4507425129413605, "learning_rate": 8e-05, "loss": 1.661, "step": 2206 }, { "epoch": 0.3768140686358204, "grad_norm": 0.5105360746383667, "learning_rate": 8e-05, "loss": 1.6846, "step": 2207 }, { "epoch": 0.376984804507427, "grad_norm": 0.4901541769504547, "learning_rate": 8e-05, "loss": 1.6694, "step": 2208 }, { "epoch": 0.37715554037903365, "grad_norm": 0.4679583013057709, "learning_rate": 8e-05, "loss": 1.6698, "step": 2209 }, { "epoch": 0.37732627625064025, "grad_norm": 0.47855886816978455, "learning_rate": 8e-05, "loss": 1.5646, "step": 2210 }, { "epoch": 0.3774970121222469, "grad_norm": 0.4574141800403595, "learning_rate": 8e-05, "loss": 1.7724, "step": 2211 }, { "epoch": 0.3776677479938535, "grad_norm": 0.42197853326797485, "learning_rate": 8e-05, "loss": 1.5039, "step": 2212 }, { "epoch": 0.37783848386546015, "grad_norm": 0.4922475218772888, "learning_rate": 8e-05, "loss": 1.8386, "step": 2213 }, { "epoch": 0.37800921973706675, "grad_norm": 0.5075274109840393, "learning_rate": 8e-05, "loss": 2.0517, "step": 2214 }, { "epoch": 0.3781799556086734, "grad_norm": 0.4860456585884094, "learning_rate": 8e-05, "loss": 1.8805, "step": 2215 }, { "epoch": 0.37835069148028, "grad_norm": 0.4568541944026947, "learning_rate": 8e-05, "loss": 1.6369, "step": 2216 }, { "epoch": 0.37852142735188665, "grad_norm": 0.4860992431640625, "learning_rate": 8e-05, "loss": 1.7754, "step": 2217 }, { "epoch": 0.37869216322349325, "grad_norm": 0.44778692722320557, "learning_rate": 8e-05, "loss": 1.6627, "step": 2218 }, { "epoch": 0.3788628990950999, "grad_norm": 0.4703764021396637, "learning_rate": 8e-05, "loss": 1.7187, "step": 2219 }, { "epoch": 0.3790336349667065, "grad_norm": 0.4774754047393799, "learning_rate": 8e-05, "loss": 1.743, "step": 2220 }, { "epoch": 0.37920437083831315, "grad_norm": 0.4544217586517334, "learning_rate": 8e-05, "loss": 1.6951, "step": 2221 }, { "epoch": 0.37937510670991975, "grad_norm": 0.47054651379585266, "learning_rate": 8e-05, "loss": 1.7165, "step": 2222 }, { "epoch": 0.3795458425815264, "grad_norm": 0.48552796244621277, "learning_rate": 8e-05, "loss": 1.7425, "step": 2223 }, { "epoch": 0.379716578453133, "grad_norm": 0.4940098226070404, "learning_rate": 8e-05, "loss": 1.7307, "step": 2224 }, { "epoch": 0.37988731432473966, "grad_norm": 0.48313432931900024, "learning_rate": 8e-05, "loss": 1.625, "step": 2225 }, { "epoch": 0.38005805019634625, "grad_norm": 0.44298431277275085, "learning_rate": 8e-05, "loss": 1.6716, "step": 2226 }, { "epoch": 0.38022878606795285, "grad_norm": 0.43253129720687866, "learning_rate": 8e-05, "loss": 1.669, "step": 2227 }, { "epoch": 0.3803995219395595, "grad_norm": 0.44669586420059204, "learning_rate": 8e-05, "loss": 1.8026, "step": 2228 }, { "epoch": 0.3805702578111661, "grad_norm": 0.46050313115119934, "learning_rate": 8e-05, "loss": 1.7214, "step": 2229 }, { "epoch": 0.38074099368277275, "grad_norm": 0.43383511900901794, "learning_rate": 8e-05, "loss": 1.7075, "step": 2230 }, { "epoch": 0.38091172955437935, "grad_norm": 0.4777047336101532, "learning_rate": 8e-05, "loss": 1.751, "step": 2231 }, { "epoch": 0.381082465425986, "grad_norm": 0.4529365599155426, "learning_rate": 8e-05, "loss": 1.7029, "step": 2232 }, { "epoch": 0.3812532012975926, "grad_norm": 0.46839866042137146, "learning_rate": 8e-05, "loss": 1.6745, "step": 2233 }, { "epoch": 0.38142393716919926, "grad_norm": 0.45535141229629517, "learning_rate": 8e-05, "loss": 1.7383, "step": 2234 }, { "epoch": 0.38159467304080585, "grad_norm": 0.5015166997909546, "learning_rate": 8e-05, "loss": 1.8598, "step": 2235 }, { "epoch": 0.3817654089124125, "grad_norm": 0.46225833892822266, "learning_rate": 8e-05, "loss": 1.6186, "step": 2236 }, { "epoch": 0.3819361447840191, "grad_norm": 0.43935152888298035, "learning_rate": 8e-05, "loss": 1.6029, "step": 2237 }, { "epoch": 0.38210688065562576, "grad_norm": 0.4762844741344452, "learning_rate": 8e-05, "loss": 1.6771, "step": 2238 }, { "epoch": 0.38227761652723236, "grad_norm": 0.4768235981464386, "learning_rate": 8e-05, "loss": 1.6911, "step": 2239 }, { "epoch": 0.382448352398839, "grad_norm": 0.47921988368034363, "learning_rate": 8e-05, "loss": 1.7695, "step": 2240 }, { "epoch": 0.3826190882704456, "grad_norm": 0.4591425359249115, "learning_rate": 8e-05, "loss": 1.7324, "step": 2241 }, { "epoch": 0.38278982414205226, "grad_norm": 0.4577582776546478, "learning_rate": 8e-05, "loss": 1.6016, "step": 2242 }, { "epoch": 0.38296056001365886, "grad_norm": 0.4952070415019989, "learning_rate": 8e-05, "loss": 1.8072, "step": 2243 }, { "epoch": 0.3831312958852655, "grad_norm": 0.4336467981338501, "learning_rate": 8e-05, "loss": 1.608, "step": 2244 }, { "epoch": 0.3833020317568721, "grad_norm": 0.4390071630477905, "learning_rate": 8e-05, "loss": 1.5042, "step": 2245 }, { "epoch": 0.38347276762847876, "grad_norm": 0.5433140993118286, "learning_rate": 8e-05, "loss": 1.8471, "step": 2246 }, { "epoch": 0.38364350350008536, "grad_norm": 0.45021122694015503, "learning_rate": 8e-05, "loss": 1.7977, "step": 2247 }, { "epoch": 0.383814239371692, "grad_norm": 0.4746263325214386, "learning_rate": 8e-05, "loss": 1.5338, "step": 2248 }, { "epoch": 0.3839849752432986, "grad_norm": 0.49180281162261963, "learning_rate": 8e-05, "loss": 1.6543, "step": 2249 }, { "epoch": 0.38415571111490526, "grad_norm": 0.5349181294441223, "learning_rate": 8e-05, "loss": 1.6884, "step": 2250 }, { "epoch": 0.38432644698651186, "grad_norm": 0.5282572507858276, "learning_rate": 8e-05, "loss": 1.7524, "step": 2251 }, { "epoch": 0.3844971828581185, "grad_norm": 0.5093258023262024, "learning_rate": 8e-05, "loss": 1.8023, "step": 2252 }, { "epoch": 0.3846679187297251, "grad_norm": 0.542975664138794, "learning_rate": 8e-05, "loss": 1.6735, "step": 2253 }, { "epoch": 0.38483865460133176, "grad_norm": 0.47355738282203674, "learning_rate": 8e-05, "loss": 1.7043, "step": 2254 }, { "epoch": 0.38500939047293836, "grad_norm": 0.518007755279541, "learning_rate": 8e-05, "loss": 1.7807, "step": 2255 }, { "epoch": 0.385180126344545, "grad_norm": 0.44154292345046997, "learning_rate": 8e-05, "loss": 1.6365, "step": 2256 }, { "epoch": 0.3853508622161516, "grad_norm": 0.454536497592926, "learning_rate": 8e-05, "loss": 1.7524, "step": 2257 }, { "epoch": 0.3855215980877582, "grad_norm": 0.4841240644454956, "learning_rate": 8e-05, "loss": 1.7213, "step": 2258 }, { "epoch": 0.38569233395936486, "grad_norm": 0.46902328729629517, "learning_rate": 8e-05, "loss": 1.7683, "step": 2259 }, { "epoch": 0.38586306983097146, "grad_norm": 0.46030059456825256, "learning_rate": 8e-05, "loss": 1.6619, "step": 2260 }, { "epoch": 0.3860338057025781, "grad_norm": 0.47862303256988525, "learning_rate": 8e-05, "loss": 1.7222, "step": 2261 }, { "epoch": 0.3862045415741847, "grad_norm": 0.4619601368904114, "learning_rate": 8e-05, "loss": 1.596, "step": 2262 }, { "epoch": 0.38637527744579137, "grad_norm": 0.479616641998291, "learning_rate": 8e-05, "loss": 1.8965, "step": 2263 }, { "epoch": 0.38654601331739796, "grad_norm": 0.5461448431015015, "learning_rate": 8e-05, "loss": 1.9919, "step": 2264 }, { "epoch": 0.3867167491890046, "grad_norm": 0.49272844195365906, "learning_rate": 8e-05, "loss": 1.6721, "step": 2265 }, { "epoch": 0.3868874850606112, "grad_norm": 0.502036988735199, "learning_rate": 8e-05, "loss": 1.6698, "step": 2266 }, { "epoch": 0.38705822093221787, "grad_norm": 0.47350507974624634, "learning_rate": 8e-05, "loss": 1.6342, "step": 2267 }, { "epoch": 0.38722895680382446, "grad_norm": 0.48932456970214844, "learning_rate": 8e-05, "loss": 1.8031, "step": 2268 }, { "epoch": 0.3873996926754311, "grad_norm": 0.5353527665138245, "learning_rate": 8e-05, "loss": 1.7954, "step": 2269 }, { "epoch": 0.3875704285470377, "grad_norm": 0.43036189675331116, "learning_rate": 8e-05, "loss": 1.5729, "step": 2270 }, { "epoch": 0.38774116441864437, "grad_norm": 0.4510219991207123, "learning_rate": 8e-05, "loss": 1.5484, "step": 2271 }, { "epoch": 0.38791190029025097, "grad_norm": 0.5005018711090088, "learning_rate": 8e-05, "loss": 1.8026, "step": 2272 }, { "epoch": 0.3880826361618576, "grad_norm": 0.45497211813926697, "learning_rate": 8e-05, "loss": 1.7782, "step": 2273 }, { "epoch": 0.3882533720334642, "grad_norm": 0.46395987272262573, "learning_rate": 8e-05, "loss": 1.5993, "step": 2274 }, { "epoch": 0.38842410790507087, "grad_norm": 0.45788806676864624, "learning_rate": 8e-05, "loss": 1.6234, "step": 2275 }, { "epoch": 0.38859484377667747, "grad_norm": 0.47536569833755493, "learning_rate": 8e-05, "loss": 1.6437, "step": 2276 }, { "epoch": 0.3887655796482841, "grad_norm": 0.4604268968105316, "learning_rate": 8e-05, "loss": 1.6691, "step": 2277 }, { "epoch": 0.3889363155198907, "grad_norm": 0.48708033561706543, "learning_rate": 8e-05, "loss": 1.8975, "step": 2278 }, { "epoch": 0.38910705139149737, "grad_norm": 0.5097882747650146, "learning_rate": 8e-05, "loss": 1.6011, "step": 2279 }, { "epoch": 0.38927778726310397, "grad_norm": 0.4617132842540741, "learning_rate": 8e-05, "loss": 1.6564, "step": 2280 }, { "epoch": 0.3894485231347106, "grad_norm": 0.4574528932571411, "learning_rate": 8e-05, "loss": 1.579, "step": 2281 }, { "epoch": 0.3896192590063172, "grad_norm": 0.4873175323009491, "learning_rate": 8e-05, "loss": 1.8923, "step": 2282 }, { "epoch": 0.3897899948779239, "grad_norm": 0.4734964966773987, "learning_rate": 8e-05, "loss": 1.631, "step": 2283 }, { "epoch": 0.38996073074953047, "grad_norm": 0.5059224963188171, "learning_rate": 8e-05, "loss": 1.6465, "step": 2284 }, { "epoch": 0.3901314666211371, "grad_norm": 0.5854185223579407, "learning_rate": 8e-05, "loss": 1.7571, "step": 2285 }, { "epoch": 0.3903022024927437, "grad_norm": 0.4823521375656128, "learning_rate": 8e-05, "loss": 1.7398, "step": 2286 }, { "epoch": 0.3904729383643504, "grad_norm": 0.45883554220199585, "learning_rate": 8e-05, "loss": 1.578, "step": 2287 }, { "epoch": 0.390643674235957, "grad_norm": 0.4434313178062439, "learning_rate": 8e-05, "loss": 1.7075, "step": 2288 }, { "epoch": 0.3908144101075636, "grad_norm": 0.4375695586204529, "learning_rate": 8e-05, "loss": 1.6622, "step": 2289 }, { "epoch": 0.3909851459791702, "grad_norm": 0.5070034265518188, "learning_rate": 8e-05, "loss": 1.6674, "step": 2290 }, { "epoch": 0.3911558818507768, "grad_norm": 0.46279817819595337, "learning_rate": 8e-05, "loss": 1.6664, "step": 2291 }, { "epoch": 0.3913266177223835, "grad_norm": 0.45672035217285156, "learning_rate": 8e-05, "loss": 1.7543, "step": 2292 }, { "epoch": 0.39149735359399007, "grad_norm": 0.45188599824905396, "learning_rate": 8e-05, "loss": 1.6476, "step": 2293 }, { "epoch": 0.3916680894655967, "grad_norm": 0.47473978996276855, "learning_rate": 8e-05, "loss": 1.5415, "step": 2294 }, { "epoch": 0.3918388253372033, "grad_norm": 0.4396112859249115, "learning_rate": 8e-05, "loss": 1.58, "step": 2295 }, { "epoch": 0.39200956120881, "grad_norm": 0.4621641933917999, "learning_rate": 8e-05, "loss": 1.6639, "step": 2296 }, { "epoch": 0.3921802970804166, "grad_norm": 0.4543353319168091, "learning_rate": 8e-05, "loss": 1.55, "step": 2297 }, { "epoch": 0.3923510329520232, "grad_norm": 0.45058169960975647, "learning_rate": 8e-05, "loss": 1.7234, "step": 2298 }, { "epoch": 0.3925217688236298, "grad_norm": 0.4526669681072235, "learning_rate": 8e-05, "loss": 1.7537, "step": 2299 }, { "epoch": 0.3926925046952365, "grad_norm": 0.45522740483283997, "learning_rate": 8e-05, "loss": 1.6903, "step": 2300 }, { "epoch": 0.3928632405668431, "grad_norm": 0.4369031488895416, "learning_rate": 8e-05, "loss": 1.6126, "step": 2301 }, { "epoch": 0.39303397643844973, "grad_norm": 0.4772341549396515, "learning_rate": 8e-05, "loss": 1.6844, "step": 2302 }, { "epoch": 0.3932047123100563, "grad_norm": 0.4508981704711914, "learning_rate": 8e-05, "loss": 1.5649, "step": 2303 }, { "epoch": 0.393375448181663, "grad_norm": 0.4735075831413269, "learning_rate": 8e-05, "loss": 1.8678, "step": 2304 }, { "epoch": 0.3935461840532696, "grad_norm": 0.5015310645103455, "learning_rate": 8e-05, "loss": 1.6575, "step": 2305 }, { "epoch": 0.39371691992487623, "grad_norm": 0.517305314540863, "learning_rate": 8e-05, "loss": 1.9168, "step": 2306 }, { "epoch": 0.3938876557964828, "grad_norm": 0.439157098531723, "learning_rate": 8e-05, "loss": 1.5875, "step": 2307 }, { "epoch": 0.3940583916680895, "grad_norm": 0.4632413685321808, "learning_rate": 8e-05, "loss": 1.7024, "step": 2308 }, { "epoch": 0.3942291275396961, "grad_norm": 0.47725045680999756, "learning_rate": 8e-05, "loss": 1.6734, "step": 2309 }, { "epoch": 0.39439986341130273, "grad_norm": 0.4502771198749542, "learning_rate": 8e-05, "loss": 1.7736, "step": 2310 }, { "epoch": 0.39457059928290933, "grad_norm": 0.4577861428260803, "learning_rate": 8e-05, "loss": 1.7595, "step": 2311 }, { "epoch": 0.394741335154516, "grad_norm": 0.4683159291744232, "learning_rate": 8e-05, "loss": 1.7045, "step": 2312 }, { "epoch": 0.3949120710261226, "grad_norm": 0.4555746614933014, "learning_rate": 8e-05, "loss": 1.6003, "step": 2313 }, { "epoch": 0.39508280689772923, "grad_norm": 0.45230746269226074, "learning_rate": 8e-05, "loss": 1.6817, "step": 2314 }, { "epoch": 0.39525354276933583, "grad_norm": 0.4594647288322449, "learning_rate": 8e-05, "loss": 1.4721, "step": 2315 }, { "epoch": 0.3954242786409425, "grad_norm": 0.4737915098667145, "learning_rate": 8e-05, "loss": 1.6688, "step": 2316 }, { "epoch": 0.3955950145125491, "grad_norm": 0.4650702476501465, "learning_rate": 8e-05, "loss": 1.5185, "step": 2317 }, { "epoch": 0.39576575038415573, "grad_norm": 0.44668570160865784, "learning_rate": 8e-05, "loss": 1.6343, "step": 2318 }, { "epoch": 0.39593648625576233, "grad_norm": 0.4384133219718933, "learning_rate": 8e-05, "loss": 1.6152, "step": 2319 }, { "epoch": 0.396107222127369, "grad_norm": 0.4560842514038086, "learning_rate": 8e-05, "loss": 1.5416, "step": 2320 }, { "epoch": 0.3962779579989756, "grad_norm": 0.4852603077888489, "learning_rate": 8e-05, "loss": 1.8094, "step": 2321 }, { "epoch": 0.39644869387058224, "grad_norm": 0.4547492265701294, "learning_rate": 8e-05, "loss": 1.746, "step": 2322 }, { "epoch": 0.39661942974218883, "grad_norm": 0.46903127431869507, "learning_rate": 8e-05, "loss": 1.8252, "step": 2323 }, { "epoch": 0.39679016561379543, "grad_norm": 0.4376675486564636, "learning_rate": 8e-05, "loss": 1.6379, "step": 2324 }, { "epoch": 0.3969609014854021, "grad_norm": 0.4405995309352875, "learning_rate": 8e-05, "loss": 1.6539, "step": 2325 }, { "epoch": 0.3971316373570087, "grad_norm": 0.4351668357849121, "learning_rate": 8e-05, "loss": 1.5497, "step": 2326 }, { "epoch": 0.39730237322861534, "grad_norm": 0.46969151496887207, "learning_rate": 8e-05, "loss": 1.7055, "step": 2327 }, { "epoch": 0.39747310910022193, "grad_norm": 0.4406990706920624, "learning_rate": 8e-05, "loss": 1.7, "step": 2328 }, { "epoch": 0.3976438449718286, "grad_norm": 0.47240373492240906, "learning_rate": 8e-05, "loss": 1.6698, "step": 2329 }, { "epoch": 0.3978145808434352, "grad_norm": 0.44875645637512207, "learning_rate": 8e-05, "loss": 1.7394, "step": 2330 }, { "epoch": 0.39798531671504184, "grad_norm": 0.491071492433548, "learning_rate": 8e-05, "loss": 1.7356, "step": 2331 }, { "epoch": 0.39815605258664843, "grad_norm": 0.5017666220664978, "learning_rate": 8e-05, "loss": 1.6414, "step": 2332 }, { "epoch": 0.3983267884582551, "grad_norm": 0.448062002658844, "learning_rate": 8e-05, "loss": 1.703, "step": 2333 }, { "epoch": 0.3984975243298617, "grad_norm": 0.48237138986587524, "learning_rate": 8e-05, "loss": 1.9062, "step": 2334 }, { "epoch": 0.39866826020146834, "grad_norm": 0.4614006578922272, "learning_rate": 8e-05, "loss": 1.6233, "step": 2335 }, { "epoch": 0.39883899607307494, "grad_norm": 0.4894557595252991, "learning_rate": 8e-05, "loss": 1.7653, "step": 2336 }, { "epoch": 0.3990097319446816, "grad_norm": 0.47922804951667786, "learning_rate": 8e-05, "loss": 1.8291, "step": 2337 }, { "epoch": 0.3991804678162882, "grad_norm": 0.45868000388145447, "learning_rate": 8e-05, "loss": 1.7211, "step": 2338 }, { "epoch": 0.39935120368789484, "grad_norm": 0.46839776635169983, "learning_rate": 8e-05, "loss": 1.7718, "step": 2339 }, { "epoch": 0.39952193955950144, "grad_norm": 0.4678306579589844, "learning_rate": 8e-05, "loss": 1.6282, "step": 2340 }, { "epoch": 0.3996926754311081, "grad_norm": 0.47147417068481445, "learning_rate": 8e-05, "loss": 1.6856, "step": 2341 }, { "epoch": 0.3998634113027147, "grad_norm": 0.43597811460494995, "learning_rate": 8e-05, "loss": 1.6601, "step": 2342 }, { "epoch": 0.40003414717432134, "grad_norm": 0.46502169966697693, "learning_rate": 8e-05, "loss": 1.7955, "step": 2343 }, { "epoch": 0.40020488304592794, "grad_norm": 0.49708986282348633, "learning_rate": 8e-05, "loss": 1.6977, "step": 2344 }, { "epoch": 0.4003756189175346, "grad_norm": 0.4478561282157898, "learning_rate": 8e-05, "loss": 1.6545, "step": 2345 }, { "epoch": 0.4005463547891412, "grad_norm": 0.47932279109954834, "learning_rate": 8e-05, "loss": 1.6996, "step": 2346 }, { "epoch": 0.40071709066074784, "grad_norm": 0.45497530698776245, "learning_rate": 8e-05, "loss": 1.5608, "step": 2347 }, { "epoch": 0.40088782653235444, "grad_norm": 0.4569617211818695, "learning_rate": 8e-05, "loss": 1.6726, "step": 2348 }, { "epoch": 0.4010585624039611, "grad_norm": 0.44305914640426636, "learning_rate": 8e-05, "loss": 1.6173, "step": 2349 }, { "epoch": 0.4012292982755677, "grad_norm": 0.482025146484375, "learning_rate": 8e-05, "loss": 1.7045, "step": 2350 }, { "epoch": 0.40140003414717434, "grad_norm": 0.4626065790653229, "learning_rate": 8e-05, "loss": 1.6774, "step": 2351 }, { "epoch": 0.40157077001878094, "grad_norm": 0.4451853334903717, "learning_rate": 8e-05, "loss": 1.6051, "step": 2352 }, { "epoch": 0.4017415058903876, "grad_norm": 0.4693995416164398, "learning_rate": 8e-05, "loss": 1.6684, "step": 2353 }, { "epoch": 0.4019122417619942, "grad_norm": 0.45117267966270447, "learning_rate": 8e-05, "loss": 1.6172, "step": 2354 }, { "epoch": 0.40208297763360085, "grad_norm": 0.46482011675834656, "learning_rate": 8e-05, "loss": 1.7356, "step": 2355 }, { "epoch": 0.40225371350520744, "grad_norm": 0.5327745676040649, "learning_rate": 8e-05, "loss": 1.6415, "step": 2356 }, { "epoch": 0.40242444937681404, "grad_norm": 0.4677508771419525, "learning_rate": 8e-05, "loss": 1.6083, "step": 2357 }, { "epoch": 0.4025951852484207, "grad_norm": 0.45142123103141785, "learning_rate": 8e-05, "loss": 1.746, "step": 2358 }, { "epoch": 0.4027659211200273, "grad_norm": 0.4836920201778412, "learning_rate": 8e-05, "loss": 1.766, "step": 2359 }, { "epoch": 0.40293665699163395, "grad_norm": 0.4637686610221863, "learning_rate": 8e-05, "loss": 1.6824, "step": 2360 }, { "epoch": 0.40310739286324054, "grad_norm": 0.479793518781662, "learning_rate": 8e-05, "loss": 1.8347, "step": 2361 }, { "epoch": 0.4032781287348472, "grad_norm": 0.48406270146369934, "learning_rate": 8e-05, "loss": 1.844, "step": 2362 }, { "epoch": 0.4034488646064538, "grad_norm": 0.43597492575645447, "learning_rate": 8e-05, "loss": 1.4633, "step": 2363 }, { "epoch": 0.40361960047806045, "grad_norm": 0.44741836190223694, "learning_rate": 8e-05, "loss": 1.6305, "step": 2364 }, { "epoch": 0.40379033634966705, "grad_norm": 0.48834720253944397, "learning_rate": 8e-05, "loss": 1.3532, "step": 2365 }, { "epoch": 0.4039610722212737, "grad_norm": 0.473214328289032, "learning_rate": 8e-05, "loss": 1.65, "step": 2366 }, { "epoch": 0.4041318080928803, "grad_norm": 0.46694549918174744, "learning_rate": 8e-05, "loss": 1.6074, "step": 2367 }, { "epoch": 0.40430254396448695, "grad_norm": 0.4785877466201782, "learning_rate": 8e-05, "loss": 1.6791, "step": 2368 }, { "epoch": 0.40447327983609355, "grad_norm": 0.4516986906528473, "learning_rate": 8e-05, "loss": 1.6428, "step": 2369 }, { "epoch": 0.4046440157077002, "grad_norm": 0.4655621647834778, "learning_rate": 8e-05, "loss": 1.6175, "step": 2370 }, { "epoch": 0.4048147515793068, "grad_norm": 0.5451213121414185, "learning_rate": 8e-05, "loss": 1.824, "step": 2371 }, { "epoch": 0.40498548745091345, "grad_norm": 0.5149049162864685, "learning_rate": 8e-05, "loss": 1.9638, "step": 2372 }, { "epoch": 0.40515622332252005, "grad_norm": 0.4479779303073883, "learning_rate": 8e-05, "loss": 1.7117, "step": 2373 }, { "epoch": 0.4053269591941267, "grad_norm": 0.45145079493522644, "learning_rate": 8e-05, "loss": 1.4858, "step": 2374 }, { "epoch": 0.4054976950657333, "grad_norm": 0.4581349492073059, "learning_rate": 8e-05, "loss": 1.5855, "step": 2375 }, { "epoch": 0.40566843093733995, "grad_norm": 0.4610206186771393, "learning_rate": 8e-05, "loss": 1.7368, "step": 2376 }, { "epoch": 0.40583916680894655, "grad_norm": 0.48893827199935913, "learning_rate": 8e-05, "loss": 1.8706, "step": 2377 }, { "epoch": 0.4060099026805532, "grad_norm": 0.4898843765258789, "learning_rate": 8e-05, "loss": 1.7496, "step": 2378 }, { "epoch": 0.4061806385521598, "grad_norm": 0.4753839373588562, "learning_rate": 8e-05, "loss": 1.7086, "step": 2379 }, { "epoch": 0.40635137442376645, "grad_norm": 0.501255452632904, "learning_rate": 8e-05, "loss": 1.6921, "step": 2380 }, { "epoch": 0.40652211029537305, "grad_norm": 0.4712652862071991, "learning_rate": 8e-05, "loss": 1.6947, "step": 2381 }, { "epoch": 0.4066928461669797, "grad_norm": 0.47737812995910645, "learning_rate": 8e-05, "loss": 1.7477, "step": 2382 }, { "epoch": 0.4068635820385863, "grad_norm": 0.4665011465549469, "learning_rate": 8e-05, "loss": 1.6632, "step": 2383 }, { "epoch": 0.40703431791019296, "grad_norm": 0.4440247416496277, "learning_rate": 8e-05, "loss": 1.6743, "step": 2384 }, { "epoch": 0.40720505378179955, "grad_norm": 0.465071439743042, "learning_rate": 8e-05, "loss": 1.8269, "step": 2385 }, { "epoch": 0.4073757896534062, "grad_norm": 0.46801233291625977, "learning_rate": 8e-05, "loss": 1.7362, "step": 2386 }, { "epoch": 0.4075465255250128, "grad_norm": 0.4623088836669922, "learning_rate": 8e-05, "loss": 1.8218, "step": 2387 }, { "epoch": 0.40771726139661946, "grad_norm": 0.45201531052589417, "learning_rate": 8e-05, "loss": 1.6585, "step": 2388 }, { "epoch": 0.40788799726822605, "grad_norm": 0.45731353759765625, "learning_rate": 8e-05, "loss": 1.6594, "step": 2389 }, { "epoch": 0.40805873313983265, "grad_norm": 0.4371221363544464, "learning_rate": 8e-05, "loss": 1.6544, "step": 2390 }, { "epoch": 0.4082294690114393, "grad_norm": 0.47018468379974365, "learning_rate": 8e-05, "loss": 1.68, "step": 2391 }, { "epoch": 0.4084002048830459, "grad_norm": 0.43194684386253357, "learning_rate": 8e-05, "loss": 1.5582, "step": 2392 }, { "epoch": 0.40857094075465256, "grad_norm": 0.47571560740470886, "learning_rate": 8e-05, "loss": 1.8085, "step": 2393 }, { "epoch": 0.40874167662625915, "grad_norm": 0.462466835975647, "learning_rate": 8e-05, "loss": 1.5882, "step": 2394 }, { "epoch": 0.4089124124978658, "grad_norm": 0.5047008991241455, "learning_rate": 8e-05, "loss": 1.6305, "step": 2395 }, { "epoch": 0.4090831483694724, "grad_norm": 0.45475438237190247, "learning_rate": 8e-05, "loss": 1.7386, "step": 2396 }, { "epoch": 0.40925388424107906, "grad_norm": 0.443965345621109, "learning_rate": 8e-05, "loss": 1.6976, "step": 2397 }, { "epoch": 0.40942462011268566, "grad_norm": 0.42566514015197754, "learning_rate": 8e-05, "loss": 1.5794, "step": 2398 }, { "epoch": 0.4095953559842923, "grad_norm": 0.4510253667831421, "learning_rate": 8e-05, "loss": 1.5904, "step": 2399 }, { "epoch": 0.4097660918558989, "grad_norm": 0.45022380352020264, "learning_rate": 8e-05, "loss": 1.6574, "step": 2400 }, { "epoch": 0.40993682772750556, "grad_norm": 0.46305152773857117, "learning_rate": 8e-05, "loss": 1.468, "step": 2401 }, { "epoch": 0.41010756359911216, "grad_norm": 0.492990642786026, "learning_rate": 8e-05, "loss": 1.8111, "step": 2402 }, { "epoch": 0.4102782994707188, "grad_norm": 0.5312866568565369, "learning_rate": 8e-05, "loss": 1.613, "step": 2403 }, { "epoch": 0.4104490353423254, "grad_norm": 0.4558365046977997, "learning_rate": 8e-05, "loss": 1.7498, "step": 2404 }, { "epoch": 0.41061977121393206, "grad_norm": 0.44643473625183105, "learning_rate": 8e-05, "loss": 1.5126, "step": 2405 }, { "epoch": 0.41079050708553866, "grad_norm": 0.47051578760147095, "learning_rate": 8e-05, "loss": 1.6129, "step": 2406 }, { "epoch": 0.4109612429571453, "grad_norm": 0.4359566867351532, "learning_rate": 8e-05, "loss": 1.597, "step": 2407 }, { "epoch": 0.4111319788287519, "grad_norm": 0.4677959382534027, "learning_rate": 8e-05, "loss": 1.6697, "step": 2408 }, { "epoch": 0.41130271470035856, "grad_norm": 0.45164528489112854, "learning_rate": 8e-05, "loss": 1.6823, "step": 2409 }, { "epoch": 0.41147345057196516, "grad_norm": 0.4749421775341034, "learning_rate": 8e-05, "loss": 1.5215, "step": 2410 }, { "epoch": 0.4116441864435718, "grad_norm": 0.4616687297821045, "learning_rate": 8e-05, "loss": 1.6623, "step": 2411 }, { "epoch": 0.4118149223151784, "grad_norm": 0.45999908447265625, "learning_rate": 8e-05, "loss": 1.7641, "step": 2412 }, { "epoch": 0.41198565818678506, "grad_norm": 0.4851484000682831, "learning_rate": 8e-05, "loss": 1.6033, "step": 2413 }, { "epoch": 0.41215639405839166, "grad_norm": 0.4461384117603302, "learning_rate": 8e-05, "loss": 1.4872, "step": 2414 }, { "epoch": 0.4123271299299983, "grad_norm": 0.455317884683609, "learning_rate": 8e-05, "loss": 1.7001, "step": 2415 }, { "epoch": 0.4124978658016049, "grad_norm": 0.4651748239994049, "learning_rate": 8e-05, "loss": 1.7644, "step": 2416 }, { "epoch": 0.41266860167321157, "grad_norm": 0.4464469850063324, "learning_rate": 8e-05, "loss": 1.5342, "step": 2417 }, { "epoch": 0.41283933754481816, "grad_norm": 0.4724650979042053, "learning_rate": 8e-05, "loss": 1.7436, "step": 2418 }, { "epoch": 0.4130100734164248, "grad_norm": 0.5463894009590149, "learning_rate": 8e-05, "loss": 1.9823, "step": 2419 }, { "epoch": 0.4131808092880314, "grad_norm": 0.4730560779571533, "learning_rate": 8e-05, "loss": 1.7053, "step": 2420 }, { "epoch": 0.41335154515963807, "grad_norm": 0.4588293731212616, "learning_rate": 8e-05, "loss": 1.6558, "step": 2421 }, { "epoch": 0.41352228103124467, "grad_norm": 0.4567486047744751, "learning_rate": 8e-05, "loss": 1.5668, "step": 2422 }, { "epoch": 0.41369301690285126, "grad_norm": 0.46139177680015564, "learning_rate": 8e-05, "loss": 1.776, "step": 2423 }, { "epoch": 0.4138637527744579, "grad_norm": 0.5327205061912537, "learning_rate": 8e-05, "loss": 1.8589, "step": 2424 }, { "epoch": 0.4140344886460645, "grad_norm": 0.5183351635932922, "learning_rate": 8e-05, "loss": 1.6055, "step": 2425 }, { "epoch": 0.41420522451767117, "grad_norm": 0.4798131585121155, "learning_rate": 8e-05, "loss": 1.7545, "step": 2426 }, { "epoch": 0.41437596038927776, "grad_norm": 0.4512142241001129, "learning_rate": 8e-05, "loss": 1.6989, "step": 2427 }, { "epoch": 0.4145466962608844, "grad_norm": 0.46498796343803406, "learning_rate": 8e-05, "loss": 1.7398, "step": 2428 }, { "epoch": 0.414717432132491, "grad_norm": 0.43480902910232544, "learning_rate": 8e-05, "loss": 1.6513, "step": 2429 }, { "epoch": 0.41488816800409767, "grad_norm": 0.4662635326385498, "learning_rate": 8e-05, "loss": 1.6795, "step": 2430 }, { "epoch": 0.41505890387570427, "grad_norm": 0.46077263355255127, "learning_rate": 8e-05, "loss": 1.5823, "step": 2431 }, { "epoch": 0.4152296397473109, "grad_norm": 0.46118083596229553, "learning_rate": 8e-05, "loss": 1.7091, "step": 2432 }, { "epoch": 0.4154003756189175, "grad_norm": 0.44231101870536804, "learning_rate": 8e-05, "loss": 1.6872, "step": 2433 }, { "epoch": 0.41557111149052417, "grad_norm": 0.4979581832885742, "learning_rate": 8e-05, "loss": 1.8746, "step": 2434 }, { "epoch": 0.41574184736213077, "grad_norm": 0.45096513628959656, "learning_rate": 8e-05, "loss": 1.6162, "step": 2435 }, { "epoch": 0.4159125832337374, "grad_norm": 0.468929260969162, "learning_rate": 8e-05, "loss": 1.7779, "step": 2436 }, { "epoch": 0.416083319105344, "grad_norm": 0.4491850435733795, "learning_rate": 8e-05, "loss": 1.6742, "step": 2437 }, { "epoch": 0.41625405497695067, "grad_norm": 0.47143593430519104, "learning_rate": 8e-05, "loss": 1.6345, "step": 2438 }, { "epoch": 0.41642479084855727, "grad_norm": 0.48872649669647217, "learning_rate": 8e-05, "loss": 1.836, "step": 2439 }, { "epoch": 0.4165955267201639, "grad_norm": 0.4575916528701782, "learning_rate": 8e-05, "loss": 1.7199, "step": 2440 }, { "epoch": 0.4167662625917705, "grad_norm": 0.46970856189727783, "learning_rate": 8e-05, "loss": 1.7096, "step": 2441 }, { "epoch": 0.4169369984633772, "grad_norm": 0.43167564272880554, "learning_rate": 8e-05, "loss": 1.5011, "step": 2442 }, { "epoch": 0.41710773433498377, "grad_norm": 0.4652673006057739, "learning_rate": 8e-05, "loss": 1.7458, "step": 2443 }, { "epoch": 0.4172784702065904, "grad_norm": 0.4916973114013672, "learning_rate": 8e-05, "loss": 1.7861, "step": 2444 }, { "epoch": 0.417449206078197, "grad_norm": 0.47770336270332336, "learning_rate": 8e-05, "loss": 1.4984, "step": 2445 }, { "epoch": 0.4176199419498037, "grad_norm": 0.45609867572784424, "learning_rate": 8e-05, "loss": 1.5316, "step": 2446 }, { "epoch": 0.4177906778214103, "grad_norm": 0.4982661306858063, "learning_rate": 8e-05, "loss": 1.7262, "step": 2447 }, { "epoch": 0.4179614136930169, "grad_norm": 0.46614325046539307, "learning_rate": 8e-05, "loss": 1.6097, "step": 2448 }, { "epoch": 0.4181321495646235, "grad_norm": 0.46124935150146484, "learning_rate": 8e-05, "loss": 1.7835, "step": 2449 }, { "epoch": 0.4183028854362302, "grad_norm": 0.5227929353713989, "learning_rate": 8e-05, "loss": 1.708, "step": 2450 }, { "epoch": 0.4184736213078368, "grad_norm": 0.4872196912765503, "learning_rate": 8e-05, "loss": 1.8558, "step": 2451 }, { "epoch": 0.4186443571794434, "grad_norm": 0.47217321395874023, "learning_rate": 8e-05, "loss": 1.854, "step": 2452 }, { "epoch": 0.41881509305105, "grad_norm": 0.5067691802978516, "learning_rate": 8e-05, "loss": 1.7613, "step": 2453 }, { "epoch": 0.4189858289226567, "grad_norm": 0.4590998589992523, "learning_rate": 8e-05, "loss": 1.6644, "step": 2454 }, { "epoch": 0.4191565647942633, "grad_norm": 0.49835145473480225, "learning_rate": 8e-05, "loss": 1.8867, "step": 2455 }, { "epoch": 0.4193273006658699, "grad_norm": 0.4345166087150574, "learning_rate": 8e-05, "loss": 1.6181, "step": 2456 }, { "epoch": 0.4194980365374765, "grad_norm": 0.4661262333393097, "learning_rate": 8e-05, "loss": 1.6155, "step": 2457 }, { "epoch": 0.4196687724090831, "grad_norm": 0.4628179669380188, "learning_rate": 8e-05, "loss": 1.7093, "step": 2458 }, { "epoch": 0.4198395082806898, "grad_norm": 0.48759785294532776, "learning_rate": 8e-05, "loss": 1.7607, "step": 2459 }, { "epoch": 0.4200102441522964, "grad_norm": 0.4487209618091583, "learning_rate": 8e-05, "loss": 1.6344, "step": 2460 }, { "epoch": 0.42018098002390303, "grad_norm": 0.45797234773635864, "learning_rate": 8e-05, "loss": 1.6617, "step": 2461 }, { "epoch": 0.4203517158955096, "grad_norm": 0.4887731671333313, "learning_rate": 8e-05, "loss": 1.6069, "step": 2462 }, { "epoch": 0.4205224517671163, "grad_norm": 0.47622862458229065, "learning_rate": 8e-05, "loss": 1.7947, "step": 2463 }, { "epoch": 0.4206931876387229, "grad_norm": 0.4990062713623047, "learning_rate": 8e-05, "loss": 1.6147, "step": 2464 }, { "epoch": 0.42086392351032953, "grad_norm": 0.4674553871154785, "learning_rate": 8e-05, "loss": 1.6855, "step": 2465 }, { "epoch": 0.4210346593819361, "grad_norm": 0.45264357328414917, "learning_rate": 8e-05, "loss": 1.7416, "step": 2466 }, { "epoch": 0.4212053952535428, "grad_norm": 0.4907410740852356, "learning_rate": 8e-05, "loss": 1.7917, "step": 2467 }, { "epoch": 0.4213761311251494, "grad_norm": 0.5263738036155701, "learning_rate": 8e-05, "loss": 1.692, "step": 2468 }, { "epoch": 0.42154686699675603, "grad_norm": 0.4644889831542969, "learning_rate": 8e-05, "loss": 1.6159, "step": 2469 }, { "epoch": 0.42171760286836263, "grad_norm": 0.476471483707428, "learning_rate": 8e-05, "loss": 1.6009, "step": 2470 }, { "epoch": 0.4218883387399693, "grad_norm": 0.4561581015586853, "learning_rate": 8e-05, "loss": 1.7035, "step": 2471 }, { "epoch": 0.4220590746115759, "grad_norm": 0.47221919894218445, "learning_rate": 8e-05, "loss": 1.4822, "step": 2472 }, { "epoch": 0.42222981048318253, "grad_norm": 0.49273741245269775, "learning_rate": 8e-05, "loss": 1.8897, "step": 2473 }, { "epoch": 0.42240054635478913, "grad_norm": 0.4465414583683014, "learning_rate": 8e-05, "loss": 1.5857, "step": 2474 }, { "epoch": 0.4225712822263958, "grad_norm": 0.4880433678627014, "learning_rate": 8e-05, "loss": 1.5835, "step": 2475 }, { "epoch": 0.4227420180980024, "grad_norm": 0.4994145929813385, "learning_rate": 8e-05, "loss": 1.6906, "step": 2476 }, { "epoch": 0.42291275396960903, "grad_norm": 0.45598509907722473, "learning_rate": 8e-05, "loss": 1.6402, "step": 2477 }, { "epoch": 0.42308348984121563, "grad_norm": 0.45813074707984924, "learning_rate": 8e-05, "loss": 1.7128, "step": 2478 }, { "epoch": 0.4232542257128223, "grad_norm": 0.4636058807373047, "learning_rate": 8e-05, "loss": 1.7376, "step": 2479 }, { "epoch": 0.4234249615844289, "grad_norm": 0.45494458079338074, "learning_rate": 8e-05, "loss": 1.6831, "step": 2480 }, { "epoch": 0.42359569745603554, "grad_norm": 0.4610213041305542, "learning_rate": 8e-05, "loss": 1.7859, "step": 2481 }, { "epoch": 0.42376643332764213, "grad_norm": 0.4704842269420624, "learning_rate": 8e-05, "loss": 1.6425, "step": 2482 }, { "epoch": 0.4239371691992488, "grad_norm": 0.4808899462223053, "learning_rate": 8e-05, "loss": 1.6728, "step": 2483 }, { "epoch": 0.4241079050708554, "grad_norm": 0.451897531747818, "learning_rate": 8e-05, "loss": 1.7521, "step": 2484 }, { "epoch": 0.42427864094246204, "grad_norm": 0.4692476987838745, "learning_rate": 8e-05, "loss": 1.7537, "step": 2485 }, { "epoch": 0.42444937681406864, "grad_norm": 0.5004152059555054, "learning_rate": 8e-05, "loss": 1.8493, "step": 2486 }, { "epoch": 0.42462011268567523, "grad_norm": 0.4987953007221222, "learning_rate": 8e-05, "loss": 1.6271, "step": 2487 }, { "epoch": 0.4247908485572819, "grad_norm": 0.45357656478881836, "learning_rate": 8e-05, "loss": 1.7088, "step": 2488 }, { "epoch": 0.4249615844288885, "grad_norm": 0.4786326587200165, "learning_rate": 8e-05, "loss": 1.7523, "step": 2489 }, { "epoch": 0.42513232030049514, "grad_norm": 0.4593566060066223, "learning_rate": 8e-05, "loss": 1.8871, "step": 2490 }, { "epoch": 0.42530305617210173, "grad_norm": 0.46692806482315063, "learning_rate": 8e-05, "loss": 1.6876, "step": 2491 }, { "epoch": 0.4254737920437084, "grad_norm": 0.45717862248420715, "learning_rate": 8e-05, "loss": 1.6291, "step": 2492 }, { "epoch": 0.425644527915315, "grad_norm": 0.4787648618221283, "learning_rate": 8e-05, "loss": 1.6951, "step": 2493 }, { "epoch": 0.42581526378692164, "grad_norm": 0.4820668399333954, "learning_rate": 8e-05, "loss": 1.5937, "step": 2494 }, { "epoch": 0.42598599965852824, "grad_norm": 0.4469219744205475, "learning_rate": 8e-05, "loss": 1.8021, "step": 2495 }, { "epoch": 0.4261567355301349, "grad_norm": 0.44716763496398926, "learning_rate": 8e-05, "loss": 1.7182, "step": 2496 }, { "epoch": 0.4263274714017415, "grad_norm": 0.47253647446632385, "learning_rate": 8e-05, "loss": 1.788, "step": 2497 }, { "epoch": 0.42649820727334814, "grad_norm": 0.4480881690979004, "learning_rate": 8e-05, "loss": 1.6965, "step": 2498 }, { "epoch": 0.42666894314495474, "grad_norm": 0.4974229633808136, "learning_rate": 8e-05, "loss": 1.7797, "step": 2499 }, { "epoch": 0.4268396790165614, "grad_norm": 0.4738336205482483, "learning_rate": 8e-05, "loss": 1.6058, "step": 2500 }, { "epoch": 0.427010414888168, "grad_norm": 0.49161669611930847, "learning_rate": 8e-05, "loss": 1.7467, "step": 2501 }, { "epoch": 0.42718115075977464, "grad_norm": 0.45325028896331787, "learning_rate": 8e-05, "loss": 1.7582, "step": 2502 }, { "epoch": 0.42735188663138124, "grad_norm": 0.4597230553627014, "learning_rate": 8e-05, "loss": 1.6446, "step": 2503 }, { "epoch": 0.4275226225029879, "grad_norm": 0.4468061029911041, "learning_rate": 8e-05, "loss": 1.6784, "step": 2504 }, { "epoch": 0.4276933583745945, "grad_norm": 0.4606238603591919, "learning_rate": 8e-05, "loss": 1.6911, "step": 2505 }, { "epoch": 0.42786409424620114, "grad_norm": 0.4341283142566681, "learning_rate": 8e-05, "loss": 1.7375, "step": 2506 }, { "epoch": 0.42803483011780774, "grad_norm": 0.4609305262565613, "learning_rate": 8e-05, "loss": 1.7617, "step": 2507 }, { "epoch": 0.4282055659894144, "grad_norm": 0.45982396602630615, "learning_rate": 8e-05, "loss": 1.4014, "step": 2508 }, { "epoch": 0.428376301861021, "grad_norm": 0.45153939723968506, "learning_rate": 8e-05, "loss": 1.6276, "step": 2509 }, { "epoch": 0.42854703773262764, "grad_norm": 0.45453646779060364, "learning_rate": 8e-05, "loss": 1.7558, "step": 2510 }, { "epoch": 0.42871777360423424, "grad_norm": 0.4714541435241699, "learning_rate": 8e-05, "loss": 1.5675, "step": 2511 }, { "epoch": 0.4288885094758409, "grad_norm": 0.5063548684120178, "learning_rate": 8e-05, "loss": 1.7212, "step": 2512 }, { "epoch": 0.4290592453474475, "grad_norm": 0.46505579352378845, "learning_rate": 8e-05, "loss": 1.6299, "step": 2513 }, { "epoch": 0.42922998121905415, "grad_norm": 0.46373075246810913, "learning_rate": 8e-05, "loss": 1.7877, "step": 2514 }, { "epoch": 0.42940071709066074, "grad_norm": 0.44636496901512146, "learning_rate": 8e-05, "loss": 1.7703, "step": 2515 }, { "epoch": 0.4295714529622674, "grad_norm": 0.47246089577674866, "learning_rate": 8e-05, "loss": 1.6521, "step": 2516 }, { "epoch": 0.429742188833874, "grad_norm": 0.49411797523498535, "learning_rate": 8e-05, "loss": 1.7145, "step": 2517 }, { "epoch": 0.42991292470548065, "grad_norm": 0.44039368629455566, "learning_rate": 8e-05, "loss": 1.5749, "step": 2518 }, { "epoch": 0.43008366057708725, "grad_norm": 0.5084601640701294, "learning_rate": 8e-05, "loss": 1.8974, "step": 2519 }, { "epoch": 0.43025439644869384, "grad_norm": 0.4615045189857483, "learning_rate": 8e-05, "loss": 1.612, "step": 2520 }, { "epoch": 0.4304251323203005, "grad_norm": 0.4273780286312103, "learning_rate": 8e-05, "loss": 1.5886, "step": 2521 }, { "epoch": 0.4305958681919071, "grad_norm": 0.4510349929332733, "learning_rate": 8e-05, "loss": 1.7016, "step": 2522 }, { "epoch": 0.43076660406351375, "grad_norm": 0.46226000785827637, "learning_rate": 8e-05, "loss": 1.5595, "step": 2523 }, { "epoch": 0.43093733993512034, "grad_norm": 0.44252118468284607, "learning_rate": 8e-05, "loss": 1.5669, "step": 2524 }, { "epoch": 0.431108075806727, "grad_norm": 0.4898073375225067, "learning_rate": 8e-05, "loss": 1.6913, "step": 2525 }, { "epoch": 0.4312788116783336, "grad_norm": 0.4794728457927704, "learning_rate": 8e-05, "loss": 1.5969, "step": 2526 }, { "epoch": 0.43144954754994025, "grad_norm": 0.46577247977256775, "learning_rate": 8e-05, "loss": 1.7551, "step": 2527 }, { "epoch": 0.43162028342154685, "grad_norm": 0.4347391426563263, "learning_rate": 8e-05, "loss": 1.5899, "step": 2528 }, { "epoch": 0.4317910192931535, "grad_norm": 0.4489923417568207, "learning_rate": 8e-05, "loss": 1.6296, "step": 2529 }, { "epoch": 0.4319617551647601, "grad_norm": 0.47046706080436707, "learning_rate": 8e-05, "loss": 1.7331, "step": 2530 }, { "epoch": 0.43213249103636675, "grad_norm": 0.48597452044487, "learning_rate": 8e-05, "loss": 1.8725, "step": 2531 }, { "epoch": 0.43230322690797335, "grad_norm": 0.50346440076828, "learning_rate": 8e-05, "loss": 1.6867, "step": 2532 }, { "epoch": 0.43247396277958, "grad_norm": 0.4612138271331787, "learning_rate": 8e-05, "loss": 1.7044, "step": 2533 }, { "epoch": 0.4326446986511866, "grad_norm": 0.45743948221206665, "learning_rate": 8e-05, "loss": 1.3469, "step": 2534 }, { "epoch": 0.43281543452279325, "grad_norm": 0.4908691942691803, "learning_rate": 8e-05, "loss": 1.5266, "step": 2535 }, { "epoch": 0.43298617039439985, "grad_norm": 0.47656434774398804, "learning_rate": 8e-05, "loss": 1.7157, "step": 2536 }, { "epoch": 0.4331569062660065, "grad_norm": 0.4451460838317871, "learning_rate": 8e-05, "loss": 1.5852, "step": 2537 }, { "epoch": 0.4333276421376131, "grad_norm": 0.49213287234306335, "learning_rate": 8e-05, "loss": 1.6367, "step": 2538 }, { "epoch": 0.43349837800921975, "grad_norm": 0.5042041540145874, "learning_rate": 8e-05, "loss": 1.7771, "step": 2539 }, { "epoch": 0.43366911388082635, "grad_norm": 0.445442259311676, "learning_rate": 8e-05, "loss": 1.78, "step": 2540 }, { "epoch": 0.433839849752433, "grad_norm": 0.458783358335495, "learning_rate": 8e-05, "loss": 1.6257, "step": 2541 }, { "epoch": 0.4340105856240396, "grad_norm": 0.48819655179977417, "learning_rate": 8e-05, "loss": 1.8118, "step": 2542 }, { "epoch": 0.43418132149564626, "grad_norm": 0.4538355767726898, "learning_rate": 8e-05, "loss": 1.7608, "step": 2543 }, { "epoch": 0.43435205736725285, "grad_norm": 0.4385939836502075, "learning_rate": 8e-05, "loss": 1.6791, "step": 2544 }, { "epoch": 0.4345227932388595, "grad_norm": 0.490219384431839, "learning_rate": 8e-05, "loss": 1.7959, "step": 2545 }, { "epoch": 0.4346935291104661, "grad_norm": 0.47144219279289246, "learning_rate": 8e-05, "loss": 1.764, "step": 2546 }, { "epoch": 0.43486426498207276, "grad_norm": 0.4712027311325073, "learning_rate": 8e-05, "loss": 1.6478, "step": 2547 }, { "epoch": 0.43503500085367935, "grad_norm": 0.46273303031921387, "learning_rate": 8e-05, "loss": 1.6948, "step": 2548 }, { "epoch": 0.435205736725286, "grad_norm": 0.44699516892433167, "learning_rate": 8e-05, "loss": 1.6709, "step": 2549 }, { "epoch": 0.4353764725968926, "grad_norm": 0.46965527534484863, "learning_rate": 8e-05, "loss": 1.6088, "step": 2550 }, { "epoch": 0.43554720846849926, "grad_norm": 0.4910741448402405, "learning_rate": 8e-05, "loss": 1.8092, "step": 2551 }, { "epoch": 0.43571794434010586, "grad_norm": 0.4435274302959442, "learning_rate": 8e-05, "loss": 1.4478, "step": 2552 }, { "epoch": 0.43588868021171245, "grad_norm": 0.50806725025177, "learning_rate": 8e-05, "loss": 1.8239, "step": 2553 }, { "epoch": 0.4360594160833191, "grad_norm": 0.4656658470630646, "learning_rate": 8e-05, "loss": 1.6711, "step": 2554 }, { "epoch": 0.4362301519549257, "grad_norm": 0.4703902006149292, "learning_rate": 8e-05, "loss": 1.687, "step": 2555 }, { "epoch": 0.43640088782653236, "grad_norm": 0.4831232726573944, "learning_rate": 8e-05, "loss": 1.6919, "step": 2556 }, { "epoch": 0.43657162369813896, "grad_norm": 0.45855823159217834, "learning_rate": 8e-05, "loss": 1.6987, "step": 2557 }, { "epoch": 0.4367423595697456, "grad_norm": 0.47399452328681946, "learning_rate": 8e-05, "loss": 1.7945, "step": 2558 }, { "epoch": 0.4369130954413522, "grad_norm": 0.4580029249191284, "learning_rate": 8e-05, "loss": 1.6618, "step": 2559 }, { "epoch": 0.43708383131295886, "grad_norm": 0.44515088200569153, "learning_rate": 8e-05, "loss": 1.6657, "step": 2560 }, { "epoch": 0.43725456718456546, "grad_norm": 0.47998109459877014, "learning_rate": 8e-05, "loss": 1.9105, "step": 2561 }, { "epoch": 0.4374253030561721, "grad_norm": 0.4984442889690399, "learning_rate": 8e-05, "loss": 1.8693, "step": 2562 }, { "epoch": 0.4375960389277787, "grad_norm": 0.4767996370792389, "learning_rate": 8e-05, "loss": 1.7383, "step": 2563 }, { "epoch": 0.43776677479938536, "grad_norm": 0.4510997533798218, "learning_rate": 8e-05, "loss": 1.6549, "step": 2564 }, { "epoch": 0.43793751067099196, "grad_norm": 0.4607182741165161, "learning_rate": 8e-05, "loss": 1.63, "step": 2565 }, { "epoch": 0.4381082465425986, "grad_norm": 0.4664430320262909, "learning_rate": 8e-05, "loss": 1.6201, "step": 2566 }, { "epoch": 0.4382789824142052, "grad_norm": 0.48048052191734314, "learning_rate": 8e-05, "loss": 1.65, "step": 2567 }, { "epoch": 0.43844971828581186, "grad_norm": 0.4627820551395416, "learning_rate": 8e-05, "loss": 1.6883, "step": 2568 }, { "epoch": 0.43862045415741846, "grad_norm": 0.4991253614425659, "learning_rate": 8e-05, "loss": 1.8694, "step": 2569 }, { "epoch": 0.4387911900290251, "grad_norm": 0.4563300311565399, "learning_rate": 8e-05, "loss": 1.8247, "step": 2570 }, { "epoch": 0.4389619259006317, "grad_norm": 0.4484195411205292, "learning_rate": 8e-05, "loss": 1.7239, "step": 2571 }, { "epoch": 0.43913266177223836, "grad_norm": 0.5757659673690796, "learning_rate": 8e-05, "loss": 1.9145, "step": 2572 }, { "epoch": 0.43930339764384496, "grad_norm": 0.46657466888427734, "learning_rate": 8e-05, "loss": 1.6835, "step": 2573 }, { "epoch": 0.4394741335154516, "grad_norm": 0.4562886655330658, "learning_rate": 8e-05, "loss": 1.6825, "step": 2574 }, { "epoch": 0.4396448693870582, "grad_norm": 0.43500635027885437, "learning_rate": 8e-05, "loss": 1.6814, "step": 2575 }, { "epoch": 0.43981560525866487, "grad_norm": 0.49654272198677063, "learning_rate": 8e-05, "loss": 1.7659, "step": 2576 }, { "epoch": 0.43998634113027146, "grad_norm": 0.5150516629219055, "learning_rate": 8e-05, "loss": 1.9548, "step": 2577 }, { "epoch": 0.4401570770018781, "grad_norm": 0.4936681389808655, "learning_rate": 8e-05, "loss": 1.6371, "step": 2578 }, { "epoch": 0.4403278128734847, "grad_norm": 0.48088112473487854, "learning_rate": 8e-05, "loss": 1.6436, "step": 2579 }, { "epoch": 0.44049854874509137, "grad_norm": 0.46998682618141174, "learning_rate": 8e-05, "loss": 1.6539, "step": 2580 }, { "epoch": 0.44066928461669796, "grad_norm": 0.45522257685661316, "learning_rate": 8e-05, "loss": 1.5378, "step": 2581 }, { "epoch": 0.4408400204883046, "grad_norm": 0.4561886489391327, "learning_rate": 8e-05, "loss": 1.4114, "step": 2582 }, { "epoch": 0.4410107563599112, "grad_norm": 0.48010173439979553, "learning_rate": 8e-05, "loss": 1.7707, "step": 2583 }, { "epoch": 0.44118149223151787, "grad_norm": 0.4603475332260132, "learning_rate": 8e-05, "loss": 1.6617, "step": 2584 }, { "epoch": 0.44135222810312447, "grad_norm": 0.5004491209983826, "learning_rate": 8e-05, "loss": 1.6546, "step": 2585 }, { "epoch": 0.44152296397473106, "grad_norm": 0.44468215107917786, "learning_rate": 8e-05, "loss": 1.7337, "step": 2586 }, { "epoch": 0.4416936998463377, "grad_norm": 0.4367702305316925, "learning_rate": 8e-05, "loss": 1.6739, "step": 2587 }, { "epoch": 0.4418644357179443, "grad_norm": 0.44108837842941284, "learning_rate": 8e-05, "loss": 1.6425, "step": 2588 }, { "epoch": 0.44203517158955097, "grad_norm": 0.4555835723876953, "learning_rate": 8e-05, "loss": 1.6712, "step": 2589 }, { "epoch": 0.44220590746115757, "grad_norm": 0.4902646243572235, "learning_rate": 8e-05, "loss": 1.9573, "step": 2590 }, { "epoch": 0.4423766433327642, "grad_norm": 0.5347006320953369, "learning_rate": 8e-05, "loss": 1.758, "step": 2591 }, { "epoch": 0.4425473792043708, "grad_norm": 0.46403762698173523, "learning_rate": 8e-05, "loss": 1.7285, "step": 2592 }, { "epoch": 0.44271811507597747, "grad_norm": 0.4356669783592224, "learning_rate": 8e-05, "loss": 1.3313, "step": 2593 }, { "epoch": 0.44288885094758407, "grad_norm": 0.48379796743392944, "learning_rate": 8e-05, "loss": 1.889, "step": 2594 }, { "epoch": 0.4430595868191907, "grad_norm": 0.4480961859226227, "learning_rate": 8e-05, "loss": 1.7177, "step": 2595 }, { "epoch": 0.4432303226907973, "grad_norm": 0.4710129499435425, "learning_rate": 8e-05, "loss": 1.7929, "step": 2596 }, { "epoch": 0.44340105856240397, "grad_norm": 0.47627806663513184, "learning_rate": 8e-05, "loss": 1.8041, "step": 2597 }, { "epoch": 0.44357179443401057, "grad_norm": 0.4640841782093048, "learning_rate": 8e-05, "loss": 1.8711, "step": 2598 }, { "epoch": 0.4437425303056172, "grad_norm": 0.45402005314826965, "learning_rate": 8e-05, "loss": 1.7398, "step": 2599 }, { "epoch": 0.4439132661772238, "grad_norm": 0.43594104051589966, "learning_rate": 8e-05, "loss": 1.5579, "step": 2600 }, { "epoch": 0.4440840020488305, "grad_norm": 0.4708251655101776, "learning_rate": 8e-05, "loss": 1.7529, "step": 2601 }, { "epoch": 0.44425473792043707, "grad_norm": 0.4739881157875061, "learning_rate": 8e-05, "loss": 1.8818, "step": 2602 }, { "epoch": 0.4444254737920437, "grad_norm": 0.5117759108543396, "learning_rate": 8e-05, "loss": 1.8305, "step": 2603 }, { "epoch": 0.4445962096636503, "grad_norm": 0.47125405073165894, "learning_rate": 8e-05, "loss": 1.8212, "step": 2604 }, { "epoch": 0.444766945535257, "grad_norm": 0.47544726729393005, "learning_rate": 8e-05, "loss": 1.8147, "step": 2605 }, { "epoch": 0.44493768140686357, "grad_norm": 0.48870259523391724, "learning_rate": 8e-05, "loss": 1.6541, "step": 2606 }, { "epoch": 0.4451084172784702, "grad_norm": 0.4655316472053528, "learning_rate": 8e-05, "loss": 1.7942, "step": 2607 }, { "epoch": 0.4452791531500768, "grad_norm": 0.5860849022865295, "learning_rate": 8e-05, "loss": 1.7616, "step": 2608 }, { "epoch": 0.4454498890216835, "grad_norm": 0.47384020686149597, "learning_rate": 8e-05, "loss": 1.6716, "step": 2609 }, { "epoch": 0.4456206248932901, "grad_norm": 0.47722765803337097, "learning_rate": 8e-05, "loss": 1.7112, "step": 2610 }, { "epoch": 0.4457913607648967, "grad_norm": 0.480053573846817, "learning_rate": 8e-05, "loss": 1.7685, "step": 2611 }, { "epoch": 0.4459620966365033, "grad_norm": 0.48621034622192383, "learning_rate": 8e-05, "loss": 1.567, "step": 2612 }, { "epoch": 0.44613283250811, "grad_norm": 0.4394967257976532, "learning_rate": 8e-05, "loss": 1.7269, "step": 2613 }, { "epoch": 0.4463035683797166, "grad_norm": 0.4735114872455597, "learning_rate": 8e-05, "loss": 1.6964, "step": 2614 }, { "epoch": 0.44647430425132323, "grad_norm": 0.47212135791778564, "learning_rate": 8e-05, "loss": 1.7459, "step": 2615 }, { "epoch": 0.4466450401229298, "grad_norm": 0.490371435880661, "learning_rate": 8e-05, "loss": 1.588, "step": 2616 }, { "epoch": 0.4468157759945365, "grad_norm": 0.5322454571723938, "learning_rate": 8e-05, "loss": 1.6868, "step": 2617 }, { "epoch": 0.4469865118661431, "grad_norm": 0.461782842874527, "learning_rate": 8e-05, "loss": 1.6851, "step": 2618 }, { "epoch": 0.4471572477377497, "grad_norm": 0.455562949180603, "learning_rate": 8e-05, "loss": 1.8124, "step": 2619 }, { "epoch": 0.4473279836093563, "grad_norm": 0.4551364481449127, "learning_rate": 8e-05, "loss": 1.4673, "step": 2620 }, { "epoch": 0.4474987194809629, "grad_norm": 0.5404364466667175, "learning_rate": 8e-05, "loss": 1.7245, "step": 2621 }, { "epoch": 0.4476694553525696, "grad_norm": 0.46826058626174927, "learning_rate": 8e-05, "loss": 1.613, "step": 2622 }, { "epoch": 0.4478401912241762, "grad_norm": 0.4942779242992401, "learning_rate": 8e-05, "loss": 1.546, "step": 2623 }, { "epoch": 0.44801092709578283, "grad_norm": 0.5387442111968994, "learning_rate": 8e-05, "loss": 2.0619, "step": 2624 }, { "epoch": 0.4481816629673894, "grad_norm": 0.5285208225250244, "learning_rate": 8e-05, "loss": 1.862, "step": 2625 }, { "epoch": 0.4483523988389961, "grad_norm": 0.4610133469104767, "learning_rate": 8e-05, "loss": 1.6037, "step": 2626 }, { "epoch": 0.4485231347106027, "grad_norm": 0.4809604585170746, "learning_rate": 8e-05, "loss": 1.723, "step": 2627 }, { "epoch": 0.44869387058220933, "grad_norm": 0.45301559567451477, "learning_rate": 8e-05, "loss": 1.5601, "step": 2628 }, { "epoch": 0.44886460645381593, "grad_norm": 0.4617462456226349, "learning_rate": 8e-05, "loss": 1.6641, "step": 2629 }, { "epoch": 0.4490353423254226, "grad_norm": 0.4701690375804901, "learning_rate": 8e-05, "loss": 1.6633, "step": 2630 }, { "epoch": 0.4492060781970292, "grad_norm": 0.495699942111969, "learning_rate": 8e-05, "loss": 1.7286, "step": 2631 }, { "epoch": 0.44937681406863583, "grad_norm": 0.46543818712234497, "learning_rate": 8e-05, "loss": 1.6674, "step": 2632 }, { "epoch": 0.44954754994024243, "grad_norm": 0.4610511064529419, "learning_rate": 8e-05, "loss": 1.7637, "step": 2633 }, { "epoch": 0.4497182858118491, "grad_norm": 0.4790436327457428, "learning_rate": 8e-05, "loss": 1.6927, "step": 2634 }, { "epoch": 0.4498890216834557, "grad_norm": 0.4701451063156128, "learning_rate": 8e-05, "loss": 1.6245, "step": 2635 }, { "epoch": 0.45005975755506233, "grad_norm": 0.45504477620124817, "learning_rate": 8e-05, "loss": 1.5943, "step": 2636 }, { "epoch": 0.45023049342666893, "grad_norm": 0.4375942647457123, "learning_rate": 8e-05, "loss": 1.7328, "step": 2637 }, { "epoch": 0.4504012292982756, "grad_norm": 0.4440290927886963, "learning_rate": 8e-05, "loss": 1.5565, "step": 2638 }, { "epoch": 0.4505719651698822, "grad_norm": 0.46710970997810364, "learning_rate": 8e-05, "loss": 1.7205, "step": 2639 }, { "epoch": 0.45074270104148884, "grad_norm": 0.4302923381328583, "learning_rate": 8e-05, "loss": 1.5665, "step": 2640 }, { "epoch": 0.45091343691309543, "grad_norm": 0.47111308574676514, "learning_rate": 8e-05, "loss": 1.696, "step": 2641 }, { "epoch": 0.4510841727847021, "grad_norm": 0.48294317722320557, "learning_rate": 8e-05, "loss": 1.8174, "step": 2642 }, { "epoch": 0.4512549086563087, "grad_norm": 0.45135918259620667, "learning_rate": 8e-05, "loss": 1.6996, "step": 2643 }, { "epoch": 0.45142564452791534, "grad_norm": 0.4298174977302551, "learning_rate": 8e-05, "loss": 1.5011, "step": 2644 }, { "epoch": 0.45159638039952194, "grad_norm": 0.48215970396995544, "learning_rate": 8e-05, "loss": 1.6583, "step": 2645 }, { "epoch": 0.4517671162711286, "grad_norm": 0.46612033247947693, "learning_rate": 8e-05, "loss": 1.911, "step": 2646 }, { "epoch": 0.4519378521427352, "grad_norm": 0.45797932147979736, "learning_rate": 8e-05, "loss": 1.6334, "step": 2647 }, { "epoch": 0.45210858801434184, "grad_norm": 0.4692116379737854, "learning_rate": 8e-05, "loss": 1.7218, "step": 2648 }, { "epoch": 0.45227932388594844, "grad_norm": 0.4371168911457062, "learning_rate": 8e-05, "loss": 1.4407, "step": 2649 }, { "epoch": 0.4524500597575551, "grad_norm": 0.46609020233154297, "learning_rate": 8e-05, "loss": 1.7028, "step": 2650 }, { "epoch": 0.4526207956291617, "grad_norm": 0.4749968349933624, "learning_rate": 8e-05, "loss": 1.6383, "step": 2651 }, { "epoch": 0.4527915315007683, "grad_norm": 0.4925970137119293, "learning_rate": 8e-05, "loss": 1.9478, "step": 2652 }, { "epoch": 0.45296226737237494, "grad_norm": 0.4844960570335388, "learning_rate": 8e-05, "loss": 1.6133, "step": 2653 }, { "epoch": 0.45313300324398154, "grad_norm": 0.4626704454421997, "learning_rate": 8e-05, "loss": 1.5884, "step": 2654 }, { "epoch": 0.4533037391155882, "grad_norm": 0.4704859256744385, "learning_rate": 8e-05, "loss": 1.7494, "step": 2655 }, { "epoch": 0.4534744749871948, "grad_norm": 0.45694512128829956, "learning_rate": 8e-05, "loss": 1.6337, "step": 2656 }, { "epoch": 0.45364521085880144, "grad_norm": 0.6035774350166321, "learning_rate": 8e-05, "loss": 2.0169, "step": 2657 }, { "epoch": 0.45381594673040804, "grad_norm": 0.4326131045818329, "learning_rate": 8e-05, "loss": 1.5913, "step": 2658 }, { "epoch": 0.4539866826020147, "grad_norm": 0.4620121419429779, "learning_rate": 8e-05, "loss": 1.4933, "step": 2659 }, { "epoch": 0.4541574184736213, "grad_norm": 0.45801323652267456, "learning_rate": 8e-05, "loss": 1.7492, "step": 2660 }, { "epoch": 0.45432815434522794, "grad_norm": 0.45384591817855835, "learning_rate": 8e-05, "loss": 1.5612, "step": 2661 }, { "epoch": 0.45449889021683454, "grad_norm": 0.4550377428531647, "learning_rate": 8e-05, "loss": 1.5536, "step": 2662 }, { "epoch": 0.4546696260884412, "grad_norm": 0.4707670509815216, "learning_rate": 8e-05, "loss": 1.7378, "step": 2663 }, { "epoch": 0.4548403619600478, "grad_norm": 0.4974104166030884, "learning_rate": 8e-05, "loss": 1.736, "step": 2664 }, { "epoch": 0.45501109783165444, "grad_norm": 0.5741046071052551, "learning_rate": 8e-05, "loss": 1.9488, "step": 2665 }, { "epoch": 0.45518183370326104, "grad_norm": 0.5179641246795654, "learning_rate": 8e-05, "loss": 1.7072, "step": 2666 }, { "epoch": 0.4553525695748677, "grad_norm": 0.4533507525920868, "learning_rate": 8e-05, "loss": 1.6235, "step": 2667 }, { "epoch": 0.4555233054464743, "grad_norm": 0.46079131960868835, "learning_rate": 8e-05, "loss": 1.6813, "step": 2668 }, { "epoch": 0.45569404131808094, "grad_norm": 0.4790368378162384, "learning_rate": 8e-05, "loss": 1.6252, "step": 2669 }, { "epoch": 0.45586477718968754, "grad_norm": 0.43660444021224976, "learning_rate": 8e-05, "loss": 1.6474, "step": 2670 }, { "epoch": 0.4560355130612942, "grad_norm": 0.4751622974872589, "learning_rate": 8e-05, "loss": 1.6587, "step": 2671 }, { "epoch": 0.4562062489329008, "grad_norm": 0.42356327176094055, "learning_rate": 8e-05, "loss": 1.5846, "step": 2672 }, { "epoch": 0.45637698480450745, "grad_norm": 0.4739704430103302, "learning_rate": 8e-05, "loss": 1.6387, "step": 2673 }, { "epoch": 0.45654772067611404, "grad_norm": 0.46332865953445435, "learning_rate": 8e-05, "loss": 1.5468, "step": 2674 }, { "epoch": 0.4567184565477207, "grad_norm": 0.4799434542655945, "learning_rate": 8e-05, "loss": 1.6512, "step": 2675 }, { "epoch": 0.4568891924193273, "grad_norm": 0.48943477869033813, "learning_rate": 8e-05, "loss": 1.7718, "step": 2676 }, { "epoch": 0.45705992829093395, "grad_norm": 0.48598212003707886, "learning_rate": 8e-05, "loss": 1.7995, "step": 2677 }, { "epoch": 0.45723066416254055, "grad_norm": 0.4866950213909149, "learning_rate": 8e-05, "loss": 1.9046, "step": 2678 }, { "epoch": 0.4574014000341472, "grad_norm": 0.4729439318180084, "learning_rate": 8e-05, "loss": 1.7653, "step": 2679 }, { "epoch": 0.4575721359057538, "grad_norm": 0.4602661430835724, "learning_rate": 8e-05, "loss": 1.727, "step": 2680 }, { "epoch": 0.45774287177736045, "grad_norm": 0.4913081228733063, "learning_rate": 8e-05, "loss": 1.6542, "step": 2681 }, { "epoch": 0.45791360764896705, "grad_norm": 0.4957571029663086, "learning_rate": 8e-05, "loss": 1.8823, "step": 2682 }, { "epoch": 0.4580843435205737, "grad_norm": 0.4735201597213745, "learning_rate": 8e-05, "loss": 1.6851, "step": 2683 }, { "epoch": 0.4582550793921803, "grad_norm": 0.46118637919425964, "learning_rate": 8e-05, "loss": 1.7578, "step": 2684 }, { "epoch": 0.4584258152637869, "grad_norm": 0.478422075510025, "learning_rate": 8e-05, "loss": 1.5969, "step": 2685 }, { "epoch": 0.45859655113539355, "grad_norm": 0.4541417062282562, "learning_rate": 8e-05, "loss": 1.6604, "step": 2686 }, { "epoch": 0.45876728700700015, "grad_norm": 0.4723649322986603, "learning_rate": 8e-05, "loss": 1.6489, "step": 2687 }, { "epoch": 0.4589380228786068, "grad_norm": 0.4675385057926178, "learning_rate": 8e-05, "loss": 1.7532, "step": 2688 }, { "epoch": 0.4591087587502134, "grad_norm": 0.5132037401199341, "learning_rate": 8e-05, "loss": 1.6287, "step": 2689 }, { "epoch": 0.45927949462182005, "grad_norm": 0.46517330408096313, "learning_rate": 8e-05, "loss": 1.7498, "step": 2690 }, { "epoch": 0.45945023049342665, "grad_norm": 0.42402905225753784, "learning_rate": 8e-05, "loss": 1.6783, "step": 2691 }, { "epoch": 0.4596209663650333, "grad_norm": 0.4478793442249298, "learning_rate": 8e-05, "loss": 1.6383, "step": 2692 }, { "epoch": 0.4597917022366399, "grad_norm": 0.4676401615142822, "learning_rate": 8e-05, "loss": 1.5635, "step": 2693 }, { "epoch": 0.45996243810824655, "grad_norm": 0.5075092911720276, "learning_rate": 8e-05, "loss": 1.7153, "step": 2694 }, { "epoch": 0.46013317397985315, "grad_norm": 0.4630168080329895, "learning_rate": 8e-05, "loss": 1.6626, "step": 2695 }, { "epoch": 0.4603039098514598, "grad_norm": 0.518678605556488, "learning_rate": 8e-05, "loss": 1.8556, "step": 2696 }, { "epoch": 0.4604746457230664, "grad_norm": 0.43866631388664246, "learning_rate": 8e-05, "loss": 1.5989, "step": 2697 }, { "epoch": 0.46064538159467305, "grad_norm": 0.4884837865829468, "learning_rate": 8e-05, "loss": 1.8182, "step": 2698 }, { "epoch": 0.46081611746627965, "grad_norm": 0.45296981930732727, "learning_rate": 8e-05, "loss": 1.5952, "step": 2699 }, { "epoch": 0.4609868533378863, "grad_norm": 0.5259296298027039, "learning_rate": 8e-05, "loss": 1.8243, "step": 2700 }, { "epoch": 0.4611575892094929, "grad_norm": 0.4833025336265564, "learning_rate": 8e-05, "loss": 1.5373, "step": 2701 }, { "epoch": 0.46132832508109956, "grad_norm": 0.48829683661460876, "learning_rate": 8e-05, "loss": 1.6676, "step": 2702 }, { "epoch": 0.46149906095270615, "grad_norm": 0.46349865198135376, "learning_rate": 8e-05, "loss": 1.5689, "step": 2703 }, { "epoch": 0.4616697968243128, "grad_norm": 0.45718663930892944, "learning_rate": 8e-05, "loss": 1.8719, "step": 2704 }, { "epoch": 0.4618405326959194, "grad_norm": 0.48702242970466614, "learning_rate": 8e-05, "loss": 1.8033, "step": 2705 }, { "epoch": 0.46201126856752606, "grad_norm": 0.4489390254020691, "learning_rate": 8e-05, "loss": 1.4676, "step": 2706 }, { "epoch": 0.46218200443913265, "grad_norm": 0.4747924506664276, "learning_rate": 8e-05, "loss": 1.6485, "step": 2707 }, { "epoch": 0.4623527403107393, "grad_norm": 0.45245543122291565, "learning_rate": 8e-05, "loss": 1.6401, "step": 2708 }, { "epoch": 0.4625234761823459, "grad_norm": 0.4677758812904358, "learning_rate": 8e-05, "loss": 1.6236, "step": 2709 }, { "epoch": 0.46269421205395256, "grad_norm": 0.4499771296977997, "learning_rate": 8e-05, "loss": 1.6793, "step": 2710 }, { "epoch": 0.46286494792555916, "grad_norm": 0.5227884650230408, "learning_rate": 8e-05, "loss": 1.609, "step": 2711 }, { "epoch": 0.4630356837971658, "grad_norm": 0.4927975535392761, "learning_rate": 8e-05, "loss": 1.7089, "step": 2712 }, { "epoch": 0.4632064196687724, "grad_norm": 0.49075058102607727, "learning_rate": 8e-05, "loss": 1.9345, "step": 2713 }, { "epoch": 0.46337715554037906, "grad_norm": 0.4678492546081543, "learning_rate": 8e-05, "loss": 1.5832, "step": 2714 }, { "epoch": 0.46354789141198566, "grad_norm": 0.4979275166988373, "learning_rate": 8e-05, "loss": 1.7179, "step": 2715 }, { "epoch": 0.46371862728359226, "grad_norm": 0.4756011366844177, "learning_rate": 8e-05, "loss": 1.752, "step": 2716 }, { "epoch": 0.4638893631551989, "grad_norm": 0.44694381952285767, "learning_rate": 8e-05, "loss": 1.6567, "step": 2717 }, { "epoch": 0.4640600990268055, "grad_norm": 0.4490819573402405, "learning_rate": 8e-05, "loss": 1.5954, "step": 2718 }, { "epoch": 0.46423083489841216, "grad_norm": 0.4516647458076477, "learning_rate": 8e-05, "loss": 1.7457, "step": 2719 }, { "epoch": 0.46440157077001876, "grad_norm": 0.46965938806533813, "learning_rate": 8e-05, "loss": 1.6913, "step": 2720 }, { "epoch": 0.4645723066416254, "grad_norm": 0.5109792351722717, "learning_rate": 8e-05, "loss": 1.9399, "step": 2721 }, { "epoch": 0.464743042513232, "grad_norm": 0.47832435369491577, "learning_rate": 8e-05, "loss": 1.663, "step": 2722 }, { "epoch": 0.46491377838483866, "grad_norm": 0.4295305907726288, "learning_rate": 8e-05, "loss": 1.5647, "step": 2723 }, { "epoch": 0.46508451425644526, "grad_norm": 0.45818260312080383, "learning_rate": 8e-05, "loss": 1.6967, "step": 2724 }, { "epoch": 0.4652552501280519, "grad_norm": 0.4693359136581421, "learning_rate": 8e-05, "loss": 1.6712, "step": 2725 }, { "epoch": 0.4654259859996585, "grad_norm": 0.4744309186935425, "learning_rate": 8e-05, "loss": 1.8675, "step": 2726 }, { "epoch": 0.46559672187126516, "grad_norm": 0.4706098437309265, "learning_rate": 8e-05, "loss": 1.6918, "step": 2727 }, { "epoch": 0.46576745774287176, "grad_norm": 0.46563684940338135, "learning_rate": 8e-05, "loss": 1.7266, "step": 2728 }, { "epoch": 0.4659381936144784, "grad_norm": 0.47886449098587036, "learning_rate": 8e-05, "loss": 1.9203, "step": 2729 }, { "epoch": 0.466108929486085, "grad_norm": 0.4360644519329071, "learning_rate": 8e-05, "loss": 1.5502, "step": 2730 }, { "epoch": 0.46627966535769166, "grad_norm": 0.47870784997940063, "learning_rate": 8e-05, "loss": 1.681, "step": 2731 }, { "epoch": 0.46645040122929826, "grad_norm": 0.4453476667404175, "learning_rate": 8e-05, "loss": 1.5906, "step": 2732 }, { "epoch": 0.4666211371009049, "grad_norm": 0.49558159708976746, "learning_rate": 8e-05, "loss": 1.7716, "step": 2733 }, { "epoch": 0.4667918729725115, "grad_norm": 0.4612334370613098, "learning_rate": 8e-05, "loss": 1.862, "step": 2734 }, { "epoch": 0.46696260884411817, "grad_norm": 0.4948166310787201, "learning_rate": 8e-05, "loss": 1.8336, "step": 2735 }, { "epoch": 0.46713334471572476, "grad_norm": 0.45333099365234375, "learning_rate": 8e-05, "loss": 1.8533, "step": 2736 }, { "epoch": 0.4673040805873314, "grad_norm": 0.48238569498062134, "learning_rate": 8e-05, "loss": 1.7803, "step": 2737 }, { "epoch": 0.467474816458938, "grad_norm": 0.47213268280029297, "learning_rate": 8e-05, "loss": 1.6977, "step": 2738 }, { "epoch": 0.46764555233054467, "grad_norm": 0.44199755787849426, "learning_rate": 8e-05, "loss": 1.469, "step": 2739 }, { "epoch": 0.46781628820215126, "grad_norm": 0.4732804596424103, "learning_rate": 8e-05, "loss": 1.6659, "step": 2740 }, { "epoch": 0.4679870240737579, "grad_norm": 0.45423540472984314, "learning_rate": 8e-05, "loss": 1.595, "step": 2741 }, { "epoch": 0.4681577599453645, "grad_norm": 0.5095189213752747, "learning_rate": 8e-05, "loss": 1.7885, "step": 2742 }, { "epoch": 0.46832849581697117, "grad_norm": 0.545202374458313, "learning_rate": 8e-05, "loss": 1.9341, "step": 2743 }, { "epoch": 0.46849923168857777, "grad_norm": 0.45267122983932495, "learning_rate": 8e-05, "loss": 1.6859, "step": 2744 }, { "epoch": 0.4686699675601844, "grad_norm": 0.46865248680114746, "learning_rate": 8e-05, "loss": 1.7957, "step": 2745 }, { "epoch": 0.468840703431791, "grad_norm": 0.45831677317619324, "learning_rate": 8e-05, "loss": 1.6308, "step": 2746 }, { "epoch": 0.46901143930339767, "grad_norm": 0.4722541570663452, "learning_rate": 8e-05, "loss": 1.5499, "step": 2747 }, { "epoch": 0.46918217517500427, "grad_norm": 0.4585897624492645, "learning_rate": 8e-05, "loss": 1.6001, "step": 2748 }, { "epoch": 0.46935291104661087, "grad_norm": 0.4258328378200531, "learning_rate": 8e-05, "loss": 1.5444, "step": 2749 }, { "epoch": 0.4695236469182175, "grad_norm": 0.47355690598487854, "learning_rate": 8e-05, "loss": 1.6654, "step": 2750 }, { "epoch": 0.4696943827898241, "grad_norm": 0.49979302287101746, "learning_rate": 8e-05, "loss": 1.6817, "step": 2751 }, { "epoch": 0.46986511866143077, "grad_norm": 0.48460087180137634, "learning_rate": 8e-05, "loss": 1.6818, "step": 2752 }, { "epoch": 0.47003585453303737, "grad_norm": 0.46180808544158936, "learning_rate": 8e-05, "loss": 1.7227, "step": 2753 }, { "epoch": 0.470206590404644, "grad_norm": 0.48055893182754517, "learning_rate": 8e-05, "loss": 1.7664, "step": 2754 }, { "epoch": 0.4703773262762506, "grad_norm": 0.4954647421836853, "learning_rate": 8e-05, "loss": 1.3948, "step": 2755 }, { "epoch": 0.47054806214785727, "grad_norm": 0.5344395637512207, "learning_rate": 8e-05, "loss": 2.0172, "step": 2756 }, { "epoch": 0.47071879801946387, "grad_norm": 0.49231165647506714, "learning_rate": 8e-05, "loss": 1.6612, "step": 2757 }, { "epoch": 0.4708895338910705, "grad_norm": 0.4895164370536804, "learning_rate": 8e-05, "loss": 1.8302, "step": 2758 }, { "epoch": 0.4710602697626771, "grad_norm": 0.46708574891090393, "learning_rate": 8e-05, "loss": 1.7672, "step": 2759 }, { "epoch": 0.4712310056342838, "grad_norm": 0.490065336227417, "learning_rate": 8e-05, "loss": 1.8442, "step": 2760 }, { "epoch": 0.47140174150589037, "grad_norm": 0.5052003860473633, "learning_rate": 8e-05, "loss": 1.8586, "step": 2761 }, { "epoch": 0.471572477377497, "grad_norm": 0.5650134682655334, "learning_rate": 8e-05, "loss": 1.7246, "step": 2762 }, { "epoch": 0.4717432132491036, "grad_norm": 0.5490623116493225, "learning_rate": 8e-05, "loss": 1.9444, "step": 2763 }, { "epoch": 0.4719139491207103, "grad_norm": 0.5009639859199524, "learning_rate": 8e-05, "loss": 1.7166, "step": 2764 }, { "epoch": 0.47208468499231687, "grad_norm": 0.48582419753074646, "learning_rate": 8e-05, "loss": 1.6284, "step": 2765 }, { "epoch": 0.4722554208639235, "grad_norm": 0.4223170876502991, "learning_rate": 8e-05, "loss": 1.4393, "step": 2766 }, { "epoch": 0.4724261567355301, "grad_norm": 0.46623939275741577, "learning_rate": 8e-05, "loss": 1.7582, "step": 2767 }, { "epoch": 0.4725968926071368, "grad_norm": 0.48591262102127075, "learning_rate": 8e-05, "loss": 1.7226, "step": 2768 }, { "epoch": 0.4727676284787434, "grad_norm": 0.4692893624305725, "learning_rate": 8e-05, "loss": 1.6472, "step": 2769 }, { "epoch": 0.47293836435035, "grad_norm": 0.4791190028190613, "learning_rate": 8e-05, "loss": 1.821, "step": 2770 }, { "epoch": 0.4731091002219566, "grad_norm": 0.4493101239204407, "learning_rate": 8e-05, "loss": 1.6907, "step": 2771 }, { "epoch": 0.4732798360935633, "grad_norm": 0.4814390540122986, "learning_rate": 8e-05, "loss": 1.6717, "step": 2772 }, { "epoch": 0.4734505719651699, "grad_norm": 0.44618040323257446, "learning_rate": 8e-05, "loss": 1.6423, "step": 2773 }, { "epoch": 0.47362130783677653, "grad_norm": 0.47304216027259827, "learning_rate": 8e-05, "loss": 1.6657, "step": 2774 }, { "epoch": 0.4737920437083831, "grad_norm": 0.46501898765563965, "learning_rate": 8e-05, "loss": 1.6929, "step": 2775 }, { "epoch": 0.4739627795799898, "grad_norm": 0.4630656838417053, "learning_rate": 8e-05, "loss": 1.3998, "step": 2776 }, { "epoch": 0.4741335154515964, "grad_norm": 0.4170381724834442, "learning_rate": 8e-05, "loss": 1.5661, "step": 2777 }, { "epoch": 0.47430425132320303, "grad_norm": 0.4711603820323944, "learning_rate": 8e-05, "loss": 1.8301, "step": 2778 }, { "epoch": 0.4744749871948096, "grad_norm": 0.48265621066093445, "learning_rate": 8e-05, "loss": 1.6463, "step": 2779 }, { "epoch": 0.4746457230664163, "grad_norm": 0.4525502026081085, "learning_rate": 8e-05, "loss": 1.6032, "step": 2780 }, { "epoch": 0.4748164589380229, "grad_norm": 0.45607852935791016, "learning_rate": 8e-05, "loss": 1.6421, "step": 2781 }, { "epoch": 0.4749871948096295, "grad_norm": 0.5221694111824036, "learning_rate": 8e-05, "loss": 1.7062, "step": 2782 }, { "epoch": 0.47515793068123613, "grad_norm": 0.5164807438850403, "learning_rate": 8e-05, "loss": 1.6822, "step": 2783 }, { "epoch": 0.4753286665528427, "grad_norm": 0.47035858035087585, "learning_rate": 8e-05, "loss": 1.7688, "step": 2784 }, { "epoch": 0.4754994024244494, "grad_norm": 0.46989554166793823, "learning_rate": 8e-05, "loss": 1.7177, "step": 2785 }, { "epoch": 0.475670138296056, "grad_norm": 0.465376079082489, "learning_rate": 8e-05, "loss": 1.6059, "step": 2786 }, { "epoch": 0.47584087416766263, "grad_norm": 0.491696834564209, "learning_rate": 8e-05, "loss": 1.8049, "step": 2787 }, { "epoch": 0.47601161003926923, "grad_norm": 0.4422317445278168, "learning_rate": 8e-05, "loss": 1.55, "step": 2788 }, { "epoch": 0.4761823459108759, "grad_norm": 0.45854201912879944, "learning_rate": 8e-05, "loss": 1.6998, "step": 2789 }, { "epoch": 0.4763530817824825, "grad_norm": 0.4385296702384949, "learning_rate": 8e-05, "loss": 1.6051, "step": 2790 }, { "epoch": 0.47652381765408913, "grad_norm": 0.4599505662918091, "learning_rate": 8e-05, "loss": 1.7444, "step": 2791 }, { "epoch": 0.47669455352569573, "grad_norm": 0.44887596368789673, "learning_rate": 8e-05, "loss": 1.5622, "step": 2792 }, { "epoch": 0.4768652893973024, "grad_norm": 0.5442419648170471, "learning_rate": 8e-05, "loss": 1.7803, "step": 2793 }, { "epoch": 0.477036025268909, "grad_norm": 0.49992305040359497, "learning_rate": 8e-05, "loss": 1.6133, "step": 2794 }, { "epoch": 0.47720676114051563, "grad_norm": 0.4884761869907379, "learning_rate": 8e-05, "loss": 1.9684, "step": 2795 }, { "epoch": 0.47737749701212223, "grad_norm": 0.4662308096885681, "learning_rate": 8e-05, "loss": 1.8231, "step": 2796 }, { "epoch": 0.4775482328837289, "grad_norm": 0.4607981741428375, "learning_rate": 8e-05, "loss": 1.669, "step": 2797 }, { "epoch": 0.4777189687553355, "grad_norm": 0.48921334743499756, "learning_rate": 8e-05, "loss": 1.7831, "step": 2798 }, { "epoch": 0.47788970462694214, "grad_norm": 0.47219356894493103, "learning_rate": 8e-05, "loss": 1.6155, "step": 2799 }, { "epoch": 0.47806044049854873, "grad_norm": 0.5633625388145447, "learning_rate": 8e-05, "loss": 2.0433, "step": 2800 }, { "epoch": 0.4782311763701554, "grad_norm": 0.45140713453292847, "learning_rate": 8e-05, "loss": 1.6427, "step": 2801 }, { "epoch": 0.478401912241762, "grad_norm": 0.469361275434494, "learning_rate": 8e-05, "loss": 1.6592, "step": 2802 }, { "epoch": 0.47857264811336864, "grad_norm": 0.458085834980011, "learning_rate": 8e-05, "loss": 1.7373, "step": 2803 }, { "epoch": 0.47874338398497523, "grad_norm": 0.46897441148757935, "learning_rate": 8e-05, "loss": 1.7319, "step": 2804 }, { "epoch": 0.4789141198565819, "grad_norm": 0.45501524209976196, "learning_rate": 8e-05, "loss": 1.7477, "step": 2805 }, { "epoch": 0.4790848557281885, "grad_norm": 0.464868426322937, "learning_rate": 8e-05, "loss": 1.7298, "step": 2806 }, { "epoch": 0.47925559159979514, "grad_norm": 0.4760870337486267, "learning_rate": 8e-05, "loss": 1.6385, "step": 2807 }, { "epoch": 0.47942632747140174, "grad_norm": 0.4948864281177521, "learning_rate": 8e-05, "loss": 1.6655, "step": 2808 }, { "epoch": 0.4795970633430084, "grad_norm": 0.4857485294342041, "learning_rate": 8e-05, "loss": 1.9561, "step": 2809 }, { "epoch": 0.479767799214615, "grad_norm": 0.45147913694381714, "learning_rate": 8e-05, "loss": 1.5279, "step": 2810 }, { "epoch": 0.47993853508622164, "grad_norm": 0.43635880947113037, "learning_rate": 8e-05, "loss": 1.4954, "step": 2811 }, { "epoch": 0.48010927095782824, "grad_norm": 0.4885260760784149, "learning_rate": 8e-05, "loss": 1.6456, "step": 2812 }, { "epoch": 0.4802800068294349, "grad_norm": 0.46411365270614624, "learning_rate": 8e-05, "loss": 1.8586, "step": 2813 }, { "epoch": 0.4804507427010415, "grad_norm": 0.47470617294311523, "learning_rate": 8e-05, "loss": 1.7553, "step": 2814 }, { "epoch": 0.4806214785726481, "grad_norm": 0.480072557926178, "learning_rate": 8e-05, "loss": 1.679, "step": 2815 }, { "epoch": 0.48079221444425474, "grad_norm": 0.49012014269828796, "learning_rate": 8e-05, "loss": 1.7331, "step": 2816 }, { "epoch": 0.48096295031586134, "grad_norm": 0.4804023206233978, "learning_rate": 8e-05, "loss": 1.7251, "step": 2817 }, { "epoch": 0.481133686187468, "grad_norm": 0.5375509262084961, "learning_rate": 8e-05, "loss": 1.6571, "step": 2818 }, { "epoch": 0.4813044220590746, "grad_norm": 0.4931970238685608, "learning_rate": 8e-05, "loss": 1.5632, "step": 2819 }, { "epoch": 0.48147515793068124, "grad_norm": 0.48406317830085754, "learning_rate": 8e-05, "loss": 1.7509, "step": 2820 }, { "epoch": 0.48164589380228784, "grad_norm": 0.47286882996559143, "learning_rate": 8e-05, "loss": 1.7467, "step": 2821 }, { "epoch": 0.4818166296738945, "grad_norm": 0.5075170993804932, "learning_rate": 8e-05, "loss": 1.788, "step": 2822 }, { "epoch": 0.4819873655455011, "grad_norm": 0.47040504217147827, "learning_rate": 8e-05, "loss": 1.6402, "step": 2823 }, { "epoch": 0.48215810141710774, "grad_norm": 0.4549131393432617, "learning_rate": 8e-05, "loss": 1.6099, "step": 2824 }, { "epoch": 0.48232883728871434, "grad_norm": 0.4689737558364868, "learning_rate": 8e-05, "loss": 1.6022, "step": 2825 }, { "epoch": 0.482499573160321, "grad_norm": 0.4746909737586975, "learning_rate": 8e-05, "loss": 1.5541, "step": 2826 }, { "epoch": 0.4826703090319276, "grad_norm": 0.522488534450531, "learning_rate": 8e-05, "loss": 1.8945, "step": 2827 }, { "epoch": 0.48284104490353424, "grad_norm": 0.4862683117389679, "learning_rate": 8e-05, "loss": 1.8796, "step": 2828 }, { "epoch": 0.48301178077514084, "grad_norm": 0.4979954957962036, "learning_rate": 8e-05, "loss": 1.8849, "step": 2829 }, { "epoch": 0.4831825166467475, "grad_norm": 0.47245359420776367, "learning_rate": 8e-05, "loss": 1.8383, "step": 2830 }, { "epoch": 0.4833532525183541, "grad_norm": 0.4526452422142029, "learning_rate": 8e-05, "loss": 1.6687, "step": 2831 }, { "epoch": 0.48352398838996075, "grad_norm": 0.4951600432395935, "learning_rate": 8e-05, "loss": 1.7104, "step": 2832 }, { "epoch": 0.48369472426156734, "grad_norm": 0.4972224533557892, "learning_rate": 8e-05, "loss": 1.7381, "step": 2833 }, { "epoch": 0.483865460133174, "grad_norm": 0.4538514316082001, "learning_rate": 8e-05, "loss": 1.6776, "step": 2834 }, { "epoch": 0.4840361960047806, "grad_norm": 0.4647708535194397, "learning_rate": 8e-05, "loss": 1.3853, "step": 2835 }, { "epoch": 0.48420693187638725, "grad_norm": 0.4975901246070862, "learning_rate": 8e-05, "loss": 1.6861, "step": 2836 }, { "epoch": 0.48437766774799385, "grad_norm": 0.4766479730606079, "learning_rate": 8e-05, "loss": 1.7953, "step": 2837 }, { "epoch": 0.4845484036196005, "grad_norm": 0.4713963270187378, "learning_rate": 8e-05, "loss": 1.6331, "step": 2838 }, { "epoch": 0.4847191394912071, "grad_norm": 0.4629961848258972, "learning_rate": 8e-05, "loss": 1.4379, "step": 2839 }, { "epoch": 0.48488987536281375, "grad_norm": 0.4631959795951843, "learning_rate": 8e-05, "loss": 1.618, "step": 2840 }, { "epoch": 0.48506061123442035, "grad_norm": 0.4905923902988434, "learning_rate": 8e-05, "loss": 1.8265, "step": 2841 }, { "epoch": 0.485231347106027, "grad_norm": 0.4645066261291504, "learning_rate": 8e-05, "loss": 1.6246, "step": 2842 }, { "epoch": 0.4854020829776336, "grad_norm": 0.45040878653526306, "learning_rate": 8e-05, "loss": 1.5743, "step": 2843 }, { "epoch": 0.48557281884924025, "grad_norm": 0.4920259416103363, "learning_rate": 8e-05, "loss": 1.8086, "step": 2844 }, { "epoch": 0.48574355472084685, "grad_norm": 0.5153072476387024, "learning_rate": 8e-05, "loss": 1.8373, "step": 2845 }, { "epoch": 0.4859142905924535, "grad_norm": 0.45372137427330017, "learning_rate": 8e-05, "loss": 1.6127, "step": 2846 }, { "epoch": 0.4860850264640601, "grad_norm": 0.46845147013664246, "learning_rate": 8e-05, "loss": 1.7259, "step": 2847 }, { "epoch": 0.4862557623356667, "grad_norm": 0.4326455295085907, "learning_rate": 8e-05, "loss": 1.532, "step": 2848 }, { "epoch": 0.48642649820727335, "grad_norm": 0.4975261390209198, "learning_rate": 8e-05, "loss": 1.7646, "step": 2849 }, { "epoch": 0.48659723407887995, "grad_norm": 0.5843642950057983, "learning_rate": 8e-05, "loss": 1.7658, "step": 2850 }, { "epoch": 0.4867679699504866, "grad_norm": 0.5045951008796692, "learning_rate": 8e-05, "loss": 2.0444, "step": 2851 }, { "epoch": 0.4869387058220932, "grad_norm": 0.520639955997467, "learning_rate": 8e-05, "loss": 1.6555, "step": 2852 }, { "epoch": 0.48710944169369985, "grad_norm": 0.46873176097869873, "learning_rate": 8e-05, "loss": 1.7788, "step": 2853 }, { "epoch": 0.48728017756530645, "grad_norm": 0.4670833349227905, "learning_rate": 8e-05, "loss": 1.677, "step": 2854 }, { "epoch": 0.4874509134369131, "grad_norm": 0.45583269000053406, "learning_rate": 8e-05, "loss": 1.6493, "step": 2855 }, { "epoch": 0.4876216493085197, "grad_norm": 0.4926481246948242, "learning_rate": 8e-05, "loss": 1.6418, "step": 2856 }, { "epoch": 0.48779238518012635, "grad_norm": 0.4532318413257599, "learning_rate": 8e-05, "loss": 1.7168, "step": 2857 }, { "epoch": 0.48796312105173295, "grad_norm": 0.5192470550537109, "learning_rate": 8e-05, "loss": 1.765, "step": 2858 }, { "epoch": 0.4881338569233396, "grad_norm": 0.4666150212287903, "learning_rate": 8e-05, "loss": 1.7748, "step": 2859 }, { "epoch": 0.4883045927949462, "grad_norm": 0.4886549711227417, "learning_rate": 8e-05, "loss": 1.4896, "step": 2860 }, { "epoch": 0.48847532866655285, "grad_norm": 0.5131173133850098, "learning_rate": 8e-05, "loss": 1.7565, "step": 2861 }, { "epoch": 0.48864606453815945, "grad_norm": 0.483856737613678, "learning_rate": 8e-05, "loss": 1.5033, "step": 2862 }, { "epoch": 0.4888168004097661, "grad_norm": 0.4572276175022125, "learning_rate": 8e-05, "loss": 1.6348, "step": 2863 }, { "epoch": 0.4889875362813727, "grad_norm": 0.49286699295043945, "learning_rate": 8e-05, "loss": 1.6834, "step": 2864 }, { "epoch": 0.48915827215297936, "grad_norm": 0.48755842447280884, "learning_rate": 8e-05, "loss": 1.5693, "step": 2865 }, { "epoch": 0.48932900802458595, "grad_norm": 0.45375651121139526, "learning_rate": 8e-05, "loss": 1.7016, "step": 2866 }, { "epoch": 0.4894997438961926, "grad_norm": 0.4548616111278534, "learning_rate": 8e-05, "loss": 1.5454, "step": 2867 }, { "epoch": 0.4896704797677992, "grad_norm": 0.5052309632301331, "learning_rate": 8e-05, "loss": 1.6474, "step": 2868 }, { "epoch": 0.48984121563940586, "grad_norm": 0.47989070415496826, "learning_rate": 8e-05, "loss": 1.7397, "step": 2869 }, { "epoch": 0.49001195151101246, "grad_norm": 0.46103179454803467, "learning_rate": 8e-05, "loss": 1.816, "step": 2870 }, { "epoch": 0.4901826873826191, "grad_norm": 0.5205078721046448, "learning_rate": 8e-05, "loss": 1.6779, "step": 2871 }, { "epoch": 0.4903534232542257, "grad_norm": 0.4758618474006653, "learning_rate": 8e-05, "loss": 1.6825, "step": 2872 }, { "epoch": 0.49052415912583236, "grad_norm": 0.4807494878768921, "learning_rate": 8e-05, "loss": 1.6787, "step": 2873 }, { "epoch": 0.49069489499743896, "grad_norm": 0.48035097122192383, "learning_rate": 8e-05, "loss": 1.8344, "step": 2874 }, { "epoch": 0.4908656308690456, "grad_norm": 0.44154641032218933, "learning_rate": 8e-05, "loss": 1.7024, "step": 2875 }, { "epoch": 0.4910363667406522, "grad_norm": 0.4720616638660431, "learning_rate": 8e-05, "loss": 1.8625, "step": 2876 }, { "epoch": 0.49120710261225886, "grad_norm": 0.5255178809165955, "learning_rate": 8e-05, "loss": 1.7205, "step": 2877 }, { "epoch": 0.49137783848386546, "grad_norm": 0.49386414885520935, "learning_rate": 8e-05, "loss": 1.6861, "step": 2878 }, { "epoch": 0.4915485743554721, "grad_norm": 0.43005746603012085, "learning_rate": 8e-05, "loss": 1.4387, "step": 2879 }, { "epoch": 0.4917193102270787, "grad_norm": 0.477679967880249, "learning_rate": 8e-05, "loss": 1.5288, "step": 2880 }, { "epoch": 0.4918900460986853, "grad_norm": 0.5142102241516113, "learning_rate": 8e-05, "loss": 1.7261, "step": 2881 }, { "epoch": 0.49206078197029196, "grad_norm": 0.4902280569076538, "learning_rate": 8e-05, "loss": 1.7102, "step": 2882 }, { "epoch": 0.49223151784189856, "grad_norm": 0.46098312735557556, "learning_rate": 8e-05, "loss": 1.7224, "step": 2883 }, { "epoch": 0.4924022537135052, "grad_norm": 0.4509775638580322, "learning_rate": 8e-05, "loss": 1.7127, "step": 2884 }, { "epoch": 0.4925729895851118, "grad_norm": 0.5631358027458191, "learning_rate": 8e-05, "loss": 1.8306, "step": 2885 }, { "epoch": 0.49274372545671846, "grad_norm": 0.4911383092403412, "learning_rate": 8e-05, "loss": 1.6025, "step": 2886 }, { "epoch": 0.49291446132832506, "grad_norm": 0.5127415657043457, "learning_rate": 8e-05, "loss": 1.7258, "step": 2887 }, { "epoch": 0.4930851971999317, "grad_norm": 0.5535193085670471, "learning_rate": 8e-05, "loss": 1.8283, "step": 2888 }, { "epoch": 0.4932559330715383, "grad_norm": 0.4745803475379944, "learning_rate": 8e-05, "loss": 1.743, "step": 2889 }, { "epoch": 0.49342666894314496, "grad_norm": 0.4630903899669647, "learning_rate": 8e-05, "loss": 1.6534, "step": 2890 }, { "epoch": 0.49359740481475156, "grad_norm": 0.47172456979751587, "learning_rate": 8e-05, "loss": 1.7117, "step": 2891 }, { "epoch": 0.4937681406863582, "grad_norm": 0.4738222062587738, "learning_rate": 8e-05, "loss": 1.7921, "step": 2892 }, { "epoch": 0.4939388765579648, "grad_norm": 0.4842044711112976, "learning_rate": 8e-05, "loss": 1.8377, "step": 2893 }, { "epoch": 0.49410961242957147, "grad_norm": 0.4808140993118286, "learning_rate": 8e-05, "loss": 1.7603, "step": 2894 }, { "epoch": 0.49428034830117806, "grad_norm": 0.4543936550617218, "learning_rate": 8e-05, "loss": 1.6139, "step": 2895 }, { "epoch": 0.4944510841727847, "grad_norm": 0.5439001321792603, "learning_rate": 8e-05, "loss": 1.8979, "step": 2896 }, { "epoch": 0.4946218200443913, "grad_norm": 0.48701196908950806, "learning_rate": 8e-05, "loss": 1.7506, "step": 2897 }, { "epoch": 0.49479255591599797, "grad_norm": 0.4538487195968628, "learning_rate": 8e-05, "loss": 1.7133, "step": 2898 }, { "epoch": 0.49496329178760456, "grad_norm": 0.5141444802284241, "learning_rate": 8e-05, "loss": 1.7446, "step": 2899 }, { "epoch": 0.4951340276592112, "grad_norm": 0.45681527256965637, "learning_rate": 8e-05, "loss": 1.6933, "step": 2900 }, { "epoch": 0.4953047635308178, "grad_norm": 0.46449965238571167, "learning_rate": 8e-05, "loss": 1.8084, "step": 2901 }, { "epoch": 0.49547549940242447, "grad_norm": 0.494157612323761, "learning_rate": 8e-05, "loss": 1.7065, "step": 2902 }, { "epoch": 0.49564623527403107, "grad_norm": 0.48882704973220825, "learning_rate": 8e-05, "loss": 1.8291, "step": 2903 }, { "epoch": 0.4958169711456377, "grad_norm": 0.4535320997238159, "learning_rate": 8e-05, "loss": 1.6476, "step": 2904 }, { "epoch": 0.4959877070172443, "grad_norm": 0.44959503412246704, "learning_rate": 8e-05, "loss": 1.5529, "step": 2905 }, { "epoch": 0.49615844288885097, "grad_norm": 0.5422093272209167, "learning_rate": 8e-05, "loss": 1.6562, "step": 2906 }, { "epoch": 0.49632917876045757, "grad_norm": 0.44377028942108154, "learning_rate": 8e-05, "loss": 1.7061, "step": 2907 }, { "epoch": 0.4964999146320642, "grad_norm": 0.44449353218078613, "learning_rate": 8e-05, "loss": 1.6009, "step": 2908 }, { "epoch": 0.4966706505036708, "grad_norm": 0.4890891909599304, "learning_rate": 8e-05, "loss": 1.7504, "step": 2909 }, { "epoch": 0.49684138637527747, "grad_norm": 0.45434415340423584, "learning_rate": 8e-05, "loss": 1.7602, "step": 2910 }, { "epoch": 0.49701212224688407, "grad_norm": 0.4668287932872772, "learning_rate": 8e-05, "loss": 1.7126, "step": 2911 }, { "epoch": 0.4971828581184907, "grad_norm": 0.4987989068031311, "learning_rate": 8e-05, "loss": 1.8053, "step": 2912 }, { "epoch": 0.4973535939900973, "grad_norm": 0.4723316729068756, "learning_rate": 8e-05, "loss": 1.6706, "step": 2913 }, { "epoch": 0.4975243298617039, "grad_norm": 0.4853842258453369, "learning_rate": 8e-05, "loss": 1.6963, "step": 2914 }, { "epoch": 0.49769506573331057, "grad_norm": 0.4926818311214447, "learning_rate": 8e-05, "loss": 1.7255, "step": 2915 }, { "epoch": 0.49786580160491717, "grad_norm": 0.47684401273727417, "learning_rate": 8e-05, "loss": 1.7348, "step": 2916 }, { "epoch": 0.4980365374765238, "grad_norm": 0.4749840199947357, "learning_rate": 8e-05, "loss": 1.7, "step": 2917 }, { "epoch": 0.4982072733481304, "grad_norm": 0.46517643332481384, "learning_rate": 8e-05, "loss": 1.6624, "step": 2918 }, { "epoch": 0.4983780092197371, "grad_norm": 0.4882124960422516, "learning_rate": 8e-05, "loss": 1.7154, "step": 2919 }, { "epoch": 0.49854874509134367, "grad_norm": 0.4745650887489319, "learning_rate": 8e-05, "loss": 1.7835, "step": 2920 }, { "epoch": 0.4987194809629503, "grad_norm": 0.44033342599868774, "learning_rate": 8e-05, "loss": 1.5284, "step": 2921 }, { "epoch": 0.4988902168345569, "grad_norm": 0.470261812210083, "learning_rate": 8e-05, "loss": 1.7708, "step": 2922 }, { "epoch": 0.4990609527061636, "grad_norm": 0.4623255431652069, "learning_rate": 8e-05, "loss": 1.5551, "step": 2923 }, { "epoch": 0.49923168857777017, "grad_norm": 0.48429611325263977, "learning_rate": 8e-05, "loss": 1.6204, "step": 2924 }, { "epoch": 0.4994024244493768, "grad_norm": 0.4563884735107422, "learning_rate": 8e-05, "loss": 1.7762, "step": 2925 }, { "epoch": 0.4995731603209834, "grad_norm": 0.45088106393814087, "learning_rate": 8e-05, "loss": 1.6287, "step": 2926 }, { "epoch": 0.4997438961925901, "grad_norm": 0.46165338158607483, "learning_rate": 8e-05, "loss": 1.7375, "step": 2927 }, { "epoch": 0.4999146320641967, "grad_norm": 0.5027192234992981, "learning_rate": 8e-05, "loss": 1.6942, "step": 2928 }, { "epoch": 0.5000853679358033, "grad_norm": 0.46143120527267456, "learning_rate": 8e-05, "loss": 1.5995, "step": 2929 }, { "epoch": 0.5002561038074099, "grad_norm": 0.4538455605506897, "learning_rate": 8e-05, "loss": 1.7252, "step": 2930 }, { "epoch": 0.5004268396790166, "grad_norm": 0.4785248637199402, "learning_rate": 8e-05, "loss": 1.5278, "step": 2931 }, { "epoch": 0.5005975755506232, "grad_norm": 0.44984501600265503, "learning_rate": 8e-05, "loss": 1.6154, "step": 2932 }, { "epoch": 0.5007683114222298, "grad_norm": 0.4750569760799408, "learning_rate": 8e-05, "loss": 1.6124, "step": 2933 }, { "epoch": 0.5009390472938364, "grad_norm": 0.5059394240379333, "learning_rate": 8e-05, "loss": 1.745, "step": 2934 }, { "epoch": 0.5011097831654431, "grad_norm": 0.4923405349254608, "learning_rate": 8e-05, "loss": 1.4748, "step": 2935 }, { "epoch": 0.5012805190370497, "grad_norm": 0.4743569493293762, "learning_rate": 8e-05, "loss": 1.6398, "step": 2936 }, { "epoch": 0.5014512549086563, "grad_norm": 0.471627801656723, "learning_rate": 8e-05, "loss": 1.7648, "step": 2937 }, { "epoch": 0.5016219907802629, "grad_norm": 0.4856167137622833, "learning_rate": 8e-05, "loss": 1.8869, "step": 2938 }, { "epoch": 0.5017927266518696, "grad_norm": 0.49059751629829407, "learning_rate": 8e-05, "loss": 1.7756, "step": 2939 }, { "epoch": 0.5019634625234762, "grad_norm": 0.4850859045982361, "learning_rate": 8e-05, "loss": 1.7618, "step": 2940 }, { "epoch": 0.5021341983950828, "grad_norm": 0.4588680565357208, "learning_rate": 8e-05, "loss": 1.8702, "step": 2941 }, { "epoch": 0.5023049342666894, "grad_norm": 0.4265677332878113, "learning_rate": 8e-05, "loss": 1.5198, "step": 2942 }, { "epoch": 0.5024756701382961, "grad_norm": 0.4688941538333893, "learning_rate": 8e-05, "loss": 1.7771, "step": 2943 }, { "epoch": 0.5026464060099027, "grad_norm": 0.45092153549194336, "learning_rate": 8e-05, "loss": 1.7076, "step": 2944 }, { "epoch": 0.5028171418815093, "grad_norm": 0.44515925645828247, "learning_rate": 8e-05, "loss": 1.7214, "step": 2945 }, { "epoch": 0.5029878777531159, "grad_norm": 0.4960660934448242, "learning_rate": 8e-05, "loss": 1.835, "step": 2946 }, { "epoch": 0.5031586136247226, "grad_norm": 0.48396891355514526, "learning_rate": 8e-05, "loss": 1.8816, "step": 2947 }, { "epoch": 0.5033293494963291, "grad_norm": 0.4683492183685303, "learning_rate": 8e-05, "loss": 1.745, "step": 2948 }, { "epoch": 0.5035000853679358, "grad_norm": 0.5017387866973877, "learning_rate": 8e-05, "loss": 1.867, "step": 2949 }, { "epoch": 0.5036708212395424, "grad_norm": 0.4724352955818176, "learning_rate": 8e-05, "loss": 1.7962, "step": 2950 }, { "epoch": 0.5038415571111491, "grad_norm": 0.4810575544834137, "learning_rate": 8e-05, "loss": 1.7173, "step": 2951 }, { "epoch": 0.5040122929827556, "grad_norm": 0.4713374972343445, "learning_rate": 8e-05, "loss": 1.7383, "step": 2952 }, { "epoch": 0.5041830288543623, "grad_norm": 0.4572106599807739, "learning_rate": 8e-05, "loss": 1.7592, "step": 2953 }, { "epoch": 0.5043537647259689, "grad_norm": 0.4755952060222626, "learning_rate": 8e-05, "loss": 1.743, "step": 2954 }, { "epoch": 0.5045245005975756, "grad_norm": 0.6874768733978271, "learning_rate": 8e-05, "loss": 1.6842, "step": 2955 }, { "epoch": 0.5046952364691821, "grad_norm": 0.4861033856868744, "learning_rate": 8e-05, "loss": 1.5364, "step": 2956 }, { "epoch": 0.5048659723407888, "grad_norm": 0.47667181491851807, "learning_rate": 8e-05, "loss": 1.779, "step": 2957 }, { "epoch": 0.5050367082123954, "grad_norm": 0.5169256329536438, "learning_rate": 8e-05, "loss": 1.7134, "step": 2958 }, { "epoch": 0.5052074440840021, "grad_norm": 0.4834003746509552, "learning_rate": 8e-05, "loss": 1.8269, "step": 2959 }, { "epoch": 0.5053781799556086, "grad_norm": 0.5171499848365784, "learning_rate": 8e-05, "loss": 1.6292, "step": 2960 }, { "epoch": 0.5055489158272153, "grad_norm": 0.46102604269981384, "learning_rate": 8e-05, "loss": 1.5658, "step": 2961 }, { "epoch": 0.5057196516988219, "grad_norm": 0.46823498606681824, "learning_rate": 8e-05, "loss": 1.4696, "step": 2962 }, { "epoch": 0.5058903875704286, "grad_norm": 0.4681349992752075, "learning_rate": 8e-05, "loss": 1.6932, "step": 2963 }, { "epoch": 0.5060611234420351, "grad_norm": 0.48428812623023987, "learning_rate": 8e-05, "loss": 1.7994, "step": 2964 }, { "epoch": 0.5062318593136418, "grad_norm": 0.4595687687397003, "learning_rate": 8e-05, "loss": 1.5966, "step": 2965 }, { "epoch": 0.5064025951852484, "grad_norm": 0.4361281991004944, "learning_rate": 8e-05, "loss": 1.4786, "step": 2966 }, { "epoch": 0.5065733310568551, "grad_norm": 0.46241965889930725, "learning_rate": 8e-05, "loss": 1.6547, "step": 2967 }, { "epoch": 0.5067440669284616, "grad_norm": 0.42521345615386963, "learning_rate": 8e-05, "loss": 1.6597, "step": 2968 }, { "epoch": 0.5069148028000683, "grad_norm": 0.48732078075408936, "learning_rate": 8e-05, "loss": 1.7409, "step": 2969 }, { "epoch": 0.5070855386716749, "grad_norm": 0.5102716088294983, "learning_rate": 8e-05, "loss": 1.7307, "step": 2970 }, { "epoch": 0.5072562745432816, "grad_norm": 0.4584282636642456, "learning_rate": 8e-05, "loss": 1.7659, "step": 2971 }, { "epoch": 0.5074270104148881, "grad_norm": 0.5056310892105103, "learning_rate": 8e-05, "loss": 1.6673, "step": 2972 }, { "epoch": 0.5075977462864948, "grad_norm": 0.478425532579422, "learning_rate": 8e-05, "loss": 1.7091, "step": 2973 }, { "epoch": 0.5077684821581014, "grad_norm": 0.48797088861465454, "learning_rate": 8e-05, "loss": 1.7443, "step": 2974 }, { "epoch": 0.5079392180297081, "grad_norm": 0.5015484690666199, "learning_rate": 8e-05, "loss": 1.7993, "step": 2975 }, { "epoch": 0.5081099539013146, "grad_norm": 0.4791986346244812, "learning_rate": 8e-05, "loss": 1.7399, "step": 2976 }, { "epoch": 0.5082806897729213, "grad_norm": 0.46009930968284607, "learning_rate": 8e-05, "loss": 1.6434, "step": 2977 }, { "epoch": 0.5084514256445279, "grad_norm": 0.4794132113456726, "learning_rate": 8e-05, "loss": 1.7825, "step": 2978 }, { "epoch": 0.5086221615161345, "grad_norm": 0.4591515362262726, "learning_rate": 8e-05, "loss": 1.6352, "step": 2979 }, { "epoch": 0.5087928973877411, "grad_norm": 0.43680357933044434, "learning_rate": 8e-05, "loss": 1.5235, "step": 2980 }, { "epoch": 0.5089636332593478, "grad_norm": 0.48241978883743286, "learning_rate": 8e-05, "loss": 1.8248, "step": 2981 }, { "epoch": 0.5091343691309544, "grad_norm": 0.4402645528316498, "learning_rate": 8e-05, "loss": 1.5782, "step": 2982 }, { "epoch": 0.509305105002561, "grad_norm": 0.46398794651031494, "learning_rate": 8e-05, "loss": 1.708, "step": 2983 }, { "epoch": 0.5094758408741676, "grad_norm": 0.4539839029312134, "learning_rate": 8e-05, "loss": 1.7338, "step": 2984 }, { "epoch": 0.5096465767457743, "grad_norm": 0.524556577205658, "learning_rate": 8e-05, "loss": 1.5352, "step": 2985 }, { "epoch": 0.509817312617381, "grad_norm": 0.47125130891799927, "learning_rate": 8e-05, "loss": 1.8172, "step": 2986 }, { "epoch": 0.5099880484889875, "grad_norm": 0.5166170001029968, "learning_rate": 8e-05, "loss": 1.8622, "step": 2987 }, { "epoch": 0.5101587843605941, "grad_norm": 0.4542105793952942, "learning_rate": 8e-05, "loss": 1.665, "step": 2988 }, { "epoch": 0.5103295202322008, "grad_norm": 0.5161030292510986, "learning_rate": 8e-05, "loss": 1.73, "step": 2989 }, { "epoch": 0.5105002561038074, "grad_norm": 0.46128028631210327, "learning_rate": 8e-05, "loss": 1.6045, "step": 2990 }, { "epoch": 0.510670991975414, "grad_norm": 0.4717986285686493, "learning_rate": 8e-05, "loss": 1.835, "step": 2991 }, { "epoch": 0.5108417278470206, "grad_norm": 0.47231537103652954, "learning_rate": 8e-05, "loss": 1.6216, "step": 2992 }, { "epoch": 0.5110124637186273, "grad_norm": 0.48478519916534424, "learning_rate": 8e-05, "loss": 1.777, "step": 2993 }, { "epoch": 0.511183199590234, "grad_norm": 0.46695926785469055, "learning_rate": 8e-05, "loss": 1.7274, "step": 2994 }, { "epoch": 0.5113539354618405, "grad_norm": 0.4936000108718872, "learning_rate": 8e-05, "loss": 1.8599, "step": 2995 }, { "epoch": 0.5115246713334471, "grad_norm": 0.4416693449020386, "learning_rate": 8e-05, "loss": 1.6078, "step": 2996 }, { "epoch": 0.5116954072050538, "grad_norm": 0.4868267774581909, "learning_rate": 8e-05, "loss": 1.5339, "step": 2997 }, { "epoch": 0.5118661430766605, "grad_norm": 0.47653087973594666, "learning_rate": 8e-05, "loss": 1.5754, "step": 2998 }, { "epoch": 0.512036878948267, "grad_norm": 0.4521692097187042, "learning_rate": 8e-05, "loss": 1.5331, "step": 2999 }, { "epoch": 0.5122076148198736, "grad_norm": 0.4927699565887451, "learning_rate": 8e-05, "loss": 1.5411, "step": 3000 }, { "epoch": 0.5123783506914803, "grad_norm": 0.49687856435775757, "learning_rate": 8e-05, "loss": 1.7154, "step": 3001 }, { "epoch": 0.512549086563087, "grad_norm": 0.5254907608032227, "learning_rate": 8e-05, "loss": 1.8575, "step": 3002 }, { "epoch": 0.5127198224346935, "grad_norm": 0.5252456665039062, "learning_rate": 8e-05, "loss": 1.7117, "step": 3003 }, { "epoch": 0.5128905583063001, "grad_norm": 0.46823686361312866, "learning_rate": 8e-05, "loss": 1.5193, "step": 3004 }, { "epoch": 0.5130612941779068, "grad_norm": 0.4998099207878113, "learning_rate": 8e-05, "loss": 1.6722, "step": 3005 }, { "epoch": 0.5132320300495135, "grad_norm": 0.47618260979652405, "learning_rate": 8e-05, "loss": 1.7253, "step": 3006 }, { "epoch": 0.51340276592112, "grad_norm": 0.47419315576553345, "learning_rate": 8e-05, "loss": 1.7415, "step": 3007 }, { "epoch": 0.5135735017927266, "grad_norm": 0.4653856158256531, "learning_rate": 8e-05, "loss": 1.7406, "step": 3008 }, { "epoch": 0.5137442376643333, "grad_norm": 0.49567946791648865, "learning_rate": 8e-05, "loss": 1.7812, "step": 3009 }, { "epoch": 0.51391497353594, "grad_norm": 0.4675254225730896, "learning_rate": 8e-05, "loss": 1.6231, "step": 3010 }, { "epoch": 0.5140857094075465, "grad_norm": 0.4541786313056946, "learning_rate": 8e-05, "loss": 1.584, "step": 3011 }, { "epoch": 0.5142564452791532, "grad_norm": 0.4904613196849823, "learning_rate": 8e-05, "loss": 1.6149, "step": 3012 }, { "epoch": 0.5144271811507598, "grad_norm": 0.47716224193573, "learning_rate": 8e-05, "loss": 1.7355, "step": 3013 }, { "epoch": 0.5145979170223663, "grad_norm": 0.46124961972236633, "learning_rate": 8e-05, "loss": 1.6802, "step": 3014 }, { "epoch": 0.514768652893973, "grad_norm": 0.4760732352733612, "learning_rate": 8e-05, "loss": 1.6316, "step": 3015 }, { "epoch": 0.5149393887655797, "grad_norm": 0.5171788334846497, "learning_rate": 8e-05, "loss": 1.7958, "step": 3016 }, { "epoch": 0.5151101246371863, "grad_norm": 0.5120903253555298, "learning_rate": 8e-05, "loss": 1.6598, "step": 3017 }, { "epoch": 0.5152808605087928, "grad_norm": 0.4420072138309479, "learning_rate": 8e-05, "loss": 1.6996, "step": 3018 }, { "epoch": 0.5154515963803995, "grad_norm": 0.4810623526573181, "learning_rate": 8e-05, "loss": 1.8438, "step": 3019 }, { "epoch": 0.5156223322520062, "grad_norm": 0.4285312294960022, "learning_rate": 8e-05, "loss": 1.5943, "step": 3020 }, { "epoch": 0.5157930681236128, "grad_norm": 0.4533747434616089, "learning_rate": 8e-05, "loss": 1.7151, "step": 3021 }, { "epoch": 0.5159638039952194, "grad_norm": 0.43888235092163086, "learning_rate": 8e-05, "loss": 1.5187, "step": 3022 }, { "epoch": 0.516134539866826, "grad_norm": 0.45601505041122437, "learning_rate": 8e-05, "loss": 1.5553, "step": 3023 }, { "epoch": 0.5163052757384327, "grad_norm": 0.4457545280456543, "learning_rate": 8e-05, "loss": 1.5172, "step": 3024 }, { "epoch": 0.5164760116100393, "grad_norm": 0.46725958585739136, "learning_rate": 8e-05, "loss": 1.5196, "step": 3025 }, { "epoch": 0.5166467474816459, "grad_norm": 0.5001928210258484, "learning_rate": 8e-05, "loss": 1.7974, "step": 3026 }, { "epoch": 0.5168174833532525, "grad_norm": 0.4672739505767822, "learning_rate": 8e-05, "loss": 1.6856, "step": 3027 }, { "epoch": 0.5169882192248592, "grad_norm": 0.5220106244087219, "learning_rate": 8e-05, "loss": 1.8427, "step": 3028 }, { "epoch": 0.5171589550964658, "grad_norm": 0.4581565260887146, "learning_rate": 8e-05, "loss": 1.7229, "step": 3029 }, { "epoch": 0.5173296909680724, "grad_norm": 0.4800087511539459, "learning_rate": 8e-05, "loss": 1.5909, "step": 3030 }, { "epoch": 0.517500426839679, "grad_norm": 0.5192506909370422, "learning_rate": 8e-05, "loss": 1.7755, "step": 3031 }, { "epoch": 0.5176711627112857, "grad_norm": 0.4730176329612732, "learning_rate": 8e-05, "loss": 1.8041, "step": 3032 }, { "epoch": 0.5178418985828923, "grad_norm": 0.45754191279411316, "learning_rate": 8e-05, "loss": 1.7759, "step": 3033 }, { "epoch": 0.5180126344544989, "grad_norm": 0.4666658937931061, "learning_rate": 8e-05, "loss": 1.7651, "step": 3034 }, { "epoch": 0.5181833703261055, "grad_norm": 0.4581877887248993, "learning_rate": 8e-05, "loss": 1.6719, "step": 3035 }, { "epoch": 0.5183541061977122, "grad_norm": 0.4763900339603424, "learning_rate": 8e-05, "loss": 1.7532, "step": 3036 }, { "epoch": 0.5185248420693188, "grad_norm": 0.4489060938358307, "learning_rate": 8e-05, "loss": 1.5181, "step": 3037 }, { "epoch": 0.5186955779409254, "grad_norm": 0.4898918569087982, "learning_rate": 8e-05, "loss": 1.6509, "step": 3038 }, { "epoch": 0.518866313812532, "grad_norm": 0.4554068446159363, "learning_rate": 8e-05, "loss": 1.7271, "step": 3039 }, { "epoch": 0.5190370496841387, "grad_norm": 0.46523797512054443, "learning_rate": 8e-05, "loss": 1.7035, "step": 3040 }, { "epoch": 0.5192077855557453, "grad_norm": 0.45822492241859436, "learning_rate": 8e-05, "loss": 1.8437, "step": 3041 }, { "epoch": 0.5193785214273519, "grad_norm": 0.46781644225120544, "learning_rate": 8e-05, "loss": 1.6875, "step": 3042 }, { "epoch": 0.5195492572989585, "grad_norm": 0.44657817482948303, "learning_rate": 8e-05, "loss": 1.617, "step": 3043 }, { "epoch": 0.5197199931705652, "grad_norm": 0.5358601808547974, "learning_rate": 8e-05, "loss": 1.6984, "step": 3044 }, { "epoch": 0.5198907290421717, "grad_norm": 0.46921995282173157, "learning_rate": 8e-05, "loss": 1.7257, "step": 3045 }, { "epoch": 0.5200614649137784, "grad_norm": 0.4593559503555298, "learning_rate": 8e-05, "loss": 1.6559, "step": 3046 }, { "epoch": 0.520232200785385, "grad_norm": 0.47281986474990845, "learning_rate": 8e-05, "loss": 1.8956, "step": 3047 }, { "epoch": 0.5204029366569917, "grad_norm": 0.45302248001098633, "learning_rate": 8e-05, "loss": 1.5898, "step": 3048 }, { "epoch": 0.5205736725285982, "grad_norm": 0.4648543894290924, "learning_rate": 8e-05, "loss": 1.723, "step": 3049 }, { "epoch": 0.5207444084002049, "grad_norm": 0.46502789855003357, "learning_rate": 8e-05, "loss": 1.6323, "step": 3050 }, { "epoch": 0.5209151442718115, "grad_norm": 0.42568159103393555, "learning_rate": 8e-05, "loss": 1.5227, "step": 3051 }, { "epoch": 0.5210858801434182, "grad_norm": 0.4693038761615753, "learning_rate": 8e-05, "loss": 1.6579, "step": 3052 }, { "epoch": 0.5212566160150247, "grad_norm": 0.46511316299438477, "learning_rate": 8e-05, "loss": 1.5655, "step": 3053 }, { "epoch": 0.5214273518866314, "grad_norm": 0.4911282956600189, "learning_rate": 8e-05, "loss": 1.9285, "step": 3054 }, { "epoch": 0.521598087758238, "grad_norm": 0.4462016522884369, "learning_rate": 8e-05, "loss": 1.4651, "step": 3055 }, { "epoch": 0.5217688236298447, "grad_norm": 0.45094361901283264, "learning_rate": 8e-05, "loss": 1.576, "step": 3056 }, { "epoch": 0.5219395595014512, "grad_norm": 0.4596937298774719, "learning_rate": 8e-05, "loss": 1.7351, "step": 3057 }, { "epoch": 0.5221102953730579, "grad_norm": 0.47252222895622253, "learning_rate": 8e-05, "loss": 1.6367, "step": 3058 }, { "epoch": 0.5222810312446645, "grad_norm": 0.45939162373542786, "learning_rate": 8e-05, "loss": 1.6282, "step": 3059 }, { "epoch": 0.5224517671162712, "grad_norm": 0.47160059213638306, "learning_rate": 8e-05, "loss": 1.6095, "step": 3060 }, { "epoch": 0.5226225029878777, "grad_norm": 0.4556399881839752, "learning_rate": 8e-05, "loss": 1.5939, "step": 3061 }, { "epoch": 0.5227932388594844, "grad_norm": 0.422037273645401, "learning_rate": 8e-05, "loss": 1.4816, "step": 3062 }, { "epoch": 0.522963974731091, "grad_norm": 0.5072676539421082, "learning_rate": 8e-05, "loss": 1.7621, "step": 3063 }, { "epoch": 0.5231347106026977, "grad_norm": 0.43973466753959656, "learning_rate": 8e-05, "loss": 1.5991, "step": 3064 }, { "epoch": 0.5233054464743042, "grad_norm": 0.4654005765914917, "learning_rate": 8e-05, "loss": 1.568, "step": 3065 }, { "epoch": 0.5234761823459109, "grad_norm": 0.443205326795578, "learning_rate": 8e-05, "loss": 1.6359, "step": 3066 }, { "epoch": 0.5236469182175175, "grad_norm": 0.48556774854660034, "learning_rate": 8e-05, "loss": 1.712, "step": 3067 }, { "epoch": 0.5238176540891242, "grad_norm": 0.49253761768341064, "learning_rate": 8e-05, "loss": 1.8329, "step": 3068 }, { "epoch": 0.5239883899607307, "grad_norm": 0.4620918333530426, "learning_rate": 8e-05, "loss": 1.7049, "step": 3069 }, { "epoch": 0.5241591258323374, "grad_norm": 0.5562222003936768, "learning_rate": 8e-05, "loss": 1.9583, "step": 3070 }, { "epoch": 0.524329861703944, "grad_norm": 0.462167352437973, "learning_rate": 8e-05, "loss": 1.5947, "step": 3071 }, { "epoch": 0.5245005975755507, "grad_norm": 0.5202651619911194, "learning_rate": 8e-05, "loss": 1.6805, "step": 3072 }, { "epoch": 0.5246713334471572, "grad_norm": 0.44867345690727234, "learning_rate": 8e-05, "loss": 1.4849, "step": 3073 }, { "epoch": 0.5248420693187639, "grad_norm": 0.45303332805633545, "learning_rate": 8e-05, "loss": 1.6679, "step": 3074 }, { "epoch": 0.5250128051903705, "grad_norm": 0.4807845950126648, "learning_rate": 8e-05, "loss": 1.6412, "step": 3075 }, { "epoch": 0.5251835410619771, "grad_norm": 0.5011045336723328, "learning_rate": 8e-05, "loss": 1.7003, "step": 3076 }, { "epoch": 0.5253542769335837, "grad_norm": 0.4641472399234772, "learning_rate": 8e-05, "loss": 1.754, "step": 3077 }, { "epoch": 0.5255250128051904, "grad_norm": 0.4950914978981018, "learning_rate": 8e-05, "loss": 1.6495, "step": 3078 }, { "epoch": 0.525695748676797, "grad_norm": 0.44170624017715454, "learning_rate": 8e-05, "loss": 1.624, "step": 3079 }, { "epoch": 0.5258664845484036, "grad_norm": 0.47776085138320923, "learning_rate": 8e-05, "loss": 1.7037, "step": 3080 }, { "epoch": 0.5260372204200102, "grad_norm": 0.44707053899765015, "learning_rate": 8e-05, "loss": 1.6447, "step": 3081 }, { "epoch": 0.5262079562916169, "grad_norm": 0.4764224886894226, "learning_rate": 8e-05, "loss": 1.6502, "step": 3082 }, { "epoch": 0.5263786921632235, "grad_norm": 0.4905192255973816, "learning_rate": 8e-05, "loss": 1.5779, "step": 3083 }, { "epoch": 0.5265494280348301, "grad_norm": 0.4892081022262573, "learning_rate": 8e-05, "loss": 1.7893, "step": 3084 }, { "epoch": 0.5267201639064367, "grad_norm": 0.4458787143230438, "learning_rate": 8e-05, "loss": 1.5506, "step": 3085 }, { "epoch": 0.5268908997780434, "grad_norm": 0.4772554636001587, "learning_rate": 8e-05, "loss": 1.7728, "step": 3086 }, { "epoch": 0.52706163564965, "grad_norm": 0.47485804557800293, "learning_rate": 8e-05, "loss": 1.6932, "step": 3087 }, { "epoch": 0.5272323715212566, "grad_norm": 0.4497566521167755, "learning_rate": 8e-05, "loss": 1.5932, "step": 3088 }, { "epoch": 0.5274031073928632, "grad_norm": 0.4544573724269867, "learning_rate": 8e-05, "loss": 1.6441, "step": 3089 }, { "epoch": 0.5275738432644699, "grad_norm": 0.4664478003978729, "learning_rate": 8e-05, "loss": 1.7835, "step": 3090 }, { "epoch": 0.5277445791360765, "grad_norm": 0.46521231532096863, "learning_rate": 8e-05, "loss": 1.6244, "step": 3091 }, { "epoch": 0.5279153150076831, "grad_norm": 0.49758511781692505, "learning_rate": 8e-05, "loss": 1.5523, "step": 3092 }, { "epoch": 0.5280860508792897, "grad_norm": 0.5030139684677124, "learning_rate": 8e-05, "loss": 1.8358, "step": 3093 }, { "epoch": 0.5282567867508964, "grad_norm": 0.5193340182304382, "learning_rate": 8e-05, "loss": 1.6669, "step": 3094 }, { "epoch": 0.528427522622503, "grad_norm": 0.48758816719055176, "learning_rate": 8e-05, "loss": 1.7729, "step": 3095 }, { "epoch": 0.5285982584941096, "grad_norm": 0.49531257152557373, "learning_rate": 8e-05, "loss": 1.6601, "step": 3096 }, { "epoch": 0.5287689943657162, "grad_norm": 0.48018723726272583, "learning_rate": 8e-05, "loss": 1.7537, "step": 3097 }, { "epoch": 0.5289397302373229, "grad_norm": 0.4441763460636139, "learning_rate": 8e-05, "loss": 1.6496, "step": 3098 }, { "epoch": 0.5291104661089295, "grad_norm": 0.45262783765792847, "learning_rate": 8e-05, "loss": 1.5052, "step": 3099 }, { "epoch": 0.5292812019805361, "grad_norm": 0.46708300709724426, "learning_rate": 8e-05, "loss": 1.6237, "step": 3100 }, { "epoch": 0.5294519378521427, "grad_norm": 0.5584198236465454, "learning_rate": 8e-05, "loss": 1.812, "step": 3101 }, { "epoch": 0.5296226737237494, "grad_norm": 0.4623683989048004, "learning_rate": 8e-05, "loss": 1.704, "step": 3102 }, { "epoch": 0.529793409595356, "grad_norm": 0.4388664960861206, "learning_rate": 8e-05, "loss": 1.6368, "step": 3103 }, { "epoch": 0.5299641454669626, "grad_norm": 0.49267709255218506, "learning_rate": 8e-05, "loss": 1.8507, "step": 3104 }, { "epoch": 0.5301348813385692, "grad_norm": 0.49554193019866943, "learning_rate": 8e-05, "loss": 1.6301, "step": 3105 }, { "epoch": 0.5303056172101759, "grad_norm": 0.48525819182395935, "learning_rate": 8e-05, "loss": 1.8663, "step": 3106 }, { "epoch": 0.5304763530817825, "grad_norm": 0.4952967166900635, "learning_rate": 8e-05, "loss": 1.6731, "step": 3107 }, { "epoch": 0.5306470889533891, "grad_norm": 0.45838576555252075, "learning_rate": 8e-05, "loss": 1.679, "step": 3108 }, { "epoch": 0.5308178248249957, "grad_norm": 0.45535460114479065, "learning_rate": 8e-05, "loss": 1.6607, "step": 3109 }, { "epoch": 0.5309885606966024, "grad_norm": 0.44999194145202637, "learning_rate": 8e-05, "loss": 1.6861, "step": 3110 }, { "epoch": 0.5311592965682089, "grad_norm": 0.4493943452835083, "learning_rate": 8e-05, "loss": 1.3826, "step": 3111 }, { "epoch": 0.5313300324398156, "grad_norm": 0.4562506377696991, "learning_rate": 8e-05, "loss": 1.6719, "step": 3112 }, { "epoch": 0.5315007683114222, "grad_norm": 0.47013479471206665, "learning_rate": 8e-05, "loss": 1.6946, "step": 3113 }, { "epoch": 0.5316715041830289, "grad_norm": 0.48105794191360474, "learning_rate": 8e-05, "loss": 1.6856, "step": 3114 }, { "epoch": 0.5318422400546354, "grad_norm": 0.4780803918838501, "learning_rate": 8e-05, "loss": 1.7177, "step": 3115 }, { "epoch": 0.5320129759262421, "grad_norm": 0.4390682876110077, "learning_rate": 8e-05, "loss": 1.5447, "step": 3116 }, { "epoch": 0.5321837117978487, "grad_norm": 0.4411123991012573, "learning_rate": 8e-05, "loss": 1.5306, "step": 3117 }, { "epoch": 0.5323544476694554, "grad_norm": 0.471196711063385, "learning_rate": 8e-05, "loss": 1.6941, "step": 3118 }, { "epoch": 0.5325251835410619, "grad_norm": 0.4867100417613983, "learning_rate": 8e-05, "loss": 1.6214, "step": 3119 }, { "epoch": 0.5326959194126686, "grad_norm": 0.5021118521690369, "learning_rate": 8e-05, "loss": 1.7985, "step": 3120 }, { "epoch": 0.5328666552842752, "grad_norm": 0.44731831550598145, "learning_rate": 8e-05, "loss": 1.5944, "step": 3121 }, { "epoch": 0.5330373911558819, "grad_norm": 0.48064547777175903, "learning_rate": 8e-05, "loss": 1.6891, "step": 3122 }, { "epoch": 0.5332081270274884, "grad_norm": 0.45175856351852417, "learning_rate": 8e-05, "loss": 1.6684, "step": 3123 }, { "epoch": 0.5333788628990951, "grad_norm": 0.5032597184181213, "learning_rate": 8e-05, "loss": 1.7902, "step": 3124 }, { "epoch": 0.5335495987707017, "grad_norm": 0.46419867873191833, "learning_rate": 8e-05, "loss": 1.7137, "step": 3125 }, { "epoch": 0.5337203346423084, "grad_norm": 0.47107356786727905, "learning_rate": 8e-05, "loss": 1.8363, "step": 3126 }, { "epoch": 0.5338910705139149, "grad_norm": 0.4563312530517578, "learning_rate": 8e-05, "loss": 1.5638, "step": 3127 }, { "epoch": 0.5340618063855216, "grad_norm": 0.493177592754364, "learning_rate": 8e-05, "loss": 1.6808, "step": 3128 }, { "epoch": 0.5342325422571282, "grad_norm": 0.47253596782684326, "learning_rate": 8e-05, "loss": 1.7323, "step": 3129 }, { "epoch": 0.5344032781287349, "grad_norm": 0.4713157117366791, "learning_rate": 8e-05, "loss": 1.6668, "step": 3130 }, { "epoch": 0.5345740140003414, "grad_norm": 0.4474860727787018, "learning_rate": 8e-05, "loss": 1.735, "step": 3131 }, { "epoch": 0.5347447498719481, "grad_norm": 0.5730111002922058, "learning_rate": 8e-05, "loss": 1.8322, "step": 3132 }, { "epoch": 0.5349154857435547, "grad_norm": 0.540249764919281, "learning_rate": 8e-05, "loss": 1.8709, "step": 3133 }, { "epoch": 0.5350862216151614, "grad_norm": 0.5036017894744873, "learning_rate": 8e-05, "loss": 1.7361, "step": 3134 }, { "epoch": 0.5352569574867679, "grad_norm": 0.49276629090309143, "learning_rate": 8e-05, "loss": 1.6894, "step": 3135 }, { "epoch": 0.5354276933583746, "grad_norm": 0.4319154918193817, "learning_rate": 8e-05, "loss": 1.5802, "step": 3136 }, { "epoch": 0.5355984292299812, "grad_norm": 0.43404924869537354, "learning_rate": 8e-05, "loss": 1.5568, "step": 3137 }, { "epoch": 0.5357691651015879, "grad_norm": 0.49559593200683594, "learning_rate": 8e-05, "loss": 1.6908, "step": 3138 }, { "epoch": 0.5359399009731944, "grad_norm": 0.4633415639400482, "learning_rate": 8e-05, "loss": 1.6297, "step": 3139 }, { "epoch": 0.5361106368448011, "grad_norm": 0.46184831857681274, "learning_rate": 8e-05, "loss": 1.7299, "step": 3140 }, { "epoch": 0.5362813727164077, "grad_norm": 0.5076905488967896, "learning_rate": 8e-05, "loss": 1.4696, "step": 3141 }, { "epoch": 0.5364521085880143, "grad_norm": 0.4695439338684082, "learning_rate": 8e-05, "loss": 1.7348, "step": 3142 }, { "epoch": 0.5366228444596209, "grad_norm": 0.4706817865371704, "learning_rate": 8e-05, "loss": 1.7603, "step": 3143 }, { "epoch": 0.5367935803312276, "grad_norm": 0.4774729609489441, "learning_rate": 8e-05, "loss": 1.6223, "step": 3144 }, { "epoch": 0.5369643162028342, "grad_norm": 0.4810127913951874, "learning_rate": 8e-05, "loss": 1.7691, "step": 3145 }, { "epoch": 0.5371350520744408, "grad_norm": 0.460782527923584, "learning_rate": 8e-05, "loss": 1.6175, "step": 3146 }, { "epoch": 0.5373057879460474, "grad_norm": 0.4737483263015747, "learning_rate": 8e-05, "loss": 1.666, "step": 3147 }, { "epoch": 0.5374765238176541, "grad_norm": 0.44914042949676514, "learning_rate": 8e-05, "loss": 1.5008, "step": 3148 }, { "epoch": 0.5376472596892607, "grad_norm": 0.48232489824295044, "learning_rate": 8e-05, "loss": 1.6595, "step": 3149 }, { "epoch": 0.5378179955608673, "grad_norm": 0.5100310444831848, "learning_rate": 8e-05, "loss": 1.775, "step": 3150 }, { "epoch": 0.5379887314324739, "grad_norm": 0.45110830664634705, "learning_rate": 8e-05, "loss": 1.6066, "step": 3151 }, { "epoch": 0.5381594673040806, "grad_norm": 0.48488762974739075, "learning_rate": 8e-05, "loss": 1.7164, "step": 3152 }, { "epoch": 0.5383302031756872, "grad_norm": 0.4340057075023651, "learning_rate": 8e-05, "loss": 1.6561, "step": 3153 }, { "epoch": 0.5385009390472938, "grad_norm": 0.5103899240493774, "learning_rate": 8e-05, "loss": 1.8866, "step": 3154 }, { "epoch": 0.5386716749189004, "grad_norm": 0.48089632391929626, "learning_rate": 8e-05, "loss": 1.7147, "step": 3155 }, { "epoch": 0.5388424107905071, "grad_norm": 0.45938971638679504, "learning_rate": 8e-05, "loss": 1.607, "step": 3156 }, { "epoch": 0.5390131466621138, "grad_norm": 0.4963492751121521, "learning_rate": 8e-05, "loss": 1.6922, "step": 3157 }, { "epoch": 0.5391838825337203, "grad_norm": 0.4598407745361328, "learning_rate": 8e-05, "loss": 1.4014, "step": 3158 }, { "epoch": 0.539354618405327, "grad_norm": 0.4919363558292389, "learning_rate": 8e-05, "loss": 1.745, "step": 3159 }, { "epoch": 0.5395253542769336, "grad_norm": 0.5041525363922119, "learning_rate": 8e-05, "loss": 1.7108, "step": 3160 }, { "epoch": 0.5396960901485403, "grad_norm": 0.4515829086303711, "learning_rate": 8e-05, "loss": 1.5331, "step": 3161 }, { "epoch": 0.5398668260201468, "grad_norm": 0.47814032435417175, "learning_rate": 8e-05, "loss": 1.7769, "step": 3162 }, { "epoch": 0.5400375618917534, "grad_norm": 0.45417508482933044, "learning_rate": 8e-05, "loss": 1.6678, "step": 3163 }, { "epoch": 0.5402082977633601, "grad_norm": 0.53815096616745, "learning_rate": 8e-05, "loss": 1.8518, "step": 3164 }, { "epoch": 0.5403790336349668, "grad_norm": 0.47144120931625366, "learning_rate": 8e-05, "loss": 1.7903, "step": 3165 }, { "epoch": 0.5405497695065733, "grad_norm": 0.455252081155777, "learning_rate": 8e-05, "loss": 1.6661, "step": 3166 }, { "epoch": 0.54072050537818, "grad_norm": 0.4718973934650421, "learning_rate": 8e-05, "loss": 1.5819, "step": 3167 }, { "epoch": 0.5408912412497866, "grad_norm": 0.4838426113128662, "learning_rate": 8e-05, "loss": 1.5915, "step": 3168 }, { "epoch": 0.5410619771213933, "grad_norm": 0.4842928946018219, "learning_rate": 8e-05, "loss": 1.8107, "step": 3169 }, { "epoch": 0.5412327129929998, "grad_norm": 0.4814947843551636, "learning_rate": 8e-05, "loss": 1.6846, "step": 3170 }, { "epoch": 0.5414034488646065, "grad_norm": 0.4671061933040619, "learning_rate": 8e-05, "loss": 1.5085, "step": 3171 }, { "epoch": 0.5415741847362131, "grad_norm": 0.46838417649269104, "learning_rate": 8e-05, "loss": 1.6215, "step": 3172 }, { "epoch": 0.5417449206078198, "grad_norm": 0.5369250178337097, "learning_rate": 8e-05, "loss": 1.8238, "step": 3173 }, { "epoch": 0.5419156564794263, "grad_norm": 0.4743238687515259, "learning_rate": 8e-05, "loss": 1.7297, "step": 3174 }, { "epoch": 0.542086392351033, "grad_norm": 0.4554976522922516, "learning_rate": 8e-05, "loss": 1.6225, "step": 3175 }, { "epoch": 0.5422571282226396, "grad_norm": 0.4602017402648926, "learning_rate": 8e-05, "loss": 1.7381, "step": 3176 }, { "epoch": 0.5424278640942461, "grad_norm": 0.4822181165218353, "learning_rate": 8e-05, "loss": 1.8134, "step": 3177 }, { "epoch": 0.5425985999658528, "grad_norm": 0.4718601107597351, "learning_rate": 8e-05, "loss": 1.6211, "step": 3178 }, { "epoch": 0.5427693358374595, "grad_norm": 0.5084348320960999, "learning_rate": 8e-05, "loss": 1.9278, "step": 3179 }, { "epoch": 0.5429400717090661, "grad_norm": 0.46307650208473206, "learning_rate": 8e-05, "loss": 1.6389, "step": 3180 }, { "epoch": 0.5431108075806726, "grad_norm": 0.4730548560619354, "learning_rate": 8e-05, "loss": 1.6328, "step": 3181 }, { "epoch": 0.5432815434522793, "grad_norm": 0.5397545695304871, "learning_rate": 8e-05, "loss": 1.9498, "step": 3182 }, { "epoch": 0.543452279323886, "grad_norm": 0.4610382616519928, "learning_rate": 8e-05, "loss": 1.6621, "step": 3183 }, { "epoch": 0.5436230151954926, "grad_norm": 0.4946828782558441, "learning_rate": 8e-05, "loss": 1.6329, "step": 3184 }, { "epoch": 0.5437937510670992, "grad_norm": 0.4916592538356781, "learning_rate": 8e-05, "loss": 1.6765, "step": 3185 }, { "epoch": 0.5439644869387058, "grad_norm": 0.5138195157051086, "learning_rate": 8e-05, "loss": 1.9815, "step": 3186 }, { "epoch": 0.5441352228103125, "grad_norm": 0.4761867821216583, "learning_rate": 8e-05, "loss": 1.6578, "step": 3187 }, { "epoch": 0.5443059586819191, "grad_norm": 0.47480508685112, "learning_rate": 8e-05, "loss": 1.6103, "step": 3188 }, { "epoch": 0.5444766945535257, "grad_norm": 0.4655658006668091, "learning_rate": 8e-05, "loss": 1.7833, "step": 3189 }, { "epoch": 0.5446474304251323, "grad_norm": 0.45511871576309204, "learning_rate": 8e-05, "loss": 1.6706, "step": 3190 }, { "epoch": 0.544818166296739, "grad_norm": 0.4707539975643158, "learning_rate": 8e-05, "loss": 1.6646, "step": 3191 }, { "epoch": 0.5449889021683456, "grad_norm": 0.4434085786342621, "learning_rate": 8e-05, "loss": 1.6211, "step": 3192 }, { "epoch": 0.5451596380399522, "grad_norm": 0.4662170708179474, "learning_rate": 8e-05, "loss": 1.6638, "step": 3193 }, { "epoch": 0.5453303739115588, "grad_norm": 0.45334944128990173, "learning_rate": 8e-05, "loss": 1.5209, "step": 3194 }, { "epoch": 0.5455011097831655, "grad_norm": 0.4761353135108948, "learning_rate": 8e-05, "loss": 1.5422, "step": 3195 }, { "epoch": 0.5456718456547721, "grad_norm": 0.48589351773262024, "learning_rate": 8e-05, "loss": 1.695, "step": 3196 }, { "epoch": 0.5458425815263787, "grad_norm": 0.5432782769203186, "learning_rate": 8e-05, "loss": 1.8266, "step": 3197 }, { "epoch": 0.5460133173979853, "grad_norm": 0.4549084007740021, "learning_rate": 8e-05, "loss": 1.702, "step": 3198 }, { "epoch": 0.546184053269592, "grad_norm": 0.4909738004207611, "learning_rate": 8e-05, "loss": 1.8095, "step": 3199 }, { "epoch": 0.5463547891411986, "grad_norm": 0.4887339770793915, "learning_rate": 8e-05, "loss": 1.5183, "step": 3200 }, { "epoch": 0.5465255250128052, "grad_norm": 0.4791821837425232, "learning_rate": 8e-05, "loss": 1.6571, "step": 3201 }, { "epoch": 0.5466962608844118, "grad_norm": 0.5119158029556274, "learning_rate": 8e-05, "loss": 1.7942, "step": 3202 }, { "epoch": 0.5468669967560185, "grad_norm": 0.45645517110824585, "learning_rate": 8e-05, "loss": 1.5536, "step": 3203 }, { "epoch": 0.5470377326276251, "grad_norm": 0.47702762484550476, "learning_rate": 8e-05, "loss": 1.691, "step": 3204 }, { "epoch": 0.5472084684992317, "grad_norm": 0.45964792370796204, "learning_rate": 8e-05, "loss": 1.5377, "step": 3205 }, { "epoch": 0.5473792043708383, "grad_norm": 0.49799901247024536, "learning_rate": 8e-05, "loss": 1.7128, "step": 3206 }, { "epoch": 0.547549940242445, "grad_norm": 0.5345284938812256, "learning_rate": 8e-05, "loss": 1.6907, "step": 3207 }, { "epoch": 0.5477206761140515, "grad_norm": 0.533539354801178, "learning_rate": 8e-05, "loss": 1.7215, "step": 3208 }, { "epoch": 0.5478914119856582, "grad_norm": 0.46878620982170105, "learning_rate": 8e-05, "loss": 1.7697, "step": 3209 }, { "epoch": 0.5480621478572648, "grad_norm": 0.5013412833213806, "learning_rate": 8e-05, "loss": 1.5899, "step": 3210 }, { "epoch": 0.5482328837288715, "grad_norm": 0.48745232820510864, "learning_rate": 8e-05, "loss": 1.7124, "step": 3211 }, { "epoch": 0.548403619600478, "grad_norm": 0.52227783203125, "learning_rate": 8e-05, "loss": 1.7378, "step": 3212 }, { "epoch": 0.5485743554720847, "grad_norm": 0.4461539387702942, "learning_rate": 8e-05, "loss": 1.606, "step": 3213 }, { "epoch": 0.5487450913436913, "grad_norm": 0.4658341705799103, "learning_rate": 8e-05, "loss": 1.7889, "step": 3214 }, { "epoch": 0.548915827215298, "grad_norm": 0.4837781488895416, "learning_rate": 8e-05, "loss": 1.7305, "step": 3215 }, { "epoch": 0.5490865630869045, "grad_norm": 0.46719491481781006, "learning_rate": 8e-05, "loss": 1.5694, "step": 3216 }, { "epoch": 0.5492572989585112, "grad_norm": 0.4546234607696533, "learning_rate": 8e-05, "loss": 1.3179, "step": 3217 }, { "epoch": 0.5494280348301178, "grad_norm": 0.5014774203300476, "learning_rate": 8e-05, "loss": 1.8477, "step": 3218 }, { "epoch": 0.5495987707017245, "grad_norm": 0.46697306632995605, "learning_rate": 8e-05, "loss": 1.7402, "step": 3219 }, { "epoch": 0.549769506573331, "grad_norm": 0.49721264839172363, "learning_rate": 8e-05, "loss": 1.8222, "step": 3220 }, { "epoch": 0.5499402424449377, "grad_norm": 0.4487724006175995, "learning_rate": 8e-05, "loss": 1.569, "step": 3221 }, { "epoch": 0.5501109783165443, "grad_norm": 0.49756938219070435, "learning_rate": 8e-05, "loss": 1.6835, "step": 3222 }, { "epoch": 0.550281714188151, "grad_norm": 0.48645612597465515, "learning_rate": 8e-05, "loss": 1.6622, "step": 3223 }, { "epoch": 0.5504524500597575, "grad_norm": 0.4620543420314789, "learning_rate": 8e-05, "loss": 1.7229, "step": 3224 }, { "epoch": 0.5506231859313642, "grad_norm": 0.502514123916626, "learning_rate": 8e-05, "loss": 1.7948, "step": 3225 }, { "epoch": 0.5507939218029708, "grad_norm": 0.4844774305820465, "learning_rate": 8e-05, "loss": 1.6695, "step": 3226 }, { "epoch": 0.5509646576745775, "grad_norm": 0.5535739660263062, "learning_rate": 8e-05, "loss": 2.0038, "step": 3227 }, { "epoch": 0.551135393546184, "grad_norm": 0.5101340413093567, "learning_rate": 8e-05, "loss": 1.768, "step": 3228 }, { "epoch": 0.5513061294177907, "grad_norm": 0.48049411177635193, "learning_rate": 8e-05, "loss": 1.5356, "step": 3229 }, { "epoch": 0.5514768652893973, "grad_norm": 0.4841262102127075, "learning_rate": 8e-05, "loss": 1.6437, "step": 3230 }, { "epoch": 0.551647601161004, "grad_norm": 0.47942110896110535, "learning_rate": 8e-05, "loss": 1.7265, "step": 3231 }, { "epoch": 0.5518183370326105, "grad_norm": 0.5248327851295471, "learning_rate": 8e-05, "loss": 1.566, "step": 3232 }, { "epoch": 0.5519890729042172, "grad_norm": 0.4360003173351288, "learning_rate": 8e-05, "loss": 1.5237, "step": 3233 }, { "epoch": 0.5521598087758238, "grad_norm": 0.4660533666610718, "learning_rate": 8e-05, "loss": 1.6251, "step": 3234 }, { "epoch": 0.5523305446474305, "grad_norm": 0.46013393998146057, "learning_rate": 8e-05, "loss": 1.5932, "step": 3235 }, { "epoch": 0.552501280519037, "grad_norm": 0.4792756140232086, "learning_rate": 8e-05, "loss": 1.771, "step": 3236 }, { "epoch": 0.5526720163906437, "grad_norm": 0.44299209117889404, "learning_rate": 8e-05, "loss": 1.6441, "step": 3237 }, { "epoch": 0.5528427522622503, "grad_norm": 0.46385467052459717, "learning_rate": 8e-05, "loss": 1.6696, "step": 3238 }, { "epoch": 0.553013488133857, "grad_norm": 0.4661048948764801, "learning_rate": 8e-05, "loss": 1.7647, "step": 3239 }, { "epoch": 0.5531842240054635, "grad_norm": 0.458232045173645, "learning_rate": 8e-05, "loss": 1.714, "step": 3240 }, { "epoch": 0.5533549598770702, "grad_norm": 0.45860129594802856, "learning_rate": 8e-05, "loss": 1.7294, "step": 3241 }, { "epoch": 0.5535256957486768, "grad_norm": 0.4625643193721771, "learning_rate": 8e-05, "loss": 1.7316, "step": 3242 }, { "epoch": 0.5536964316202834, "grad_norm": 0.46631452441215515, "learning_rate": 8e-05, "loss": 1.6649, "step": 3243 }, { "epoch": 0.55386716749189, "grad_norm": 0.4776540994644165, "learning_rate": 8e-05, "loss": 1.8113, "step": 3244 }, { "epoch": 0.5540379033634967, "grad_norm": 0.4448408782482147, "learning_rate": 8e-05, "loss": 1.6653, "step": 3245 }, { "epoch": 0.5542086392351033, "grad_norm": 0.49318620562553406, "learning_rate": 8e-05, "loss": 1.6158, "step": 3246 }, { "epoch": 0.5543793751067099, "grad_norm": 0.4966879189014435, "learning_rate": 8e-05, "loss": 1.7292, "step": 3247 }, { "epoch": 0.5545501109783165, "grad_norm": 0.4778051972389221, "learning_rate": 8e-05, "loss": 1.785, "step": 3248 }, { "epoch": 0.5547208468499232, "grad_norm": 0.5346766114234924, "learning_rate": 8e-05, "loss": 1.7799, "step": 3249 }, { "epoch": 0.5548915827215298, "grad_norm": 0.48048561811447144, "learning_rate": 8e-05, "loss": 1.8583, "step": 3250 }, { "epoch": 0.5550623185931364, "grad_norm": 0.4511145353317261, "learning_rate": 8e-05, "loss": 1.6591, "step": 3251 }, { "epoch": 0.555233054464743, "grad_norm": 0.5259947180747986, "learning_rate": 8e-05, "loss": 1.8591, "step": 3252 }, { "epoch": 0.5554037903363497, "grad_norm": 0.5269085764884949, "learning_rate": 8e-05, "loss": 1.7915, "step": 3253 }, { "epoch": 0.5555745262079563, "grad_norm": 0.44317835569381714, "learning_rate": 8e-05, "loss": 1.5343, "step": 3254 }, { "epoch": 0.5557452620795629, "grad_norm": 0.48574548959732056, "learning_rate": 8e-05, "loss": 1.6464, "step": 3255 }, { "epoch": 0.5559159979511695, "grad_norm": 0.44950616359710693, "learning_rate": 8e-05, "loss": 1.6343, "step": 3256 }, { "epoch": 0.5560867338227762, "grad_norm": 0.4742732048034668, "learning_rate": 8e-05, "loss": 1.6554, "step": 3257 }, { "epoch": 0.5562574696943828, "grad_norm": 0.47695696353912354, "learning_rate": 8e-05, "loss": 1.8363, "step": 3258 }, { "epoch": 0.5564282055659894, "grad_norm": 0.4843061864376068, "learning_rate": 8e-05, "loss": 1.7303, "step": 3259 }, { "epoch": 0.556598941437596, "grad_norm": 0.4668518006801605, "learning_rate": 8e-05, "loss": 1.5706, "step": 3260 }, { "epoch": 0.5567696773092027, "grad_norm": 0.46490615606307983, "learning_rate": 8e-05, "loss": 1.5624, "step": 3261 }, { "epoch": 0.5569404131808093, "grad_norm": 0.4880453646183014, "learning_rate": 8e-05, "loss": 1.8757, "step": 3262 }, { "epoch": 0.5571111490524159, "grad_norm": 0.5083554983139038, "learning_rate": 8e-05, "loss": 1.8716, "step": 3263 }, { "epoch": 0.5572818849240225, "grad_norm": 0.46769997477531433, "learning_rate": 8e-05, "loss": 1.7896, "step": 3264 }, { "epoch": 0.5574526207956292, "grad_norm": 0.4509050250053406, "learning_rate": 8e-05, "loss": 1.5812, "step": 3265 }, { "epoch": 0.5576233566672358, "grad_norm": 0.4652866721153259, "learning_rate": 8e-05, "loss": 1.6111, "step": 3266 }, { "epoch": 0.5577940925388424, "grad_norm": 0.4807164967060089, "learning_rate": 8e-05, "loss": 1.8382, "step": 3267 }, { "epoch": 0.557964828410449, "grad_norm": 0.4787854850292206, "learning_rate": 8e-05, "loss": 1.6479, "step": 3268 }, { "epoch": 0.5581355642820557, "grad_norm": 0.48182398080825806, "learning_rate": 8e-05, "loss": 1.6722, "step": 3269 }, { "epoch": 0.5583063001536623, "grad_norm": 0.47198486328125, "learning_rate": 8e-05, "loss": 1.6694, "step": 3270 }, { "epoch": 0.5584770360252689, "grad_norm": 0.4601718485355377, "learning_rate": 8e-05, "loss": 1.5036, "step": 3271 }, { "epoch": 0.5586477718968755, "grad_norm": 0.4777688682079315, "learning_rate": 8e-05, "loss": 1.7506, "step": 3272 }, { "epoch": 0.5588185077684822, "grad_norm": 0.4759737551212311, "learning_rate": 8e-05, "loss": 1.4139, "step": 3273 }, { "epoch": 0.5589892436400887, "grad_norm": 0.4717269539833069, "learning_rate": 8e-05, "loss": 1.5854, "step": 3274 }, { "epoch": 0.5591599795116954, "grad_norm": 0.4671514928340912, "learning_rate": 8e-05, "loss": 1.6782, "step": 3275 }, { "epoch": 0.559330715383302, "grad_norm": 0.48542332649230957, "learning_rate": 8e-05, "loss": 1.68, "step": 3276 }, { "epoch": 0.5595014512549087, "grad_norm": 0.5177992582321167, "learning_rate": 8e-05, "loss": 1.6486, "step": 3277 }, { "epoch": 0.5596721871265152, "grad_norm": 0.44660452008247375, "learning_rate": 8e-05, "loss": 1.576, "step": 3278 }, { "epoch": 0.5598429229981219, "grad_norm": 0.44781938195228577, "learning_rate": 8e-05, "loss": 1.5957, "step": 3279 }, { "epoch": 0.5600136588697285, "grad_norm": 0.4684285819530487, "learning_rate": 8e-05, "loss": 1.7393, "step": 3280 }, { "epoch": 0.5601843947413352, "grad_norm": 0.48874831199645996, "learning_rate": 8e-05, "loss": 1.7585, "step": 3281 }, { "epoch": 0.5603551306129417, "grad_norm": 0.4486275017261505, "learning_rate": 8e-05, "loss": 1.5081, "step": 3282 }, { "epoch": 0.5605258664845484, "grad_norm": 0.4656516909599304, "learning_rate": 8e-05, "loss": 1.5617, "step": 3283 }, { "epoch": 0.560696602356155, "grad_norm": 0.45739978551864624, "learning_rate": 8e-05, "loss": 1.6796, "step": 3284 }, { "epoch": 0.5608673382277617, "grad_norm": 0.4560164213180542, "learning_rate": 8e-05, "loss": 1.8188, "step": 3285 }, { "epoch": 0.5610380740993682, "grad_norm": 0.5119224190711975, "learning_rate": 8e-05, "loss": 1.6873, "step": 3286 }, { "epoch": 0.5612088099709749, "grad_norm": 0.45408719778060913, "learning_rate": 8e-05, "loss": 1.6932, "step": 3287 }, { "epoch": 0.5613795458425815, "grad_norm": 0.5101455450057983, "learning_rate": 8e-05, "loss": 1.6622, "step": 3288 }, { "epoch": 0.5615502817141882, "grad_norm": 0.4892732799053192, "learning_rate": 8e-05, "loss": 1.7811, "step": 3289 }, { "epoch": 0.5617210175857947, "grad_norm": 0.4641209542751312, "learning_rate": 8e-05, "loss": 1.5568, "step": 3290 }, { "epoch": 0.5618917534574014, "grad_norm": 0.46146726608276367, "learning_rate": 8e-05, "loss": 1.6691, "step": 3291 }, { "epoch": 0.562062489329008, "grad_norm": 0.45416125655174255, "learning_rate": 8e-05, "loss": 1.5624, "step": 3292 }, { "epoch": 0.5622332252006147, "grad_norm": 0.4542088806629181, "learning_rate": 8e-05, "loss": 1.602, "step": 3293 }, { "epoch": 0.5624039610722212, "grad_norm": 0.46554142236709595, "learning_rate": 8e-05, "loss": 1.6747, "step": 3294 }, { "epoch": 0.5625746969438279, "grad_norm": 0.4832119941711426, "learning_rate": 8e-05, "loss": 1.651, "step": 3295 }, { "epoch": 0.5627454328154345, "grad_norm": 0.4781706929206848, "learning_rate": 8e-05, "loss": 1.6653, "step": 3296 }, { "epoch": 0.5629161686870412, "grad_norm": 0.510077714920044, "learning_rate": 8e-05, "loss": 1.5741, "step": 3297 }, { "epoch": 0.5630869045586477, "grad_norm": 0.47661128640174866, "learning_rate": 8e-05, "loss": 1.7876, "step": 3298 }, { "epoch": 0.5632576404302544, "grad_norm": 0.5422450304031372, "learning_rate": 8e-05, "loss": 1.7932, "step": 3299 }, { "epoch": 0.563428376301861, "grad_norm": 0.5058591961860657, "learning_rate": 8e-05, "loss": 1.8971, "step": 3300 }, { "epoch": 0.5635991121734677, "grad_norm": 0.5209707021713257, "learning_rate": 8e-05, "loss": 1.6991, "step": 3301 }, { "epoch": 0.5637698480450742, "grad_norm": 0.47631025314331055, "learning_rate": 8e-05, "loss": 1.7028, "step": 3302 }, { "epoch": 0.5639405839166809, "grad_norm": 0.49201875925064087, "learning_rate": 8e-05, "loss": 1.7367, "step": 3303 }, { "epoch": 0.5641113197882875, "grad_norm": 0.4787226915359497, "learning_rate": 8e-05, "loss": 1.6719, "step": 3304 }, { "epoch": 0.5642820556598941, "grad_norm": 0.45878514647483826, "learning_rate": 8e-05, "loss": 1.487, "step": 3305 }, { "epoch": 0.5644527915315007, "grad_norm": 0.4647923409938812, "learning_rate": 8e-05, "loss": 1.7508, "step": 3306 }, { "epoch": 0.5646235274031074, "grad_norm": 0.48932355642318726, "learning_rate": 8e-05, "loss": 1.784, "step": 3307 }, { "epoch": 0.564794263274714, "grad_norm": 0.46450307965278625, "learning_rate": 8e-05, "loss": 1.6686, "step": 3308 }, { "epoch": 0.5649649991463206, "grad_norm": 0.44536036252975464, "learning_rate": 8e-05, "loss": 1.4805, "step": 3309 }, { "epoch": 0.5651357350179272, "grad_norm": 0.4665374457836151, "learning_rate": 8e-05, "loss": 1.7399, "step": 3310 }, { "epoch": 0.5653064708895339, "grad_norm": 0.467259019613266, "learning_rate": 8e-05, "loss": 1.6156, "step": 3311 }, { "epoch": 0.5654772067611405, "grad_norm": 0.505056619644165, "learning_rate": 8e-05, "loss": 1.7631, "step": 3312 }, { "epoch": 0.5656479426327471, "grad_norm": 0.4679594337940216, "learning_rate": 8e-05, "loss": 1.5723, "step": 3313 }, { "epoch": 0.5658186785043537, "grad_norm": 0.5423455238342285, "learning_rate": 8e-05, "loss": 1.798, "step": 3314 }, { "epoch": 0.5659894143759604, "grad_norm": 0.46808043122291565, "learning_rate": 8e-05, "loss": 1.7616, "step": 3315 }, { "epoch": 0.566160150247567, "grad_norm": 0.4737641513347626, "learning_rate": 8e-05, "loss": 1.6379, "step": 3316 }, { "epoch": 0.5663308861191736, "grad_norm": 0.5140879154205322, "learning_rate": 8e-05, "loss": 1.7413, "step": 3317 }, { "epoch": 0.5665016219907802, "grad_norm": 0.4850059151649475, "learning_rate": 8e-05, "loss": 1.725, "step": 3318 }, { "epoch": 0.5666723578623869, "grad_norm": 0.5383856296539307, "learning_rate": 8e-05, "loss": 1.8799, "step": 3319 }, { "epoch": 0.5668430937339936, "grad_norm": 0.48086562752723694, "learning_rate": 8e-05, "loss": 1.4892, "step": 3320 }, { "epoch": 0.5670138296056001, "grad_norm": 0.4561636745929718, "learning_rate": 8e-05, "loss": 1.6492, "step": 3321 }, { "epoch": 0.5671845654772067, "grad_norm": 0.47339269518852234, "learning_rate": 8e-05, "loss": 1.768, "step": 3322 }, { "epoch": 0.5673553013488134, "grad_norm": 0.474569171667099, "learning_rate": 8e-05, "loss": 1.7569, "step": 3323 }, { "epoch": 0.56752603722042, "grad_norm": 0.46124163269996643, "learning_rate": 8e-05, "loss": 1.5429, "step": 3324 }, { "epoch": 0.5676967730920266, "grad_norm": 0.4877426326274872, "learning_rate": 8e-05, "loss": 1.7983, "step": 3325 }, { "epoch": 0.5678675089636332, "grad_norm": 0.4717552661895752, "learning_rate": 8e-05, "loss": 1.6408, "step": 3326 }, { "epoch": 0.5680382448352399, "grad_norm": 0.4382312595844269, "learning_rate": 8e-05, "loss": 1.5037, "step": 3327 }, { "epoch": 0.5682089807068466, "grad_norm": 0.47447115182876587, "learning_rate": 8e-05, "loss": 1.6664, "step": 3328 }, { "epoch": 0.5683797165784531, "grad_norm": 0.4842239022254944, "learning_rate": 8e-05, "loss": 1.7312, "step": 3329 }, { "epoch": 0.5685504524500598, "grad_norm": 0.49211472272872925, "learning_rate": 8e-05, "loss": 1.6714, "step": 3330 }, { "epoch": 0.5687211883216664, "grad_norm": 0.5283627510070801, "learning_rate": 8e-05, "loss": 1.5751, "step": 3331 }, { "epoch": 0.5688919241932731, "grad_norm": 0.4942396879196167, "learning_rate": 8e-05, "loss": 1.8411, "step": 3332 }, { "epoch": 0.5690626600648796, "grad_norm": 0.4778619706630707, "learning_rate": 8e-05, "loss": 1.7545, "step": 3333 }, { "epoch": 0.5692333959364863, "grad_norm": 0.5033895969390869, "learning_rate": 8e-05, "loss": 1.8238, "step": 3334 }, { "epoch": 0.5694041318080929, "grad_norm": 0.4671578109264374, "learning_rate": 8e-05, "loss": 1.4968, "step": 3335 }, { "epoch": 0.5695748676796996, "grad_norm": 0.4792187213897705, "learning_rate": 8e-05, "loss": 1.6821, "step": 3336 }, { "epoch": 0.5697456035513061, "grad_norm": 0.4611697196960449, "learning_rate": 8e-05, "loss": 1.715, "step": 3337 }, { "epoch": 0.5699163394229128, "grad_norm": 0.44345250725746155, "learning_rate": 8e-05, "loss": 1.5209, "step": 3338 }, { "epoch": 0.5700870752945194, "grad_norm": 0.5357179045677185, "learning_rate": 8e-05, "loss": 1.8725, "step": 3339 }, { "epoch": 0.570257811166126, "grad_norm": 0.5105072259902954, "learning_rate": 8e-05, "loss": 1.7656, "step": 3340 }, { "epoch": 0.5704285470377326, "grad_norm": 0.4759785532951355, "learning_rate": 8e-05, "loss": 1.6449, "step": 3341 }, { "epoch": 0.5705992829093393, "grad_norm": 0.475008100271225, "learning_rate": 8e-05, "loss": 1.732, "step": 3342 }, { "epoch": 0.5707700187809459, "grad_norm": 0.4781922399997711, "learning_rate": 8e-05, "loss": 1.6486, "step": 3343 }, { "epoch": 0.5709407546525525, "grad_norm": 0.4699609577655792, "learning_rate": 8e-05, "loss": 1.7962, "step": 3344 }, { "epoch": 0.5711114905241591, "grad_norm": 0.4423743486404419, "learning_rate": 8e-05, "loss": 1.5522, "step": 3345 }, { "epoch": 0.5712822263957658, "grad_norm": 0.4678907096385956, "learning_rate": 8e-05, "loss": 1.7383, "step": 3346 }, { "epoch": 0.5714529622673724, "grad_norm": 0.4757753610610962, "learning_rate": 8e-05, "loss": 1.6479, "step": 3347 }, { "epoch": 0.571623698138979, "grad_norm": 0.48473069071769714, "learning_rate": 8e-05, "loss": 1.7452, "step": 3348 }, { "epoch": 0.5717944340105856, "grad_norm": 0.5281333923339844, "learning_rate": 8e-05, "loss": 1.8474, "step": 3349 }, { "epoch": 0.5719651698821923, "grad_norm": 0.4584832191467285, "learning_rate": 8e-05, "loss": 1.5585, "step": 3350 }, { "epoch": 0.5721359057537989, "grad_norm": 0.45103469491004944, "learning_rate": 8e-05, "loss": 1.7253, "step": 3351 }, { "epoch": 0.5723066416254055, "grad_norm": 0.4595406651496887, "learning_rate": 8e-05, "loss": 1.7907, "step": 3352 }, { "epoch": 0.5724773774970121, "grad_norm": 0.51038658618927, "learning_rate": 8e-05, "loss": 1.9174, "step": 3353 }, { "epoch": 0.5726481133686188, "grad_norm": 0.4750240743160248, "learning_rate": 8e-05, "loss": 1.7223, "step": 3354 }, { "epoch": 0.5728188492402254, "grad_norm": 0.48377472162246704, "learning_rate": 8e-05, "loss": 1.6779, "step": 3355 }, { "epoch": 0.572989585111832, "grad_norm": 0.46826860308647156, "learning_rate": 8e-05, "loss": 1.5696, "step": 3356 }, { "epoch": 0.5731603209834386, "grad_norm": 0.5058405995368958, "learning_rate": 8e-05, "loss": 1.8589, "step": 3357 }, { "epoch": 0.5733310568550453, "grad_norm": 0.4433777630329132, "learning_rate": 8e-05, "loss": 1.5077, "step": 3358 }, { "epoch": 0.5735017927266519, "grad_norm": 0.4735387861728668, "learning_rate": 8e-05, "loss": 1.767, "step": 3359 }, { "epoch": 0.5736725285982585, "grad_norm": 0.49106255173683167, "learning_rate": 8e-05, "loss": 1.5678, "step": 3360 }, { "epoch": 0.5738432644698651, "grad_norm": 0.47774678468704224, "learning_rate": 8e-05, "loss": 1.619, "step": 3361 }, { "epoch": 0.5740140003414718, "grad_norm": 0.4595874845981598, "learning_rate": 8e-05, "loss": 1.5158, "step": 3362 }, { "epoch": 0.5741847362130784, "grad_norm": 0.45516496896743774, "learning_rate": 8e-05, "loss": 1.6803, "step": 3363 }, { "epoch": 0.574355472084685, "grad_norm": 0.4626932442188263, "learning_rate": 8e-05, "loss": 1.6491, "step": 3364 }, { "epoch": 0.5745262079562916, "grad_norm": 0.48517102003097534, "learning_rate": 8e-05, "loss": 1.7675, "step": 3365 }, { "epoch": 0.5746969438278983, "grad_norm": 0.47943583130836487, "learning_rate": 8e-05, "loss": 1.7165, "step": 3366 }, { "epoch": 0.5748676796995049, "grad_norm": 0.4786792993545532, "learning_rate": 8e-05, "loss": 1.823, "step": 3367 }, { "epoch": 0.5750384155711115, "grad_norm": 0.4839119017124176, "learning_rate": 8e-05, "loss": 1.6438, "step": 3368 }, { "epoch": 0.5752091514427181, "grad_norm": 0.46348774433135986, "learning_rate": 8e-05, "loss": 1.809, "step": 3369 }, { "epoch": 0.5753798873143248, "grad_norm": 0.4775078296661377, "learning_rate": 8e-05, "loss": 1.8301, "step": 3370 }, { "epoch": 0.5755506231859313, "grad_norm": 0.4795530140399933, "learning_rate": 8e-05, "loss": 1.8679, "step": 3371 }, { "epoch": 0.575721359057538, "grad_norm": 0.49621447920799255, "learning_rate": 8e-05, "loss": 1.7294, "step": 3372 }, { "epoch": 0.5758920949291446, "grad_norm": 0.4966508746147156, "learning_rate": 8e-05, "loss": 1.71, "step": 3373 }, { "epoch": 0.5760628308007513, "grad_norm": 0.4585264325141907, "learning_rate": 8e-05, "loss": 1.7198, "step": 3374 }, { "epoch": 0.5762335666723578, "grad_norm": 0.5031297206878662, "learning_rate": 8e-05, "loss": 1.6585, "step": 3375 }, { "epoch": 0.5764043025439645, "grad_norm": 0.46386852860450745, "learning_rate": 8e-05, "loss": 1.6297, "step": 3376 }, { "epoch": 0.5765750384155711, "grad_norm": 0.6040430068969727, "learning_rate": 8e-05, "loss": 1.5922, "step": 3377 }, { "epoch": 0.5767457742871778, "grad_norm": 0.48569998145103455, "learning_rate": 8e-05, "loss": 1.6548, "step": 3378 }, { "epoch": 0.5769165101587843, "grad_norm": 0.4809260070323944, "learning_rate": 8e-05, "loss": 1.6547, "step": 3379 }, { "epoch": 0.577087246030391, "grad_norm": 0.4923464059829712, "learning_rate": 8e-05, "loss": 1.7379, "step": 3380 }, { "epoch": 0.5772579819019976, "grad_norm": 0.4662120044231415, "learning_rate": 8e-05, "loss": 1.5851, "step": 3381 }, { "epoch": 0.5774287177736043, "grad_norm": 0.47898709774017334, "learning_rate": 8e-05, "loss": 1.709, "step": 3382 }, { "epoch": 0.5775994536452108, "grad_norm": 0.5230194926261902, "learning_rate": 8e-05, "loss": 1.7254, "step": 3383 }, { "epoch": 0.5777701895168175, "grad_norm": 0.4952589273452759, "learning_rate": 8e-05, "loss": 1.7186, "step": 3384 }, { "epoch": 0.5779409253884241, "grad_norm": 0.4692406952381134, "learning_rate": 8e-05, "loss": 1.658, "step": 3385 }, { "epoch": 0.5781116612600308, "grad_norm": 0.4787033498287201, "learning_rate": 8e-05, "loss": 1.6718, "step": 3386 }, { "epoch": 0.5782823971316373, "grad_norm": 0.46520575881004333, "learning_rate": 8e-05, "loss": 1.5449, "step": 3387 }, { "epoch": 0.578453133003244, "grad_norm": 0.47507235407829285, "learning_rate": 8e-05, "loss": 1.7319, "step": 3388 }, { "epoch": 0.5786238688748506, "grad_norm": 0.4906587600708008, "learning_rate": 8e-05, "loss": 1.6539, "step": 3389 }, { "epoch": 0.5787946047464573, "grad_norm": 0.4711267650127411, "learning_rate": 8e-05, "loss": 1.7698, "step": 3390 }, { "epoch": 0.5789653406180638, "grad_norm": 0.4777052402496338, "learning_rate": 8e-05, "loss": 1.849, "step": 3391 }, { "epoch": 0.5791360764896705, "grad_norm": 0.48031359910964966, "learning_rate": 8e-05, "loss": 1.5817, "step": 3392 }, { "epoch": 0.5793068123612771, "grad_norm": 0.4623790681362152, "learning_rate": 8e-05, "loss": 1.6941, "step": 3393 }, { "epoch": 0.5794775482328838, "grad_norm": 0.5103781819343567, "learning_rate": 8e-05, "loss": 1.6927, "step": 3394 }, { "epoch": 0.5796482841044903, "grad_norm": 0.5844423770904541, "learning_rate": 8e-05, "loss": 1.7861, "step": 3395 }, { "epoch": 0.579819019976097, "grad_norm": 0.4905589818954468, "learning_rate": 8e-05, "loss": 1.8106, "step": 3396 }, { "epoch": 0.5799897558477036, "grad_norm": 0.49740836024284363, "learning_rate": 8e-05, "loss": 1.755, "step": 3397 }, { "epoch": 0.5801604917193103, "grad_norm": 0.4892294406890869, "learning_rate": 8e-05, "loss": 1.6397, "step": 3398 }, { "epoch": 0.5803312275909168, "grad_norm": 0.4710220396518707, "learning_rate": 8e-05, "loss": 1.5005, "step": 3399 }, { "epoch": 0.5805019634625235, "grad_norm": 0.4588889479637146, "learning_rate": 8e-05, "loss": 1.581, "step": 3400 }, { "epoch": 0.5806726993341301, "grad_norm": 0.5245676040649414, "learning_rate": 8e-05, "loss": 1.74, "step": 3401 }, { "epoch": 0.5808434352057368, "grad_norm": 0.491338312625885, "learning_rate": 8e-05, "loss": 1.7057, "step": 3402 }, { "epoch": 0.5810141710773433, "grad_norm": 0.47585538029670715, "learning_rate": 8e-05, "loss": 1.737, "step": 3403 }, { "epoch": 0.58118490694895, "grad_norm": 0.4797638952732086, "learning_rate": 8e-05, "loss": 1.6176, "step": 3404 }, { "epoch": 0.5813556428205566, "grad_norm": 0.4825317859649658, "learning_rate": 8e-05, "loss": 1.5219, "step": 3405 }, { "epoch": 0.5815263786921632, "grad_norm": 0.5049483180046082, "learning_rate": 8e-05, "loss": 1.6604, "step": 3406 }, { "epoch": 0.5816971145637698, "grad_norm": 0.4983532428741455, "learning_rate": 8e-05, "loss": 1.7439, "step": 3407 }, { "epoch": 0.5818678504353765, "grad_norm": 0.48963358998298645, "learning_rate": 8e-05, "loss": 1.8161, "step": 3408 }, { "epoch": 0.5820385863069831, "grad_norm": 0.5174717903137207, "learning_rate": 8e-05, "loss": 1.7424, "step": 3409 }, { "epoch": 0.5822093221785897, "grad_norm": 0.46064579486846924, "learning_rate": 8e-05, "loss": 1.8183, "step": 3410 }, { "epoch": 0.5823800580501963, "grad_norm": 0.4762938916683197, "learning_rate": 8e-05, "loss": 1.6717, "step": 3411 }, { "epoch": 0.582550793921803, "grad_norm": 0.4355177879333496, "learning_rate": 8e-05, "loss": 1.5524, "step": 3412 }, { "epoch": 0.5827215297934096, "grad_norm": 0.48198792338371277, "learning_rate": 8e-05, "loss": 1.7774, "step": 3413 }, { "epoch": 0.5828922656650162, "grad_norm": 0.45344310998916626, "learning_rate": 8e-05, "loss": 1.5831, "step": 3414 }, { "epoch": 0.5830630015366228, "grad_norm": 0.4673636853694916, "learning_rate": 8e-05, "loss": 1.6507, "step": 3415 }, { "epoch": 0.5832337374082295, "grad_norm": 0.4659893214702606, "learning_rate": 8e-05, "loss": 1.7258, "step": 3416 }, { "epoch": 0.5834044732798361, "grad_norm": 0.4687820374965668, "learning_rate": 8e-05, "loss": 1.7142, "step": 3417 }, { "epoch": 0.5835752091514427, "grad_norm": 0.5023786425590515, "learning_rate": 8e-05, "loss": 1.715, "step": 3418 }, { "epoch": 0.5837459450230493, "grad_norm": 0.5045672655105591, "learning_rate": 8e-05, "loss": 1.6273, "step": 3419 }, { "epoch": 0.583916680894656, "grad_norm": 0.45375457406044006, "learning_rate": 8e-05, "loss": 1.62, "step": 3420 }, { "epoch": 0.5840874167662626, "grad_norm": 0.49965009093284607, "learning_rate": 8e-05, "loss": 1.7325, "step": 3421 }, { "epoch": 0.5842581526378692, "grad_norm": 0.47659632563591003, "learning_rate": 8e-05, "loss": 1.644, "step": 3422 }, { "epoch": 0.5844288885094758, "grad_norm": 0.4931514263153076, "learning_rate": 8e-05, "loss": 1.6988, "step": 3423 }, { "epoch": 0.5845996243810825, "grad_norm": 0.4623546898365021, "learning_rate": 8e-05, "loss": 1.6105, "step": 3424 }, { "epoch": 0.5847703602526891, "grad_norm": 0.47120004892349243, "learning_rate": 8e-05, "loss": 1.6081, "step": 3425 }, { "epoch": 0.5849410961242957, "grad_norm": 0.47975438833236694, "learning_rate": 8e-05, "loss": 1.7284, "step": 3426 }, { "epoch": 0.5851118319959023, "grad_norm": 0.4485236406326294, "learning_rate": 8e-05, "loss": 1.4583, "step": 3427 }, { "epoch": 0.585282567867509, "grad_norm": 0.44221553206443787, "learning_rate": 8e-05, "loss": 1.5969, "step": 3428 }, { "epoch": 0.5854533037391156, "grad_norm": 0.4654899537563324, "learning_rate": 8e-05, "loss": 1.6806, "step": 3429 }, { "epoch": 0.5856240396107222, "grad_norm": 0.4840885400772095, "learning_rate": 8e-05, "loss": 1.6286, "step": 3430 }, { "epoch": 0.5857947754823288, "grad_norm": 0.5039430856704712, "learning_rate": 8e-05, "loss": 1.774, "step": 3431 }, { "epoch": 0.5859655113539355, "grad_norm": 0.5004041790962219, "learning_rate": 8e-05, "loss": 1.7963, "step": 3432 }, { "epoch": 0.5861362472255421, "grad_norm": 0.47662562131881714, "learning_rate": 8e-05, "loss": 1.8225, "step": 3433 }, { "epoch": 0.5863069830971487, "grad_norm": 0.48688945174217224, "learning_rate": 8e-05, "loss": 1.643, "step": 3434 }, { "epoch": 0.5864777189687553, "grad_norm": 0.4929359555244446, "learning_rate": 8e-05, "loss": 1.8514, "step": 3435 }, { "epoch": 0.586648454840362, "grad_norm": 0.43874591588974, "learning_rate": 8e-05, "loss": 1.5653, "step": 3436 }, { "epoch": 0.5868191907119685, "grad_norm": 0.4838576912879944, "learning_rate": 8e-05, "loss": 1.8776, "step": 3437 }, { "epoch": 0.5869899265835752, "grad_norm": 0.5199116468429565, "learning_rate": 8e-05, "loss": 1.9033, "step": 3438 }, { "epoch": 0.5871606624551818, "grad_norm": 0.4540807902812958, "learning_rate": 8e-05, "loss": 1.7676, "step": 3439 }, { "epoch": 0.5873313983267885, "grad_norm": 0.4520860016345978, "learning_rate": 8e-05, "loss": 1.6085, "step": 3440 }, { "epoch": 0.587502134198395, "grad_norm": 0.49735066294670105, "learning_rate": 8e-05, "loss": 1.7633, "step": 3441 }, { "epoch": 0.5876728700700017, "grad_norm": 0.4595932364463806, "learning_rate": 8e-05, "loss": 1.6338, "step": 3442 }, { "epoch": 0.5878436059416083, "grad_norm": 0.48519036173820496, "learning_rate": 8e-05, "loss": 1.6658, "step": 3443 }, { "epoch": 0.588014341813215, "grad_norm": 0.4694824814796448, "learning_rate": 8e-05, "loss": 1.6311, "step": 3444 }, { "epoch": 0.5881850776848215, "grad_norm": 0.4685525596141815, "learning_rate": 8e-05, "loss": 1.5395, "step": 3445 }, { "epoch": 0.5883558135564282, "grad_norm": 0.4857216477394104, "learning_rate": 8e-05, "loss": 1.7175, "step": 3446 }, { "epoch": 0.5885265494280348, "grad_norm": 0.4785721004009247, "learning_rate": 8e-05, "loss": 1.6562, "step": 3447 }, { "epoch": 0.5886972852996415, "grad_norm": 0.4736572504043579, "learning_rate": 8e-05, "loss": 1.6919, "step": 3448 }, { "epoch": 0.588868021171248, "grad_norm": 0.4625205099582672, "learning_rate": 8e-05, "loss": 1.5984, "step": 3449 }, { "epoch": 0.5890387570428547, "grad_norm": 0.4937761723995209, "learning_rate": 8e-05, "loss": 1.8163, "step": 3450 }, { "epoch": 0.5892094929144613, "grad_norm": 0.4770500063896179, "learning_rate": 8e-05, "loss": 1.667, "step": 3451 }, { "epoch": 0.589380228786068, "grad_norm": 0.4344290494918823, "learning_rate": 8e-05, "loss": 1.5019, "step": 3452 }, { "epoch": 0.5895509646576745, "grad_norm": 0.4578484892845154, "learning_rate": 8e-05, "loss": 1.6024, "step": 3453 }, { "epoch": 0.5897217005292812, "grad_norm": 0.49049705266952515, "learning_rate": 8e-05, "loss": 1.6616, "step": 3454 }, { "epoch": 0.5898924364008878, "grad_norm": 0.4696107804775238, "learning_rate": 8e-05, "loss": 1.5665, "step": 3455 }, { "epoch": 0.5900631722724945, "grad_norm": 0.466412216424942, "learning_rate": 8e-05, "loss": 1.7489, "step": 3456 }, { "epoch": 0.590233908144101, "grad_norm": 0.46453526616096497, "learning_rate": 8e-05, "loss": 1.856, "step": 3457 }, { "epoch": 0.5904046440157077, "grad_norm": 0.48164770007133484, "learning_rate": 8e-05, "loss": 1.7469, "step": 3458 }, { "epoch": 0.5905753798873143, "grad_norm": 0.4851072430610657, "learning_rate": 8e-05, "loss": 1.8998, "step": 3459 }, { "epoch": 0.590746115758921, "grad_norm": 0.4513305723667145, "learning_rate": 8e-05, "loss": 1.666, "step": 3460 }, { "epoch": 0.5909168516305275, "grad_norm": 0.4531025290489197, "learning_rate": 8e-05, "loss": 1.5845, "step": 3461 }, { "epoch": 0.5910875875021342, "grad_norm": 0.49425065517425537, "learning_rate": 8e-05, "loss": 1.7349, "step": 3462 }, { "epoch": 0.5912583233737408, "grad_norm": 0.46004733443260193, "learning_rate": 8e-05, "loss": 1.7909, "step": 3463 }, { "epoch": 0.5914290592453475, "grad_norm": 0.4671190083026886, "learning_rate": 8e-05, "loss": 1.8203, "step": 3464 }, { "epoch": 0.591599795116954, "grad_norm": 0.4583698809146881, "learning_rate": 8e-05, "loss": 1.6385, "step": 3465 }, { "epoch": 0.5917705309885607, "grad_norm": 0.4585220515727997, "learning_rate": 8e-05, "loss": 1.7117, "step": 3466 }, { "epoch": 0.5919412668601673, "grad_norm": 0.4649127721786499, "learning_rate": 8e-05, "loss": 1.6554, "step": 3467 }, { "epoch": 0.592112002731774, "grad_norm": 0.48948773741722107, "learning_rate": 8e-05, "loss": 1.6589, "step": 3468 }, { "epoch": 0.5922827386033805, "grad_norm": 0.4560562074184418, "learning_rate": 8e-05, "loss": 1.623, "step": 3469 }, { "epoch": 0.5924534744749872, "grad_norm": 0.4650895893573761, "learning_rate": 8e-05, "loss": 1.7479, "step": 3470 }, { "epoch": 0.5926242103465938, "grad_norm": 0.4622649848461151, "learning_rate": 8e-05, "loss": 1.6688, "step": 3471 }, { "epoch": 0.5927949462182004, "grad_norm": 0.5190022587776184, "learning_rate": 8e-05, "loss": 1.823, "step": 3472 }, { "epoch": 0.592965682089807, "grad_norm": 0.5120734572410583, "learning_rate": 8e-05, "loss": 1.6667, "step": 3473 }, { "epoch": 0.5931364179614137, "grad_norm": 0.456492155790329, "learning_rate": 8e-05, "loss": 1.5194, "step": 3474 }, { "epoch": 0.5933071538330204, "grad_norm": 0.4564683735370636, "learning_rate": 8e-05, "loss": 1.5017, "step": 3475 }, { "epoch": 0.5934778897046269, "grad_norm": 0.48183396458625793, "learning_rate": 8e-05, "loss": 1.7372, "step": 3476 }, { "epoch": 0.5936486255762335, "grad_norm": 0.5166504979133606, "learning_rate": 8e-05, "loss": 1.7609, "step": 3477 }, { "epoch": 0.5938193614478402, "grad_norm": 0.525559663772583, "learning_rate": 8e-05, "loss": 1.8908, "step": 3478 }, { "epoch": 0.5939900973194469, "grad_norm": 0.5128688812255859, "learning_rate": 8e-05, "loss": 1.6591, "step": 3479 }, { "epoch": 0.5941608331910534, "grad_norm": 0.4981849789619446, "learning_rate": 8e-05, "loss": 1.7606, "step": 3480 }, { "epoch": 0.59433156906266, "grad_norm": 0.46463167667388916, "learning_rate": 8e-05, "loss": 1.5149, "step": 3481 }, { "epoch": 0.5945023049342667, "grad_norm": 0.578113853931427, "learning_rate": 8e-05, "loss": 1.7077, "step": 3482 }, { "epoch": 0.5946730408058734, "grad_norm": 0.5148515105247498, "learning_rate": 8e-05, "loss": 1.6692, "step": 3483 }, { "epoch": 0.5948437766774799, "grad_norm": 0.5329025387763977, "learning_rate": 8e-05, "loss": 1.9012, "step": 3484 }, { "epoch": 0.5950145125490865, "grad_norm": 0.49861395359039307, "learning_rate": 8e-05, "loss": 1.7114, "step": 3485 }, { "epoch": 0.5951852484206932, "grad_norm": 0.5110282897949219, "learning_rate": 8e-05, "loss": 1.7751, "step": 3486 }, { "epoch": 0.5953559842922999, "grad_norm": 0.4730503261089325, "learning_rate": 8e-05, "loss": 1.6542, "step": 3487 }, { "epoch": 0.5955267201639064, "grad_norm": 0.5000714659690857, "learning_rate": 8e-05, "loss": 1.7369, "step": 3488 }, { "epoch": 0.595697456035513, "grad_norm": 0.5006521344184875, "learning_rate": 8e-05, "loss": 1.7243, "step": 3489 }, { "epoch": 0.5958681919071197, "grad_norm": 0.49334681034088135, "learning_rate": 8e-05, "loss": 1.7883, "step": 3490 }, { "epoch": 0.5960389277787264, "grad_norm": 0.4648458659648895, "learning_rate": 8e-05, "loss": 1.5846, "step": 3491 }, { "epoch": 0.5962096636503329, "grad_norm": 0.5221567153930664, "learning_rate": 8e-05, "loss": 1.7158, "step": 3492 }, { "epoch": 0.5963803995219396, "grad_norm": 0.46805107593536377, "learning_rate": 8e-05, "loss": 1.5998, "step": 3493 }, { "epoch": 0.5965511353935462, "grad_norm": 0.47967949509620667, "learning_rate": 8e-05, "loss": 1.6365, "step": 3494 }, { "epoch": 0.5967218712651529, "grad_norm": 0.48217159509658813, "learning_rate": 8e-05, "loss": 1.9379, "step": 3495 }, { "epoch": 0.5968926071367594, "grad_norm": 0.5162557363510132, "learning_rate": 8e-05, "loss": 1.6539, "step": 3496 }, { "epoch": 0.597063343008366, "grad_norm": 0.4687598943710327, "learning_rate": 8e-05, "loss": 1.7522, "step": 3497 }, { "epoch": 0.5972340788799727, "grad_norm": 0.49611449241638184, "learning_rate": 8e-05, "loss": 1.6076, "step": 3498 }, { "epoch": 0.5974048147515794, "grad_norm": 0.48148542642593384, "learning_rate": 8e-05, "loss": 1.7433, "step": 3499 }, { "epoch": 0.5975755506231859, "grad_norm": 0.4765561521053314, "learning_rate": 8e-05, "loss": 1.7269, "step": 3500 }, { "epoch": 0.5977462864947926, "grad_norm": 0.45740994811058044, "learning_rate": 8e-05, "loss": 1.5222, "step": 3501 }, { "epoch": 0.5979170223663992, "grad_norm": 0.4800482392311096, "learning_rate": 8e-05, "loss": 1.766, "step": 3502 }, { "epoch": 0.5980877582380058, "grad_norm": 0.4413750469684601, "learning_rate": 8e-05, "loss": 1.5058, "step": 3503 }, { "epoch": 0.5982584941096124, "grad_norm": 0.4768763780593872, "learning_rate": 8e-05, "loss": 1.6944, "step": 3504 }, { "epoch": 0.5984292299812191, "grad_norm": 0.5238616466522217, "learning_rate": 8e-05, "loss": 1.8398, "step": 3505 }, { "epoch": 0.5985999658528257, "grad_norm": 0.4822927713394165, "learning_rate": 8e-05, "loss": 1.5721, "step": 3506 }, { "epoch": 0.5987707017244323, "grad_norm": 0.44294825196266174, "learning_rate": 8e-05, "loss": 1.6295, "step": 3507 }, { "epoch": 0.5989414375960389, "grad_norm": 0.5319304466247559, "learning_rate": 8e-05, "loss": 1.7107, "step": 3508 }, { "epoch": 0.5991121734676456, "grad_norm": 0.4742663502693176, "learning_rate": 8e-05, "loss": 1.6897, "step": 3509 }, { "epoch": 0.5992829093392522, "grad_norm": 0.4991008937358856, "learning_rate": 8e-05, "loss": 1.669, "step": 3510 }, { "epoch": 0.5994536452108588, "grad_norm": 0.4575789272785187, "learning_rate": 8e-05, "loss": 1.5775, "step": 3511 }, { "epoch": 0.5996243810824654, "grad_norm": 0.49806931614875793, "learning_rate": 8e-05, "loss": 1.8324, "step": 3512 }, { "epoch": 0.5997951169540721, "grad_norm": 0.4575517177581787, "learning_rate": 8e-05, "loss": 1.5348, "step": 3513 }, { "epoch": 0.5999658528256787, "grad_norm": 0.49491384625434875, "learning_rate": 8e-05, "loss": 1.3297, "step": 3514 }, { "epoch": 0.6001365886972853, "grad_norm": 0.43592965602874756, "learning_rate": 8e-05, "loss": 1.5604, "step": 3515 }, { "epoch": 0.6003073245688919, "grad_norm": 0.4768159091472626, "learning_rate": 8e-05, "loss": 1.5243, "step": 3516 }, { "epoch": 0.6004780604404986, "grad_norm": 0.4884272515773773, "learning_rate": 8e-05, "loss": 1.7416, "step": 3517 }, { "epoch": 0.6006487963121052, "grad_norm": 0.4640974700450897, "learning_rate": 8e-05, "loss": 1.5891, "step": 3518 }, { "epoch": 0.6008195321837118, "grad_norm": 0.4943418800830841, "learning_rate": 8e-05, "loss": 1.7274, "step": 3519 }, { "epoch": 0.6009902680553184, "grad_norm": 0.49270308017730713, "learning_rate": 8e-05, "loss": 1.7777, "step": 3520 }, { "epoch": 0.6011610039269251, "grad_norm": 0.4922667443752289, "learning_rate": 8e-05, "loss": 1.8309, "step": 3521 }, { "epoch": 0.6013317397985317, "grad_norm": 0.47801387310028076, "learning_rate": 8e-05, "loss": 1.7761, "step": 3522 }, { "epoch": 0.6015024756701383, "grad_norm": 0.47582393884658813, "learning_rate": 8e-05, "loss": 1.8169, "step": 3523 }, { "epoch": 0.6016732115417449, "grad_norm": 0.5243566632270813, "learning_rate": 8e-05, "loss": 1.6403, "step": 3524 }, { "epoch": 0.6018439474133516, "grad_norm": 0.4520963430404663, "learning_rate": 8e-05, "loss": 1.6177, "step": 3525 }, { "epoch": 0.6020146832849582, "grad_norm": 0.521947979927063, "learning_rate": 8e-05, "loss": 1.938, "step": 3526 }, { "epoch": 0.6021854191565648, "grad_norm": 0.47112172842025757, "learning_rate": 8e-05, "loss": 1.5579, "step": 3527 }, { "epoch": 0.6023561550281714, "grad_norm": 0.49592286348342896, "learning_rate": 8e-05, "loss": 1.7407, "step": 3528 }, { "epoch": 0.6025268908997781, "grad_norm": 0.46759334206581116, "learning_rate": 8e-05, "loss": 1.6198, "step": 3529 }, { "epoch": 0.6026976267713847, "grad_norm": 0.47949889302253723, "learning_rate": 8e-05, "loss": 1.6396, "step": 3530 }, { "epoch": 0.6028683626429913, "grad_norm": 0.44208648800849915, "learning_rate": 8e-05, "loss": 1.563, "step": 3531 }, { "epoch": 0.6030390985145979, "grad_norm": 0.4822370707988739, "learning_rate": 8e-05, "loss": 1.7287, "step": 3532 }, { "epoch": 0.6032098343862046, "grad_norm": 0.5375351309776306, "learning_rate": 8e-05, "loss": 1.9766, "step": 3533 }, { "epoch": 0.6033805702578111, "grad_norm": 0.4597059190273285, "learning_rate": 8e-05, "loss": 1.6734, "step": 3534 }, { "epoch": 0.6035513061294178, "grad_norm": 0.4833257794380188, "learning_rate": 8e-05, "loss": 1.667, "step": 3535 }, { "epoch": 0.6037220420010244, "grad_norm": 0.44714096188545227, "learning_rate": 8e-05, "loss": 1.5603, "step": 3536 }, { "epoch": 0.6038927778726311, "grad_norm": 0.5016980767250061, "learning_rate": 8e-05, "loss": 1.6556, "step": 3537 }, { "epoch": 0.6040635137442376, "grad_norm": 0.4415661692619324, "learning_rate": 8e-05, "loss": 1.5389, "step": 3538 }, { "epoch": 0.6042342496158443, "grad_norm": 0.5156210660934448, "learning_rate": 8e-05, "loss": 1.7457, "step": 3539 }, { "epoch": 0.6044049854874509, "grad_norm": 0.47376400232315063, "learning_rate": 8e-05, "loss": 1.7616, "step": 3540 }, { "epoch": 0.6045757213590576, "grad_norm": 0.4532049596309662, "learning_rate": 8e-05, "loss": 1.5363, "step": 3541 }, { "epoch": 0.6047464572306641, "grad_norm": 0.4856002926826477, "learning_rate": 8e-05, "loss": 1.708, "step": 3542 }, { "epoch": 0.6049171931022708, "grad_norm": 0.49275070428848267, "learning_rate": 8e-05, "loss": 1.6592, "step": 3543 }, { "epoch": 0.6050879289738774, "grad_norm": 0.4345633089542389, "learning_rate": 8e-05, "loss": 1.5523, "step": 3544 }, { "epoch": 0.6052586648454841, "grad_norm": 0.5105178952217102, "learning_rate": 8e-05, "loss": 1.8057, "step": 3545 }, { "epoch": 0.6054294007170906, "grad_norm": 0.4903339743614197, "learning_rate": 8e-05, "loss": 1.7352, "step": 3546 }, { "epoch": 0.6056001365886973, "grad_norm": 0.4706353545188904, "learning_rate": 8e-05, "loss": 1.5949, "step": 3547 }, { "epoch": 0.6057708724603039, "grad_norm": 0.48059263825416565, "learning_rate": 8e-05, "loss": 1.7452, "step": 3548 }, { "epoch": 0.6059416083319106, "grad_norm": 0.4764838218688965, "learning_rate": 8e-05, "loss": 1.7561, "step": 3549 }, { "epoch": 0.6061123442035171, "grad_norm": 0.48139381408691406, "learning_rate": 8e-05, "loss": 1.7263, "step": 3550 }, { "epoch": 0.6062830800751238, "grad_norm": 0.48190557956695557, "learning_rate": 8e-05, "loss": 1.5943, "step": 3551 }, { "epoch": 0.6064538159467304, "grad_norm": 0.5197940468788147, "learning_rate": 8e-05, "loss": 1.7815, "step": 3552 }, { "epoch": 0.6066245518183371, "grad_norm": 0.4713306427001953, "learning_rate": 8e-05, "loss": 1.7038, "step": 3553 }, { "epoch": 0.6067952876899436, "grad_norm": 0.4906572103500366, "learning_rate": 8e-05, "loss": 1.6615, "step": 3554 }, { "epoch": 0.6069660235615503, "grad_norm": 0.5033541321754456, "learning_rate": 8e-05, "loss": 1.7889, "step": 3555 }, { "epoch": 0.6071367594331569, "grad_norm": 0.4728256165981293, "learning_rate": 8e-05, "loss": 1.6597, "step": 3556 }, { "epoch": 0.6073074953047636, "grad_norm": 0.4686400294303894, "learning_rate": 8e-05, "loss": 1.621, "step": 3557 }, { "epoch": 0.6074782311763701, "grad_norm": 0.48040640354156494, "learning_rate": 8e-05, "loss": 1.715, "step": 3558 }, { "epoch": 0.6076489670479768, "grad_norm": 0.4863763451576233, "learning_rate": 8e-05, "loss": 1.669, "step": 3559 }, { "epoch": 0.6078197029195834, "grad_norm": 0.49719005823135376, "learning_rate": 8e-05, "loss": 1.7062, "step": 3560 }, { "epoch": 0.6079904387911901, "grad_norm": 0.5659756064414978, "learning_rate": 8e-05, "loss": 2.0316, "step": 3561 }, { "epoch": 0.6081611746627966, "grad_norm": 0.4358305335044861, "learning_rate": 8e-05, "loss": 1.4885, "step": 3562 }, { "epoch": 0.6083319105344033, "grad_norm": 0.5344245433807373, "learning_rate": 8e-05, "loss": 1.7302, "step": 3563 }, { "epoch": 0.6085026464060099, "grad_norm": 0.46907931566238403, "learning_rate": 8e-05, "loss": 1.5923, "step": 3564 }, { "epoch": 0.6086733822776166, "grad_norm": 0.4936218559741974, "learning_rate": 8e-05, "loss": 1.7991, "step": 3565 }, { "epoch": 0.6088441181492231, "grad_norm": 0.4992823004722595, "learning_rate": 8e-05, "loss": 1.7, "step": 3566 }, { "epoch": 0.6090148540208298, "grad_norm": 0.5056807398796082, "learning_rate": 8e-05, "loss": 1.7154, "step": 3567 }, { "epoch": 0.6091855898924364, "grad_norm": 0.49611762166023254, "learning_rate": 8e-05, "loss": 1.8252, "step": 3568 }, { "epoch": 0.609356325764043, "grad_norm": 0.4741036593914032, "learning_rate": 8e-05, "loss": 1.6877, "step": 3569 }, { "epoch": 0.6095270616356496, "grad_norm": 0.5012743473052979, "learning_rate": 8e-05, "loss": 1.6566, "step": 3570 }, { "epoch": 0.6096977975072563, "grad_norm": 0.5474607944488525, "learning_rate": 8e-05, "loss": 1.7313, "step": 3571 }, { "epoch": 0.6098685333788629, "grad_norm": 0.49567940831184387, "learning_rate": 8e-05, "loss": 1.6799, "step": 3572 }, { "epoch": 0.6100392692504695, "grad_norm": 0.4998388886451721, "learning_rate": 8e-05, "loss": 1.4995, "step": 3573 }, { "epoch": 0.6102100051220761, "grad_norm": 0.47105517983436584, "learning_rate": 8e-05, "loss": 1.5318, "step": 3574 }, { "epoch": 0.6103807409936828, "grad_norm": 0.4959363341331482, "learning_rate": 8e-05, "loss": 1.7333, "step": 3575 }, { "epoch": 0.6105514768652894, "grad_norm": 0.46408888697624207, "learning_rate": 8e-05, "loss": 1.6222, "step": 3576 }, { "epoch": 0.610722212736896, "grad_norm": 0.5025663375854492, "learning_rate": 8e-05, "loss": 1.7732, "step": 3577 }, { "epoch": 0.6108929486085026, "grad_norm": 0.5026647448539734, "learning_rate": 8e-05, "loss": 1.8418, "step": 3578 }, { "epoch": 0.6110636844801093, "grad_norm": 0.48884838819503784, "learning_rate": 8e-05, "loss": 1.7935, "step": 3579 }, { "epoch": 0.6112344203517159, "grad_norm": 0.47250959277153015, "learning_rate": 8e-05, "loss": 1.3557, "step": 3580 }, { "epoch": 0.6114051562233225, "grad_norm": 0.4945369064807892, "learning_rate": 8e-05, "loss": 1.7237, "step": 3581 }, { "epoch": 0.6115758920949291, "grad_norm": 0.5061044692993164, "learning_rate": 8e-05, "loss": 1.8459, "step": 3582 }, { "epoch": 0.6117466279665358, "grad_norm": 0.4444868564605713, "learning_rate": 8e-05, "loss": 1.7102, "step": 3583 }, { "epoch": 0.6119173638381424, "grad_norm": 0.45306888222694397, "learning_rate": 8e-05, "loss": 1.7069, "step": 3584 }, { "epoch": 0.612088099709749, "grad_norm": 0.498258501291275, "learning_rate": 8e-05, "loss": 1.7551, "step": 3585 }, { "epoch": 0.6122588355813556, "grad_norm": 0.5349040031433105, "learning_rate": 8e-05, "loss": 1.7908, "step": 3586 }, { "epoch": 0.6124295714529623, "grad_norm": 0.5594348311424255, "learning_rate": 8e-05, "loss": 1.8286, "step": 3587 }, { "epoch": 0.6126003073245689, "grad_norm": 0.465854287147522, "learning_rate": 8e-05, "loss": 1.6157, "step": 3588 }, { "epoch": 0.6127710431961755, "grad_norm": 0.5157387852668762, "learning_rate": 8e-05, "loss": 1.8198, "step": 3589 }, { "epoch": 0.6129417790677821, "grad_norm": 0.5187594294548035, "learning_rate": 8e-05, "loss": 1.9338, "step": 3590 }, { "epoch": 0.6131125149393888, "grad_norm": 0.47006911039352417, "learning_rate": 8e-05, "loss": 1.6796, "step": 3591 }, { "epoch": 0.6132832508109954, "grad_norm": 0.47206392884254456, "learning_rate": 8e-05, "loss": 1.7581, "step": 3592 }, { "epoch": 0.613453986682602, "grad_norm": 0.45472872257232666, "learning_rate": 8e-05, "loss": 1.659, "step": 3593 }, { "epoch": 0.6136247225542086, "grad_norm": 0.47536173462867737, "learning_rate": 8e-05, "loss": 1.4844, "step": 3594 }, { "epoch": 0.6137954584258153, "grad_norm": 0.4637840688228607, "learning_rate": 8e-05, "loss": 1.6919, "step": 3595 }, { "epoch": 0.6139661942974219, "grad_norm": 0.44415533542633057, "learning_rate": 8e-05, "loss": 1.5571, "step": 3596 }, { "epoch": 0.6141369301690285, "grad_norm": 0.46350541710853577, "learning_rate": 8e-05, "loss": 1.7216, "step": 3597 }, { "epoch": 0.6143076660406351, "grad_norm": 0.4935669004917145, "learning_rate": 8e-05, "loss": 1.6519, "step": 3598 }, { "epoch": 0.6144784019122418, "grad_norm": 0.463347464799881, "learning_rate": 8e-05, "loss": 1.5562, "step": 3599 }, { "epoch": 0.6146491377838483, "grad_norm": 0.4832221567630768, "learning_rate": 8e-05, "loss": 1.6194, "step": 3600 }, { "epoch": 0.614819873655455, "grad_norm": 0.440946489572525, "learning_rate": 8e-05, "loss": 1.6035, "step": 3601 }, { "epoch": 0.6149906095270616, "grad_norm": 0.4613446593284607, "learning_rate": 8e-05, "loss": 1.551, "step": 3602 }, { "epoch": 0.6151613453986683, "grad_norm": 0.4685191512107849, "learning_rate": 8e-05, "loss": 1.6408, "step": 3603 }, { "epoch": 0.6153320812702748, "grad_norm": 0.50385981798172, "learning_rate": 8e-05, "loss": 1.6538, "step": 3604 }, { "epoch": 0.6155028171418815, "grad_norm": 0.5013015270233154, "learning_rate": 8e-05, "loss": 1.7083, "step": 3605 }, { "epoch": 0.6156735530134881, "grad_norm": 0.4603270888328552, "learning_rate": 8e-05, "loss": 1.7709, "step": 3606 }, { "epoch": 0.6158442888850948, "grad_norm": 0.5423723459243774, "learning_rate": 8e-05, "loss": 1.9289, "step": 3607 }, { "epoch": 0.6160150247567013, "grad_norm": 0.46839433908462524, "learning_rate": 8e-05, "loss": 1.7553, "step": 3608 }, { "epoch": 0.616185760628308, "grad_norm": 0.4807577133178711, "learning_rate": 8e-05, "loss": 1.555, "step": 3609 }, { "epoch": 0.6163564964999146, "grad_norm": 0.47258874773979187, "learning_rate": 8e-05, "loss": 1.571, "step": 3610 }, { "epoch": 0.6165272323715213, "grad_norm": 0.4649566411972046, "learning_rate": 8e-05, "loss": 1.6595, "step": 3611 }, { "epoch": 0.6166979682431278, "grad_norm": 0.47475358843803406, "learning_rate": 8e-05, "loss": 1.6829, "step": 3612 }, { "epoch": 0.6168687041147345, "grad_norm": 0.5248875617980957, "learning_rate": 8e-05, "loss": 1.9632, "step": 3613 }, { "epoch": 0.6170394399863411, "grad_norm": 0.455124169588089, "learning_rate": 8e-05, "loss": 1.5633, "step": 3614 }, { "epoch": 0.6172101758579478, "grad_norm": 0.45371341705322266, "learning_rate": 8e-05, "loss": 1.7781, "step": 3615 }, { "epoch": 0.6173809117295543, "grad_norm": 0.47034841775894165, "learning_rate": 8e-05, "loss": 1.6362, "step": 3616 }, { "epoch": 0.617551647601161, "grad_norm": 0.4816347658634186, "learning_rate": 8e-05, "loss": 1.6464, "step": 3617 }, { "epoch": 0.6177223834727676, "grad_norm": 0.459091454744339, "learning_rate": 8e-05, "loss": 1.6931, "step": 3618 }, { "epoch": 0.6178931193443743, "grad_norm": 0.48002302646636963, "learning_rate": 8e-05, "loss": 1.754, "step": 3619 }, { "epoch": 0.6180638552159808, "grad_norm": 0.4874262809753418, "learning_rate": 8e-05, "loss": 1.6733, "step": 3620 }, { "epoch": 0.6182345910875875, "grad_norm": 0.46832382678985596, "learning_rate": 8e-05, "loss": 1.6508, "step": 3621 }, { "epoch": 0.6184053269591941, "grad_norm": 0.4863508641719818, "learning_rate": 8e-05, "loss": 1.5613, "step": 3622 }, { "epoch": 0.6185760628308008, "grad_norm": 0.5274993181228638, "learning_rate": 8e-05, "loss": 1.6965, "step": 3623 }, { "epoch": 0.6187467987024073, "grad_norm": 0.5069921612739563, "learning_rate": 8e-05, "loss": 1.7582, "step": 3624 }, { "epoch": 0.618917534574014, "grad_norm": 0.47482460737228394, "learning_rate": 8e-05, "loss": 1.615, "step": 3625 }, { "epoch": 0.6190882704456206, "grad_norm": 0.4653390347957611, "learning_rate": 8e-05, "loss": 1.6566, "step": 3626 }, { "epoch": 0.6192590063172273, "grad_norm": 0.5093048810958862, "learning_rate": 8e-05, "loss": 1.7498, "step": 3627 }, { "epoch": 0.6194297421888338, "grad_norm": 0.48366987705230713, "learning_rate": 8e-05, "loss": 1.6827, "step": 3628 }, { "epoch": 0.6196004780604405, "grad_norm": 0.49050846695899963, "learning_rate": 8e-05, "loss": 1.7396, "step": 3629 }, { "epoch": 0.6197712139320471, "grad_norm": 0.48732608556747437, "learning_rate": 8e-05, "loss": 1.5397, "step": 3630 }, { "epoch": 0.6199419498036538, "grad_norm": 0.502745509147644, "learning_rate": 8e-05, "loss": 1.6737, "step": 3631 }, { "epoch": 0.6201126856752603, "grad_norm": 0.46871206164360046, "learning_rate": 8e-05, "loss": 1.6956, "step": 3632 }, { "epoch": 0.620283421546867, "grad_norm": 0.5119975209236145, "learning_rate": 8e-05, "loss": 1.7994, "step": 3633 }, { "epoch": 0.6204541574184737, "grad_norm": 0.542486846446991, "learning_rate": 8e-05, "loss": 1.6976, "step": 3634 }, { "epoch": 0.6206248932900802, "grad_norm": 0.4785175025463104, "learning_rate": 8e-05, "loss": 1.7608, "step": 3635 }, { "epoch": 0.6207956291616868, "grad_norm": 0.46062374114990234, "learning_rate": 8e-05, "loss": 1.5654, "step": 3636 }, { "epoch": 0.6209663650332935, "grad_norm": 0.4795078635215759, "learning_rate": 8e-05, "loss": 1.7354, "step": 3637 }, { "epoch": 0.6211371009049002, "grad_norm": 0.4723726212978363, "learning_rate": 8e-05, "loss": 1.6175, "step": 3638 }, { "epoch": 0.6213078367765067, "grad_norm": 0.4623079001903534, "learning_rate": 8e-05, "loss": 1.7292, "step": 3639 }, { "epoch": 0.6214785726481133, "grad_norm": 0.4922284781932831, "learning_rate": 8e-05, "loss": 1.6128, "step": 3640 }, { "epoch": 0.62164930851972, "grad_norm": 0.4738790988922119, "learning_rate": 8e-05, "loss": 1.6908, "step": 3641 }, { "epoch": 0.6218200443913267, "grad_norm": 0.4896087050437927, "learning_rate": 8e-05, "loss": 1.4774, "step": 3642 }, { "epoch": 0.6219907802629332, "grad_norm": 0.48951807618141174, "learning_rate": 8e-05, "loss": 1.7056, "step": 3643 }, { "epoch": 0.6221615161345398, "grad_norm": 0.44506269693374634, "learning_rate": 8e-05, "loss": 1.5284, "step": 3644 }, { "epoch": 0.6223322520061465, "grad_norm": 0.43746691942214966, "learning_rate": 8e-05, "loss": 1.6395, "step": 3645 }, { "epoch": 0.6225029878777532, "grad_norm": 0.4757804274559021, "learning_rate": 8e-05, "loss": 1.6356, "step": 3646 }, { "epoch": 0.6226737237493597, "grad_norm": 0.48596593737602234, "learning_rate": 8e-05, "loss": 1.6784, "step": 3647 }, { "epoch": 0.6228444596209664, "grad_norm": 0.5163431763648987, "learning_rate": 8e-05, "loss": 1.8421, "step": 3648 }, { "epoch": 0.623015195492573, "grad_norm": 0.5153772234916687, "learning_rate": 8e-05, "loss": 1.9034, "step": 3649 }, { "epoch": 0.6231859313641797, "grad_norm": 0.5053626298904419, "learning_rate": 8e-05, "loss": 1.8046, "step": 3650 }, { "epoch": 0.6233566672357862, "grad_norm": 0.4634280800819397, "learning_rate": 8e-05, "loss": 1.8136, "step": 3651 }, { "epoch": 0.6235274031073929, "grad_norm": 0.4634631276130676, "learning_rate": 8e-05, "loss": 1.7389, "step": 3652 }, { "epoch": 0.6236981389789995, "grad_norm": 0.5226365923881531, "learning_rate": 8e-05, "loss": 1.7859, "step": 3653 }, { "epoch": 0.6238688748506062, "grad_norm": 0.45591750741004944, "learning_rate": 8e-05, "loss": 1.8894, "step": 3654 }, { "epoch": 0.6240396107222127, "grad_norm": 0.4595072269439697, "learning_rate": 8e-05, "loss": 1.6108, "step": 3655 }, { "epoch": 0.6242103465938194, "grad_norm": 0.4515448212623596, "learning_rate": 8e-05, "loss": 1.6978, "step": 3656 }, { "epoch": 0.624381082465426, "grad_norm": 0.6775045394897461, "learning_rate": 8e-05, "loss": 1.8703, "step": 3657 }, { "epoch": 0.6245518183370327, "grad_norm": 0.4938437044620514, "learning_rate": 8e-05, "loss": 1.7375, "step": 3658 }, { "epoch": 0.6247225542086392, "grad_norm": 0.46429723501205444, "learning_rate": 8e-05, "loss": 1.5627, "step": 3659 }, { "epoch": 0.6248932900802459, "grad_norm": 0.5161936283111572, "learning_rate": 8e-05, "loss": 1.7683, "step": 3660 }, { "epoch": 0.6250640259518525, "grad_norm": 0.45418956875801086, "learning_rate": 8e-05, "loss": 1.7152, "step": 3661 }, { "epoch": 0.6252347618234592, "grad_norm": 0.4869216978549957, "learning_rate": 8e-05, "loss": 1.7418, "step": 3662 }, { "epoch": 0.6254054976950657, "grad_norm": 0.49390023946762085, "learning_rate": 8e-05, "loss": 1.6176, "step": 3663 }, { "epoch": 0.6255762335666724, "grad_norm": 0.47901585698127747, "learning_rate": 8e-05, "loss": 1.8068, "step": 3664 }, { "epoch": 0.625746969438279, "grad_norm": 0.4745546877384186, "learning_rate": 8e-05, "loss": 1.6015, "step": 3665 }, { "epoch": 0.6259177053098856, "grad_norm": 0.4647641181945801, "learning_rate": 8e-05, "loss": 1.6502, "step": 3666 }, { "epoch": 0.6260884411814922, "grad_norm": 0.4850284457206726, "learning_rate": 8e-05, "loss": 1.6803, "step": 3667 }, { "epoch": 0.6262591770530989, "grad_norm": 0.47881361842155457, "learning_rate": 8e-05, "loss": 1.5782, "step": 3668 }, { "epoch": 0.6264299129247055, "grad_norm": 0.49172264337539673, "learning_rate": 8e-05, "loss": 1.8038, "step": 3669 }, { "epoch": 0.626600648796312, "grad_norm": 0.5786006450653076, "learning_rate": 8e-05, "loss": 2.116, "step": 3670 }, { "epoch": 0.6267713846679187, "grad_norm": 0.44553059339523315, "learning_rate": 8e-05, "loss": 1.4298, "step": 3671 }, { "epoch": 0.6269421205395254, "grad_norm": 0.4702492952346802, "learning_rate": 8e-05, "loss": 1.7131, "step": 3672 }, { "epoch": 0.627112856411132, "grad_norm": 0.4807460904121399, "learning_rate": 8e-05, "loss": 1.541, "step": 3673 }, { "epoch": 0.6272835922827386, "grad_norm": 0.5579769611358643, "learning_rate": 8e-05, "loss": 1.6805, "step": 3674 }, { "epoch": 0.6274543281543452, "grad_norm": 0.4438091218471527, "learning_rate": 8e-05, "loss": 1.6397, "step": 3675 }, { "epoch": 0.6276250640259519, "grad_norm": 0.49296143651008606, "learning_rate": 8e-05, "loss": 1.6118, "step": 3676 }, { "epoch": 0.6277957998975585, "grad_norm": 0.47838345170021057, "learning_rate": 8e-05, "loss": 1.581, "step": 3677 }, { "epoch": 0.6279665357691651, "grad_norm": 0.5210538506507874, "learning_rate": 8e-05, "loss": 1.8027, "step": 3678 }, { "epoch": 0.6281372716407717, "grad_norm": 0.5034611225128174, "learning_rate": 8e-05, "loss": 1.6141, "step": 3679 }, { "epoch": 0.6283080075123784, "grad_norm": 0.4738728106021881, "learning_rate": 8e-05, "loss": 1.5392, "step": 3680 }, { "epoch": 0.628478743383985, "grad_norm": 0.45102933049201965, "learning_rate": 8e-05, "loss": 1.5066, "step": 3681 }, { "epoch": 0.6286494792555916, "grad_norm": 0.4849122166633606, "learning_rate": 8e-05, "loss": 1.8511, "step": 3682 }, { "epoch": 0.6288202151271982, "grad_norm": 0.45644649863243103, "learning_rate": 8e-05, "loss": 1.6726, "step": 3683 }, { "epoch": 0.6289909509988049, "grad_norm": 0.4579260051250458, "learning_rate": 8e-05, "loss": 1.6304, "step": 3684 }, { "epoch": 0.6291616868704115, "grad_norm": 0.4562786817550659, "learning_rate": 8e-05, "loss": 1.4087, "step": 3685 }, { "epoch": 0.6293324227420181, "grad_norm": 0.44929853081703186, "learning_rate": 8e-05, "loss": 1.5717, "step": 3686 }, { "epoch": 0.6295031586136247, "grad_norm": 0.47466567158699036, "learning_rate": 8e-05, "loss": 1.6298, "step": 3687 }, { "epoch": 0.6296738944852314, "grad_norm": 0.46335139870643616, "learning_rate": 8e-05, "loss": 1.6896, "step": 3688 }, { "epoch": 0.629844630356838, "grad_norm": 0.4548349380493164, "learning_rate": 8e-05, "loss": 1.5479, "step": 3689 }, { "epoch": 0.6300153662284446, "grad_norm": 0.46010738611221313, "learning_rate": 8e-05, "loss": 1.6381, "step": 3690 }, { "epoch": 0.6301861021000512, "grad_norm": 0.4880732297897339, "learning_rate": 8e-05, "loss": 1.5702, "step": 3691 }, { "epoch": 0.6303568379716579, "grad_norm": 0.4916342496871948, "learning_rate": 8e-05, "loss": 1.7083, "step": 3692 }, { "epoch": 0.6305275738432645, "grad_norm": 0.5206868648529053, "learning_rate": 8e-05, "loss": 1.671, "step": 3693 }, { "epoch": 0.6306983097148711, "grad_norm": 0.4714127480983734, "learning_rate": 8e-05, "loss": 1.7517, "step": 3694 }, { "epoch": 0.6308690455864777, "grad_norm": 0.5010907649993896, "learning_rate": 8e-05, "loss": 1.6485, "step": 3695 }, { "epoch": 0.6310397814580844, "grad_norm": 0.5123007297515869, "learning_rate": 8e-05, "loss": 1.7701, "step": 3696 }, { "epoch": 0.631210517329691, "grad_norm": 0.5146669149398804, "learning_rate": 8e-05, "loss": 1.8624, "step": 3697 }, { "epoch": 0.6313812532012976, "grad_norm": 0.5609452724456787, "learning_rate": 8e-05, "loss": 1.7237, "step": 3698 }, { "epoch": 0.6315519890729042, "grad_norm": 0.47957009077072144, "learning_rate": 8e-05, "loss": 1.5668, "step": 3699 }, { "epoch": 0.6317227249445109, "grad_norm": 0.4739811420440674, "learning_rate": 8e-05, "loss": 1.7712, "step": 3700 }, { "epoch": 0.6318934608161174, "grad_norm": 0.5340419411659241, "learning_rate": 8e-05, "loss": 1.697, "step": 3701 }, { "epoch": 0.6320641966877241, "grad_norm": 0.4827873110771179, "learning_rate": 8e-05, "loss": 1.7193, "step": 3702 }, { "epoch": 0.6322349325593307, "grad_norm": 0.5340578556060791, "learning_rate": 8e-05, "loss": 1.7562, "step": 3703 }, { "epoch": 0.6324056684309374, "grad_norm": 0.47711876034736633, "learning_rate": 8e-05, "loss": 1.7431, "step": 3704 }, { "epoch": 0.6325764043025439, "grad_norm": 0.46689069271087646, "learning_rate": 8e-05, "loss": 1.6156, "step": 3705 }, { "epoch": 0.6327471401741506, "grad_norm": 0.4545641839504242, "learning_rate": 8e-05, "loss": 1.6217, "step": 3706 }, { "epoch": 0.6329178760457572, "grad_norm": 0.47017380595207214, "learning_rate": 8e-05, "loss": 1.68, "step": 3707 }, { "epoch": 0.6330886119173639, "grad_norm": 0.4619688391685486, "learning_rate": 8e-05, "loss": 1.7301, "step": 3708 }, { "epoch": 0.6332593477889704, "grad_norm": 0.4923812747001648, "learning_rate": 8e-05, "loss": 1.8269, "step": 3709 }, { "epoch": 0.6334300836605771, "grad_norm": 0.45837244391441345, "learning_rate": 8e-05, "loss": 1.7097, "step": 3710 }, { "epoch": 0.6336008195321837, "grad_norm": 0.4968366026878357, "learning_rate": 8e-05, "loss": 1.7054, "step": 3711 }, { "epoch": 0.6337715554037904, "grad_norm": 0.45160186290740967, "learning_rate": 8e-05, "loss": 1.5275, "step": 3712 }, { "epoch": 0.6339422912753969, "grad_norm": 0.4771499037742615, "learning_rate": 8e-05, "loss": 1.6466, "step": 3713 }, { "epoch": 0.6341130271470036, "grad_norm": 0.5117605328559875, "learning_rate": 8e-05, "loss": 1.7951, "step": 3714 }, { "epoch": 0.6342837630186102, "grad_norm": 0.507673442363739, "learning_rate": 8e-05, "loss": 1.7107, "step": 3715 }, { "epoch": 0.6344544988902169, "grad_norm": 0.46454116702079773, "learning_rate": 8e-05, "loss": 1.6985, "step": 3716 }, { "epoch": 0.6346252347618234, "grad_norm": 0.5068217515945435, "learning_rate": 8e-05, "loss": 1.8246, "step": 3717 }, { "epoch": 0.6347959706334301, "grad_norm": 0.495211124420166, "learning_rate": 8e-05, "loss": 1.739, "step": 3718 }, { "epoch": 0.6349667065050367, "grad_norm": 0.48935845494270325, "learning_rate": 8e-05, "loss": 1.7025, "step": 3719 }, { "epoch": 0.6351374423766434, "grad_norm": 0.4809333384037018, "learning_rate": 8e-05, "loss": 1.604, "step": 3720 }, { "epoch": 0.6353081782482499, "grad_norm": 0.47525864839553833, "learning_rate": 8e-05, "loss": 1.608, "step": 3721 }, { "epoch": 0.6354789141198566, "grad_norm": 0.49894461035728455, "learning_rate": 8e-05, "loss": 1.8546, "step": 3722 }, { "epoch": 0.6356496499914632, "grad_norm": 0.465804785490036, "learning_rate": 8e-05, "loss": 1.6979, "step": 3723 }, { "epoch": 0.6358203858630699, "grad_norm": 0.44439560174942017, "learning_rate": 8e-05, "loss": 1.6056, "step": 3724 }, { "epoch": 0.6359911217346764, "grad_norm": 0.476237416267395, "learning_rate": 8e-05, "loss": 1.776, "step": 3725 }, { "epoch": 0.6361618576062831, "grad_norm": 0.5525131225585938, "learning_rate": 8e-05, "loss": 1.7058, "step": 3726 }, { "epoch": 0.6363325934778897, "grad_norm": 0.46907711029052734, "learning_rate": 8e-05, "loss": 1.647, "step": 3727 }, { "epoch": 0.6365033293494964, "grad_norm": 0.4811820983886719, "learning_rate": 8e-05, "loss": 1.6769, "step": 3728 }, { "epoch": 0.6366740652211029, "grad_norm": 0.5082493424415588, "learning_rate": 8e-05, "loss": 1.7779, "step": 3729 }, { "epoch": 0.6368448010927096, "grad_norm": 0.4779623746871948, "learning_rate": 8e-05, "loss": 1.4886, "step": 3730 }, { "epoch": 0.6370155369643162, "grad_norm": 0.49868783354759216, "learning_rate": 8e-05, "loss": 1.7667, "step": 3731 }, { "epoch": 0.6371862728359228, "grad_norm": 0.47799021005630493, "learning_rate": 8e-05, "loss": 1.7072, "step": 3732 }, { "epoch": 0.6373570087075294, "grad_norm": 0.47706952691078186, "learning_rate": 8e-05, "loss": 1.7058, "step": 3733 }, { "epoch": 0.6375277445791361, "grad_norm": 0.4770442843437195, "learning_rate": 8e-05, "loss": 1.6432, "step": 3734 }, { "epoch": 0.6376984804507427, "grad_norm": 0.4757190942764282, "learning_rate": 8e-05, "loss": 1.8076, "step": 3735 }, { "epoch": 0.6378692163223493, "grad_norm": 0.46735870838165283, "learning_rate": 8e-05, "loss": 1.6893, "step": 3736 }, { "epoch": 0.6380399521939559, "grad_norm": 0.4747326374053955, "learning_rate": 8e-05, "loss": 1.6901, "step": 3737 }, { "epoch": 0.6382106880655626, "grad_norm": 0.48366066813468933, "learning_rate": 8e-05, "loss": 1.7547, "step": 3738 }, { "epoch": 0.6383814239371692, "grad_norm": 0.49297094345092773, "learning_rate": 8e-05, "loss": 1.5985, "step": 3739 }, { "epoch": 0.6385521598087758, "grad_norm": 0.4673383831977844, "learning_rate": 8e-05, "loss": 1.7299, "step": 3740 }, { "epoch": 0.6387228956803824, "grad_norm": 0.46083107590675354, "learning_rate": 8e-05, "loss": 1.7536, "step": 3741 }, { "epoch": 0.6388936315519891, "grad_norm": 0.4571163058280945, "learning_rate": 8e-05, "loss": 1.6925, "step": 3742 }, { "epoch": 0.6390643674235957, "grad_norm": 0.4725416898727417, "learning_rate": 8e-05, "loss": 1.7491, "step": 3743 }, { "epoch": 0.6392351032952023, "grad_norm": 0.48273006081581116, "learning_rate": 8e-05, "loss": 1.8626, "step": 3744 }, { "epoch": 0.6394058391668089, "grad_norm": 0.442257285118103, "learning_rate": 8e-05, "loss": 1.5895, "step": 3745 }, { "epoch": 0.6395765750384156, "grad_norm": 0.4794732928276062, "learning_rate": 8e-05, "loss": 1.7279, "step": 3746 }, { "epoch": 0.6397473109100222, "grad_norm": 0.517711341381073, "learning_rate": 8e-05, "loss": 1.6707, "step": 3747 }, { "epoch": 0.6399180467816288, "grad_norm": 0.4869144558906555, "learning_rate": 8e-05, "loss": 1.6972, "step": 3748 }, { "epoch": 0.6400887826532354, "grad_norm": 0.465300589799881, "learning_rate": 8e-05, "loss": 1.6418, "step": 3749 }, { "epoch": 0.6402595185248421, "grad_norm": 0.4784269332885742, "learning_rate": 8e-05, "loss": 1.6368, "step": 3750 }, { "epoch": 0.6404302543964487, "grad_norm": 0.460898220539093, "learning_rate": 8e-05, "loss": 1.6095, "step": 3751 }, { "epoch": 0.6406009902680553, "grad_norm": 0.48043668270111084, "learning_rate": 8e-05, "loss": 1.5875, "step": 3752 }, { "epoch": 0.6407717261396619, "grad_norm": 0.4844098389148712, "learning_rate": 8e-05, "loss": 1.7093, "step": 3753 }, { "epoch": 0.6409424620112686, "grad_norm": 0.43447375297546387, "learning_rate": 8e-05, "loss": 1.6362, "step": 3754 }, { "epoch": 0.6411131978828752, "grad_norm": 0.4642336964607239, "learning_rate": 8e-05, "loss": 1.7295, "step": 3755 }, { "epoch": 0.6412839337544818, "grad_norm": 0.47348663210868835, "learning_rate": 8e-05, "loss": 1.5424, "step": 3756 }, { "epoch": 0.6414546696260884, "grad_norm": 0.4833225607872009, "learning_rate": 8e-05, "loss": 1.6395, "step": 3757 }, { "epoch": 0.6416254054976951, "grad_norm": 0.4494099020957947, "learning_rate": 8e-05, "loss": 1.5295, "step": 3758 }, { "epoch": 0.6417961413693017, "grad_norm": 0.48259997367858887, "learning_rate": 8e-05, "loss": 1.6759, "step": 3759 }, { "epoch": 0.6419668772409083, "grad_norm": 0.4686848223209381, "learning_rate": 8e-05, "loss": 1.41, "step": 3760 }, { "epoch": 0.6421376131125149, "grad_norm": 0.5138399004936218, "learning_rate": 8e-05, "loss": 1.8132, "step": 3761 }, { "epoch": 0.6423083489841216, "grad_norm": 0.4754614233970642, "learning_rate": 8e-05, "loss": 1.6266, "step": 3762 }, { "epoch": 0.6424790848557281, "grad_norm": 0.5250380039215088, "learning_rate": 8e-05, "loss": 1.7796, "step": 3763 }, { "epoch": 0.6426498207273348, "grad_norm": 0.4868388772010803, "learning_rate": 8e-05, "loss": 1.5046, "step": 3764 }, { "epoch": 0.6428205565989414, "grad_norm": 0.47237449884414673, "learning_rate": 8e-05, "loss": 1.6482, "step": 3765 }, { "epoch": 0.6429912924705481, "grad_norm": 0.4521397352218628, "learning_rate": 8e-05, "loss": 1.5062, "step": 3766 }, { "epoch": 0.6431620283421546, "grad_norm": 0.4957989752292633, "learning_rate": 8e-05, "loss": 1.8315, "step": 3767 }, { "epoch": 0.6433327642137613, "grad_norm": 0.4596697688102722, "learning_rate": 8e-05, "loss": 1.6042, "step": 3768 }, { "epoch": 0.6435035000853679, "grad_norm": 0.5364698171615601, "learning_rate": 8e-05, "loss": 1.8058, "step": 3769 }, { "epoch": 0.6436742359569746, "grad_norm": 0.4689209759235382, "learning_rate": 8e-05, "loss": 1.6143, "step": 3770 }, { "epoch": 0.6438449718285811, "grad_norm": 0.4537004828453064, "learning_rate": 8e-05, "loss": 1.5008, "step": 3771 }, { "epoch": 0.6440157077001878, "grad_norm": 0.4866876006126404, "learning_rate": 8e-05, "loss": 1.5964, "step": 3772 }, { "epoch": 0.6441864435717944, "grad_norm": 0.514151394367218, "learning_rate": 8e-05, "loss": 1.8984, "step": 3773 }, { "epoch": 0.6443571794434011, "grad_norm": 0.48934754729270935, "learning_rate": 8e-05, "loss": 1.6809, "step": 3774 }, { "epoch": 0.6445279153150076, "grad_norm": 0.46427470445632935, "learning_rate": 8e-05, "loss": 1.6493, "step": 3775 }, { "epoch": 0.6446986511866143, "grad_norm": 0.4920779764652252, "learning_rate": 8e-05, "loss": 1.7333, "step": 3776 }, { "epoch": 0.6448693870582209, "grad_norm": 0.464932382106781, "learning_rate": 8e-05, "loss": 1.7951, "step": 3777 }, { "epoch": 0.6450401229298276, "grad_norm": 0.4500455856323242, "learning_rate": 8e-05, "loss": 1.617, "step": 3778 }, { "epoch": 0.6452108588014341, "grad_norm": 0.4953109323978424, "learning_rate": 8e-05, "loss": 1.7006, "step": 3779 }, { "epoch": 0.6453815946730408, "grad_norm": 0.49335113167762756, "learning_rate": 8e-05, "loss": 1.7413, "step": 3780 }, { "epoch": 0.6455523305446474, "grad_norm": 0.4499549865722656, "learning_rate": 8e-05, "loss": 1.5843, "step": 3781 }, { "epoch": 0.6457230664162541, "grad_norm": 0.46894150972366333, "learning_rate": 8e-05, "loss": 1.6599, "step": 3782 }, { "epoch": 0.6458938022878606, "grad_norm": 0.46169906854629517, "learning_rate": 8e-05, "loss": 1.7447, "step": 3783 }, { "epoch": 0.6460645381594673, "grad_norm": 0.4961356818675995, "learning_rate": 8e-05, "loss": 1.8556, "step": 3784 }, { "epoch": 0.646235274031074, "grad_norm": 0.4685845673084259, "learning_rate": 8e-05, "loss": 1.7167, "step": 3785 }, { "epoch": 0.6464060099026806, "grad_norm": 0.44313791394233704, "learning_rate": 8e-05, "loss": 1.428, "step": 3786 }, { "epoch": 0.6465767457742871, "grad_norm": 0.4811614751815796, "learning_rate": 8e-05, "loss": 1.5927, "step": 3787 }, { "epoch": 0.6467474816458938, "grad_norm": 0.4503854215145111, "learning_rate": 8e-05, "loss": 1.5966, "step": 3788 }, { "epoch": 0.6469182175175004, "grad_norm": 0.4947664439678192, "learning_rate": 8e-05, "loss": 1.6546, "step": 3789 }, { "epoch": 0.6470889533891071, "grad_norm": 0.4668845236301422, "learning_rate": 8e-05, "loss": 1.6723, "step": 3790 }, { "epoch": 0.6472596892607136, "grad_norm": 0.46392467617988586, "learning_rate": 8e-05, "loss": 1.5773, "step": 3791 }, { "epoch": 0.6474304251323203, "grad_norm": 0.45686015486717224, "learning_rate": 8e-05, "loss": 1.7337, "step": 3792 }, { "epoch": 0.647601161003927, "grad_norm": 0.47123271226882935, "learning_rate": 8e-05, "loss": 1.7698, "step": 3793 }, { "epoch": 0.6477718968755336, "grad_norm": 0.5032098889350891, "learning_rate": 8e-05, "loss": 1.7864, "step": 3794 }, { "epoch": 0.6479426327471401, "grad_norm": 0.49791035056114197, "learning_rate": 8e-05, "loss": 1.6538, "step": 3795 }, { "epoch": 0.6481133686187468, "grad_norm": 0.4895378351211548, "learning_rate": 8e-05, "loss": 1.6862, "step": 3796 }, { "epoch": 0.6482841044903535, "grad_norm": 0.45550477504730225, "learning_rate": 8e-05, "loss": 1.6071, "step": 3797 }, { "epoch": 0.64845484036196, "grad_norm": 0.4899066984653473, "learning_rate": 8e-05, "loss": 1.6746, "step": 3798 }, { "epoch": 0.6486255762335666, "grad_norm": 0.5203737020492554, "learning_rate": 8e-05, "loss": 1.996, "step": 3799 }, { "epoch": 0.6487963121051733, "grad_norm": 0.45978203415870667, "learning_rate": 8e-05, "loss": 1.7252, "step": 3800 }, { "epoch": 0.64896704797678, "grad_norm": 0.5090150237083435, "learning_rate": 8e-05, "loss": 1.6914, "step": 3801 }, { "epoch": 0.6491377838483865, "grad_norm": 0.4966232180595398, "learning_rate": 8e-05, "loss": 1.8036, "step": 3802 }, { "epoch": 0.6493085197199931, "grad_norm": 0.488521009683609, "learning_rate": 8e-05, "loss": 1.7812, "step": 3803 }, { "epoch": 0.6494792555915998, "grad_norm": 0.46800628304481506, "learning_rate": 8e-05, "loss": 1.7713, "step": 3804 }, { "epoch": 0.6496499914632065, "grad_norm": 0.5287593007087708, "learning_rate": 8e-05, "loss": 1.6136, "step": 3805 }, { "epoch": 0.649820727334813, "grad_norm": 0.4744454324245453, "learning_rate": 8e-05, "loss": 1.4477, "step": 3806 }, { "epoch": 0.6499914632064197, "grad_norm": 0.5632978677749634, "learning_rate": 8e-05, "loss": 1.6664, "step": 3807 }, { "epoch": 0.6501621990780263, "grad_norm": 0.4975838363170624, "learning_rate": 8e-05, "loss": 1.7002, "step": 3808 }, { "epoch": 0.650332934949633, "grad_norm": 0.49298375844955444, "learning_rate": 8e-05, "loss": 1.7395, "step": 3809 }, { "epoch": 0.6505036708212395, "grad_norm": 0.5694046020507812, "learning_rate": 8e-05, "loss": 1.8408, "step": 3810 }, { "epoch": 0.6506744066928462, "grad_norm": 0.457670122385025, "learning_rate": 8e-05, "loss": 1.5231, "step": 3811 }, { "epoch": 0.6508451425644528, "grad_norm": 0.48328909277915955, "learning_rate": 8e-05, "loss": 1.7441, "step": 3812 }, { "epoch": 0.6510158784360595, "grad_norm": 0.4983653128147125, "learning_rate": 8e-05, "loss": 1.7976, "step": 3813 }, { "epoch": 0.651186614307666, "grad_norm": 0.5430862903594971, "learning_rate": 8e-05, "loss": 1.8604, "step": 3814 }, { "epoch": 0.6513573501792727, "grad_norm": 0.49610471725463867, "learning_rate": 8e-05, "loss": 1.7555, "step": 3815 }, { "epoch": 0.6515280860508793, "grad_norm": 0.4694805145263672, "learning_rate": 8e-05, "loss": 1.5948, "step": 3816 }, { "epoch": 0.651698821922486, "grad_norm": 0.4743117094039917, "learning_rate": 8e-05, "loss": 1.6952, "step": 3817 }, { "epoch": 0.6518695577940925, "grad_norm": 0.4504849910736084, "learning_rate": 8e-05, "loss": 1.59, "step": 3818 }, { "epoch": 0.6520402936656992, "grad_norm": 0.4381217062473297, "learning_rate": 8e-05, "loss": 1.6459, "step": 3819 }, { "epoch": 0.6522110295373058, "grad_norm": 0.45864424109458923, "learning_rate": 8e-05, "loss": 1.749, "step": 3820 }, { "epoch": 0.6523817654089125, "grad_norm": 0.47822123765945435, "learning_rate": 8e-05, "loss": 1.7469, "step": 3821 }, { "epoch": 0.652552501280519, "grad_norm": 0.5333239436149597, "learning_rate": 8e-05, "loss": 1.8359, "step": 3822 }, { "epoch": 0.6527232371521257, "grad_norm": 0.49493059515953064, "learning_rate": 8e-05, "loss": 1.5066, "step": 3823 }, { "epoch": 0.6528939730237323, "grad_norm": 0.5050250291824341, "learning_rate": 8e-05, "loss": 1.7872, "step": 3824 }, { "epoch": 0.653064708895339, "grad_norm": 0.4938141703605652, "learning_rate": 8e-05, "loss": 1.8464, "step": 3825 }, { "epoch": 0.6532354447669455, "grad_norm": 0.453774094581604, "learning_rate": 8e-05, "loss": 1.695, "step": 3826 }, { "epoch": 0.6534061806385522, "grad_norm": 0.4846480190753937, "learning_rate": 8e-05, "loss": 1.6248, "step": 3827 }, { "epoch": 0.6535769165101588, "grad_norm": 0.49256888031959534, "learning_rate": 8e-05, "loss": 1.6065, "step": 3828 }, { "epoch": 0.6537476523817654, "grad_norm": 0.44941678643226624, "learning_rate": 8e-05, "loss": 1.5716, "step": 3829 }, { "epoch": 0.653918388253372, "grad_norm": 0.46591243147850037, "learning_rate": 8e-05, "loss": 1.7491, "step": 3830 }, { "epoch": 0.6540891241249787, "grad_norm": 0.46941256523132324, "learning_rate": 8e-05, "loss": 1.6867, "step": 3831 }, { "epoch": 0.6542598599965853, "grad_norm": 0.5115786790847778, "learning_rate": 8e-05, "loss": 1.7441, "step": 3832 }, { "epoch": 0.6544305958681919, "grad_norm": 0.48724931478500366, "learning_rate": 8e-05, "loss": 1.641, "step": 3833 }, { "epoch": 0.6546013317397985, "grad_norm": 0.48608729243278503, "learning_rate": 8e-05, "loss": 1.7085, "step": 3834 }, { "epoch": 0.6547720676114052, "grad_norm": 0.4888821840286255, "learning_rate": 8e-05, "loss": 1.7431, "step": 3835 }, { "epoch": 0.6549428034830118, "grad_norm": 0.46289941668510437, "learning_rate": 8e-05, "loss": 1.6096, "step": 3836 }, { "epoch": 0.6551135393546184, "grad_norm": 0.5026370882987976, "learning_rate": 8e-05, "loss": 1.9346, "step": 3837 }, { "epoch": 0.655284275226225, "grad_norm": 0.4707992672920227, "learning_rate": 8e-05, "loss": 1.7476, "step": 3838 }, { "epoch": 0.6554550110978317, "grad_norm": 0.46912306547164917, "learning_rate": 8e-05, "loss": 1.5837, "step": 3839 }, { "epoch": 0.6556257469694383, "grad_norm": 0.4804471433162689, "learning_rate": 8e-05, "loss": 1.7008, "step": 3840 }, { "epoch": 0.6557964828410449, "grad_norm": 0.4996829926967621, "learning_rate": 8e-05, "loss": 1.8582, "step": 3841 }, { "epoch": 0.6559672187126515, "grad_norm": 0.5130153298377991, "learning_rate": 8e-05, "loss": 1.6819, "step": 3842 }, { "epoch": 0.6561379545842582, "grad_norm": 0.46501246094703674, "learning_rate": 8e-05, "loss": 1.5843, "step": 3843 }, { "epoch": 0.6563086904558648, "grad_norm": 0.4744492173194885, "learning_rate": 8e-05, "loss": 1.7893, "step": 3844 }, { "epoch": 0.6564794263274714, "grad_norm": 0.4547916650772095, "learning_rate": 8e-05, "loss": 1.643, "step": 3845 }, { "epoch": 0.656650162199078, "grad_norm": 0.4895142614841461, "learning_rate": 8e-05, "loss": 1.5183, "step": 3846 }, { "epoch": 0.6568208980706847, "grad_norm": 0.47048553824424744, "learning_rate": 8e-05, "loss": 1.6741, "step": 3847 }, { "epoch": 0.6569916339422913, "grad_norm": 0.5102158784866333, "learning_rate": 8e-05, "loss": 1.8225, "step": 3848 }, { "epoch": 0.6571623698138979, "grad_norm": 0.48490479588508606, "learning_rate": 8e-05, "loss": 1.8506, "step": 3849 }, { "epoch": 0.6573331056855045, "grad_norm": 0.49866795539855957, "learning_rate": 8e-05, "loss": 1.7034, "step": 3850 }, { "epoch": 0.6575038415571112, "grad_norm": 0.5064531564712524, "learning_rate": 8e-05, "loss": 1.7037, "step": 3851 }, { "epoch": 0.6576745774287178, "grad_norm": 0.5070176124572754, "learning_rate": 8e-05, "loss": 1.8874, "step": 3852 }, { "epoch": 0.6578453133003244, "grad_norm": 0.4912669062614441, "learning_rate": 8e-05, "loss": 1.7445, "step": 3853 }, { "epoch": 0.658016049171931, "grad_norm": 0.4661029279232025, "learning_rate": 8e-05, "loss": 1.7227, "step": 3854 }, { "epoch": 0.6581867850435377, "grad_norm": 0.48735588788986206, "learning_rate": 8e-05, "loss": 1.68, "step": 3855 }, { "epoch": 0.6583575209151443, "grad_norm": 0.5316823720932007, "learning_rate": 8e-05, "loss": 1.7059, "step": 3856 }, { "epoch": 0.6585282567867509, "grad_norm": 0.5437746644020081, "learning_rate": 8e-05, "loss": 1.8628, "step": 3857 }, { "epoch": 0.6586989926583575, "grad_norm": 0.5427407026290894, "learning_rate": 8e-05, "loss": 1.637, "step": 3858 }, { "epoch": 0.6588697285299642, "grad_norm": 0.4925040304660797, "learning_rate": 8e-05, "loss": 1.5003, "step": 3859 }, { "epoch": 0.6590404644015708, "grad_norm": 0.49911487102508545, "learning_rate": 8e-05, "loss": 1.8496, "step": 3860 }, { "epoch": 0.6592112002731774, "grad_norm": 0.4511476457118988, "learning_rate": 8e-05, "loss": 1.5936, "step": 3861 }, { "epoch": 0.659381936144784, "grad_norm": 0.4936142861843109, "learning_rate": 8e-05, "loss": 1.6378, "step": 3862 }, { "epoch": 0.6595526720163907, "grad_norm": 0.5642780065536499, "learning_rate": 8e-05, "loss": 1.7675, "step": 3863 }, { "epoch": 0.6597234078879972, "grad_norm": 0.4943787753582001, "learning_rate": 8e-05, "loss": 1.6556, "step": 3864 }, { "epoch": 0.6598941437596039, "grad_norm": 0.4690496325492859, "learning_rate": 8e-05, "loss": 1.6514, "step": 3865 }, { "epoch": 0.6600648796312105, "grad_norm": 0.4904584288597107, "learning_rate": 8e-05, "loss": 1.7972, "step": 3866 }, { "epoch": 0.6602356155028172, "grad_norm": 0.45764702558517456, "learning_rate": 8e-05, "loss": 1.8645, "step": 3867 }, { "epoch": 0.6604063513744237, "grad_norm": 0.45227083563804626, "learning_rate": 8e-05, "loss": 1.5008, "step": 3868 }, { "epoch": 0.6605770872460304, "grad_norm": 0.5463624596595764, "learning_rate": 8e-05, "loss": 1.7055, "step": 3869 }, { "epoch": 0.660747823117637, "grad_norm": 0.475459486246109, "learning_rate": 8e-05, "loss": 1.6366, "step": 3870 }, { "epoch": 0.6609185589892437, "grad_norm": 0.4910357594490051, "learning_rate": 8e-05, "loss": 1.726, "step": 3871 }, { "epoch": 0.6610892948608502, "grad_norm": 0.45290350914001465, "learning_rate": 8e-05, "loss": 1.5814, "step": 3872 }, { "epoch": 0.6612600307324569, "grad_norm": 0.4737846851348877, "learning_rate": 8e-05, "loss": 1.7589, "step": 3873 }, { "epoch": 0.6614307666040635, "grad_norm": 0.46012622117996216, "learning_rate": 8e-05, "loss": 1.7236, "step": 3874 }, { "epoch": 0.6616015024756702, "grad_norm": 0.49739697575569153, "learning_rate": 8e-05, "loss": 1.6926, "step": 3875 }, { "epoch": 0.6617722383472767, "grad_norm": 0.4672851264476776, "learning_rate": 8e-05, "loss": 1.6764, "step": 3876 }, { "epoch": 0.6619429742188834, "grad_norm": 0.488751083612442, "learning_rate": 8e-05, "loss": 1.7731, "step": 3877 }, { "epoch": 0.66211371009049, "grad_norm": 0.5382850766181946, "learning_rate": 8e-05, "loss": 1.9044, "step": 3878 }, { "epoch": 0.6622844459620967, "grad_norm": 0.46834439039230347, "learning_rate": 8e-05, "loss": 1.7082, "step": 3879 }, { "epoch": 0.6624551818337032, "grad_norm": 0.5069261789321899, "learning_rate": 8e-05, "loss": 1.6736, "step": 3880 }, { "epoch": 0.6626259177053099, "grad_norm": 0.48261746764183044, "learning_rate": 8e-05, "loss": 1.5392, "step": 3881 }, { "epoch": 0.6627966535769165, "grad_norm": 0.5037386417388916, "learning_rate": 8e-05, "loss": 1.691, "step": 3882 }, { "epoch": 0.6629673894485232, "grad_norm": 0.46479156613349915, "learning_rate": 8e-05, "loss": 1.6805, "step": 3883 }, { "epoch": 0.6631381253201297, "grad_norm": 0.4809530973434448, "learning_rate": 8e-05, "loss": 1.7808, "step": 3884 }, { "epoch": 0.6633088611917364, "grad_norm": 0.48075586557388306, "learning_rate": 8e-05, "loss": 1.7447, "step": 3885 }, { "epoch": 0.663479597063343, "grad_norm": 0.4745989143848419, "learning_rate": 8e-05, "loss": 1.5743, "step": 3886 }, { "epoch": 0.6636503329349497, "grad_norm": 0.47843050956726074, "learning_rate": 8e-05, "loss": 1.789, "step": 3887 }, { "epoch": 0.6638210688065562, "grad_norm": 0.4789399206638336, "learning_rate": 8e-05, "loss": 1.6762, "step": 3888 }, { "epoch": 0.6639918046781629, "grad_norm": 0.4758577346801758, "learning_rate": 8e-05, "loss": 1.5357, "step": 3889 }, { "epoch": 0.6641625405497695, "grad_norm": 0.4852323532104492, "learning_rate": 8e-05, "loss": 1.4659, "step": 3890 }, { "epoch": 0.6643332764213762, "grad_norm": 0.4865403175354004, "learning_rate": 8e-05, "loss": 1.6629, "step": 3891 }, { "epoch": 0.6645040122929827, "grad_norm": 0.49600982666015625, "learning_rate": 8e-05, "loss": 1.6863, "step": 3892 }, { "epoch": 0.6646747481645894, "grad_norm": 0.4787088632583618, "learning_rate": 8e-05, "loss": 1.5799, "step": 3893 }, { "epoch": 0.664845484036196, "grad_norm": 0.4891430735588074, "learning_rate": 8e-05, "loss": 1.75, "step": 3894 }, { "epoch": 0.6650162199078026, "grad_norm": 0.48326346278190613, "learning_rate": 8e-05, "loss": 1.6897, "step": 3895 }, { "epoch": 0.6651869557794092, "grad_norm": 0.5034224390983582, "learning_rate": 8e-05, "loss": 1.7285, "step": 3896 }, { "epoch": 0.6653576916510159, "grad_norm": 0.5236946940422058, "learning_rate": 8e-05, "loss": 1.7834, "step": 3897 }, { "epoch": 0.6655284275226225, "grad_norm": 0.4559002220630646, "learning_rate": 8e-05, "loss": 1.581, "step": 3898 }, { "epoch": 0.6656991633942291, "grad_norm": 0.5220513343811035, "learning_rate": 8e-05, "loss": 1.6984, "step": 3899 }, { "epoch": 0.6658698992658357, "grad_norm": 0.481773316860199, "learning_rate": 8e-05, "loss": 1.7012, "step": 3900 }, { "epoch": 0.6660406351374424, "grad_norm": 0.45318281650543213, "learning_rate": 8e-05, "loss": 1.4997, "step": 3901 }, { "epoch": 0.666211371009049, "grad_norm": 0.5659430623054504, "learning_rate": 8e-05, "loss": 1.7745, "step": 3902 }, { "epoch": 0.6663821068806556, "grad_norm": 0.4941857159137726, "learning_rate": 8e-05, "loss": 1.6544, "step": 3903 }, { "epoch": 0.6665528427522622, "grad_norm": 0.4649876058101654, "learning_rate": 8e-05, "loss": 1.5903, "step": 3904 }, { "epoch": 0.6667235786238689, "grad_norm": 0.4935015141963959, "learning_rate": 8e-05, "loss": 1.8103, "step": 3905 }, { "epoch": 0.6668943144954755, "grad_norm": 0.4570172429084778, "learning_rate": 8e-05, "loss": 1.7232, "step": 3906 }, { "epoch": 0.6670650503670821, "grad_norm": 0.47966697812080383, "learning_rate": 8e-05, "loss": 1.6653, "step": 3907 }, { "epoch": 0.6672357862386887, "grad_norm": 0.47747674584388733, "learning_rate": 8e-05, "loss": 1.6683, "step": 3908 }, { "epoch": 0.6674065221102954, "grad_norm": 0.5179832577705383, "learning_rate": 8e-05, "loss": 1.5488, "step": 3909 }, { "epoch": 0.667577257981902, "grad_norm": 0.47368350625038147, "learning_rate": 8e-05, "loss": 1.6015, "step": 3910 }, { "epoch": 0.6677479938535086, "grad_norm": 0.49542635679244995, "learning_rate": 8e-05, "loss": 1.779, "step": 3911 }, { "epoch": 0.6679187297251152, "grad_norm": 0.4729382395744324, "learning_rate": 8e-05, "loss": 1.636, "step": 3912 }, { "epoch": 0.6680894655967219, "grad_norm": 0.5171646475791931, "learning_rate": 8e-05, "loss": 1.9235, "step": 3913 }, { "epoch": 0.6682602014683285, "grad_norm": 0.4831501543521881, "learning_rate": 8e-05, "loss": 1.4689, "step": 3914 }, { "epoch": 0.6684309373399351, "grad_norm": 0.46877017617225647, "learning_rate": 8e-05, "loss": 1.5652, "step": 3915 }, { "epoch": 0.6686016732115417, "grad_norm": 0.5162116289138794, "learning_rate": 8e-05, "loss": 1.5926, "step": 3916 }, { "epoch": 0.6687724090831484, "grad_norm": 0.4828939437866211, "learning_rate": 8e-05, "loss": 1.7265, "step": 3917 }, { "epoch": 0.668943144954755, "grad_norm": 0.4734604060649872, "learning_rate": 8e-05, "loss": 1.6638, "step": 3918 }, { "epoch": 0.6691138808263616, "grad_norm": 0.48475468158721924, "learning_rate": 8e-05, "loss": 1.6779, "step": 3919 }, { "epoch": 0.6692846166979682, "grad_norm": 0.485553503036499, "learning_rate": 8e-05, "loss": 1.5036, "step": 3920 }, { "epoch": 0.6694553525695749, "grad_norm": 0.48615583777427673, "learning_rate": 8e-05, "loss": 1.6699, "step": 3921 }, { "epoch": 0.6696260884411815, "grad_norm": 0.5014272332191467, "learning_rate": 8e-05, "loss": 1.5438, "step": 3922 }, { "epoch": 0.6697968243127881, "grad_norm": 0.48985618352890015, "learning_rate": 8e-05, "loss": 1.7172, "step": 3923 }, { "epoch": 0.6699675601843947, "grad_norm": 0.4539106786251068, "learning_rate": 8e-05, "loss": 1.6829, "step": 3924 }, { "epoch": 0.6701382960560014, "grad_norm": 0.4919623136520386, "learning_rate": 8e-05, "loss": 1.7579, "step": 3925 }, { "epoch": 0.670309031927608, "grad_norm": 0.46310651302337646, "learning_rate": 8e-05, "loss": 1.6669, "step": 3926 }, { "epoch": 0.6704797677992146, "grad_norm": 0.45655184984207153, "learning_rate": 8e-05, "loss": 1.6042, "step": 3927 }, { "epoch": 0.6706505036708212, "grad_norm": 0.5068048238754272, "learning_rate": 8e-05, "loss": 1.716, "step": 3928 }, { "epoch": 0.6708212395424279, "grad_norm": 0.5122467875480652, "learning_rate": 8e-05, "loss": 2.0974, "step": 3929 }, { "epoch": 0.6709919754140344, "grad_norm": 0.5015684962272644, "learning_rate": 8e-05, "loss": 1.8505, "step": 3930 }, { "epoch": 0.6711627112856411, "grad_norm": 0.4766533672809601, "learning_rate": 8e-05, "loss": 1.696, "step": 3931 }, { "epoch": 0.6713334471572477, "grad_norm": 0.5015007257461548, "learning_rate": 8e-05, "loss": 1.8038, "step": 3932 }, { "epoch": 0.6715041830288544, "grad_norm": 0.48028895258903503, "learning_rate": 8e-05, "loss": 1.7161, "step": 3933 }, { "epoch": 0.6716749189004609, "grad_norm": 0.46182796359062195, "learning_rate": 8e-05, "loss": 1.6222, "step": 3934 }, { "epoch": 0.6718456547720676, "grad_norm": 0.4820052981376648, "learning_rate": 8e-05, "loss": 1.7068, "step": 3935 }, { "epoch": 0.6720163906436742, "grad_norm": 0.5070589184761047, "learning_rate": 8e-05, "loss": 1.7168, "step": 3936 }, { "epoch": 0.6721871265152809, "grad_norm": 0.4821137487888336, "learning_rate": 8e-05, "loss": 1.5451, "step": 3937 }, { "epoch": 0.6723578623868874, "grad_norm": 0.5621194839477539, "learning_rate": 8e-05, "loss": 1.623, "step": 3938 }, { "epoch": 0.6725285982584941, "grad_norm": 0.4811907410621643, "learning_rate": 8e-05, "loss": 1.643, "step": 3939 }, { "epoch": 0.6726993341301007, "grad_norm": 0.5503880381584167, "learning_rate": 8e-05, "loss": 1.7706, "step": 3940 }, { "epoch": 0.6728700700017074, "grad_norm": 0.4771977663040161, "learning_rate": 8e-05, "loss": 1.7475, "step": 3941 }, { "epoch": 0.6730408058733139, "grad_norm": 0.46553993225097656, "learning_rate": 8e-05, "loss": 1.7028, "step": 3942 }, { "epoch": 0.6732115417449206, "grad_norm": 0.46167975664138794, "learning_rate": 8e-05, "loss": 1.5597, "step": 3943 }, { "epoch": 0.6733822776165272, "grad_norm": 0.4735279381275177, "learning_rate": 8e-05, "loss": 1.7055, "step": 3944 }, { "epoch": 0.6735530134881339, "grad_norm": 0.492093026638031, "learning_rate": 8e-05, "loss": 1.6681, "step": 3945 }, { "epoch": 0.6737237493597404, "grad_norm": 0.4741564095020294, "learning_rate": 8e-05, "loss": 1.6981, "step": 3946 }, { "epoch": 0.6738944852313471, "grad_norm": 0.4767804443836212, "learning_rate": 8e-05, "loss": 1.7892, "step": 3947 }, { "epoch": 0.6740652211029537, "grad_norm": 0.45200666785240173, "learning_rate": 8e-05, "loss": 1.7149, "step": 3948 }, { "epoch": 0.6742359569745604, "grad_norm": 0.4792502522468567, "learning_rate": 8e-05, "loss": 1.7168, "step": 3949 }, { "epoch": 0.6744066928461669, "grad_norm": 0.5029811859130859, "learning_rate": 8e-05, "loss": 1.6511, "step": 3950 }, { "epoch": 0.6745774287177736, "grad_norm": 0.49014443159103394, "learning_rate": 8e-05, "loss": 1.8379, "step": 3951 }, { "epoch": 0.6747481645893803, "grad_norm": 0.4893297553062439, "learning_rate": 8e-05, "loss": 1.7758, "step": 3952 }, { "epoch": 0.6749189004609869, "grad_norm": 0.47459352016448975, "learning_rate": 8e-05, "loss": 1.7313, "step": 3953 }, { "epoch": 0.6750896363325934, "grad_norm": 0.4993648827075958, "learning_rate": 8e-05, "loss": 1.724, "step": 3954 }, { "epoch": 0.6752603722042001, "grad_norm": 0.5068149566650391, "learning_rate": 8e-05, "loss": 1.6241, "step": 3955 }, { "epoch": 0.6754311080758068, "grad_norm": 0.4566686451435089, "learning_rate": 8e-05, "loss": 1.5947, "step": 3956 }, { "epoch": 0.6756018439474134, "grad_norm": 0.4725482761859894, "learning_rate": 8e-05, "loss": 1.6421, "step": 3957 }, { "epoch": 0.67577257981902, "grad_norm": 0.4537023901939392, "learning_rate": 8e-05, "loss": 1.4854, "step": 3958 }, { "epoch": 0.6759433156906266, "grad_norm": 0.5467269420623779, "learning_rate": 8e-05, "loss": 1.991, "step": 3959 }, { "epoch": 0.6761140515622333, "grad_norm": 0.4598747789859772, "learning_rate": 8e-05, "loss": 1.6658, "step": 3960 }, { "epoch": 0.6762847874338398, "grad_norm": 0.49530029296875, "learning_rate": 8e-05, "loss": 1.6673, "step": 3961 }, { "epoch": 0.6764555233054464, "grad_norm": 0.48978757858276367, "learning_rate": 8e-05, "loss": 1.772, "step": 3962 }, { "epoch": 0.6766262591770531, "grad_norm": 0.5184027552604675, "learning_rate": 8e-05, "loss": 1.7925, "step": 3963 }, { "epoch": 0.6767969950486598, "grad_norm": 0.46830207109451294, "learning_rate": 8e-05, "loss": 1.5746, "step": 3964 }, { "epoch": 0.6769677309202663, "grad_norm": 0.48851829767227173, "learning_rate": 8e-05, "loss": 1.7215, "step": 3965 }, { "epoch": 0.677138466791873, "grad_norm": 0.4907124638557434, "learning_rate": 8e-05, "loss": 1.6429, "step": 3966 }, { "epoch": 0.6773092026634796, "grad_norm": 0.48890188336372375, "learning_rate": 8e-05, "loss": 1.5739, "step": 3967 }, { "epoch": 0.6774799385350863, "grad_norm": 0.5358402729034424, "learning_rate": 8e-05, "loss": 1.8906, "step": 3968 }, { "epoch": 0.6776506744066928, "grad_norm": 0.5251879096031189, "learning_rate": 8e-05, "loss": 1.7396, "step": 3969 }, { "epoch": 0.6778214102782995, "grad_norm": 0.509303867816925, "learning_rate": 8e-05, "loss": 1.7821, "step": 3970 }, { "epoch": 0.6779921461499061, "grad_norm": 0.4794585406780243, "learning_rate": 8e-05, "loss": 1.597, "step": 3971 }, { "epoch": 0.6781628820215128, "grad_norm": 0.5163155794143677, "learning_rate": 8e-05, "loss": 1.7813, "step": 3972 }, { "epoch": 0.6783336178931193, "grad_norm": 0.4756929278373718, "learning_rate": 8e-05, "loss": 1.7528, "step": 3973 }, { "epoch": 0.678504353764726, "grad_norm": 0.46015840768814087, "learning_rate": 8e-05, "loss": 1.6584, "step": 3974 }, { "epoch": 0.6786750896363326, "grad_norm": 0.5542342066764832, "learning_rate": 8e-05, "loss": 1.841, "step": 3975 }, { "epoch": 0.6788458255079393, "grad_norm": 0.50141441822052, "learning_rate": 8e-05, "loss": 1.7638, "step": 3976 }, { "epoch": 0.6790165613795458, "grad_norm": 0.5100142359733582, "learning_rate": 8e-05, "loss": 1.655, "step": 3977 }, { "epoch": 0.6791872972511525, "grad_norm": 0.5517231225967407, "learning_rate": 8e-05, "loss": 1.7175, "step": 3978 }, { "epoch": 0.6793580331227591, "grad_norm": 0.5517465472221375, "learning_rate": 8e-05, "loss": 1.6634, "step": 3979 }, { "epoch": 0.6795287689943658, "grad_norm": 0.44598180055618286, "learning_rate": 8e-05, "loss": 1.5998, "step": 3980 }, { "epoch": 0.6796995048659723, "grad_norm": 0.4806583821773529, "learning_rate": 8e-05, "loss": 1.846, "step": 3981 }, { "epoch": 0.679870240737579, "grad_norm": 0.5516391396522522, "learning_rate": 8e-05, "loss": 1.7795, "step": 3982 }, { "epoch": 0.6800409766091856, "grad_norm": 0.46361738443374634, "learning_rate": 8e-05, "loss": 1.4153, "step": 3983 }, { "epoch": 0.6802117124807923, "grad_norm": 0.43881461024284363, "learning_rate": 8e-05, "loss": 1.6251, "step": 3984 }, { "epoch": 0.6803824483523988, "grad_norm": 0.500708818435669, "learning_rate": 8e-05, "loss": 1.6396, "step": 3985 }, { "epoch": 0.6805531842240055, "grad_norm": 0.45002925395965576, "learning_rate": 8e-05, "loss": 1.5598, "step": 3986 }, { "epoch": 0.6807239200956121, "grad_norm": 0.5362469553947449, "learning_rate": 8e-05, "loss": 1.8165, "step": 3987 }, { "epoch": 0.6808946559672188, "grad_norm": 0.5136599540710449, "learning_rate": 8e-05, "loss": 1.944, "step": 3988 }, { "epoch": 0.6810653918388253, "grad_norm": 0.505623459815979, "learning_rate": 8e-05, "loss": 1.5555, "step": 3989 }, { "epoch": 0.681236127710432, "grad_norm": 0.5473327040672302, "learning_rate": 8e-05, "loss": 1.6135, "step": 3990 }, { "epoch": 0.6814068635820386, "grad_norm": 0.5523894429206848, "learning_rate": 8e-05, "loss": 1.5946, "step": 3991 }, { "epoch": 0.6815775994536452, "grad_norm": 0.4799540042877197, "learning_rate": 8e-05, "loss": 1.6921, "step": 3992 }, { "epoch": 0.6817483353252518, "grad_norm": 0.4617343842983246, "learning_rate": 8e-05, "loss": 1.7267, "step": 3993 }, { "epoch": 0.6819190711968585, "grad_norm": 0.48491543531417847, "learning_rate": 8e-05, "loss": 1.7849, "step": 3994 }, { "epoch": 0.6820898070684651, "grad_norm": 0.5121499300003052, "learning_rate": 8e-05, "loss": 1.664, "step": 3995 }, { "epoch": 0.6822605429400717, "grad_norm": 0.48943066596984863, "learning_rate": 8e-05, "loss": 1.5368, "step": 3996 }, { "epoch": 0.6824312788116783, "grad_norm": 0.519692599773407, "learning_rate": 8e-05, "loss": 1.8907, "step": 3997 }, { "epoch": 0.682602014683285, "grad_norm": 0.5140798687934875, "learning_rate": 8e-05, "loss": 1.8239, "step": 3998 }, { "epoch": 0.6827727505548916, "grad_norm": 0.5111204385757446, "learning_rate": 8e-05, "loss": 1.6325, "step": 3999 }, { "epoch": 0.6829434864264982, "grad_norm": 0.44358476996421814, "learning_rate": 8e-05, "loss": 1.4001, "step": 4000 }, { "epoch": 0.6831142222981048, "grad_norm": 0.5415552854537964, "learning_rate": 8e-05, "loss": 1.8395, "step": 4001 }, { "epoch": 0.6832849581697115, "grad_norm": 0.5034400224685669, "learning_rate": 8e-05, "loss": 1.6349, "step": 4002 }, { "epoch": 0.6834556940413181, "grad_norm": 0.4550827145576477, "learning_rate": 8e-05, "loss": 1.5195, "step": 4003 }, { "epoch": 0.6836264299129247, "grad_norm": 0.4953053891658783, "learning_rate": 8e-05, "loss": 1.9169, "step": 4004 }, { "epoch": 0.6837971657845313, "grad_norm": 0.5067864656448364, "learning_rate": 8e-05, "loss": 1.7502, "step": 4005 }, { "epoch": 0.683967901656138, "grad_norm": 0.4318784773349762, "learning_rate": 8e-05, "loss": 1.4753, "step": 4006 }, { "epoch": 0.6841386375277446, "grad_norm": 0.45616868138313293, "learning_rate": 8e-05, "loss": 1.7286, "step": 4007 }, { "epoch": 0.6843093733993512, "grad_norm": 0.4859888553619385, "learning_rate": 8e-05, "loss": 1.67, "step": 4008 }, { "epoch": 0.6844801092709578, "grad_norm": 0.4828263223171234, "learning_rate": 8e-05, "loss": 1.6673, "step": 4009 }, { "epoch": 0.6846508451425645, "grad_norm": 0.4951271712779999, "learning_rate": 8e-05, "loss": 1.7989, "step": 4010 }, { "epoch": 0.6848215810141711, "grad_norm": 0.4607667624950409, "learning_rate": 8e-05, "loss": 1.573, "step": 4011 }, { "epoch": 0.6849923168857777, "grad_norm": 0.511167585849762, "learning_rate": 8e-05, "loss": 1.7114, "step": 4012 }, { "epoch": 0.6851630527573843, "grad_norm": 0.5400397777557373, "learning_rate": 8e-05, "loss": 1.8133, "step": 4013 }, { "epoch": 0.685333788628991, "grad_norm": 0.4388158321380615, "learning_rate": 8e-05, "loss": 1.582, "step": 4014 }, { "epoch": 0.6855045245005976, "grad_norm": 0.470877081155777, "learning_rate": 8e-05, "loss": 1.541, "step": 4015 }, { "epoch": 0.6856752603722042, "grad_norm": 0.47242793440818787, "learning_rate": 8e-05, "loss": 1.7101, "step": 4016 }, { "epoch": 0.6858459962438108, "grad_norm": 0.45551586151123047, "learning_rate": 8e-05, "loss": 1.6481, "step": 4017 }, { "epoch": 0.6860167321154175, "grad_norm": 0.4872257709503174, "learning_rate": 8e-05, "loss": 1.7498, "step": 4018 }, { "epoch": 0.6861874679870241, "grad_norm": 0.45437055826187134, "learning_rate": 8e-05, "loss": 1.6318, "step": 4019 }, { "epoch": 0.6863582038586307, "grad_norm": 0.47487688064575195, "learning_rate": 8e-05, "loss": 1.6878, "step": 4020 }, { "epoch": 0.6865289397302373, "grad_norm": 0.49101412296295166, "learning_rate": 8e-05, "loss": 1.7009, "step": 4021 }, { "epoch": 0.686699675601844, "grad_norm": 0.4638598561286926, "learning_rate": 8e-05, "loss": 1.6564, "step": 4022 }, { "epoch": 0.6868704114734506, "grad_norm": 0.49182432889938354, "learning_rate": 8e-05, "loss": 1.6044, "step": 4023 }, { "epoch": 0.6870411473450572, "grad_norm": 0.49122655391693115, "learning_rate": 8e-05, "loss": 1.6549, "step": 4024 }, { "epoch": 0.6872118832166638, "grad_norm": 0.5038841366767883, "learning_rate": 8e-05, "loss": 1.7224, "step": 4025 }, { "epoch": 0.6873826190882705, "grad_norm": 0.47384241223335266, "learning_rate": 8e-05, "loss": 1.7068, "step": 4026 }, { "epoch": 0.687553354959877, "grad_norm": 0.44381532073020935, "learning_rate": 8e-05, "loss": 1.6776, "step": 4027 }, { "epoch": 0.6877240908314837, "grad_norm": 0.48991718888282776, "learning_rate": 8e-05, "loss": 1.6417, "step": 4028 }, { "epoch": 0.6878948267030903, "grad_norm": 0.4686380624771118, "learning_rate": 8e-05, "loss": 1.538, "step": 4029 }, { "epoch": 0.688065562574697, "grad_norm": 0.5000553131103516, "learning_rate": 8e-05, "loss": 1.5784, "step": 4030 }, { "epoch": 0.6882362984463035, "grad_norm": 0.5150755643844604, "learning_rate": 8e-05, "loss": 1.7569, "step": 4031 }, { "epoch": 0.6884070343179102, "grad_norm": 0.48916465044021606, "learning_rate": 8e-05, "loss": 1.7974, "step": 4032 }, { "epoch": 0.6885777701895168, "grad_norm": 0.4451548755168915, "learning_rate": 8e-05, "loss": 1.6192, "step": 4033 }, { "epoch": 0.6887485060611235, "grad_norm": 0.48314929008483887, "learning_rate": 8e-05, "loss": 1.6988, "step": 4034 }, { "epoch": 0.68891924193273, "grad_norm": 0.4716232419013977, "learning_rate": 8e-05, "loss": 1.6816, "step": 4035 }, { "epoch": 0.6890899778043367, "grad_norm": 0.4618166983127594, "learning_rate": 8e-05, "loss": 1.802, "step": 4036 }, { "epoch": 0.6892607136759433, "grad_norm": 0.4644584655761719, "learning_rate": 8e-05, "loss": 1.4851, "step": 4037 }, { "epoch": 0.68943144954755, "grad_norm": 0.526676595211029, "learning_rate": 8e-05, "loss": 1.7422, "step": 4038 }, { "epoch": 0.6896021854191565, "grad_norm": 0.4695208668708801, "learning_rate": 8e-05, "loss": 1.6499, "step": 4039 }, { "epoch": 0.6897729212907632, "grad_norm": 0.49057114124298096, "learning_rate": 8e-05, "loss": 1.8483, "step": 4040 }, { "epoch": 0.6899436571623698, "grad_norm": 0.4940665662288666, "learning_rate": 8e-05, "loss": 1.6428, "step": 4041 }, { "epoch": 0.6901143930339765, "grad_norm": 0.49745145440101624, "learning_rate": 8e-05, "loss": 1.7224, "step": 4042 }, { "epoch": 0.690285128905583, "grad_norm": 0.4569994807243347, "learning_rate": 8e-05, "loss": 1.5602, "step": 4043 }, { "epoch": 0.6904558647771897, "grad_norm": 0.4786336421966553, "learning_rate": 8e-05, "loss": 1.9573, "step": 4044 }, { "epoch": 0.6906266006487963, "grad_norm": 0.45453810691833496, "learning_rate": 8e-05, "loss": 1.6419, "step": 4045 }, { "epoch": 0.690797336520403, "grad_norm": 0.45445889234542847, "learning_rate": 8e-05, "loss": 1.7783, "step": 4046 }, { "epoch": 0.6909680723920095, "grad_norm": 0.4863836467266083, "learning_rate": 8e-05, "loss": 1.6965, "step": 4047 }, { "epoch": 0.6911388082636162, "grad_norm": 0.508526623249054, "learning_rate": 8e-05, "loss": 1.7461, "step": 4048 }, { "epoch": 0.6913095441352228, "grad_norm": 0.5083319544792175, "learning_rate": 8e-05, "loss": 1.9235, "step": 4049 }, { "epoch": 0.6914802800068295, "grad_norm": 0.4780542254447937, "learning_rate": 8e-05, "loss": 1.7292, "step": 4050 }, { "epoch": 0.691651015878436, "grad_norm": 0.5074155926704407, "learning_rate": 8e-05, "loss": 2.0181, "step": 4051 }, { "epoch": 0.6918217517500427, "grad_norm": 0.47881612181663513, "learning_rate": 8e-05, "loss": 1.5391, "step": 4052 }, { "epoch": 0.6919924876216493, "grad_norm": 0.5006853342056274, "learning_rate": 8e-05, "loss": 1.7699, "step": 4053 }, { "epoch": 0.692163223493256, "grad_norm": 0.4777608811855316, "learning_rate": 8e-05, "loss": 1.6058, "step": 4054 }, { "epoch": 0.6923339593648625, "grad_norm": 0.46193727850914, "learning_rate": 8e-05, "loss": 1.6775, "step": 4055 }, { "epoch": 0.6925046952364692, "grad_norm": 0.45877915620803833, "learning_rate": 8e-05, "loss": 1.5091, "step": 4056 }, { "epoch": 0.6926754311080758, "grad_norm": 0.5112825632095337, "learning_rate": 8e-05, "loss": 1.9928, "step": 4057 }, { "epoch": 0.6928461669796824, "grad_norm": 0.47346848249435425, "learning_rate": 8e-05, "loss": 1.7309, "step": 4058 }, { "epoch": 0.693016902851289, "grad_norm": 0.4951663017272949, "learning_rate": 8e-05, "loss": 1.6178, "step": 4059 }, { "epoch": 0.6931876387228957, "grad_norm": 0.45764732360839844, "learning_rate": 8e-05, "loss": 1.6049, "step": 4060 }, { "epoch": 0.6933583745945023, "grad_norm": 0.4483105540275574, "learning_rate": 8e-05, "loss": 1.526, "step": 4061 }, { "epoch": 0.6935291104661089, "grad_norm": 0.5433989763259888, "learning_rate": 8e-05, "loss": 1.8073, "step": 4062 }, { "epoch": 0.6936998463377155, "grad_norm": 0.5135810971260071, "learning_rate": 8e-05, "loss": 1.965, "step": 4063 }, { "epoch": 0.6938705822093222, "grad_norm": 0.5160256028175354, "learning_rate": 8e-05, "loss": 1.7295, "step": 4064 }, { "epoch": 0.6940413180809288, "grad_norm": 0.47801482677459717, "learning_rate": 8e-05, "loss": 1.6629, "step": 4065 }, { "epoch": 0.6942120539525354, "grad_norm": 0.44945991039276123, "learning_rate": 8e-05, "loss": 1.5146, "step": 4066 }, { "epoch": 0.694382789824142, "grad_norm": 0.44422340393066406, "learning_rate": 8e-05, "loss": 1.5631, "step": 4067 }, { "epoch": 0.6945535256957487, "grad_norm": 0.5038408637046814, "learning_rate": 8e-05, "loss": 1.7028, "step": 4068 }, { "epoch": 0.6947242615673553, "grad_norm": 0.49982747435569763, "learning_rate": 8e-05, "loss": 1.8633, "step": 4069 }, { "epoch": 0.6948949974389619, "grad_norm": 0.5272482633590698, "learning_rate": 8e-05, "loss": 1.8911, "step": 4070 }, { "epoch": 0.6950657333105685, "grad_norm": 0.4715387225151062, "learning_rate": 8e-05, "loss": 1.6681, "step": 4071 }, { "epoch": 0.6952364691821752, "grad_norm": 0.4834906756877899, "learning_rate": 8e-05, "loss": 1.5387, "step": 4072 }, { "epoch": 0.6954072050537818, "grad_norm": 0.4914827346801758, "learning_rate": 8e-05, "loss": 1.6873, "step": 4073 }, { "epoch": 0.6955779409253884, "grad_norm": 0.5136581063270569, "learning_rate": 8e-05, "loss": 1.8082, "step": 4074 }, { "epoch": 0.695748676796995, "grad_norm": 0.45814692974090576, "learning_rate": 8e-05, "loss": 1.698, "step": 4075 }, { "epoch": 0.6959194126686017, "grad_norm": 0.5016587376594543, "learning_rate": 8e-05, "loss": 1.5894, "step": 4076 }, { "epoch": 0.6960901485402083, "grad_norm": 0.48511824011802673, "learning_rate": 8e-05, "loss": 1.7642, "step": 4077 }, { "epoch": 0.6962608844118149, "grad_norm": 0.4723215401172638, "learning_rate": 8e-05, "loss": 1.7276, "step": 4078 }, { "epoch": 0.6964316202834215, "grad_norm": 0.4683634638786316, "learning_rate": 8e-05, "loss": 1.4668, "step": 4079 }, { "epoch": 0.6966023561550282, "grad_norm": 0.4243675470352173, "learning_rate": 8e-05, "loss": 1.5457, "step": 4080 }, { "epoch": 0.6967730920266348, "grad_norm": 0.5549269318580627, "learning_rate": 8e-05, "loss": 2.1385, "step": 4081 }, { "epoch": 0.6969438278982414, "grad_norm": 0.4313877522945404, "learning_rate": 8e-05, "loss": 1.5072, "step": 4082 }, { "epoch": 0.697114563769848, "grad_norm": 0.4763609766960144, "learning_rate": 8e-05, "loss": 1.6987, "step": 4083 }, { "epoch": 0.6972852996414547, "grad_norm": 0.49328136444091797, "learning_rate": 8e-05, "loss": 1.8603, "step": 4084 }, { "epoch": 0.6974560355130613, "grad_norm": 0.5166590213775635, "learning_rate": 8e-05, "loss": 1.8351, "step": 4085 }, { "epoch": 0.6976267713846679, "grad_norm": 0.4779660105705261, "learning_rate": 8e-05, "loss": 1.8591, "step": 4086 }, { "epoch": 0.6977975072562745, "grad_norm": 0.49259841442108154, "learning_rate": 8e-05, "loss": 1.5827, "step": 4087 }, { "epoch": 0.6979682431278812, "grad_norm": 0.45382365584373474, "learning_rate": 8e-05, "loss": 1.5843, "step": 4088 }, { "epoch": 0.6981389789994878, "grad_norm": 0.4843275547027588, "learning_rate": 8e-05, "loss": 1.766, "step": 4089 }, { "epoch": 0.6983097148710944, "grad_norm": 0.452282190322876, "learning_rate": 8e-05, "loss": 1.7105, "step": 4090 }, { "epoch": 0.698480450742701, "grad_norm": 0.4902511537075043, "learning_rate": 8e-05, "loss": 1.727, "step": 4091 }, { "epoch": 0.6986511866143077, "grad_norm": 0.4827847182750702, "learning_rate": 8e-05, "loss": 1.7775, "step": 4092 }, { "epoch": 0.6988219224859142, "grad_norm": 0.4556988775730133, "learning_rate": 8e-05, "loss": 1.6483, "step": 4093 }, { "epoch": 0.6989926583575209, "grad_norm": 0.475120484828949, "learning_rate": 8e-05, "loss": 1.6712, "step": 4094 }, { "epoch": 0.6991633942291275, "grad_norm": 0.5771563649177551, "learning_rate": 8e-05, "loss": 2.0069, "step": 4095 }, { "epoch": 0.6993341301007342, "grad_norm": 0.4943787157535553, "learning_rate": 8e-05, "loss": 1.7866, "step": 4096 }, { "epoch": 0.6995048659723407, "grad_norm": 0.5327185392379761, "learning_rate": 8e-05, "loss": 1.6653, "step": 4097 }, { "epoch": 0.6996756018439474, "grad_norm": 0.46472957730293274, "learning_rate": 8e-05, "loss": 1.6345, "step": 4098 }, { "epoch": 0.699846337715554, "grad_norm": 0.5025299191474915, "learning_rate": 8e-05, "loss": 1.8962, "step": 4099 }, { "epoch": 0.7000170735871607, "grad_norm": 0.4827367961406708, "learning_rate": 8e-05, "loss": 1.724, "step": 4100 }, { "epoch": 0.7001878094587672, "grad_norm": 0.4874448776245117, "learning_rate": 8e-05, "loss": 1.8347, "step": 4101 }, { "epoch": 0.7003585453303739, "grad_norm": 0.4899786710739136, "learning_rate": 8e-05, "loss": 1.7968, "step": 4102 }, { "epoch": 0.7005292812019805, "grad_norm": 0.4832093119621277, "learning_rate": 8e-05, "loss": 1.7238, "step": 4103 }, { "epoch": 0.7007000170735872, "grad_norm": 0.4744323790073395, "learning_rate": 8e-05, "loss": 1.4796, "step": 4104 }, { "epoch": 0.7008707529451937, "grad_norm": 0.48225879669189453, "learning_rate": 8e-05, "loss": 1.6693, "step": 4105 }, { "epoch": 0.7010414888168004, "grad_norm": 0.451951801776886, "learning_rate": 8e-05, "loss": 1.663, "step": 4106 }, { "epoch": 0.701212224688407, "grad_norm": 0.5025045275688171, "learning_rate": 8e-05, "loss": 1.6565, "step": 4107 }, { "epoch": 0.7013829605600137, "grad_norm": 0.44342392683029175, "learning_rate": 8e-05, "loss": 1.5711, "step": 4108 }, { "epoch": 0.7015536964316202, "grad_norm": 0.47415539622306824, "learning_rate": 8e-05, "loss": 1.5549, "step": 4109 }, { "epoch": 0.7017244323032269, "grad_norm": 0.45175883173942566, "learning_rate": 8e-05, "loss": 1.5598, "step": 4110 }, { "epoch": 0.7018951681748336, "grad_norm": 0.47027891874313354, "learning_rate": 8e-05, "loss": 1.7262, "step": 4111 }, { "epoch": 0.7020659040464402, "grad_norm": 0.5337509512901306, "learning_rate": 8e-05, "loss": 1.5907, "step": 4112 }, { "epoch": 0.7022366399180467, "grad_norm": 0.46237051486968994, "learning_rate": 8e-05, "loss": 1.5432, "step": 4113 }, { "epoch": 0.7024073757896534, "grad_norm": 0.47749003767967224, "learning_rate": 8e-05, "loss": 1.6948, "step": 4114 }, { "epoch": 0.70257811166126, "grad_norm": 0.45590439438819885, "learning_rate": 8e-05, "loss": 1.7777, "step": 4115 }, { "epoch": 0.7027488475328667, "grad_norm": 0.47833240032196045, "learning_rate": 8e-05, "loss": 1.6277, "step": 4116 }, { "epoch": 0.7029195834044732, "grad_norm": 0.4899686872959137, "learning_rate": 8e-05, "loss": 1.5988, "step": 4117 }, { "epoch": 0.7030903192760799, "grad_norm": 0.5294775366783142, "learning_rate": 8e-05, "loss": 1.8073, "step": 4118 }, { "epoch": 0.7032610551476866, "grad_norm": 0.4870297312736511, "learning_rate": 8e-05, "loss": 1.6761, "step": 4119 }, { "epoch": 0.7034317910192932, "grad_norm": 0.4678928554058075, "learning_rate": 8e-05, "loss": 1.5685, "step": 4120 }, { "epoch": 0.7036025268908997, "grad_norm": 0.4708285927772522, "learning_rate": 8e-05, "loss": 1.7143, "step": 4121 }, { "epoch": 0.7037732627625064, "grad_norm": 0.4589964747428894, "learning_rate": 8e-05, "loss": 1.7483, "step": 4122 }, { "epoch": 0.703943998634113, "grad_norm": 0.48900508880615234, "learning_rate": 8e-05, "loss": 1.6196, "step": 4123 }, { "epoch": 0.7041147345057196, "grad_norm": 0.4771624803543091, "learning_rate": 8e-05, "loss": 1.7272, "step": 4124 }, { "epoch": 0.7042854703773263, "grad_norm": 0.5305108428001404, "learning_rate": 8e-05, "loss": 1.7905, "step": 4125 }, { "epoch": 0.7044562062489329, "grad_norm": 0.47745418548583984, "learning_rate": 8e-05, "loss": 1.7149, "step": 4126 }, { "epoch": 0.7046269421205396, "grad_norm": 0.5144758224487305, "learning_rate": 8e-05, "loss": 1.6855, "step": 4127 }, { "epoch": 0.7047976779921461, "grad_norm": 0.5947960615158081, "learning_rate": 8e-05, "loss": 1.6492, "step": 4128 }, { "epoch": 0.7049684138637528, "grad_norm": 0.4793091416358948, "learning_rate": 8e-05, "loss": 1.7697, "step": 4129 }, { "epoch": 0.7051391497353594, "grad_norm": 0.4813482463359833, "learning_rate": 8e-05, "loss": 1.8484, "step": 4130 }, { "epoch": 0.7053098856069661, "grad_norm": 0.470411479473114, "learning_rate": 8e-05, "loss": 1.6819, "step": 4131 }, { "epoch": 0.7054806214785726, "grad_norm": 0.5147767663002014, "learning_rate": 8e-05, "loss": 1.8699, "step": 4132 }, { "epoch": 0.7056513573501793, "grad_norm": 0.47108837962150574, "learning_rate": 8e-05, "loss": 1.6011, "step": 4133 }, { "epoch": 0.7058220932217859, "grad_norm": 0.5074462294578552, "learning_rate": 8e-05, "loss": 1.7089, "step": 4134 }, { "epoch": 0.7059928290933926, "grad_norm": 0.496817946434021, "learning_rate": 8e-05, "loss": 1.6883, "step": 4135 }, { "epoch": 0.7061635649649991, "grad_norm": 0.4844934344291687, "learning_rate": 8e-05, "loss": 1.715, "step": 4136 }, { "epoch": 0.7063343008366058, "grad_norm": 0.4868895709514618, "learning_rate": 8e-05, "loss": 1.772, "step": 4137 }, { "epoch": 0.7065050367082124, "grad_norm": 0.4809473752975464, "learning_rate": 8e-05, "loss": 1.6764, "step": 4138 }, { "epoch": 0.7066757725798191, "grad_norm": 0.494186133146286, "learning_rate": 8e-05, "loss": 1.6941, "step": 4139 }, { "epoch": 0.7068465084514256, "grad_norm": 0.4689372181892395, "learning_rate": 8e-05, "loss": 1.7268, "step": 4140 }, { "epoch": 0.7070172443230323, "grad_norm": 0.5220803022384644, "learning_rate": 8e-05, "loss": 1.9772, "step": 4141 }, { "epoch": 0.7071879801946389, "grad_norm": 0.4411933720111847, "learning_rate": 8e-05, "loss": 1.2594, "step": 4142 }, { "epoch": 0.7073587160662456, "grad_norm": 0.4686204195022583, "learning_rate": 8e-05, "loss": 1.7492, "step": 4143 }, { "epoch": 0.7075294519378521, "grad_norm": 0.4826931953430176, "learning_rate": 8e-05, "loss": 1.7353, "step": 4144 }, { "epoch": 0.7077001878094588, "grad_norm": 0.4677014648914337, "learning_rate": 8e-05, "loss": 1.6071, "step": 4145 }, { "epoch": 0.7078709236810654, "grad_norm": 0.4608941078186035, "learning_rate": 8e-05, "loss": 1.5233, "step": 4146 }, { "epoch": 0.7080416595526721, "grad_norm": 0.45315444469451904, "learning_rate": 8e-05, "loss": 1.6495, "step": 4147 }, { "epoch": 0.7082123954242786, "grad_norm": 0.4958140552043915, "learning_rate": 8e-05, "loss": 1.7246, "step": 4148 }, { "epoch": 0.7083831312958853, "grad_norm": 0.4936577081680298, "learning_rate": 8e-05, "loss": 1.5935, "step": 4149 }, { "epoch": 0.7085538671674919, "grad_norm": 0.5056451559066772, "learning_rate": 8e-05, "loss": 1.7748, "step": 4150 }, { "epoch": 0.7087246030390986, "grad_norm": 0.4667196273803711, "learning_rate": 8e-05, "loss": 1.6678, "step": 4151 }, { "epoch": 0.7088953389107051, "grad_norm": 0.5093197822570801, "learning_rate": 8e-05, "loss": 1.7724, "step": 4152 }, { "epoch": 0.7090660747823118, "grad_norm": 0.47587305307388306, "learning_rate": 8e-05, "loss": 1.548, "step": 4153 }, { "epoch": 0.7092368106539184, "grad_norm": 0.470722496509552, "learning_rate": 8e-05, "loss": 1.6536, "step": 4154 }, { "epoch": 0.7094075465255251, "grad_norm": 0.48239389061927795, "learning_rate": 8e-05, "loss": 1.8035, "step": 4155 }, { "epoch": 0.7095782823971316, "grad_norm": 0.5104386210441589, "learning_rate": 8e-05, "loss": 1.7829, "step": 4156 }, { "epoch": 0.7097490182687383, "grad_norm": 0.5063804388046265, "learning_rate": 8e-05, "loss": 1.7967, "step": 4157 }, { "epoch": 0.7099197541403449, "grad_norm": 0.49566900730133057, "learning_rate": 8e-05, "loss": 1.6866, "step": 4158 }, { "epoch": 0.7100904900119515, "grad_norm": 0.5232430100440979, "learning_rate": 8e-05, "loss": 1.5607, "step": 4159 }, { "epoch": 0.7102612258835581, "grad_norm": 0.5701991319656372, "learning_rate": 8e-05, "loss": 2.0088, "step": 4160 }, { "epoch": 0.7104319617551648, "grad_norm": 0.46724733710289, "learning_rate": 8e-05, "loss": 1.6662, "step": 4161 }, { "epoch": 0.7106026976267714, "grad_norm": 0.47973793745040894, "learning_rate": 8e-05, "loss": 1.6053, "step": 4162 }, { "epoch": 0.710773433498378, "grad_norm": 0.5122907757759094, "learning_rate": 8e-05, "loss": 1.7245, "step": 4163 }, { "epoch": 0.7109441693699846, "grad_norm": 0.4723208546638489, "learning_rate": 8e-05, "loss": 1.7058, "step": 4164 }, { "epoch": 0.7111149052415913, "grad_norm": 0.5048457384109497, "learning_rate": 8e-05, "loss": 1.8108, "step": 4165 }, { "epoch": 0.7112856411131979, "grad_norm": 0.48374509811401367, "learning_rate": 8e-05, "loss": 1.7123, "step": 4166 }, { "epoch": 0.7114563769848045, "grad_norm": 0.5041111707687378, "learning_rate": 8e-05, "loss": 1.709, "step": 4167 }, { "epoch": 0.7116271128564111, "grad_norm": 0.4572782516479492, "learning_rate": 8e-05, "loss": 1.4548, "step": 4168 }, { "epoch": 0.7117978487280178, "grad_norm": 0.4656310975551605, "learning_rate": 8e-05, "loss": 1.639, "step": 4169 }, { "epoch": 0.7119685845996244, "grad_norm": 0.5115905404090881, "learning_rate": 8e-05, "loss": 1.868, "step": 4170 }, { "epoch": 0.712139320471231, "grad_norm": 0.4909868538379669, "learning_rate": 8e-05, "loss": 1.7648, "step": 4171 }, { "epoch": 0.7123100563428376, "grad_norm": 0.4947751760482788, "learning_rate": 8e-05, "loss": 1.6107, "step": 4172 }, { "epoch": 0.7124807922144443, "grad_norm": 0.46595498919487, "learning_rate": 8e-05, "loss": 1.6343, "step": 4173 }, { "epoch": 0.7126515280860509, "grad_norm": 0.460701048374176, "learning_rate": 8e-05, "loss": 1.6139, "step": 4174 }, { "epoch": 0.7128222639576575, "grad_norm": 0.5075146555900574, "learning_rate": 8e-05, "loss": 1.5253, "step": 4175 }, { "epoch": 0.7129929998292641, "grad_norm": 0.5023282170295715, "learning_rate": 8e-05, "loss": 1.7892, "step": 4176 }, { "epoch": 0.7131637357008708, "grad_norm": 0.47536399960517883, "learning_rate": 8e-05, "loss": 1.5813, "step": 4177 }, { "epoch": 0.7133344715724774, "grad_norm": 0.5015875101089478, "learning_rate": 8e-05, "loss": 1.5853, "step": 4178 }, { "epoch": 0.713505207444084, "grad_norm": 0.4623943865299225, "learning_rate": 8e-05, "loss": 1.5792, "step": 4179 }, { "epoch": 0.7136759433156906, "grad_norm": 0.5081355571746826, "learning_rate": 8e-05, "loss": 1.7596, "step": 4180 }, { "epoch": 0.7138466791872973, "grad_norm": 0.4730270802974701, "learning_rate": 8e-05, "loss": 1.7034, "step": 4181 }, { "epoch": 0.7140174150589039, "grad_norm": 0.48214855790138245, "learning_rate": 8e-05, "loss": 1.7426, "step": 4182 }, { "epoch": 0.7141881509305105, "grad_norm": 0.45678335428237915, "learning_rate": 8e-05, "loss": 1.6648, "step": 4183 }, { "epoch": 0.7143588868021171, "grad_norm": 0.5121980905532837, "learning_rate": 8e-05, "loss": 1.7861, "step": 4184 }, { "epoch": 0.7145296226737238, "grad_norm": 0.5034542679786682, "learning_rate": 8e-05, "loss": 1.8681, "step": 4185 }, { "epoch": 0.7147003585453304, "grad_norm": 0.4796687960624695, "learning_rate": 8e-05, "loss": 1.702, "step": 4186 }, { "epoch": 0.714871094416937, "grad_norm": 0.49577537178993225, "learning_rate": 8e-05, "loss": 1.7527, "step": 4187 }, { "epoch": 0.7150418302885436, "grad_norm": 0.46770015358924866, "learning_rate": 8e-05, "loss": 1.6961, "step": 4188 }, { "epoch": 0.7152125661601503, "grad_norm": 0.48463279008865356, "learning_rate": 8e-05, "loss": 1.7665, "step": 4189 }, { "epoch": 0.7153833020317568, "grad_norm": 0.4903533458709717, "learning_rate": 8e-05, "loss": 1.8481, "step": 4190 }, { "epoch": 0.7155540379033635, "grad_norm": 0.47972533106803894, "learning_rate": 8e-05, "loss": 1.7237, "step": 4191 }, { "epoch": 0.7157247737749701, "grad_norm": 0.46631601452827454, "learning_rate": 8e-05, "loss": 1.6074, "step": 4192 }, { "epoch": 0.7158955096465768, "grad_norm": 0.512265145778656, "learning_rate": 8e-05, "loss": 1.7782, "step": 4193 }, { "epoch": 0.7160662455181833, "grad_norm": 0.48795852065086365, "learning_rate": 8e-05, "loss": 1.7284, "step": 4194 }, { "epoch": 0.71623698138979, "grad_norm": 0.4773222506046295, "learning_rate": 8e-05, "loss": 1.6323, "step": 4195 }, { "epoch": 0.7164077172613966, "grad_norm": 0.4561122953891754, "learning_rate": 8e-05, "loss": 1.551, "step": 4196 }, { "epoch": 0.7165784531330033, "grad_norm": 0.5528168082237244, "learning_rate": 8e-05, "loss": 1.983, "step": 4197 }, { "epoch": 0.7167491890046098, "grad_norm": 0.4834960699081421, "learning_rate": 8e-05, "loss": 1.5562, "step": 4198 }, { "epoch": 0.7169199248762165, "grad_norm": 0.5305192470550537, "learning_rate": 8e-05, "loss": 1.905, "step": 4199 }, { "epoch": 0.7170906607478231, "grad_norm": 0.5157716274261475, "learning_rate": 8e-05, "loss": 1.7153, "step": 4200 }, { "epoch": 0.7172613966194298, "grad_norm": 0.4751203954219818, "learning_rate": 8e-05, "loss": 1.4942, "step": 4201 }, { "epoch": 0.7174321324910363, "grad_norm": 0.5476130247116089, "learning_rate": 8e-05, "loss": 1.6425, "step": 4202 }, { "epoch": 0.717602868362643, "grad_norm": 0.4704371690750122, "learning_rate": 8e-05, "loss": 1.7529, "step": 4203 }, { "epoch": 0.7177736042342496, "grad_norm": 0.4489046037197113, "learning_rate": 8e-05, "loss": 1.5545, "step": 4204 }, { "epoch": 0.7179443401058563, "grad_norm": 0.4722289443016052, "learning_rate": 8e-05, "loss": 1.7554, "step": 4205 }, { "epoch": 0.7181150759774628, "grad_norm": 0.44824767112731934, "learning_rate": 8e-05, "loss": 1.7083, "step": 4206 }, { "epoch": 0.7182858118490695, "grad_norm": 0.4859641194343567, "learning_rate": 8e-05, "loss": 1.7988, "step": 4207 }, { "epoch": 0.7184565477206761, "grad_norm": 0.4619232416152954, "learning_rate": 8e-05, "loss": 1.4679, "step": 4208 }, { "epoch": 0.7186272835922828, "grad_norm": 0.47463637590408325, "learning_rate": 8e-05, "loss": 1.6877, "step": 4209 }, { "epoch": 0.7187980194638893, "grad_norm": 0.49557292461395264, "learning_rate": 8e-05, "loss": 1.7084, "step": 4210 }, { "epoch": 0.718968755335496, "grad_norm": 0.5443979501724243, "learning_rate": 8e-05, "loss": 1.7739, "step": 4211 }, { "epoch": 0.7191394912071026, "grad_norm": 0.4961709976196289, "learning_rate": 8e-05, "loss": 1.7735, "step": 4212 }, { "epoch": 0.7193102270787093, "grad_norm": 0.45409631729125977, "learning_rate": 8e-05, "loss": 1.5995, "step": 4213 }, { "epoch": 0.7194809629503158, "grad_norm": 0.4645013213157654, "learning_rate": 8e-05, "loss": 1.6532, "step": 4214 }, { "epoch": 0.7196516988219225, "grad_norm": 0.5128650665283203, "learning_rate": 8e-05, "loss": 1.6566, "step": 4215 }, { "epoch": 0.7198224346935291, "grad_norm": 0.492885947227478, "learning_rate": 8e-05, "loss": 1.5614, "step": 4216 }, { "epoch": 0.7199931705651358, "grad_norm": 0.4543721079826355, "learning_rate": 8e-05, "loss": 1.6094, "step": 4217 }, { "epoch": 0.7201639064367423, "grad_norm": 0.46830686926841736, "learning_rate": 8e-05, "loss": 1.6058, "step": 4218 }, { "epoch": 0.720334642308349, "grad_norm": 0.4601213335990906, "learning_rate": 8e-05, "loss": 1.6165, "step": 4219 }, { "epoch": 0.7205053781799556, "grad_norm": 0.4743505120277405, "learning_rate": 8e-05, "loss": 1.61, "step": 4220 }, { "epoch": 0.7206761140515622, "grad_norm": 0.49741822481155396, "learning_rate": 8e-05, "loss": 1.6801, "step": 4221 }, { "epoch": 0.7208468499231688, "grad_norm": 0.48508670926094055, "learning_rate": 8e-05, "loss": 1.6597, "step": 4222 }, { "epoch": 0.7210175857947755, "grad_norm": 0.5121182799339294, "learning_rate": 8e-05, "loss": 1.7566, "step": 4223 }, { "epoch": 0.7211883216663821, "grad_norm": 0.4673279821872711, "learning_rate": 8e-05, "loss": 1.6889, "step": 4224 }, { "epoch": 0.7213590575379887, "grad_norm": 0.501792311668396, "learning_rate": 8e-05, "loss": 1.6385, "step": 4225 }, { "epoch": 0.7215297934095953, "grad_norm": 0.46602603793144226, "learning_rate": 8e-05, "loss": 1.6274, "step": 4226 }, { "epoch": 0.721700529281202, "grad_norm": 0.4760897755622864, "learning_rate": 8e-05, "loss": 1.7425, "step": 4227 }, { "epoch": 0.7218712651528086, "grad_norm": 0.4779815375804901, "learning_rate": 8e-05, "loss": 1.6553, "step": 4228 }, { "epoch": 0.7220420010244152, "grad_norm": 0.46552911400794983, "learning_rate": 8e-05, "loss": 1.6388, "step": 4229 }, { "epoch": 0.7222127368960218, "grad_norm": 0.5054520964622498, "learning_rate": 8e-05, "loss": 1.4896, "step": 4230 }, { "epoch": 0.7223834727676285, "grad_norm": 0.5198450088500977, "learning_rate": 8e-05, "loss": 1.7847, "step": 4231 }, { "epoch": 0.7225542086392351, "grad_norm": 0.47112971544265747, "learning_rate": 8e-05, "loss": 1.601, "step": 4232 }, { "epoch": 0.7227249445108417, "grad_norm": 0.44683727622032166, "learning_rate": 8e-05, "loss": 1.5824, "step": 4233 }, { "epoch": 0.7228956803824483, "grad_norm": 0.46770212054252625, "learning_rate": 8e-05, "loss": 1.5957, "step": 4234 }, { "epoch": 0.723066416254055, "grad_norm": 0.4896298050880432, "learning_rate": 8e-05, "loss": 1.7418, "step": 4235 }, { "epoch": 0.7232371521256616, "grad_norm": 0.4536491334438324, "learning_rate": 8e-05, "loss": 1.6834, "step": 4236 }, { "epoch": 0.7234078879972682, "grad_norm": 0.4752861261367798, "learning_rate": 8e-05, "loss": 1.7208, "step": 4237 }, { "epoch": 0.7235786238688748, "grad_norm": 0.4466567039489746, "learning_rate": 8e-05, "loss": 1.552, "step": 4238 }, { "epoch": 0.7237493597404815, "grad_norm": 0.5166991353034973, "learning_rate": 8e-05, "loss": 1.6815, "step": 4239 }, { "epoch": 0.7239200956120881, "grad_norm": 0.4849846661090851, "learning_rate": 8e-05, "loss": 1.7267, "step": 4240 }, { "epoch": 0.7240908314836947, "grad_norm": 0.47139859199523926, "learning_rate": 8e-05, "loss": 1.7017, "step": 4241 }, { "epoch": 0.7242615673553013, "grad_norm": 0.5126760601997375, "learning_rate": 8e-05, "loss": 1.6755, "step": 4242 }, { "epoch": 0.724432303226908, "grad_norm": 0.47976475954055786, "learning_rate": 8e-05, "loss": 1.7908, "step": 4243 }, { "epoch": 0.7246030390985146, "grad_norm": 0.5102014541625977, "learning_rate": 8e-05, "loss": 1.5844, "step": 4244 }, { "epoch": 0.7247737749701212, "grad_norm": 0.47990682721138, "learning_rate": 8e-05, "loss": 1.6901, "step": 4245 }, { "epoch": 0.7249445108417278, "grad_norm": 0.49502041935920715, "learning_rate": 8e-05, "loss": 1.7664, "step": 4246 }, { "epoch": 0.7251152467133345, "grad_norm": 0.47887706756591797, "learning_rate": 8e-05, "loss": 1.8842, "step": 4247 }, { "epoch": 0.7252859825849411, "grad_norm": 0.459428608417511, "learning_rate": 8e-05, "loss": 1.7688, "step": 4248 }, { "epoch": 0.7254567184565477, "grad_norm": 0.5027249455451965, "learning_rate": 8e-05, "loss": 1.6617, "step": 4249 }, { "epoch": 0.7256274543281543, "grad_norm": 0.4882931709289551, "learning_rate": 8e-05, "loss": 1.641, "step": 4250 }, { "epoch": 0.725798190199761, "grad_norm": 0.4878803491592407, "learning_rate": 8e-05, "loss": 1.7785, "step": 4251 }, { "epoch": 0.7259689260713676, "grad_norm": 0.49307185411453247, "learning_rate": 8e-05, "loss": 1.8126, "step": 4252 }, { "epoch": 0.7261396619429742, "grad_norm": 0.46029239892959595, "learning_rate": 8e-05, "loss": 1.5076, "step": 4253 }, { "epoch": 0.7263103978145808, "grad_norm": 0.5012288093566895, "learning_rate": 8e-05, "loss": 1.6057, "step": 4254 }, { "epoch": 0.7264811336861875, "grad_norm": 0.47913020849227905, "learning_rate": 8e-05, "loss": 1.6834, "step": 4255 }, { "epoch": 0.726651869557794, "grad_norm": 0.5308405756950378, "learning_rate": 8e-05, "loss": 1.7544, "step": 4256 }, { "epoch": 0.7268226054294007, "grad_norm": 0.46427419781684875, "learning_rate": 8e-05, "loss": 1.5099, "step": 4257 }, { "epoch": 0.7269933413010073, "grad_norm": 0.48122623562812805, "learning_rate": 8e-05, "loss": 1.8763, "step": 4258 }, { "epoch": 0.727164077172614, "grad_norm": 0.4762222170829773, "learning_rate": 8e-05, "loss": 1.8378, "step": 4259 }, { "epoch": 0.7273348130442205, "grad_norm": 0.45296040177345276, "learning_rate": 8e-05, "loss": 1.6918, "step": 4260 }, { "epoch": 0.7275055489158272, "grad_norm": 0.4552997350692749, "learning_rate": 8e-05, "loss": 1.6353, "step": 4261 }, { "epoch": 0.7276762847874338, "grad_norm": 0.5140878558158875, "learning_rate": 8e-05, "loss": 1.9362, "step": 4262 }, { "epoch": 0.7278470206590405, "grad_norm": 0.460858553647995, "learning_rate": 8e-05, "loss": 1.6398, "step": 4263 }, { "epoch": 0.728017756530647, "grad_norm": 0.4986891448497772, "learning_rate": 8e-05, "loss": 1.717, "step": 4264 }, { "epoch": 0.7281884924022537, "grad_norm": 0.4778296947479248, "learning_rate": 8e-05, "loss": 1.7407, "step": 4265 }, { "epoch": 0.7283592282738603, "grad_norm": 0.5058068633079529, "learning_rate": 8e-05, "loss": 1.8721, "step": 4266 }, { "epoch": 0.728529964145467, "grad_norm": 0.45374247431755066, "learning_rate": 8e-05, "loss": 1.6046, "step": 4267 }, { "epoch": 0.7287007000170735, "grad_norm": 0.4710542559623718, "learning_rate": 8e-05, "loss": 1.6071, "step": 4268 }, { "epoch": 0.7288714358886802, "grad_norm": 0.49205100536346436, "learning_rate": 8e-05, "loss": 1.7939, "step": 4269 }, { "epoch": 0.7290421717602869, "grad_norm": 0.47707587480545044, "learning_rate": 8e-05, "loss": 1.7795, "step": 4270 }, { "epoch": 0.7292129076318935, "grad_norm": 0.43678873777389526, "learning_rate": 8e-05, "loss": 1.5815, "step": 4271 }, { "epoch": 0.7293836435035, "grad_norm": 0.47032150626182556, "learning_rate": 8e-05, "loss": 1.6373, "step": 4272 }, { "epoch": 0.7295543793751067, "grad_norm": 0.4882524609565735, "learning_rate": 8e-05, "loss": 1.673, "step": 4273 }, { "epoch": 0.7297251152467134, "grad_norm": 0.47380542755126953, "learning_rate": 8e-05, "loss": 1.6813, "step": 4274 }, { "epoch": 0.72989585111832, "grad_norm": 0.4591231644153595, "learning_rate": 8e-05, "loss": 1.4987, "step": 4275 }, { "epoch": 0.7300665869899265, "grad_norm": 0.4768156111240387, "learning_rate": 8e-05, "loss": 1.757, "step": 4276 }, { "epoch": 0.7302373228615332, "grad_norm": 0.504777729511261, "learning_rate": 8e-05, "loss": 1.7077, "step": 4277 }, { "epoch": 0.7304080587331399, "grad_norm": 0.48445218801498413, "learning_rate": 8e-05, "loss": 1.7887, "step": 4278 }, { "epoch": 0.7305787946047465, "grad_norm": 0.47918400168418884, "learning_rate": 8e-05, "loss": 1.6462, "step": 4279 }, { "epoch": 0.730749530476353, "grad_norm": 0.46769264340400696, "learning_rate": 8e-05, "loss": 1.6347, "step": 4280 }, { "epoch": 0.7309202663479597, "grad_norm": 0.4783823490142822, "learning_rate": 8e-05, "loss": 1.7937, "step": 4281 }, { "epoch": 0.7310910022195664, "grad_norm": 0.4822678565979004, "learning_rate": 8e-05, "loss": 1.8538, "step": 4282 }, { "epoch": 0.731261738091173, "grad_norm": 0.45416203141212463, "learning_rate": 8e-05, "loss": 1.6712, "step": 4283 }, { "epoch": 0.7314324739627796, "grad_norm": 0.5375953316688538, "learning_rate": 8e-05, "loss": 1.7792, "step": 4284 }, { "epoch": 0.7316032098343862, "grad_norm": 0.45842528343200684, "learning_rate": 8e-05, "loss": 1.5286, "step": 4285 }, { "epoch": 0.7317739457059929, "grad_norm": 0.4781549572944641, "learning_rate": 8e-05, "loss": 1.7109, "step": 4286 }, { "epoch": 0.7319446815775994, "grad_norm": 0.4693221151828766, "learning_rate": 8e-05, "loss": 1.6171, "step": 4287 }, { "epoch": 0.732115417449206, "grad_norm": 0.47941577434539795, "learning_rate": 8e-05, "loss": 1.8201, "step": 4288 }, { "epoch": 0.7322861533208127, "grad_norm": 0.5016553997993469, "learning_rate": 8e-05, "loss": 1.791, "step": 4289 }, { "epoch": 0.7324568891924194, "grad_norm": 0.4492838978767395, "learning_rate": 8e-05, "loss": 1.4396, "step": 4290 }, { "epoch": 0.7326276250640259, "grad_norm": 0.4755418598651886, "learning_rate": 8e-05, "loss": 1.6061, "step": 4291 }, { "epoch": 0.7327983609356326, "grad_norm": 0.48798224329948425, "learning_rate": 8e-05, "loss": 1.8319, "step": 4292 }, { "epoch": 0.7329690968072392, "grad_norm": 0.5804129242897034, "learning_rate": 8e-05, "loss": 1.4298, "step": 4293 }, { "epoch": 0.7331398326788459, "grad_norm": 0.4715261459350586, "learning_rate": 8e-05, "loss": 1.4983, "step": 4294 }, { "epoch": 0.7333105685504524, "grad_norm": 0.47278472781181335, "learning_rate": 8e-05, "loss": 1.5751, "step": 4295 }, { "epoch": 0.733481304422059, "grad_norm": 0.48909446597099304, "learning_rate": 8e-05, "loss": 1.521, "step": 4296 }, { "epoch": 0.7336520402936657, "grad_norm": 0.49254339933395386, "learning_rate": 8e-05, "loss": 1.6232, "step": 4297 }, { "epoch": 0.7338227761652724, "grad_norm": 0.4572277069091797, "learning_rate": 8e-05, "loss": 1.4722, "step": 4298 }, { "epoch": 0.7339935120368789, "grad_norm": 0.5114235281944275, "learning_rate": 8e-05, "loss": 1.5789, "step": 4299 }, { "epoch": 0.7341642479084856, "grad_norm": 0.4380512833595276, "learning_rate": 8e-05, "loss": 1.5733, "step": 4300 }, { "epoch": 0.7343349837800922, "grad_norm": 0.45060309767723083, "learning_rate": 8e-05, "loss": 1.6632, "step": 4301 }, { "epoch": 0.7345057196516989, "grad_norm": 0.4747752845287323, "learning_rate": 8e-05, "loss": 1.7885, "step": 4302 }, { "epoch": 0.7346764555233054, "grad_norm": 0.47548025846481323, "learning_rate": 8e-05, "loss": 1.454, "step": 4303 }, { "epoch": 0.7348471913949121, "grad_norm": 0.4610688090324402, "learning_rate": 8e-05, "loss": 1.71, "step": 4304 }, { "epoch": 0.7350179272665187, "grad_norm": 0.4691765606403351, "learning_rate": 8e-05, "loss": 1.693, "step": 4305 }, { "epoch": 0.7351886631381254, "grad_norm": 0.4724983274936676, "learning_rate": 8e-05, "loss": 1.7134, "step": 4306 }, { "epoch": 0.7353593990097319, "grad_norm": 0.5146923065185547, "learning_rate": 8e-05, "loss": 1.7231, "step": 4307 }, { "epoch": 0.7355301348813386, "grad_norm": 0.47671860456466675, "learning_rate": 8e-05, "loss": 1.5547, "step": 4308 }, { "epoch": 0.7357008707529452, "grad_norm": 0.48169052600860596, "learning_rate": 8e-05, "loss": 1.6733, "step": 4309 }, { "epoch": 0.7358716066245519, "grad_norm": 0.47295981645584106, "learning_rate": 8e-05, "loss": 1.5326, "step": 4310 }, { "epoch": 0.7360423424961584, "grad_norm": 0.4934510290622711, "learning_rate": 8e-05, "loss": 1.6348, "step": 4311 }, { "epoch": 0.7362130783677651, "grad_norm": 0.4632241129875183, "learning_rate": 8e-05, "loss": 1.7804, "step": 4312 }, { "epoch": 0.7363838142393717, "grad_norm": 0.4882811903953552, "learning_rate": 8e-05, "loss": 1.6642, "step": 4313 }, { "epoch": 0.7365545501109784, "grad_norm": 0.47313040494918823, "learning_rate": 8e-05, "loss": 1.7818, "step": 4314 }, { "epoch": 0.7367252859825849, "grad_norm": 0.5486581921577454, "learning_rate": 8e-05, "loss": 1.6514, "step": 4315 }, { "epoch": 0.7368960218541916, "grad_norm": 0.47051161527633667, "learning_rate": 8e-05, "loss": 1.5303, "step": 4316 }, { "epoch": 0.7370667577257982, "grad_norm": 0.49160075187683105, "learning_rate": 8e-05, "loss": 1.7633, "step": 4317 }, { "epoch": 0.7372374935974049, "grad_norm": 0.4937325119972229, "learning_rate": 8e-05, "loss": 1.6806, "step": 4318 }, { "epoch": 0.7374082294690114, "grad_norm": 0.43936020135879517, "learning_rate": 8e-05, "loss": 1.5014, "step": 4319 }, { "epoch": 0.7375789653406181, "grad_norm": 0.43613722920417786, "learning_rate": 8e-05, "loss": 1.6101, "step": 4320 }, { "epoch": 0.7377497012122247, "grad_norm": 0.4640974700450897, "learning_rate": 8e-05, "loss": 1.5601, "step": 4321 }, { "epoch": 0.7379204370838313, "grad_norm": 0.4841482937335968, "learning_rate": 8e-05, "loss": 1.6354, "step": 4322 }, { "epoch": 0.7380911729554379, "grad_norm": 0.4670560657978058, "learning_rate": 8e-05, "loss": 1.6971, "step": 4323 }, { "epoch": 0.7382619088270446, "grad_norm": 0.4694540500640869, "learning_rate": 8e-05, "loss": 1.7002, "step": 4324 }, { "epoch": 0.7384326446986512, "grad_norm": 0.4872758090496063, "learning_rate": 8e-05, "loss": 1.6821, "step": 4325 }, { "epoch": 0.7386033805702578, "grad_norm": 0.49906468391418457, "learning_rate": 8e-05, "loss": 1.6217, "step": 4326 }, { "epoch": 0.7387741164418644, "grad_norm": 0.4736610949039459, "learning_rate": 8e-05, "loss": 1.7041, "step": 4327 }, { "epoch": 0.7389448523134711, "grad_norm": 0.45677000284194946, "learning_rate": 8e-05, "loss": 1.5923, "step": 4328 }, { "epoch": 0.7391155881850777, "grad_norm": 0.4706338047981262, "learning_rate": 8e-05, "loss": 1.559, "step": 4329 }, { "epoch": 0.7392863240566843, "grad_norm": 0.5057492256164551, "learning_rate": 8e-05, "loss": 1.7815, "step": 4330 }, { "epoch": 0.7394570599282909, "grad_norm": 0.46279510855674744, "learning_rate": 8e-05, "loss": 1.568, "step": 4331 }, { "epoch": 0.7396277957998976, "grad_norm": 0.5156611800193787, "learning_rate": 8e-05, "loss": 1.831, "step": 4332 }, { "epoch": 0.7397985316715042, "grad_norm": 0.5297399163246155, "learning_rate": 8e-05, "loss": 1.7867, "step": 4333 }, { "epoch": 0.7399692675431108, "grad_norm": 0.4627359211444855, "learning_rate": 8e-05, "loss": 1.6663, "step": 4334 }, { "epoch": 0.7401400034147174, "grad_norm": 0.46081170439720154, "learning_rate": 8e-05, "loss": 1.5979, "step": 4335 }, { "epoch": 0.7403107392863241, "grad_norm": 0.49856990575790405, "learning_rate": 8e-05, "loss": 1.7167, "step": 4336 }, { "epoch": 0.7404814751579307, "grad_norm": 0.508152186870575, "learning_rate": 8e-05, "loss": 1.831, "step": 4337 }, { "epoch": 0.7406522110295373, "grad_norm": 0.46900901198387146, "learning_rate": 8e-05, "loss": 1.6277, "step": 4338 }, { "epoch": 0.7408229469011439, "grad_norm": 0.5351276397705078, "learning_rate": 8e-05, "loss": 1.7046, "step": 4339 }, { "epoch": 0.7409936827727506, "grad_norm": 0.4737755060195923, "learning_rate": 8e-05, "loss": 1.5853, "step": 4340 }, { "epoch": 0.7411644186443572, "grad_norm": 0.48422011733055115, "learning_rate": 8e-05, "loss": 1.5661, "step": 4341 }, { "epoch": 0.7413351545159638, "grad_norm": 0.46728745102882385, "learning_rate": 8e-05, "loss": 1.5851, "step": 4342 }, { "epoch": 0.7415058903875704, "grad_norm": 0.49392351508140564, "learning_rate": 8e-05, "loss": 1.8282, "step": 4343 }, { "epoch": 0.7416766262591771, "grad_norm": 0.49953699111938477, "learning_rate": 8e-05, "loss": 1.8107, "step": 4344 }, { "epoch": 0.7418473621307837, "grad_norm": 0.48436442017555237, "learning_rate": 8e-05, "loss": 1.7338, "step": 4345 }, { "epoch": 0.7420180980023903, "grad_norm": 0.4690072238445282, "learning_rate": 8e-05, "loss": 1.7802, "step": 4346 }, { "epoch": 0.7421888338739969, "grad_norm": 0.48212116956710815, "learning_rate": 8e-05, "loss": 1.5204, "step": 4347 }, { "epoch": 0.7423595697456036, "grad_norm": 0.4741227626800537, "learning_rate": 8e-05, "loss": 1.6372, "step": 4348 }, { "epoch": 0.7425303056172102, "grad_norm": 0.48168739676475525, "learning_rate": 8e-05, "loss": 1.6491, "step": 4349 }, { "epoch": 0.7427010414888168, "grad_norm": 0.5563356876373291, "learning_rate": 8e-05, "loss": 1.6421, "step": 4350 }, { "epoch": 0.7428717773604234, "grad_norm": 0.5295475125312805, "learning_rate": 8e-05, "loss": 1.7879, "step": 4351 }, { "epoch": 0.7430425132320301, "grad_norm": 0.50235915184021, "learning_rate": 8e-05, "loss": 1.9119, "step": 4352 }, { "epoch": 0.7432132491036366, "grad_norm": 0.4539337158203125, "learning_rate": 8e-05, "loss": 1.5535, "step": 4353 }, { "epoch": 0.7433839849752433, "grad_norm": 0.5027146935462952, "learning_rate": 8e-05, "loss": 1.964, "step": 4354 }, { "epoch": 0.7435547208468499, "grad_norm": 0.48124101758003235, "learning_rate": 8e-05, "loss": 1.6082, "step": 4355 }, { "epoch": 0.7437254567184566, "grad_norm": 0.5005484819412231, "learning_rate": 8e-05, "loss": 1.7481, "step": 4356 }, { "epoch": 0.7438961925900631, "grad_norm": 0.4761844277381897, "learning_rate": 8e-05, "loss": 1.5474, "step": 4357 }, { "epoch": 0.7440669284616698, "grad_norm": 0.462914377450943, "learning_rate": 8e-05, "loss": 1.5493, "step": 4358 }, { "epoch": 0.7442376643332764, "grad_norm": 0.45874130725860596, "learning_rate": 8e-05, "loss": 1.6693, "step": 4359 }, { "epoch": 0.7444084002048831, "grad_norm": 0.45297321677207947, "learning_rate": 8e-05, "loss": 1.5749, "step": 4360 }, { "epoch": 0.7445791360764896, "grad_norm": 0.47085344791412354, "learning_rate": 8e-05, "loss": 1.7867, "step": 4361 }, { "epoch": 0.7447498719480963, "grad_norm": 0.48295676708221436, "learning_rate": 8e-05, "loss": 1.7428, "step": 4362 }, { "epoch": 0.7449206078197029, "grad_norm": 0.45850682258605957, "learning_rate": 8e-05, "loss": 1.7158, "step": 4363 }, { "epoch": 0.7450913436913096, "grad_norm": 0.47383713722229004, "learning_rate": 8e-05, "loss": 1.7225, "step": 4364 }, { "epoch": 0.7452620795629161, "grad_norm": 0.4832513630390167, "learning_rate": 8e-05, "loss": 1.6681, "step": 4365 }, { "epoch": 0.7454328154345228, "grad_norm": 0.5270935297012329, "learning_rate": 8e-05, "loss": 1.6905, "step": 4366 }, { "epoch": 0.7456035513061294, "grad_norm": 0.4435289800167084, "learning_rate": 8e-05, "loss": 1.6333, "step": 4367 }, { "epoch": 0.7457742871777361, "grad_norm": 0.4727223515510559, "learning_rate": 8e-05, "loss": 1.6355, "step": 4368 }, { "epoch": 0.7459450230493426, "grad_norm": 0.5183879137039185, "learning_rate": 8e-05, "loss": 1.871, "step": 4369 }, { "epoch": 0.7461157589209493, "grad_norm": 0.4863802492618561, "learning_rate": 8e-05, "loss": 1.6534, "step": 4370 }, { "epoch": 0.7462864947925559, "grad_norm": 0.46008947491645813, "learning_rate": 8e-05, "loss": 1.7392, "step": 4371 }, { "epoch": 0.7464572306641626, "grad_norm": 0.5199511647224426, "learning_rate": 8e-05, "loss": 1.7723, "step": 4372 }, { "epoch": 0.7466279665357691, "grad_norm": 0.5055847764015198, "learning_rate": 8e-05, "loss": 1.8776, "step": 4373 }, { "epoch": 0.7467987024073758, "grad_norm": 0.4946203827857971, "learning_rate": 8e-05, "loss": 1.7222, "step": 4374 }, { "epoch": 0.7469694382789824, "grad_norm": 0.47466427087783813, "learning_rate": 8e-05, "loss": 1.7524, "step": 4375 }, { "epoch": 0.7471401741505891, "grad_norm": 0.5027341246604919, "learning_rate": 8e-05, "loss": 1.7934, "step": 4376 }, { "epoch": 0.7473109100221956, "grad_norm": 0.4615477919578552, "learning_rate": 8e-05, "loss": 1.7634, "step": 4377 }, { "epoch": 0.7474816458938023, "grad_norm": 0.460007905960083, "learning_rate": 8e-05, "loss": 1.6642, "step": 4378 }, { "epoch": 0.7476523817654089, "grad_norm": 0.4922712445259094, "learning_rate": 8e-05, "loss": 1.5657, "step": 4379 }, { "epoch": 0.7478231176370156, "grad_norm": 0.5012286305427551, "learning_rate": 8e-05, "loss": 1.5263, "step": 4380 }, { "epoch": 0.7479938535086221, "grad_norm": 0.4748152792453766, "learning_rate": 8e-05, "loss": 1.3004, "step": 4381 }, { "epoch": 0.7481645893802288, "grad_norm": 0.5022371411323547, "learning_rate": 8e-05, "loss": 1.7098, "step": 4382 }, { "epoch": 0.7483353252518354, "grad_norm": 0.5143356919288635, "learning_rate": 8e-05, "loss": 1.6471, "step": 4383 }, { "epoch": 0.7485060611234421, "grad_norm": 0.48427698016166687, "learning_rate": 8e-05, "loss": 1.7509, "step": 4384 }, { "epoch": 0.7486767969950486, "grad_norm": 0.47302761673927307, "learning_rate": 8e-05, "loss": 1.7105, "step": 4385 }, { "epoch": 0.7488475328666553, "grad_norm": 0.48885661363601685, "learning_rate": 8e-05, "loss": 1.6163, "step": 4386 }, { "epoch": 0.7490182687382619, "grad_norm": 0.46550020575523376, "learning_rate": 8e-05, "loss": 1.6542, "step": 4387 }, { "epoch": 0.7491890046098685, "grad_norm": 0.4585787057876587, "learning_rate": 8e-05, "loss": 1.7369, "step": 4388 }, { "epoch": 0.7493597404814751, "grad_norm": 0.5097677111625671, "learning_rate": 8e-05, "loss": 1.8709, "step": 4389 }, { "epoch": 0.7495304763530818, "grad_norm": 0.4746238589286804, "learning_rate": 8e-05, "loss": 1.8116, "step": 4390 }, { "epoch": 0.7497012122246884, "grad_norm": 0.5121199488639832, "learning_rate": 8e-05, "loss": 1.9512, "step": 4391 }, { "epoch": 0.749871948096295, "grad_norm": 0.5360056161880493, "learning_rate": 8e-05, "loss": 1.5758, "step": 4392 }, { "epoch": 0.7500426839679016, "grad_norm": 0.4922710657119751, "learning_rate": 8e-05, "loss": 1.6665, "step": 4393 }, { "epoch": 0.7502134198395083, "grad_norm": 0.490486741065979, "learning_rate": 8e-05, "loss": 1.7029, "step": 4394 }, { "epoch": 0.7503841557111149, "grad_norm": 0.5260136127471924, "learning_rate": 8e-05, "loss": 1.7181, "step": 4395 }, { "epoch": 0.7505548915827215, "grad_norm": 0.46043625473976135, "learning_rate": 8e-05, "loss": 1.706, "step": 4396 }, { "epoch": 0.7507256274543281, "grad_norm": 0.4572415351867676, "learning_rate": 8e-05, "loss": 1.6185, "step": 4397 }, { "epoch": 0.7508963633259348, "grad_norm": 0.4994569420814514, "learning_rate": 8e-05, "loss": 1.8666, "step": 4398 }, { "epoch": 0.7510670991975414, "grad_norm": 0.48827478289604187, "learning_rate": 8e-05, "loss": 1.6482, "step": 4399 }, { "epoch": 0.751237835069148, "grad_norm": 0.46071064472198486, "learning_rate": 8e-05, "loss": 1.6502, "step": 4400 }, { "epoch": 0.7514085709407546, "grad_norm": 0.4927595853805542, "learning_rate": 8e-05, "loss": 1.6226, "step": 4401 }, { "epoch": 0.7515793068123613, "grad_norm": 0.47678083181381226, "learning_rate": 8e-05, "loss": 1.7247, "step": 4402 }, { "epoch": 0.751750042683968, "grad_norm": 0.4685894250869751, "learning_rate": 8e-05, "loss": 1.6545, "step": 4403 }, { "epoch": 0.7519207785555745, "grad_norm": 0.49959567189216614, "learning_rate": 8e-05, "loss": 1.815, "step": 4404 }, { "epoch": 0.7520915144271811, "grad_norm": 0.49250733852386475, "learning_rate": 8e-05, "loss": 1.9325, "step": 4405 }, { "epoch": 0.7522622502987878, "grad_norm": 0.4569817781448364, "learning_rate": 8e-05, "loss": 1.6133, "step": 4406 }, { "epoch": 0.7524329861703944, "grad_norm": 0.48968130350112915, "learning_rate": 8e-05, "loss": 1.7177, "step": 4407 }, { "epoch": 0.752603722042001, "grad_norm": 0.45341208577156067, "learning_rate": 8e-05, "loss": 1.5907, "step": 4408 }, { "epoch": 0.7527744579136076, "grad_norm": 0.46391797065734863, "learning_rate": 8e-05, "loss": 1.7115, "step": 4409 }, { "epoch": 0.7529451937852143, "grad_norm": 0.4971759021282196, "learning_rate": 8e-05, "loss": 1.7756, "step": 4410 }, { "epoch": 0.753115929656821, "grad_norm": 0.49080657958984375, "learning_rate": 8e-05, "loss": 1.8401, "step": 4411 }, { "epoch": 0.7532866655284275, "grad_norm": 0.49792346358299255, "learning_rate": 8e-05, "loss": 1.5954, "step": 4412 }, { "epoch": 0.7534574014000341, "grad_norm": 0.5109968185424805, "learning_rate": 8e-05, "loss": 1.7817, "step": 4413 }, { "epoch": 0.7536281372716408, "grad_norm": 0.4807204306125641, "learning_rate": 8e-05, "loss": 1.7042, "step": 4414 }, { "epoch": 0.7537988731432474, "grad_norm": 0.49665263295173645, "learning_rate": 8e-05, "loss": 1.8404, "step": 4415 }, { "epoch": 0.753969609014854, "grad_norm": 0.4714358448982239, "learning_rate": 8e-05, "loss": 1.6974, "step": 4416 }, { "epoch": 0.7541403448864606, "grad_norm": 0.44964858889579773, "learning_rate": 8e-05, "loss": 1.4516, "step": 4417 }, { "epoch": 0.7543110807580673, "grad_norm": 0.4814924895763397, "learning_rate": 8e-05, "loss": 1.7074, "step": 4418 }, { "epoch": 0.7544818166296738, "grad_norm": 0.5010007619857788, "learning_rate": 8e-05, "loss": 1.6316, "step": 4419 }, { "epoch": 0.7546525525012805, "grad_norm": 0.48828262090682983, "learning_rate": 8e-05, "loss": 1.7592, "step": 4420 }, { "epoch": 0.7548232883728871, "grad_norm": 0.4625883102416992, "learning_rate": 8e-05, "loss": 1.5891, "step": 4421 }, { "epoch": 0.7549940242444938, "grad_norm": 0.4740544855594635, "learning_rate": 8e-05, "loss": 1.6768, "step": 4422 }, { "epoch": 0.7551647601161003, "grad_norm": 0.4852917790412903, "learning_rate": 8e-05, "loss": 1.6761, "step": 4423 }, { "epoch": 0.755335495987707, "grad_norm": 0.46568265557289124, "learning_rate": 8e-05, "loss": 1.5896, "step": 4424 }, { "epoch": 0.7555062318593136, "grad_norm": 0.473845899105072, "learning_rate": 8e-05, "loss": 1.6886, "step": 4425 }, { "epoch": 0.7556769677309203, "grad_norm": 0.4602227509021759, "learning_rate": 8e-05, "loss": 1.6298, "step": 4426 }, { "epoch": 0.7558477036025268, "grad_norm": 0.4837547540664673, "learning_rate": 8e-05, "loss": 1.7516, "step": 4427 }, { "epoch": 0.7560184394741335, "grad_norm": 0.5403469204902649, "learning_rate": 8e-05, "loss": 1.8562, "step": 4428 }, { "epoch": 0.7561891753457401, "grad_norm": 0.4336179494857788, "learning_rate": 8e-05, "loss": 1.5703, "step": 4429 }, { "epoch": 0.7563599112173468, "grad_norm": 0.5483734607696533, "learning_rate": 8e-05, "loss": 1.671, "step": 4430 }, { "epoch": 0.7565306470889533, "grad_norm": 0.4713594317436218, "learning_rate": 8e-05, "loss": 1.4965, "step": 4431 }, { "epoch": 0.75670138296056, "grad_norm": 0.46816107630729675, "learning_rate": 8e-05, "loss": 1.7632, "step": 4432 }, { "epoch": 0.7568721188321667, "grad_norm": 0.455568790435791, "learning_rate": 8e-05, "loss": 1.6871, "step": 4433 }, { "epoch": 0.7570428547037733, "grad_norm": 0.44552046060562134, "learning_rate": 8e-05, "loss": 1.5035, "step": 4434 }, { "epoch": 0.7572135905753798, "grad_norm": 0.49087271094322205, "learning_rate": 8e-05, "loss": 1.6804, "step": 4435 }, { "epoch": 0.7573843264469865, "grad_norm": 0.5023034811019897, "learning_rate": 8e-05, "loss": 1.453, "step": 4436 }, { "epoch": 0.7575550623185932, "grad_norm": 0.47326982021331787, "learning_rate": 8e-05, "loss": 1.6107, "step": 4437 }, { "epoch": 0.7577257981901998, "grad_norm": 0.47503048181533813, "learning_rate": 8e-05, "loss": 1.5043, "step": 4438 }, { "epoch": 0.7578965340618063, "grad_norm": 0.49386653304100037, "learning_rate": 8e-05, "loss": 1.7888, "step": 4439 }, { "epoch": 0.758067269933413, "grad_norm": 0.47511181235313416, "learning_rate": 8e-05, "loss": 1.501, "step": 4440 }, { "epoch": 0.7582380058050197, "grad_norm": 0.4654957056045532, "learning_rate": 8e-05, "loss": 1.5589, "step": 4441 }, { "epoch": 0.7584087416766263, "grad_norm": 0.5701891779899597, "learning_rate": 8e-05, "loss": 1.8206, "step": 4442 }, { "epoch": 0.7585794775482329, "grad_norm": 0.5076572895050049, "learning_rate": 8e-05, "loss": 1.7105, "step": 4443 }, { "epoch": 0.7587502134198395, "grad_norm": 0.5194302797317505, "learning_rate": 8e-05, "loss": 1.7694, "step": 4444 }, { "epoch": 0.7589209492914462, "grad_norm": 0.48314377665519714, "learning_rate": 8e-05, "loss": 1.672, "step": 4445 }, { "epoch": 0.7590916851630528, "grad_norm": 0.4746382236480713, "learning_rate": 8e-05, "loss": 1.6987, "step": 4446 }, { "epoch": 0.7592624210346594, "grad_norm": 0.5195656418800354, "learning_rate": 8e-05, "loss": 1.6824, "step": 4447 }, { "epoch": 0.759433156906266, "grad_norm": 0.5034726858139038, "learning_rate": 8e-05, "loss": 1.8483, "step": 4448 }, { "epoch": 0.7596038927778727, "grad_norm": 0.45169001817703247, "learning_rate": 8e-05, "loss": 1.6414, "step": 4449 }, { "epoch": 0.7597746286494793, "grad_norm": 0.46494296193122864, "learning_rate": 8e-05, "loss": 1.6724, "step": 4450 }, { "epoch": 0.7599453645210859, "grad_norm": 0.49697792530059814, "learning_rate": 8e-05, "loss": 1.7662, "step": 4451 }, { "epoch": 0.7601161003926925, "grad_norm": 0.4877566397190094, "learning_rate": 8e-05, "loss": 1.7987, "step": 4452 }, { "epoch": 0.7602868362642992, "grad_norm": 0.46319645643234253, "learning_rate": 8e-05, "loss": 1.5045, "step": 4453 }, { "epoch": 0.7604575721359057, "grad_norm": 0.462878942489624, "learning_rate": 8e-05, "loss": 1.6228, "step": 4454 }, { "epoch": 0.7606283080075124, "grad_norm": 0.45624926686286926, "learning_rate": 8e-05, "loss": 1.5267, "step": 4455 }, { "epoch": 0.760799043879119, "grad_norm": 0.4651598036289215, "learning_rate": 8e-05, "loss": 1.7755, "step": 4456 }, { "epoch": 0.7609697797507257, "grad_norm": 0.43957698345184326, "learning_rate": 8e-05, "loss": 1.4731, "step": 4457 }, { "epoch": 0.7611405156223322, "grad_norm": 0.46492767333984375, "learning_rate": 8e-05, "loss": 1.6602, "step": 4458 }, { "epoch": 0.7613112514939389, "grad_norm": 0.49632981419563293, "learning_rate": 8e-05, "loss": 1.4798, "step": 4459 }, { "epoch": 0.7614819873655455, "grad_norm": 0.47802165150642395, "learning_rate": 8e-05, "loss": 1.6312, "step": 4460 }, { "epoch": 0.7616527232371522, "grad_norm": 0.4546307325363159, "learning_rate": 8e-05, "loss": 1.6878, "step": 4461 }, { "epoch": 0.7618234591087587, "grad_norm": 0.5159940719604492, "learning_rate": 8e-05, "loss": 1.7186, "step": 4462 }, { "epoch": 0.7619941949803654, "grad_norm": 0.4897524416446686, "learning_rate": 8e-05, "loss": 1.7001, "step": 4463 }, { "epoch": 0.762164930851972, "grad_norm": 0.4804462790489197, "learning_rate": 8e-05, "loss": 1.703, "step": 4464 }, { "epoch": 0.7623356667235787, "grad_norm": 0.4859493374824524, "learning_rate": 8e-05, "loss": 1.7105, "step": 4465 }, { "epoch": 0.7625064025951852, "grad_norm": 0.4860101342201233, "learning_rate": 8e-05, "loss": 1.6578, "step": 4466 }, { "epoch": 0.7626771384667919, "grad_norm": 0.4828910529613495, "learning_rate": 8e-05, "loss": 1.6064, "step": 4467 }, { "epoch": 0.7628478743383985, "grad_norm": 0.49821653962135315, "learning_rate": 8e-05, "loss": 1.8829, "step": 4468 }, { "epoch": 0.7630186102100052, "grad_norm": 0.5120306015014648, "learning_rate": 8e-05, "loss": 1.6292, "step": 4469 }, { "epoch": 0.7631893460816117, "grad_norm": 0.5153220295906067, "learning_rate": 8e-05, "loss": 1.7175, "step": 4470 }, { "epoch": 0.7633600819532184, "grad_norm": 0.4686887264251709, "learning_rate": 8e-05, "loss": 1.7009, "step": 4471 }, { "epoch": 0.763530817824825, "grad_norm": 0.47975289821624756, "learning_rate": 8e-05, "loss": 1.8265, "step": 4472 }, { "epoch": 0.7637015536964317, "grad_norm": 0.4608544111251831, "learning_rate": 8e-05, "loss": 1.6076, "step": 4473 }, { "epoch": 0.7638722895680382, "grad_norm": 0.47130194306373596, "learning_rate": 8e-05, "loss": 1.7214, "step": 4474 }, { "epoch": 0.7640430254396449, "grad_norm": 0.48048505187034607, "learning_rate": 8e-05, "loss": 1.5749, "step": 4475 }, { "epoch": 0.7642137613112515, "grad_norm": 0.45948976278305054, "learning_rate": 8e-05, "loss": 1.452, "step": 4476 }, { "epoch": 0.7643844971828582, "grad_norm": 0.4839404225349426, "learning_rate": 8e-05, "loss": 1.7744, "step": 4477 }, { "epoch": 0.7645552330544647, "grad_norm": 0.44322553277015686, "learning_rate": 8e-05, "loss": 1.6502, "step": 4478 }, { "epoch": 0.7647259689260714, "grad_norm": 0.48980793356895447, "learning_rate": 8e-05, "loss": 1.7107, "step": 4479 }, { "epoch": 0.764896704797678, "grad_norm": 0.5067513585090637, "learning_rate": 8e-05, "loss": 1.8041, "step": 4480 }, { "epoch": 0.7650674406692847, "grad_norm": 0.4545120894908905, "learning_rate": 8e-05, "loss": 1.4428, "step": 4481 }, { "epoch": 0.7652381765408912, "grad_norm": 0.46014443039894104, "learning_rate": 8e-05, "loss": 1.6554, "step": 4482 }, { "epoch": 0.7654089124124979, "grad_norm": 0.5081507563591003, "learning_rate": 8e-05, "loss": 1.6868, "step": 4483 }, { "epoch": 0.7655796482841045, "grad_norm": 0.5042709112167358, "learning_rate": 8e-05, "loss": 1.6546, "step": 4484 }, { "epoch": 0.7657503841557111, "grad_norm": 0.47780290246009827, "learning_rate": 8e-05, "loss": 1.7243, "step": 4485 }, { "epoch": 0.7659211200273177, "grad_norm": 0.5071755051612854, "learning_rate": 8e-05, "loss": 1.6557, "step": 4486 }, { "epoch": 0.7660918558989244, "grad_norm": 0.5126450657844543, "learning_rate": 8e-05, "loss": 1.6014, "step": 4487 }, { "epoch": 0.766262591770531, "grad_norm": 0.4766642153263092, "learning_rate": 8e-05, "loss": 1.7204, "step": 4488 }, { "epoch": 0.7664333276421376, "grad_norm": 0.4894273281097412, "learning_rate": 8e-05, "loss": 1.8649, "step": 4489 }, { "epoch": 0.7666040635137442, "grad_norm": 0.4991094768047333, "learning_rate": 8e-05, "loss": 1.8478, "step": 4490 }, { "epoch": 0.7667747993853509, "grad_norm": 0.4885282516479492, "learning_rate": 8e-05, "loss": 1.7748, "step": 4491 }, { "epoch": 0.7669455352569575, "grad_norm": 0.49344655871391296, "learning_rate": 8e-05, "loss": 1.7653, "step": 4492 }, { "epoch": 0.7671162711285641, "grad_norm": 0.4910622239112854, "learning_rate": 8e-05, "loss": 1.7872, "step": 4493 }, { "epoch": 0.7672870070001707, "grad_norm": 0.4909657835960388, "learning_rate": 8e-05, "loss": 1.7693, "step": 4494 }, { "epoch": 0.7674577428717774, "grad_norm": 0.506008505821228, "learning_rate": 8e-05, "loss": 1.8785, "step": 4495 }, { "epoch": 0.767628478743384, "grad_norm": 0.490109384059906, "learning_rate": 8e-05, "loss": 1.8393, "step": 4496 }, { "epoch": 0.7677992146149906, "grad_norm": 0.5534494519233704, "learning_rate": 8e-05, "loss": 1.6905, "step": 4497 }, { "epoch": 0.7679699504865972, "grad_norm": 0.5055539011955261, "learning_rate": 8e-05, "loss": 1.6841, "step": 4498 }, { "epoch": 0.7681406863582039, "grad_norm": 0.49471017718315125, "learning_rate": 8e-05, "loss": 1.7697, "step": 4499 }, { "epoch": 0.7683114222298105, "grad_norm": 0.5016489028930664, "learning_rate": 8e-05, "loss": 1.7991, "step": 4500 }, { "epoch": 0.7684821581014171, "grad_norm": 0.48524680733680725, "learning_rate": 8e-05, "loss": 1.6289, "step": 4501 }, { "epoch": 0.7686528939730237, "grad_norm": 0.48416513204574585, "learning_rate": 8e-05, "loss": 1.6899, "step": 4502 }, { "epoch": 0.7688236298446304, "grad_norm": 0.5483559370040894, "learning_rate": 8e-05, "loss": 1.7585, "step": 4503 }, { "epoch": 0.768994365716237, "grad_norm": 0.480787992477417, "learning_rate": 8e-05, "loss": 1.632, "step": 4504 }, { "epoch": 0.7691651015878436, "grad_norm": 0.472707599401474, "learning_rate": 8e-05, "loss": 1.7422, "step": 4505 }, { "epoch": 0.7693358374594502, "grad_norm": 0.45850425958633423, "learning_rate": 8e-05, "loss": 1.493, "step": 4506 }, { "epoch": 0.7695065733310569, "grad_norm": 0.4879100024700165, "learning_rate": 8e-05, "loss": 1.6133, "step": 4507 }, { "epoch": 0.7696773092026635, "grad_norm": 0.4716891348361969, "learning_rate": 8e-05, "loss": 1.6764, "step": 4508 }, { "epoch": 0.7698480450742701, "grad_norm": 0.5818755030632019, "learning_rate": 8e-05, "loss": 1.718, "step": 4509 }, { "epoch": 0.7700187809458767, "grad_norm": 0.5362465977668762, "learning_rate": 8e-05, "loss": 1.7748, "step": 4510 }, { "epoch": 0.7701895168174834, "grad_norm": 0.47864818572998047, "learning_rate": 8e-05, "loss": 1.7582, "step": 4511 }, { "epoch": 0.77036025268909, "grad_norm": 0.5021650195121765, "learning_rate": 8e-05, "loss": 1.6099, "step": 4512 }, { "epoch": 0.7705309885606966, "grad_norm": 0.46591365337371826, "learning_rate": 8e-05, "loss": 1.6485, "step": 4513 }, { "epoch": 0.7707017244323032, "grad_norm": 0.49805620312690735, "learning_rate": 8e-05, "loss": 1.8446, "step": 4514 }, { "epoch": 0.7708724603039099, "grad_norm": 0.49380791187286377, "learning_rate": 8e-05, "loss": 1.7802, "step": 4515 }, { "epoch": 0.7710431961755164, "grad_norm": 0.4918292462825775, "learning_rate": 8e-05, "loss": 1.6601, "step": 4516 }, { "epoch": 0.7712139320471231, "grad_norm": 0.4947502911090851, "learning_rate": 8e-05, "loss": 1.8952, "step": 4517 }, { "epoch": 0.7713846679187297, "grad_norm": 0.5006787776947021, "learning_rate": 8e-05, "loss": 1.6957, "step": 4518 }, { "epoch": 0.7715554037903364, "grad_norm": 0.446748822927475, "learning_rate": 8e-05, "loss": 1.5848, "step": 4519 }, { "epoch": 0.7717261396619429, "grad_norm": 0.4716370105743408, "learning_rate": 8e-05, "loss": 1.5427, "step": 4520 }, { "epoch": 0.7718968755335496, "grad_norm": 0.45984897017478943, "learning_rate": 8e-05, "loss": 1.677, "step": 4521 }, { "epoch": 0.7720676114051562, "grad_norm": 0.46500062942504883, "learning_rate": 8e-05, "loss": 1.5976, "step": 4522 }, { "epoch": 0.7722383472767629, "grad_norm": 0.5254181027412415, "learning_rate": 8e-05, "loss": 1.5726, "step": 4523 }, { "epoch": 0.7724090831483694, "grad_norm": 0.5050808191299438, "learning_rate": 8e-05, "loss": 1.8128, "step": 4524 }, { "epoch": 0.7725798190199761, "grad_norm": 0.49856898188591003, "learning_rate": 8e-05, "loss": 1.7985, "step": 4525 }, { "epoch": 0.7727505548915827, "grad_norm": 0.4924960732460022, "learning_rate": 8e-05, "loss": 1.6999, "step": 4526 }, { "epoch": 0.7729212907631894, "grad_norm": 0.4630880057811737, "learning_rate": 8e-05, "loss": 1.5307, "step": 4527 }, { "epoch": 0.7730920266347959, "grad_norm": 0.5242035984992981, "learning_rate": 8e-05, "loss": 1.7691, "step": 4528 }, { "epoch": 0.7732627625064026, "grad_norm": 0.47423702478408813, "learning_rate": 8e-05, "loss": 1.8113, "step": 4529 }, { "epoch": 0.7734334983780092, "grad_norm": 0.5292898416519165, "learning_rate": 8e-05, "loss": 1.6352, "step": 4530 }, { "epoch": 0.7736042342496159, "grad_norm": 0.46888378262519836, "learning_rate": 8e-05, "loss": 1.7019, "step": 4531 }, { "epoch": 0.7737749701212224, "grad_norm": 0.47725024819374084, "learning_rate": 8e-05, "loss": 1.6068, "step": 4532 }, { "epoch": 0.7739457059928291, "grad_norm": 0.4951931834220886, "learning_rate": 8e-05, "loss": 1.6821, "step": 4533 }, { "epoch": 0.7741164418644357, "grad_norm": 0.4795713722705841, "learning_rate": 8e-05, "loss": 1.7953, "step": 4534 }, { "epoch": 0.7742871777360424, "grad_norm": 0.5036234855651855, "learning_rate": 8e-05, "loss": 1.7499, "step": 4535 }, { "epoch": 0.7744579136076489, "grad_norm": 0.49201998114585876, "learning_rate": 8e-05, "loss": 1.7527, "step": 4536 }, { "epoch": 0.7746286494792556, "grad_norm": 0.49627846479415894, "learning_rate": 8e-05, "loss": 1.6138, "step": 4537 }, { "epoch": 0.7747993853508622, "grad_norm": 0.46936073899269104, "learning_rate": 8e-05, "loss": 1.8208, "step": 4538 }, { "epoch": 0.7749701212224689, "grad_norm": 0.4856550097465515, "learning_rate": 8e-05, "loss": 1.6366, "step": 4539 }, { "epoch": 0.7751408570940754, "grad_norm": 0.4717883765697479, "learning_rate": 8e-05, "loss": 1.5971, "step": 4540 }, { "epoch": 0.7753115929656821, "grad_norm": 0.45169153809547424, "learning_rate": 8e-05, "loss": 1.5855, "step": 4541 }, { "epoch": 0.7754823288372887, "grad_norm": 0.5036446452140808, "learning_rate": 8e-05, "loss": 1.7303, "step": 4542 }, { "epoch": 0.7756530647088954, "grad_norm": 0.5140407681465149, "learning_rate": 8e-05, "loss": 1.6325, "step": 4543 }, { "epoch": 0.7758238005805019, "grad_norm": 0.5180602073669434, "learning_rate": 8e-05, "loss": 1.7098, "step": 4544 }, { "epoch": 0.7759945364521086, "grad_norm": 0.4835319221019745, "learning_rate": 8e-05, "loss": 1.5896, "step": 4545 }, { "epoch": 0.7761652723237152, "grad_norm": 0.47374433279037476, "learning_rate": 8e-05, "loss": 1.5298, "step": 4546 }, { "epoch": 0.7763360081953219, "grad_norm": 0.4982183873653412, "learning_rate": 8e-05, "loss": 1.7098, "step": 4547 }, { "epoch": 0.7765067440669284, "grad_norm": 0.5282678604125977, "learning_rate": 8e-05, "loss": 1.7766, "step": 4548 }, { "epoch": 0.7766774799385351, "grad_norm": 0.4791935682296753, "learning_rate": 8e-05, "loss": 1.7011, "step": 4549 }, { "epoch": 0.7768482158101417, "grad_norm": 0.5191706418991089, "learning_rate": 8e-05, "loss": 1.7683, "step": 4550 }, { "epoch": 0.7770189516817483, "grad_norm": 0.4794354736804962, "learning_rate": 8e-05, "loss": 1.6794, "step": 4551 }, { "epoch": 0.7771896875533549, "grad_norm": 0.49059349298477173, "learning_rate": 8e-05, "loss": 1.739, "step": 4552 }, { "epoch": 0.7773604234249616, "grad_norm": 0.45742031931877136, "learning_rate": 8e-05, "loss": 1.6151, "step": 4553 }, { "epoch": 0.7775311592965682, "grad_norm": 0.4796464443206787, "learning_rate": 8e-05, "loss": 1.8292, "step": 4554 }, { "epoch": 0.7777018951681748, "grad_norm": 0.4657885730266571, "learning_rate": 8e-05, "loss": 1.5686, "step": 4555 }, { "epoch": 0.7778726310397814, "grad_norm": 0.49606308341026306, "learning_rate": 8e-05, "loss": 1.7872, "step": 4556 }, { "epoch": 0.7780433669113881, "grad_norm": 0.4748438894748688, "learning_rate": 8e-05, "loss": 1.8258, "step": 4557 }, { "epoch": 0.7782141027829947, "grad_norm": 0.4821031093597412, "learning_rate": 8e-05, "loss": 1.7239, "step": 4558 }, { "epoch": 0.7783848386546013, "grad_norm": 0.48875680565834045, "learning_rate": 8e-05, "loss": 1.7555, "step": 4559 }, { "epoch": 0.7785555745262079, "grad_norm": 0.47800883650779724, "learning_rate": 8e-05, "loss": 1.6253, "step": 4560 }, { "epoch": 0.7787263103978146, "grad_norm": 0.49324071407318115, "learning_rate": 8e-05, "loss": 1.8049, "step": 4561 }, { "epoch": 0.7788970462694212, "grad_norm": 0.4717057943344116, "learning_rate": 8e-05, "loss": 1.7363, "step": 4562 }, { "epoch": 0.7790677821410278, "grad_norm": 0.4655677080154419, "learning_rate": 8e-05, "loss": 1.5894, "step": 4563 }, { "epoch": 0.7792385180126344, "grad_norm": 0.48449212312698364, "learning_rate": 8e-05, "loss": 1.5576, "step": 4564 }, { "epoch": 0.7794092538842411, "grad_norm": 0.47879642248153687, "learning_rate": 8e-05, "loss": 1.7819, "step": 4565 }, { "epoch": 0.7795799897558477, "grad_norm": 0.5096138715744019, "learning_rate": 8e-05, "loss": 1.6485, "step": 4566 }, { "epoch": 0.7797507256274543, "grad_norm": 0.4991984963417053, "learning_rate": 8e-05, "loss": 1.4902, "step": 4567 }, { "epoch": 0.7799214614990609, "grad_norm": 0.48797371983528137, "learning_rate": 8e-05, "loss": 1.7974, "step": 4568 }, { "epoch": 0.7800921973706676, "grad_norm": 0.4658960998058319, "learning_rate": 8e-05, "loss": 1.545, "step": 4569 }, { "epoch": 0.7802629332422742, "grad_norm": 0.46970853209495544, "learning_rate": 8e-05, "loss": 1.4806, "step": 4570 }, { "epoch": 0.7804336691138808, "grad_norm": 0.490326464176178, "learning_rate": 8e-05, "loss": 1.5363, "step": 4571 }, { "epoch": 0.7806044049854874, "grad_norm": 0.4822893440723419, "learning_rate": 8e-05, "loss": 1.8266, "step": 4572 }, { "epoch": 0.7807751408570941, "grad_norm": 0.49589020013809204, "learning_rate": 8e-05, "loss": 1.81, "step": 4573 }, { "epoch": 0.7809458767287007, "grad_norm": 0.48236986994743347, "learning_rate": 8e-05, "loss": 1.5983, "step": 4574 }, { "epoch": 0.7811166126003073, "grad_norm": 0.5090792179107666, "learning_rate": 8e-05, "loss": 1.813, "step": 4575 }, { "epoch": 0.781287348471914, "grad_norm": 0.4672381579875946, "learning_rate": 8e-05, "loss": 1.6933, "step": 4576 }, { "epoch": 0.7814580843435206, "grad_norm": 0.48527660965919495, "learning_rate": 8e-05, "loss": 1.5913, "step": 4577 }, { "epoch": 0.7816288202151273, "grad_norm": 0.5073494911193848, "learning_rate": 8e-05, "loss": 1.7874, "step": 4578 }, { "epoch": 0.7817995560867338, "grad_norm": 0.5113846063613892, "learning_rate": 8e-05, "loss": 1.759, "step": 4579 }, { "epoch": 0.7819702919583404, "grad_norm": 0.47559675574302673, "learning_rate": 8e-05, "loss": 1.809, "step": 4580 }, { "epoch": 0.7821410278299471, "grad_norm": 0.4697447419166565, "learning_rate": 8e-05, "loss": 1.5702, "step": 4581 }, { "epoch": 0.7823117637015536, "grad_norm": 0.5028384327888489, "learning_rate": 8e-05, "loss": 1.713, "step": 4582 }, { "epoch": 0.7824824995731603, "grad_norm": 0.49480435252189636, "learning_rate": 8e-05, "loss": 1.7169, "step": 4583 }, { "epoch": 0.782653235444767, "grad_norm": 0.4939188063144684, "learning_rate": 8e-05, "loss": 1.7845, "step": 4584 }, { "epoch": 0.7828239713163736, "grad_norm": 0.5005636811256409, "learning_rate": 8e-05, "loss": 1.5949, "step": 4585 }, { "epoch": 0.7829947071879801, "grad_norm": 0.4765579104423523, "learning_rate": 8e-05, "loss": 1.6865, "step": 4586 }, { "epoch": 0.7831654430595868, "grad_norm": 0.5233904719352722, "learning_rate": 8e-05, "loss": 1.8914, "step": 4587 }, { "epoch": 0.7833361789311934, "grad_norm": 0.4716081917285919, "learning_rate": 8e-05, "loss": 1.5798, "step": 4588 }, { "epoch": 0.7835069148028001, "grad_norm": 0.49088728427886963, "learning_rate": 8e-05, "loss": 1.6217, "step": 4589 }, { "epoch": 0.7836776506744066, "grad_norm": 0.4899400770664215, "learning_rate": 8e-05, "loss": 1.7037, "step": 4590 }, { "epoch": 0.7838483865460133, "grad_norm": 0.4710739552974701, "learning_rate": 8e-05, "loss": 1.7251, "step": 4591 }, { "epoch": 0.78401912241762, "grad_norm": 0.45619818568229675, "learning_rate": 8e-05, "loss": 1.6186, "step": 4592 }, { "epoch": 0.7841898582892266, "grad_norm": 0.45902860164642334, "learning_rate": 8e-05, "loss": 1.579, "step": 4593 }, { "epoch": 0.7843605941608331, "grad_norm": 0.5087790489196777, "learning_rate": 8e-05, "loss": 1.8684, "step": 4594 }, { "epoch": 0.7845313300324398, "grad_norm": 0.45453816652297974, "learning_rate": 8e-05, "loss": 1.4063, "step": 4595 }, { "epoch": 0.7847020659040465, "grad_norm": 0.452575147151947, "learning_rate": 8e-05, "loss": 1.5913, "step": 4596 }, { "epoch": 0.7848728017756531, "grad_norm": 0.47772008180618286, "learning_rate": 8e-05, "loss": 1.7112, "step": 4597 }, { "epoch": 0.7850435376472596, "grad_norm": 0.45549604296684265, "learning_rate": 8e-05, "loss": 1.5754, "step": 4598 }, { "epoch": 0.7852142735188663, "grad_norm": 0.508405327796936, "learning_rate": 8e-05, "loss": 1.6696, "step": 4599 }, { "epoch": 0.785385009390473, "grad_norm": 0.4650472104549408, "learning_rate": 8e-05, "loss": 1.7702, "step": 4600 }, { "epoch": 0.7855557452620796, "grad_norm": 0.497589647769928, "learning_rate": 8e-05, "loss": 1.8213, "step": 4601 }, { "epoch": 0.7857264811336861, "grad_norm": 0.5061016082763672, "learning_rate": 8e-05, "loss": 1.7787, "step": 4602 }, { "epoch": 0.7858972170052928, "grad_norm": 0.5230071544647217, "learning_rate": 8e-05, "loss": 1.5374, "step": 4603 }, { "epoch": 0.7860679528768995, "grad_norm": 0.4921020269393921, "learning_rate": 8e-05, "loss": 1.771, "step": 4604 }, { "epoch": 0.7862386887485061, "grad_norm": 0.4444049298763275, "learning_rate": 8e-05, "loss": 1.426, "step": 4605 }, { "epoch": 0.7864094246201127, "grad_norm": 0.5240102410316467, "learning_rate": 8e-05, "loss": 1.7555, "step": 4606 }, { "epoch": 0.7865801604917193, "grad_norm": 0.4630122184753418, "learning_rate": 8e-05, "loss": 1.6948, "step": 4607 }, { "epoch": 0.786750896363326, "grad_norm": 0.545038640499115, "learning_rate": 8e-05, "loss": 1.6501, "step": 4608 }, { "epoch": 0.7869216322349326, "grad_norm": 0.468415766954422, "learning_rate": 8e-05, "loss": 1.6194, "step": 4609 }, { "epoch": 0.7870923681065392, "grad_norm": 0.4543391168117523, "learning_rate": 8e-05, "loss": 1.5694, "step": 4610 }, { "epoch": 0.7872631039781458, "grad_norm": 0.4657517671585083, "learning_rate": 8e-05, "loss": 1.7824, "step": 4611 }, { "epoch": 0.7874338398497525, "grad_norm": 0.4548516273498535, "learning_rate": 8e-05, "loss": 1.5524, "step": 4612 }, { "epoch": 0.7876045757213591, "grad_norm": 0.504684567451477, "learning_rate": 8e-05, "loss": 1.646, "step": 4613 }, { "epoch": 0.7877753115929657, "grad_norm": 0.4553622603416443, "learning_rate": 8e-05, "loss": 1.7098, "step": 4614 }, { "epoch": 0.7879460474645723, "grad_norm": 0.48076578974723816, "learning_rate": 8e-05, "loss": 1.5378, "step": 4615 }, { "epoch": 0.788116783336179, "grad_norm": 0.49771639704704285, "learning_rate": 8e-05, "loss": 1.8187, "step": 4616 }, { "epoch": 0.7882875192077855, "grad_norm": 0.49268123507499695, "learning_rate": 8e-05, "loss": 1.5717, "step": 4617 }, { "epoch": 0.7884582550793922, "grad_norm": 0.5379424095153809, "learning_rate": 8e-05, "loss": 1.8291, "step": 4618 }, { "epoch": 0.7886289909509988, "grad_norm": 0.47191673517227173, "learning_rate": 8e-05, "loss": 1.4571, "step": 4619 }, { "epoch": 0.7887997268226055, "grad_norm": 0.4780910909175873, "learning_rate": 8e-05, "loss": 1.772, "step": 4620 }, { "epoch": 0.788970462694212, "grad_norm": 0.439413845539093, "learning_rate": 8e-05, "loss": 1.3898, "step": 4621 }, { "epoch": 0.7891411985658187, "grad_norm": 0.4988950788974762, "learning_rate": 8e-05, "loss": 1.7377, "step": 4622 }, { "epoch": 0.7893119344374253, "grad_norm": 0.4666007459163666, "learning_rate": 8e-05, "loss": 1.6607, "step": 4623 }, { "epoch": 0.789482670309032, "grad_norm": 0.471858412027359, "learning_rate": 8e-05, "loss": 1.7225, "step": 4624 }, { "epoch": 0.7896534061806385, "grad_norm": 0.5123687386512756, "learning_rate": 8e-05, "loss": 1.5294, "step": 4625 }, { "epoch": 0.7898241420522452, "grad_norm": 0.45420047640800476, "learning_rate": 8e-05, "loss": 1.6189, "step": 4626 }, { "epoch": 0.7899948779238518, "grad_norm": 0.4904423654079437, "learning_rate": 8e-05, "loss": 1.6976, "step": 4627 }, { "epoch": 0.7901656137954585, "grad_norm": 0.49454647302627563, "learning_rate": 8e-05, "loss": 1.6799, "step": 4628 }, { "epoch": 0.790336349667065, "grad_norm": 0.45405617356300354, "learning_rate": 8e-05, "loss": 1.6083, "step": 4629 }, { "epoch": 0.7905070855386717, "grad_norm": 0.560637891292572, "learning_rate": 8e-05, "loss": 1.8647, "step": 4630 }, { "epoch": 0.7906778214102783, "grad_norm": 0.4637416899204254, "learning_rate": 8e-05, "loss": 1.6751, "step": 4631 }, { "epoch": 0.790848557281885, "grad_norm": 0.5160278081893921, "learning_rate": 8e-05, "loss": 1.8976, "step": 4632 }, { "epoch": 0.7910192931534915, "grad_norm": 0.48190420866012573, "learning_rate": 8e-05, "loss": 1.6641, "step": 4633 }, { "epoch": 0.7911900290250982, "grad_norm": 0.4980706572532654, "learning_rate": 8e-05, "loss": 1.6391, "step": 4634 }, { "epoch": 0.7913607648967048, "grad_norm": 0.4512653648853302, "learning_rate": 8e-05, "loss": 1.4074, "step": 4635 }, { "epoch": 0.7915315007683115, "grad_norm": 0.44642409682273865, "learning_rate": 8e-05, "loss": 1.6244, "step": 4636 }, { "epoch": 0.791702236639918, "grad_norm": 0.48453933000564575, "learning_rate": 8e-05, "loss": 1.8241, "step": 4637 }, { "epoch": 0.7918729725115247, "grad_norm": 0.4844171404838562, "learning_rate": 8e-05, "loss": 1.6837, "step": 4638 }, { "epoch": 0.7920437083831313, "grad_norm": 0.4505460560321808, "learning_rate": 8e-05, "loss": 1.6418, "step": 4639 }, { "epoch": 0.792214444254738, "grad_norm": 0.4638948142528534, "learning_rate": 8e-05, "loss": 1.627, "step": 4640 }, { "epoch": 0.7923851801263445, "grad_norm": 0.4848320186138153, "learning_rate": 8e-05, "loss": 1.6623, "step": 4641 }, { "epoch": 0.7925559159979512, "grad_norm": 0.4703969359397888, "learning_rate": 8e-05, "loss": 1.588, "step": 4642 }, { "epoch": 0.7927266518695578, "grad_norm": 0.4786967635154724, "learning_rate": 8e-05, "loss": 1.8389, "step": 4643 }, { "epoch": 0.7928973877411645, "grad_norm": 0.4324696660041809, "learning_rate": 8e-05, "loss": 1.5757, "step": 4644 }, { "epoch": 0.793068123612771, "grad_norm": 0.49308323860168457, "learning_rate": 8e-05, "loss": 1.6545, "step": 4645 }, { "epoch": 0.7932388594843777, "grad_norm": 0.47314414381980896, "learning_rate": 8e-05, "loss": 1.7478, "step": 4646 }, { "epoch": 0.7934095953559843, "grad_norm": 0.4862039089202881, "learning_rate": 8e-05, "loss": 1.8348, "step": 4647 }, { "epoch": 0.7935803312275909, "grad_norm": 0.4977606236934662, "learning_rate": 8e-05, "loss": 1.671, "step": 4648 }, { "epoch": 0.7937510670991975, "grad_norm": 0.47843050956726074, "learning_rate": 8e-05, "loss": 1.7399, "step": 4649 }, { "epoch": 0.7939218029708042, "grad_norm": 0.48051661252975464, "learning_rate": 8e-05, "loss": 1.6949, "step": 4650 }, { "epoch": 0.7940925388424108, "grad_norm": 0.45798829197883606, "learning_rate": 8e-05, "loss": 1.6742, "step": 4651 }, { "epoch": 0.7942632747140174, "grad_norm": 0.4774686098098755, "learning_rate": 8e-05, "loss": 1.5975, "step": 4652 }, { "epoch": 0.794434010585624, "grad_norm": 0.48108306527137756, "learning_rate": 8e-05, "loss": 1.7804, "step": 4653 }, { "epoch": 0.7946047464572307, "grad_norm": 0.4741242229938507, "learning_rate": 8e-05, "loss": 1.7398, "step": 4654 }, { "epoch": 0.7947754823288373, "grad_norm": 0.5152931213378906, "learning_rate": 8e-05, "loss": 1.8048, "step": 4655 }, { "epoch": 0.7949462182004439, "grad_norm": 0.49557560682296753, "learning_rate": 8e-05, "loss": 1.585, "step": 4656 }, { "epoch": 0.7951169540720505, "grad_norm": 0.4975075125694275, "learning_rate": 8e-05, "loss": 1.6087, "step": 4657 }, { "epoch": 0.7952876899436572, "grad_norm": 0.492954283952713, "learning_rate": 8e-05, "loss": 1.6704, "step": 4658 }, { "epoch": 0.7954584258152638, "grad_norm": 0.47675392031669617, "learning_rate": 8e-05, "loss": 1.7728, "step": 4659 }, { "epoch": 0.7956291616868704, "grad_norm": 0.4947381913661957, "learning_rate": 8e-05, "loss": 1.8002, "step": 4660 }, { "epoch": 0.795799897558477, "grad_norm": 0.4574858248233795, "learning_rate": 8e-05, "loss": 1.6237, "step": 4661 }, { "epoch": 0.7959706334300837, "grad_norm": 0.5148410201072693, "learning_rate": 8e-05, "loss": 1.7869, "step": 4662 }, { "epoch": 0.7961413693016903, "grad_norm": 0.4813382029533386, "learning_rate": 8e-05, "loss": 1.4127, "step": 4663 }, { "epoch": 0.7963121051732969, "grad_norm": 0.4655681848526001, "learning_rate": 8e-05, "loss": 1.6221, "step": 4664 }, { "epoch": 0.7964828410449035, "grad_norm": 0.4973985552787781, "learning_rate": 8e-05, "loss": 1.7051, "step": 4665 }, { "epoch": 0.7966535769165102, "grad_norm": 0.4613925516605377, "learning_rate": 8e-05, "loss": 1.5515, "step": 4666 }, { "epoch": 0.7968243127881168, "grad_norm": 0.523714542388916, "learning_rate": 8e-05, "loss": 1.5983, "step": 4667 }, { "epoch": 0.7969950486597234, "grad_norm": 0.5079917907714844, "learning_rate": 8e-05, "loss": 1.7768, "step": 4668 }, { "epoch": 0.79716578453133, "grad_norm": 0.4796507656574249, "learning_rate": 8e-05, "loss": 1.6503, "step": 4669 }, { "epoch": 0.7973365204029367, "grad_norm": 0.4733453392982483, "learning_rate": 8e-05, "loss": 1.7116, "step": 4670 }, { "epoch": 0.7975072562745433, "grad_norm": 0.4634247422218323, "learning_rate": 8e-05, "loss": 1.5707, "step": 4671 }, { "epoch": 0.7976779921461499, "grad_norm": 0.45025375485420227, "learning_rate": 8e-05, "loss": 1.6299, "step": 4672 }, { "epoch": 0.7978487280177565, "grad_norm": 0.506456196308136, "learning_rate": 8e-05, "loss": 1.7236, "step": 4673 }, { "epoch": 0.7980194638893632, "grad_norm": 0.4864477217197418, "learning_rate": 8e-05, "loss": 1.6895, "step": 4674 }, { "epoch": 0.7981901997609698, "grad_norm": 0.5313494205474854, "learning_rate": 8e-05, "loss": 1.713, "step": 4675 }, { "epoch": 0.7983609356325764, "grad_norm": 0.4826977252960205, "learning_rate": 8e-05, "loss": 1.7074, "step": 4676 }, { "epoch": 0.798531671504183, "grad_norm": 0.4727814793586731, "learning_rate": 8e-05, "loss": 1.6469, "step": 4677 }, { "epoch": 0.7987024073757897, "grad_norm": 0.5257948040962219, "learning_rate": 8e-05, "loss": 1.7747, "step": 4678 }, { "epoch": 0.7988731432473963, "grad_norm": 0.4901696741580963, "learning_rate": 8e-05, "loss": 1.7759, "step": 4679 }, { "epoch": 0.7990438791190029, "grad_norm": 0.4615681767463684, "learning_rate": 8e-05, "loss": 1.6221, "step": 4680 }, { "epoch": 0.7992146149906095, "grad_norm": 0.48226502537727356, "learning_rate": 8e-05, "loss": 1.7848, "step": 4681 }, { "epoch": 0.7993853508622162, "grad_norm": 0.5439376831054688, "learning_rate": 8e-05, "loss": 1.7162, "step": 4682 }, { "epoch": 0.7995560867338227, "grad_norm": 0.4455244243144989, "learning_rate": 8e-05, "loss": 1.4666, "step": 4683 }, { "epoch": 0.7997268226054294, "grad_norm": 0.43863925337791443, "learning_rate": 8e-05, "loss": 1.5103, "step": 4684 }, { "epoch": 0.799897558477036, "grad_norm": 0.4825666546821594, "learning_rate": 8e-05, "loss": 1.838, "step": 4685 }, { "epoch": 0.8000682943486427, "grad_norm": 0.501164436340332, "learning_rate": 8e-05, "loss": 1.7206, "step": 4686 }, { "epoch": 0.8002390302202492, "grad_norm": 0.4698594808578491, "learning_rate": 8e-05, "loss": 1.6152, "step": 4687 }, { "epoch": 0.8004097660918559, "grad_norm": 0.4915659725666046, "learning_rate": 8e-05, "loss": 1.5636, "step": 4688 }, { "epoch": 0.8005805019634625, "grad_norm": 0.4841611385345459, "learning_rate": 8e-05, "loss": 1.6448, "step": 4689 }, { "epoch": 0.8007512378350692, "grad_norm": 0.47557783126831055, "learning_rate": 8e-05, "loss": 1.5657, "step": 4690 }, { "epoch": 0.8009219737066757, "grad_norm": 0.5011236071586609, "learning_rate": 8e-05, "loss": 1.4146, "step": 4691 }, { "epoch": 0.8010927095782824, "grad_norm": 0.481790691614151, "learning_rate": 8e-05, "loss": 1.7217, "step": 4692 }, { "epoch": 0.801263445449889, "grad_norm": 0.4761542081832886, "learning_rate": 8e-05, "loss": 1.74, "step": 4693 }, { "epoch": 0.8014341813214957, "grad_norm": 0.49251601099967957, "learning_rate": 8e-05, "loss": 1.8288, "step": 4694 }, { "epoch": 0.8016049171931022, "grad_norm": 0.48326700925827026, "learning_rate": 8e-05, "loss": 1.6125, "step": 4695 }, { "epoch": 0.8017756530647089, "grad_norm": 0.5432422161102295, "learning_rate": 8e-05, "loss": 1.7225, "step": 4696 }, { "epoch": 0.8019463889363155, "grad_norm": 0.47892269492149353, "learning_rate": 8e-05, "loss": 1.8079, "step": 4697 }, { "epoch": 0.8021171248079222, "grad_norm": 0.4836810827255249, "learning_rate": 8e-05, "loss": 1.7036, "step": 4698 }, { "epoch": 0.8022878606795287, "grad_norm": 0.46962976455688477, "learning_rate": 8e-05, "loss": 1.6136, "step": 4699 }, { "epoch": 0.8024585965511354, "grad_norm": 0.4885936677455902, "learning_rate": 8e-05, "loss": 1.7032, "step": 4700 }, { "epoch": 0.802629332422742, "grad_norm": 0.47808265686035156, "learning_rate": 8e-05, "loss": 1.5835, "step": 4701 }, { "epoch": 0.8028000682943487, "grad_norm": 0.46774715185165405, "learning_rate": 8e-05, "loss": 1.4232, "step": 4702 }, { "epoch": 0.8029708041659552, "grad_norm": 0.49926772713661194, "learning_rate": 8e-05, "loss": 1.6022, "step": 4703 }, { "epoch": 0.8031415400375619, "grad_norm": 0.4923655688762665, "learning_rate": 8e-05, "loss": 1.7798, "step": 4704 }, { "epoch": 0.8033122759091685, "grad_norm": 0.4941106140613556, "learning_rate": 8e-05, "loss": 1.7282, "step": 4705 }, { "epoch": 0.8034830117807752, "grad_norm": 0.49168649315834045, "learning_rate": 8e-05, "loss": 1.5436, "step": 4706 }, { "epoch": 0.8036537476523817, "grad_norm": 0.5532305240631104, "learning_rate": 8e-05, "loss": 2.1228, "step": 4707 }, { "epoch": 0.8038244835239884, "grad_norm": 0.5127628445625305, "learning_rate": 8e-05, "loss": 1.6609, "step": 4708 }, { "epoch": 0.803995219395595, "grad_norm": 0.48146873712539673, "learning_rate": 8e-05, "loss": 1.6917, "step": 4709 }, { "epoch": 0.8041659552672017, "grad_norm": 0.4954553544521332, "learning_rate": 8e-05, "loss": 1.7457, "step": 4710 }, { "epoch": 0.8043366911388082, "grad_norm": 0.46162575483322144, "learning_rate": 8e-05, "loss": 1.5515, "step": 4711 }, { "epoch": 0.8045074270104149, "grad_norm": 0.4973834156990051, "learning_rate": 8e-05, "loss": 1.627, "step": 4712 }, { "epoch": 0.8046781628820215, "grad_norm": 0.5363545417785645, "learning_rate": 8e-05, "loss": 1.8571, "step": 4713 }, { "epoch": 0.8048488987536281, "grad_norm": 0.48678067326545715, "learning_rate": 8e-05, "loss": 1.5249, "step": 4714 }, { "epoch": 0.8050196346252347, "grad_norm": 0.5866255760192871, "learning_rate": 8e-05, "loss": 1.9055, "step": 4715 }, { "epoch": 0.8051903704968414, "grad_norm": 0.5174505114555359, "learning_rate": 8e-05, "loss": 1.6089, "step": 4716 }, { "epoch": 0.805361106368448, "grad_norm": 0.48609158396720886, "learning_rate": 8e-05, "loss": 1.882, "step": 4717 }, { "epoch": 0.8055318422400546, "grad_norm": 0.46521618962287903, "learning_rate": 8e-05, "loss": 1.6682, "step": 4718 }, { "epoch": 0.8057025781116612, "grad_norm": 0.46991583704948425, "learning_rate": 8e-05, "loss": 1.801, "step": 4719 }, { "epoch": 0.8058733139832679, "grad_norm": 0.5029054880142212, "learning_rate": 8e-05, "loss": 1.6818, "step": 4720 }, { "epoch": 0.8060440498548745, "grad_norm": 0.4535408616065979, "learning_rate": 8e-05, "loss": 1.5123, "step": 4721 }, { "epoch": 0.8062147857264811, "grad_norm": 0.4998134672641754, "learning_rate": 8e-05, "loss": 1.798, "step": 4722 }, { "epoch": 0.8063855215980877, "grad_norm": 0.5024643540382385, "learning_rate": 8e-05, "loss": 1.5696, "step": 4723 }, { "epoch": 0.8065562574696944, "grad_norm": 0.5006137490272522, "learning_rate": 8e-05, "loss": 1.8469, "step": 4724 }, { "epoch": 0.806726993341301, "grad_norm": 0.49769702553749084, "learning_rate": 8e-05, "loss": 1.5947, "step": 4725 }, { "epoch": 0.8068977292129076, "grad_norm": 0.5043945908546448, "learning_rate": 8e-05, "loss": 1.7665, "step": 4726 }, { "epoch": 0.8070684650845142, "grad_norm": 0.49602657556533813, "learning_rate": 8e-05, "loss": 1.5668, "step": 4727 }, { "epoch": 0.8072392009561209, "grad_norm": 0.4945175051689148, "learning_rate": 8e-05, "loss": 1.5124, "step": 4728 }, { "epoch": 0.8074099368277275, "grad_norm": 0.4931887090206146, "learning_rate": 8e-05, "loss": 1.7333, "step": 4729 }, { "epoch": 0.8075806726993341, "grad_norm": 0.4743255078792572, "learning_rate": 8e-05, "loss": 1.5809, "step": 4730 }, { "epoch": 0.8077514085709407, "grad_norm": 0.4903728663921356, "learning_rate": 8e-05, "loss": 1.6199, "step": 4731 }, { "epoch": 0.8079221444425474, "grad_norm": 0.5284668207168579, "learning_rate": 8e-05, "loss": 1.6311, "step": 4732 }, { "epoch": 0.808092880314154, "grad_norm": 0.4992787539958954, "learning_rate": 8e-05, "loss": 1.8017, "step": 4733 }, { "epoch": 0.8082636161857606, "grad_norm": 0.4612729251384735, "learning_rate": 8e-05, "loss": 1.7068, "step": 4734 }, { "epoch": 0.8084343520573672, "grad_norm": 0.4657500684261322, "learning_rate": 8e-05, "loss": 1.595, "step": 4735 }, { "epoch": 0.8086050879289739, "grad_norm": 0.4750165045261383, "learning_rate": 8e-05, "loss": 1.8003, "step": 4736 }, { "epoch": 0.8087758238005806, "grad_norm": 0.46836674213409424, "learning_rate": 8e-05, "loss": 1.5932, "step": 4737 }, { "epoch": 0.8089465596721871, "grad_norm": 0.4667136073112488, "learning_rate": 8e-05, "loss": 1.7084, "step": 4738 }, { "epoch": 0.8091172955437937, "grad_norm": 0.5206020474433899, "learning_rate": 8e-05, "loss": 1.7348, "step": 4739 }, { "epoch": 0.8092880314154004, "grad_norm": 0.5037229061126709, "learning_rate": 8e-05, "loss": 1.6315, "step": 4740 }, { "epoch": 0.809458767287007, "grad_norm": 0.48651283979415894, "learning_rate": 8e-05, "loss": 1.904, "step": 4741 }, { "epoch": 0.8096295031586136, "grad_norm": 0.4725748598575592, "learning_rate": 8e-05, "loss": 1.7487, "step": 4742 }, { "epoch": 0.8098002390302202, "grad_norm": 0.48934221267700195, "learning_rate": 8e-05, "loss": 1.7172, "step": 4743 }, { "epoch": 0.8099709749018269, "grad_norm": 0.5221983194351196, "learning_rate": 8e-05, "loss": 1.7607, "step": 4744 }, { "epoch": 0.8101417107734334, "grad_norm": 0.46845632791519165, "learning_rate": 8e-05, "loss": 1.5323, "step": 4745 }, { "epoch": 0.8103124466450401, "grad_norm": 0.47632792592048645, "learning_rate": 8e-05, "loss": 1.7364, "step": 4746 }, { "epoch": 0.8104831825166467, "grad_norm": 0.45491722226142883, "learning_rate": 8e-05, "loss": 1.6612, "step": 4747 }, { "epoch": 0.8106539183882534, "grad_norm": 0.4609295427799225, "learning_rate": 8e-05, "loss": 1.4243, "step": 4748 }, { "epoch": 0.81082465425986, "grad_norm": 0.4727425277233124, "learning_rate": 8e-05, "loss": 1.7164, "step": 4749 }, { "epoch": 0.8109953901314666, "grad_norm": 0.5277508497238159, "learning_rate": 8e-05, "loss": 1.9039, "step": 4750 }, { "epoch": 0.8111661260030733, "grad_norm": 0.5020326972007751, "learning_rate": 8e-05, "loss": 1.6662, "step": 4751 }, { "epoch": 0.8113368618746799, "grad_norm": 0.44550541043281555, "learning_rate": 8e-05, "loss": 1.4408, "step": 4752 }, { "epoch": 0.8115075977462864, "grad_norm": 0.4893970489501953, "learning_rate": 8e-05, "loss": 1.6359, "step": 4753 }, { "epoch": 0.8116783336178931, "grad_norm": 0.4860396385192871, "learning_rate": 8e-05, "loss": 1.4583, "step": 4754 }, { "epoch": 0.8118490694894998, "grad_norm": 0.4938044548034668, "learning_rate": 8e-05, "loss": 1.7344, "step": 4755 }, { "epoch": 0.8120198053611064, "grad_norm": 0.47242841124534607, "learning_rate": 8e-05, "loss": 1.6927, "step": 4756 }, { "epoch": 0.812190541232713, "grad_norm": 0.489854097366333, "learning_rate": 8e-05, "loss": 1.6972, "step": 4757 }, { "epoch": 0.8123612771043196, "grad_norm": 0.49132978916168213, "learning_rate": 8e-05, "loss": 1.7461, "step": 4758 }, { "epoch": 0.8125320129759263, "grad_norm": 0.4855135381221771, "learning_rate": 8e-05, "loss": 1.5951, "step": 4759 }, { "epoch": 0.8127027488475329, "grad_norm": 0.48348793387413025, "learning_rate": 8e-05, "loss": 1.6548, "step": 4760 }, { "epoch": 0.8128734847191394, "grad_norm": 0.5003876686096191, "learning_rate": 8e-05, "loss": 1.5846, "step": 4761 }, { "epoch": 0.8130442205907461, "grad_norm": 0.5016365051269531, "learning_rate": 8e-05, "loss": 1.6744, "step": 4762 }, { "epoch": 0.8132149564623528, "grad_norm": 0.4899667501449585, "learning_rate": 8e-05, "loss": 1.6711, "step": 4763 }, { "epoch": 0.8133856923339594, "grad_norm": 0.49521100521087646, "learning_rate": 8e-05, "loss": 1.6778, "step": 4764 }, { "epoch": 0.813556428205566, "grad_norm": 0.4757125675678253, "learning_rate": 8e-05, "loss": 1.6255, "step": 4765 }, { "epoch": 0.8137271640771726, "grad_norm": 0.4833199679851532, "learning_rate": 8e-05, "loss": 1.7279, "step": 4766 }, { "epoch": 0.8138978999487793, "grad_norm": 0.4537424147129059, "learning_rate": 8e-05, "loss": 1.5334, "step": 4767 }, { "epoch": 0.8140686358203859, "grad_norm": 0.49411338567733765, "learning_rate": 8e-05, "loss": 1.6666, "step": 4768 }, { "epoch": 0.8142393716919925, "grad_norm": 0.4719898998737335, "learning_rate": 8e-05, "loss": 1.5023, "step": 4769 }, { "epoch": 0.8144101075635991, "grad_norm": 0.510901153087616, "learning_rate": 8e-05, "loss": 1.7693, "step": 4770 }, { "epoch": 0.8145808434352058, "grad_norm": 0.4854840338230133, "learning_rate": 8e-05, "loss": 1.6681, "step": 4771 }, { "epoch": 0.8147515793068124, "grad_norm": 0.47815752029418945, "learning_rate": 8e-05, "loss": 1.6667, "step": 4772 }, { "epoch": 0.814922315178419, "grad_norm": 0.4911339581012726, "learning_rate": 8e-05, "loss": 1.7637, "step": 4773 }, { "epoch": 0.8150930510500256, "grad_norm": 0.4734748601913452, "learning_rate": 8e-05, "loss": 1.7166, "step": 4774 }, { "epoch": 0.8152637869216323, "grad_norm": 0.4640292525291443, "learning_rate": 8e-05, "loss": 1.7525, "step": 4775 }, { "epoch": 0.8154345227932389, "grad_norm": 0.4809292256832123, "learning_rate": 8e-05, "loss": 1.5574, "step": 4776 }, { "epoch": 0.8156052586648455, "grad_norm": 0.4963778257369995, "learning_rate": 8e-05, "loss": 1.7975, "step": 4777 }, { "epoch": 0.8157759945364521, "grad_norm": 0.48044902086257935, "learning_rate": 8e-05, "loss": 1.8015, "step": 4778 }, { "epoch": 0.8159467304080588, "grad_norm": 0.49667230248451233, "learning_rate": 8e-05, "loss": 1.8126, "step": 4779 }, { "epoch": 0.8161174662796653, "grad_norm": 0.491493284702301, "learning_rate": 8e-05, "loss": 1.6833, "step": 4780 }, { "epoch": 0.816288202151272, "grad_norm": 0.4918260872364044, "learning_rate": 8e-05, "loss": 1.7403, "step": 4781 }, { "epoch": 0.8164589380228786, "grad_norm": 0.4893765449523926, "learning_rate": 8e-05, "loss": 1.4984, "step": 4782 }, { "epoch": 0.8166296738944853, "grad_norm": 0.5029245018959045, "learning_rate": 8e-05, "loss": 1.7149, "step": 4783 }, { "epoch": 0.8168004097660918, "grad_norm": 0.4914396405220032, "learning_rate": 8e-05, "loss": 1.7244, "step": 4784 }, { "epoch": 0.8169711456376985, "grad_norm": 0.49045735597610474, "learning_rate": 8e-05, "loss": 1.7452, "step": 4785 }, { "epoch": 0.8171418815093051, "grad_norm": 0.49825796484947205, "learning_rate": 8e-05, "loss": 1.5286, "step": 4786 }, { "epoch": 0.8173126173809118, "grad_norm": 0.485517293214798, "learning_rate": 8e-05, "loss": 1.5496, "step": 4787 }, { "epoch": 0.8174833532525183, "grad_norm": 0.5424889326095581, "learning_rate": 8e-05, "loss": 1.7177, "step": 4788 }, { "epoch": 0.817654089124125, "grad_norm": 0.4855060577392578, "learning_rate": 8e-05, "loss": 1.6516, "step": 4789 }, { "epoch": 0.8178248249957316, "grad_norm": 0.4868861734867096, "learning_rate": 8e-05, "loss": 1.7731, "step": 4790 }, { "epoch": 0.8179955608673383, "grad_norm": 0.4789245128631592, "learning_rate": 8e-05, "loss": 1.7242, "step": 4791 }, { "epoch": 0.8181662967389448, "grad_norm": 0.46541866660118103, "learning_rate": 8e-05, "loss": 1.6, "step": 4792 }, { "epoch": 0.8183370326105515, "grad_norm": 0.49138760566711426, "learning_rate": 8e-05, "loss": 1.801, "step": 4793 }, { "epoch": 0.8185077684821581, "grad_norm": 0.49702179431915283, "learning_rate": 8e-05, "loss": 1.7448, "step": 4794 }, { "epoch": 0.8186785043537648, "grad_norm": 0.507452130317688, "learning_rate": 8e-05, "loss": 1.8534, "step": 4795 }, { "epoch": 0.8188492402253713, "grad_norm": 0.476664662361145, "learning_rate": 8e-05, "loss": 1.5712, "step": 4796 }, { "epoch": 0.819019976096978, "grad_norm": 0.44364815950393677, "learning_rate": 8e-05, "loss": 1.3938, "step": 4797 }, { "epoch": 0.8191907119685846, "grad_norm": 0.44339120388031006, "learning_rate": 8e-05, "loss": 1.5239, "step": 4798 }, { "epoch": 0.8193614478401913, "grad_norm": 0.5162563323974609, "learning_rate": 8e-05, "loss": 1.8059, "step": 4799 }, { "epoch": 0.8195321837117978, "grad_norm": 0.47735247015953064, "learning_rate": 8e-05, "loss": 1.6149, "step": 4800 }, { "epoch": 0.8197029195834045, "grad_norm": 0.4970512092113495, "learning_rate": 8e-05, "loss": 1.5981, "step": 4801 }, { "epoch": 0.8198736554550111, "grad_norm": 0.4618983268737793, "learning_rate": 8e-05, "loss": 1.3992, "step": 4802 }, { "epoch": 0.8200443913266178, "grad_norm": 0.4703265428543091, "learning_rate": 8e-05, "loss": 1.7087, "step": 4803 }, { "epoch": 0.8202151271982243, "grad_norm": 0.4538809359073639, "learning_rate": 8e-05, "loss": 1.6002, "step": 4804 }, { "epoch": 0.820385863069831, "grad_norm": 0.4962475299835205, "learning_rate": 8e-05, "loss": 1.6006, "step": 4805 }, { "epoch": 0.8205565989414376, "grad_norm": 0.4794214069843292, "learning_rate": 8e-05, "loss": 1.5924, "step": 4806 }, { "epoch": 0.8207273348130443, "grad_norm": 0.46506738662719727, "learning_rate": 8e-05, "loss": 1.7116, "step": 4807 }, { "epoch": 0.8208980706846508, "grad_norm": 0.48868265748023987, "learning_rate": 8e-05, "loss": 1.6601, "step": 4808 }, { "epoch": 0.8210688065562575, "grad_norm": 0.45927396416664124, "learning_rate": 8e-05, "loss": 1.625, "step": 4809 }, { "epoch": 0.8212395424278641, "grad_norm": 0.46479156613349915, "learning_rate": 8e-05, "loss": 1.6464, "step": 4810 }, { "epoch": 0.8214102782994707, "grad_norm": 0.4954288899898529, "learning_rate": 8e-05, "loss": 1.7411, "step": 4811 }, { "epoch": 0.8215810141710773, "grad_norm": 0.5209667086601257, "learning_rate": 8e-05, "loss": 1.9462, "step": 4812 }, { "epoch": 0.821751750042684, "grad_norm": 0.4713740646839142, "learning_rate": 8e-05, "loss": 1.5417, "step": 4813 }, { "epoch": 0.8219224859142906, "grad_norm": 0.4413336217403412, "learning_rate": 8e-05, "loss": 1.3741, "step": 4814 }, { "epoch": 0.8220932217858972, "grad_norm": 0.4786505103111267, "learning_rate": 8e-05, "loss": 1.6304, "step": 4815 }, { "epoch": 0.8222639576575038, "grad_norm": 0.4640769958496094, "learning_rate": 8e-05, "loss": 1.4782, "step": 4816 }, { "epoch": 0.8224346935291105, "grad_norm": 0.4851781725883484, "learning_rate": 8e-05, "loss": 1.6803, "step": 4817 }, { "epoch": 0.8226054294007171, "grad_norm": 0.4782380163669586, "learning_rate": 8e-05, "loss": 1.6684, "step": 4818 }, { "epoch": 0.8227761652723237, "grad_norm": 0.516416072845459, "learning_rate": 8e-05, "loss": 1.7823, "step": 4819 }, { "epoch": 0.8229469011439303, "grad_norm": 0.4987551271915436, "learning_rate": 8e-05, "loss": 1.6156, "step": 4820 }, { "epoch": 0.823117637015537, "grad_norm": 0.5381556153297424, "learning_rate": 8e-05, "loss": 1.7753, "step": 4821 }, { "epoch": 0.8232883728871436, "grad_norm": 0.4758678674697876, "learning_rate": 8e-05, "loss": 1.7583, "step": 4822 }, { "epoch": 0.8234591087587502, "grad_norm": 0.48394325375556946, "learning_rate": 8e-05, "loss": 1.6284, "step": 4823 }, { "epoch": 0.8236298446303568, "grad_norm": 0.5057984590530396, "learning_rate": 8e-05, "loss": 1.5637, "step": 4824 }, { "epoch": 0.8238005805019635, "grad_norm": 0.517059326171875, "learning_rate": 8e-05, "loss": 1.6413, "step": 4825 }, { "epoch": 0.8239713163735701, "grad_norm": 0.44807168841362, "learning_rate": 8e-05, "loss": 1.5698, "step": 4826 }, { "epoch": 0.8241420522451767, "grad_norm": 0.47023525834083557, "learning_rate": 8e-05, "loss": 1.6288, "step": 4827 }, { "epoch": 0.8243127881167833, "grad_norm": 0.51022869348526, "learning_rate": 8e-05, "loss": 1.7003, "step": 4828 }, { "epoch": 0.82448352398839, "grad_norm": 0.48965656757354736, "learning_rate": 8e-05, "loss": 1.6637, "step": 4829 }, { "epoch": 0.8246542598599966, "grad_norm": 0.5072157979011536, "learning_rate": 8e-05, "loss": 1.7372, "step": 4830 }, { "epoch": 0.8248249957316032, "grad_norm": 0.49405378103256226, "learning_rate": 8e-05, "loss": 1.7023, "step": 4831 }, { "epoch": 0.8249957316032098, "grad_norm": 0.4785628914833069, "learning_rate": 8e-05, "loss": 1.5974, "step": 4832 }, { "epoch": 0.8251664674748165, "grad_norm": 0.46570533514022827, "learning_rate": 8e-05, "loss": 1.6671, "step": 4833 }, { "epoch": 0.8253372033464231, "grad_norm": 0.47187021374702454, "learning_rate": 8e-05, "loss": 1.5934, "step": 4834 }, { "epoch": 0.8255079392180297, "grad_norm": 0.5237879157066345, "learning_rate": 8e-05, "loss": 1.7758, "step": 4835 }, { "epoch": 0.8256786750896363, "grad_norm": 0.5042793154716492, "learning_rate": 8e-05, "loss": 1.8929, "step": 4836 }, { "epoch": 0.825849410961243, "grad_norm": 0.4593603312969208, "learning_rate": 8e-05, "loss": 1.7903, "step": 4837 }, { "epoch": 0.8260201468328496, "grad_norm": 0.4753436744213104, "learning_rate": 8e-05, "loss": 1.5391, "step": 4838 }, { "epoch": 0.8261908827044562, "grad_norm": 0.4627128839492798, "learning_rate": 8e-05, "loss": 1.6585, "step": 4839 }, { "epoch": 0.8263616185760628, "grad_norm": 0.45129334926605225, "learning_rate": 8e-05, "loss": 1.5828, "step": 4840 }, { "epoch": 0.8265323544476695, "grad_norm": 0.48552751541137695, "learning_rate": 8e-05, "loss": 1.74, "step": 4841 }, { "epoch": 0.8267030903192761, "grad_norm": 0.4900718629360199, "learning_rate": 8e-05, "loss": 1.9673, "step": 4842 }, { "epoch": 0.8268738261908827, "grad_norm": 0.4693829119205475, "learning_rate": 8e-05, "loss": 1.6342, "step": 4843 }, { "epoch": 0.8270445620624893, "grad_norm": 0.5044471621513367, "learning_rate": 8e-05, "loss": 1.5638, "step": 4844 }, { "epoch": 0.827215297934096, "grad_norm": 0.4768293499946594, "learning_rate": 8e-05, "loss": 1.5914, "step": 4845 }, { "epoch": 0.8273860338057025, "grad_norm": 0.48595166206359863, "learning_rate": 8e-05, "loss": 1.678, "step": 4846 }, { "epoch": 0.8275567696773092, "grad_norm": 0.45785945653915405, "learning_rate": 8e-05, "loss": 1.5645, "step": 4847 }, { "epoch": 0.8277275055489158, "grad_norm": 0.5038360357284546, "learning_rate": 8e-05, "loss": 1.8733, "step": 4848 }, { "epoch": 0.8278982414205225, "grad_norm": 0.5084542632102966, "learning_rate": 8e-05, "loss": 1.7305, "step": 4849 }, { "epoch": 0.828068977292129, "grad_norm": 0.5015466213226318, "learning_rate": 8e-05, "loss": 1.7168, "step": 4850 }, { "epoch": 0.8282397131637357, "grad_norm": 0.5173287987709045, "learning_rate": 8e-05, "loss": 1.8122, "step": 4851 }, { "epoch": 0.8284104490353423, "grad_norm": 0.4900153875350952, "learning_rate": 8e-05, "loss": 1.7503, "step": 4852 }, { "epoch": 0.828581184906949, "grad_norm": 0.5117270946502686, "learning_rate": 8e-05, "loss": 1.6848, "step": 4853 }, { "epoch": 0.8287519207785555, "grad_norm": 0.48045429587364197, "learning_rate": 8e-05, "loss": 1.6467, "step": 4854 }, { "epoch": 0.8289226566501622, "grad_norm": 0.48186108469963074, "learning_rate": 8e-05, "loss": 1.5507, "step": 4855 }, { "epoch": 0.8290933925217688, "grad_norm": 0.4751034677028656, "learning_rate": 8e-05, "loss": 1.5468, "step": 4856 }, { "epoch": 0.8292641283933755, "grad_norm": 0.46612533926963806, "learning_rate": 8e-05, "loss": 1.6853, "step": 4857 }, { "epoch": 0.829434864264982, "grad_norm": 0.45829910039901733, "learning_rate": 8e-05, "loss": 1.6369, "step": 4858 }, { "epoch": 0.8296056001365887, "grad_norm": 0.5280304551124573, "learning_rate": 8e-05, "loss": 1.7704, "step": 4859 }, { "epoch": 0.8297763360081953, "grad_norm": 0.4920056462287903, "learning_rate": 8e-05, "loss": 1.7851, "step": 4860 }, { "epoch": 0.829947071879802, "grad_norm": 0.5014638900756836, "learning_rate": 8e-05, "loss": 1.7903, "step": 4861 }, { "epoch": 0.8301178077514085, "grad_norm": 0.4798939824104309, "learning_rate": 8e-05, "loss": 1.6023, "step": 4862 }, { "epoch": 0.8302885436230152, "grad_norm": 0.5455090403556824, "learning_rate": 8e-05, "loss": 1.7411, "step": 4863 }, { "epoch": 0.8304592794946218, "grad_norm": 0.4669041633605957, "learning_rate": 8e-05, "loss": 1.4965, "step": 4864 }, { "epoch": 0.8306300153662285, "grad_norm": 0.4771013855934143, "learning_rate": 8e-05, "loss": 1.759, "step": 4865 }, { "epoch": 0.830800751237835, "grad_norm": 0.5194916129112244, "learning_rate": 8e-05, "loss": 1.7185, "step": 4866 }, { "epoch": 0.8309714871094417, "grad_norm": 0.4763810932636261, "learning_rate": 8e-05, "loss": 1.5796, "step": 4867 }, { "epoch": 0.8311422229810483, "grad_norm": 0.4600338935852051, "learning_rate": 8e-05, "loss": 1.4985, "step": 4868 }, { "epoch": 0.831312958852655, "grad_norm": 0.4704911708831787, "learning_rate": 8e-05, "loss": 1.5001, "step": 4869 }, { "epoch": 0.8314836947242615, "grad_norm": 0.4580978751182556, "learning_rate": 8e-05, "loss": 1.64, "step": 4870 }, { "epoch": 0.8316544305958682, "grad_norm": 0.4753847122192383, "learning_rate": 8e-05, "loss": 1.6123, "step": 4871 }, { "epoch": 0.8318251664674748, "grad_norm": 0.48762157559394836, "learning_rate": 8e-05, "loss": 1.5712, "step": 4872 }, { "epoch": 0.8319959023390815, "grad_norm": 0.46951794624328613, "learning_rate": 8e-05, "loss": 1.6441, "step": 4873 }, { "epoch": 0.832166638210688, "grad_norm": 0.47635993361473083, "learning_rate": 8e-05, "loss": 1.6543, "step": 4874 }, { "epoch": 0.8323373740822947, "grad_norm": 0.4844836890697479, "learning_rate": 8e-05, "loss": 1.8661, "step": 4875 }, { "epoch": 0.8325081099539013, "grad_norm": 0.5370471477508545, "learning_rate": 8e-05, "loss": 1.7085, "step": 4876 }, { "epoch": 0.8326788458255079, "grad_norm": 0.47938820719718933, "learning_rate": 8e-05, "loss": 1.603, "step": 4877 }, { "epoch": 0.8328495816971145, "grad_norm": 0.4743550419807434, "learning_rate": 8e-05, "loss": 1.7515, "step": 4878 }, { "epoch": 0.8330203175687212, "grad_norm": 0.5965827703475952, "learning_rate": 8e-05, "loss": 2.0094, "step": 4879 }, { "epoch": 0.8331910534403278, "grad_norm": 0.49633949995040894, "learning_rate": 8e-05, "loss": 1.6562, "step": 4880 }, { "epoch": 0.8333617893119344, "grad_norm": 0.5049113035202026, "learning_rate": 8e-05, "loss": 1.7823, "step": 4881 }, { "epoch": 0.833532525183541, "grad_norm": 0.6846257448196411, "learning_rate": 8e-05, "loss": 1.4664, "step": 4882 }, { "epoch": 0.8337032610551477, "grad_norm": 0.468095064163208, "learning_rate": 8e-05, "loss": 1.7039, "step": 4883 }, { "epoch": 0.8338739969267543, "grad_norm": 0.48344743251800537, "learning_rate": 8e-05, "loss": 1.6347, "step": 4884 }, { "epoch": 0.8340447327983609, "grad_norm": 0.4972993731498718, "learning_rate": 8e-05, "loss": 1.6764, "step": 4885 }, { "epoch": 0.8342154686699675, "grad_norm": 0.49302586913108826, "learning_rate": 8e-05, "loss": 1.7466, "step": 4886 }, { "epoch": 0.8343862045415742, "grad_norm": 0.45662564039230347, "learning_rate": 8e-05, "loss": 1.6179, "step": 4887 }, { "epoch": 0.8345569404131808, "grad_norm": 0.5026736259460449, "learning_rate": 8e-05, "loss": 1.8175, "step": 4888 }, { "epoch": 0.8347276762847874, "grad_norm": 0.45027032494544983, "learning_rate": 8e-05, "loss": 1.3246, "step": 4889 }, { "epoch": 0.834898412156394, "grad_norm": 0.4988824427127838, "learning_rate": 8e-05, "loss": 1.7086, "step": 4890 }, { "epoch": 0.8350691480280007, "grad_norm": 0.48561784625053406, "learning_rate": 8e-05, "loss": 1.8424, "step": 4891 }, { "epoch": 0.8352398838996073, "grad_norm": 0.513272225856781, "learning_rate": 8e-05, "loss": 1.7963, "step": 4892 }, { "epoch": 0.8354106197712139, "grad_norm": 0.49061012268066406, "learning_rate": 8e-05, "loss": 1.5712, "step": 4893 }, { "epoch": 0.8355813556428205, "grad_norm": 0.4728465974330902, "learning_rate": 8e-05, "loss": 1.6912, "step": 4894 }, { "epoch": 0.8357520915144272, "grad_norm": 0.4934762120246887, "learning_rate": 8e-05, "loss": 1.678, "step": 4895 }, { "epoch": 0.8359228273860339, "grad_norm": 0.49866169691085815, "learning_rate": 8e-05, "loss": 1.6047, "step": 4896 }, { "epoch": 0.8360935632576404, "grad_norm": 0.48665651679039, "learning_rate": 8e-05, "loss": 1.7397, "step": 4897 }, { "epoch": 0.836264299129247, "grad_norm": 0.4445750415325165, "learning_rate": 8e-05, "loss": 1.5814, "step": 4898 }, { "epoch": 0.8364350350008537, "grad_norm": 0.4642829895019531, "learning_rate": 8e-05, "loss": 1.666, "step": 4899 }, { "epoch": 0.8366057708724604, "grad_norm": 0.48508667945861816, "learning_rate": 8e-05, "loss": 1.431, "step": 4900 }, { "epoch": 0.8367765067440669, "grad_norm": 0.5086689591407776, "learning_rate": 8e-05, "loss": 1.5838, "step": 4901 }, { "epoch": 0.8369472426156735, "grad_norm": 0.5069466829299927, "learning_rate": 8e-05, "loss": 1.8416, "step": 4902 }, { "epoch": 0.8371179784872802, "grad_norm": 0.47102469205856323, "learning_rate": 8e-05, "loss": 1.7087, "step": 4903 }, { "epoch": 0.8372887143588869, "grad_norm": 0.4550751745700836, "learning_rate": 8e-05, "loss": 1.6716, "step": 4904 }, { "epoch": 0.8374594502304934, "grad_norm": 0.46955397725105286, "learning_rate": 8e-05, "loss": 1.6954, "step": 4905 }, { "epoch": 0.8376301861021, "grad_norm": 0.4571993947029114, "learning_rate": 8e-05, "loss": 1.6638, "step": 4906 }, { "epoch": 0.8378009219737067, "grad_norm": 0.5020971298217773, "learning_rate": 8e-05, "loss": 1.5887, "step": 4907 }, { "epoch": 0.8379716578453134, "grad_norm": 0.495318740606308, "learning_rate": 8e-05, "loss": 1.6942, "step": 4908 }, { "epoch": 0.8381423937169199, "grad_norm": 0.48326635360717773, "learning_rate": 8e-05, "loss": 1.794, "step": 4909 }, { "epoch": 0.8383131295885266, "grad_norm": 0.47297486662864685, "learning_rate": 8e-05, "loss": 1.4651, "step": 4910 }, { "epoch": 0.8384838654601332, "grad_norm": 0.47969484329223633, "learning_rate": 8e-05, "loss": 1.7227, "step": 4911 }, { "epoch": 0.8386546013317397, "grad_norm": 0.4658058285713196, "learning_rate": 8e-05, "loss": 1.7257, "step": 4912 }, { "epoch": 0.8388253372033464, "grad_norm": 0.5089539289474487, "learning_rate": 8e-05, "loss": 1.7448, "step": 4913 }, { "epoch": 0.838996073074953, "grad_norm": 0.4780142903327942, "learning_rate": 8e-05, "loss": 1.5546, "step": 4914 }, { "epoch": 0.8391668089465597, "grad_norm": 0.5356048345565796, "learning_rate": 8e-05, "loss": 1.7806, "step": 4915 }, { "epoch": 0.8393375448181662, "grad_norm": 0.49335384368896484, "learning_rate": 8e-05, "loss": 1.7185, "step": 4916 }, { "epoch": 0.8395082806897729, "grad_norm": 0.5365769267082214, "learning_rate": 8e-05, "loss": 1.7791, "step": 4917 }, { "epoch": 0.8396790165613796, "grad_norm": 0.5013848543167114, "learning_rate": 8e-05, "loss": 1.655, "step": 4918 }, { "epoch": 0.8398497524329862, "grad_norm": 0.48643603920936584, "learning_rate": 8e-05, "loss": 1.557, "step": 4919 }, { "epoch": 0.8400204883045927, "grad_norm": 0.46798110008239746, "learning_rate": 8e-05, "loss": 1.64, "step": 4920 }, { "epoch": 0.8401912241761994, "grad_norm": 0.5285088419914246, "learning_rate": 8e-05, "loss": 1.7168, "step": 4921 }, { "epoch": 0.8403619600478061, "grad_norm": 0.514209508895874, "learning_rate": 8e-05, "loss": 1.9429, "step": 4922 }, { "epoch": 0.8405326959194127, "grad_norm": 0.5273070335388184, "learning_rate": 8e-05, "loss": 1.9625, "step": 4923 }, { "epoch": 0.8407034317910193, "grad_norm": 0.5062464475631714, "learning_rate": 8e-05, "loss": 1.6106, "step": 4924 }, { "epoch": 0.8408741676626259, "grad_norm": 0.5061963200569153, "learning_rate": 8e-05, "loss": 1.8305, "step": 4925 }, { "epoch": 0.8410449035342326, "grad_norm": 0.49464407563209534, "learning_rate": 8e-05, "loss": 1.8211, "step": 4926 }, { "epoch": 0.8412156394058392, "grad_norm": 0.4926972985267639, "learning_rate": 8e-05, "loss": 1.8153, "step": 4927 }, { "epoch": 0.8413863752774458, "grad_norm": 0.5230970978736877, "learning_rate": 8e-05, "loss": 1.5779, "step": 4928 }, { "epoch": 0.8415571111490524, "grad_norm": 0.4812949001789093, "learning_rate": 8e-05, "loss": 1.7766, "step": 4929 }, { "epoch": 0.8417278470206591, "grad_norm": 0.590523898601532, "learning_rate": 8e-05, "loss": 2.0687, "step": 4930 }, { "epoch": 0.8418985828922657, "grad_norm": 0.4647871255874634, "learning_rate": 8e-05, "loss": 1.7014, "step": 4931 }, { "epoch": 0.8420693187638723, "grad_norm": 0.485939085483551, "learning_rate": 8e-05, "loss": 1.7754, "step": 4932 }, { "epoch": 0.8422400546354789, "grad_norm": 0.50428307056427, "learning_rate": 8e-05, "loss": 1.8418, "step": 4933 }, { "epoch": 0.8424107905070856, "grad_norm": 0.5203037858009338, "learning_rate": 8e-05, "loss": 1.7831, "step": 4934 }, { "epoch": 0.8425815263786922, "grad_norm": 0.4755784273147583, "learning_rate": 8e-05, "loss": 1.6326, "step": 4935 }, { "epoch": 0.8427522622502988, "grad_norm": 0.45392686128616333, "learning_rate": 8e-05, "loss": 1.4025, "step": 4936 }, { "epoch": 0.8429229981219054, "grad_norm": 0.4829670786857605, "learning_rate": 8e-05, "loss": 1.6076, "step": 4937 }, { "epoch": 0.8430937339935121, "grad_norm": 0.4993061423301697, "learning_rate": 8e-05, "loss": 1.6822, "step": 4938 }, { "epoch": 0.8432644698651187, "grad_norm": 0.5604971647262573, "learning_rate": 8e-05, "loss": 1.7515, "step": 4939 }, { "epoch": 0.8434352057367253, "grad_norm": 0.4835330545902252, "learning_rate": 8e-05, "loss": 1.4409, "step": 4940 }, { "epoch": 0.8436059416083319, "grad_norm": 0.5161312222480774, "learning_rate": 8e-05, "loss": 1.6905, "step": 4941 }, { "epoch": 0.8437766774799386, "grad_norm": 0.46470212936401367, "learning_rate": 8e-05, "loss": 1.6888, "step": 4942 }, { "epoch": 0.8439474133515451, "grad_norm": 0.4646231234073639, "learning_rate": 8e-05, "loss": 1.5422, "step": 4943 }, { "epoch": 0.8441181492231518, "grad_norm": 0.4857289791107178, "learning_rate": 8e-05, "loss": 1.58, "step": 4944 }, { "epoch": 0.8442888850947584, "grad_norm": 0.4611664414405823, "learning_rate": 8e-05, "loss": 1.5305, "step": 4945 }, { "epoch": 0.8444596209663651, "grad_norm": 0.5073329210281372, "learning_rate": 8e-05, "loss": 1.673, "step": 4946 }, { "epoch": 0.8446303568379716, "grad_norm": 0.49827417731285095, "learning_rate": 8e-05, "loss": 1.8038, "step": 4947 }, { "epoch": 0.8448010927095783, "grad_norm": 0.46276387572288513, "learning_rate": 8e-05, "loss": 1.4993, "step": 4948 }, { "epoch": 0.8449718285811849, "grad_norm": 0.4940231740474701, "learning_rate": 8e-05, "loss": 1.6919, "step": 4949 }, { "epoch": 0.8451425644527916, "grad_norm": 0.4908299446105957, "learning_rate": 8e-05, "loss": 1.7206, "step": 4950 }, { "epoch": 0.8453133003243981, "grad_norm": 0.5253572463989258, "learning_rate": 8e-05, "loss": 1.7825, "step": 4951 }, { "epoch": 0.8454840361960048, "grad_norm": 0.4780232906341553, "learning_rate": 8e-05, "loss": 1.7604, "step": 4952 }, { "epoch": 0.8456547720676114, "grad_norm": 0.4947189688682556, "learning_rate": 8e-05, "loss": 1.695, "step": 4953 }, { "epoch": 0.8458255079392181, "grad_norm": 0.46305879950523376, "learning_rate": 8e-05, "loss": 1.4971, "step": 4954 }, { "epoch": 0.8459962438108246, "grad_norm": 0.47366082668304443, "learning_rate": 8e-05, "loss": 1.6969, "step": 4955 }, { "epoch": 0.8461669796824313, "grad_norm": 0.4728732407093048, "learning_rate": 8e-05, "loss": 1.5962, "step": 4956 }, { "epoch": 0.8463377155540379, "grad_norm": 0.5032863616943359, "learning_rate": 8e-05, "loss": 1.7912, "step": 4957 }, { "epoch": 0.8465084514256446, "grad_norm": 0.47103142738342285, "learning_rate": 8e-05, "loss": 1.483, "step": 4958 }, { "epoch": 0.8466791872972511, "grad_norm": 0.5079430341720581, "learning_rate": 8e-05, "loss": 1.4787, "step": 4959 }, { "epoch": 0.8468499231688578, "grad_norm": 0.4654134511947632, "learning_rate": 8e-05, "loss": 1.7193, "step": 4960 }, { "epoch": 0.8470206590404644, "grad_norm": 0.49141642451286316, "learning_rate": 8e-05, "loss": 1.7587, "step": 4961 }, { "epoch": 0.8471913949120711, "grad_norm": 0.48822179436683655, "learning_rate": 8e-05, "loss": 1.6585, "step": 4962 }, { "epoch": 0.8473621307836776, "grad_norm": 0.5425360798835754, "learning_rate": 8e-05, "loss": 1.8859, "step": 4963 }, { "epoch": 0.8475328666552843, "grad_norm": 0.4557400643825531, "learning_rate": 8e-05, "loss": 1.4682, "step": 4964 }, { "epoch": 0.8477036025268909, "grad_norm": 0.5250974297523499, "learning_rate": 8e-05, "loss": 1.6614, "step": 4965 }, { "epoch": 0.8478743383984976, "grad_norm": 0.4806775748729706, "learning_rate": 8e-05, "loss": 1.6719, "step": 4966 }, { "epoch": 0.8480450742701041, "grad_norm": 0.4966818392276764, "learning_rate": 8e-05, "loss": 1.6561, "step": 4967 }, { "epoch": 0.8482158101417108, "grad_norm": 0.49989721179008484, "learning_rate": 8e-05, "loss": 1.8505, "step": 4968 }, { "epoch": 0.8483865460133174, "grad_norm": 0.4823855459690094, "learning_rate": 8e-05, "loss": 1.6669, "step": 4969 }, { "epoch": 0.8485572818849241, "grad_norm": 0.4967973828315735, "learning_rate": 8e-05, "loss": 1.7564, "step": 4970 }, { "epoch": 0.8487280177565306, "grad_norm": 0.48064813017845154, "learning_rate": 8e-05, "loss": 1.7949, "step": 4971 }, { "epoch": 0.8488987536281373, "grad_norm": 0.4712863564491272, "learning_rate": 8e-05, "loss": 1.7102, "step": 4972 }, { "epoch": 0.8490694894997439, "grad_norm": 0.49589642882347107, "learning_rate": 8e-05, "loss": 1.748, "step": 4973 }, { "epoch": 0.8492402253713505, "grad_norm": 0.49743396043777466, "learning_rate": 8e-05, "loss": 1.7638, "step": 4974 }, { "epoch": 0.8494109612429571, "grad_norm": 0.47620120644569397, "learning_rate": 8e-05, "loss": 1.5771, "step": 4975 }, { "epoch": 0.8495816971145638, "grad_norm": 0.5074315071105957, "learning_rate": 8e-05, "loss": 1.6963, "step": 4976 }, { "epoch": 0.8497524329861704, "grad_norm": 0.5333656072616577, "learning_rate": 8e-05, "loss": 1.8122, "step": 4977 }, { "epoch": 0.849923168857777, "grad_norm": 0.4767550528049469, "learning_rate": 8e-05, "loss": 1.6202, "step": 4978 }, { "epoch": 0.8500939047293836, "grad_norm": 0.4713440537452698, "learning_rate": 8e-05, "loss": 1.5508, "step": 4979 }, { "epoch": 0.8502646406009903, "grad_norm": 0.4702150523662567, "learning_rate": 8e-05, "loss": 1.6043, "step": 4980 }, { "epoch": 0.8504353764725969, "grad_norm": 0.5284653902053833, "learning_rate": 8e-05, "loss": 1.6512, "step": 4981 }, { "epoch": 0.8506061123442035, "grad_norm": 0.4858403205871582, "learning_rate": 8e-05, "loss": 1.4734, "step": 4982 }, { "epoch": 0.8507768482158101, "grad_norm": 0.5018724203109741, "learning_rate": 8e-05, "loss": 1.9318, "step": 4983 }, { "epoch": 0.8509475840874168, "grad_norm": 0.4656963646411896, "learning_rate": 8e-05, "loss": 1.7316, "step": 4984 }, { "epoch": 0.8511183199590234, "grad_norm": 0.5253808498382568, "learning_rate": 8e-05, "loss": 1.6906, "step": 4985 }, { "epoch": 0.85128905583063, "grad_norm": 0.47659340500831604, "learning_rate": 8e-05, "loss": 1.6934, "step": 4986 }, { "epoch": 0.8514597917022366, "grad_norm": 0.4941353499889374, "learning_rate": 8e-05, "loss": 1.6314, "step": 4987 }, { "epoch": 0.8516305275738433, "grad_norm": 0.49403324723243713, "learning_rate": 8e-05, "loss": 1.7468, "step": 4988 }, { "epoch": 0.8518012634454499, "grad_norm": 0.5249005556106567, "learning_rate": 8e-05, "loss": 1.7862, "step": 4989 }, { "epoch": 0.8519719993170565, "grad_norm": 0.4713033139705658, "learning_rate": 8e-05, "loss": 1.6066, "step": 4990 }, { "epoch": 0.8521427351886631, "grad_norm": 0.5070139765739441, "learning_rate": 8e-05, "loss": 1.7571, "step": 4991 }, { "epoch": 0.8523134710602698, "grad_norm": 0.45239660143852234, "learning_rate": 8e-05, "loss": 1.5384, "step": 4992 }, { "epoch": 0.8524842069318764, "grad_norm": 0.5018871426582336, "learning_rate": 8e-05, "loss": 1.6888, "step": 4993 }, { "epoch": 0.852654942803483, "grad_norm": 0.4747350215911865, "learning_rate": 8e-05, "loss": 1.6248, "step": 4994 }, { "epoch": 0.8528256786750896, "grad_norm": 0.49039408564567566, "learning_rate": 8e-05, "loss": 1.5979, "step": 4995 }, { "epoch": 0.8529964145466963, "grad_norm": 0.45886024832725525, "learning_rate": 8e-05, "loss": 1.7299, "step": 4996 }, { "epoch": 0.8531671504183029, "grad_norm": 0.45238715410232544, "learning_rate": 8e-05, "loss": 1.3522, "step": 4997 }, { "epoch": 0.8533378862899095, "grad_norm": 0.4720473289489746, "learning_rate": 8e-05, "loss": 1.6391, "step": 4998 }, { "epoch": 0.8535086221615161, "grad_norm": 0.46787160634994507, "learning_rate": 8e-05, "loss": 1.7421, "step": 4999 }, { "epoch": 0.8536793580331228, "grad_norm": 0.46369314193725586, "learning_rate": 8e-05, "loss": 1.6321, "step": 5000 } ], "logging_steps": 1, "max_steps": 5857, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.70749463789568e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }