| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 20.0, |
| "eval_steps": 500, |
| "global_step": 1500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.013377926421404682, |
| "grad_norm": 0.5197079181671143, |
| "learning_rate": 0.0, |
| "loss": 4.2636, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.026755852842809364, |
| "grad_norm": 0.5626901984214783, |
| "learning_rate": 4e-05, |
| "loss": 4.3971, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.04013377926421405, |
| "grad_norm": 0.5167903304100037, |
| "learning_rate": 8e-05, |
| "loss": 4.3249, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.05351170568561873, |
| "grad_norm": 0.4764951169490814, |
| "learning_rate": 0.00012, |
| "loss": 4.2031, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.06688963210702341, |
| "grad_norm": 0.45488491654396057, |
| "learning_rate": 0.00016, |
| "loss": 4.3914, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0802675585284281, |
| "grad_norm": 0.568274736404419, |
| "learning_rate": 0.0002, |
| "loss": 4.2346, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.09364548494983277, |
| "grad_norm": 0.5974003076553345, |
| "learning_rate": 0.0001999555061179088, |
| "loss": 4.131, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.10702341137123746, |
| "grad_norm": 0.6204471588134766, |
| "learning_rate": 0.00019991101223581757, |
| "loss": 4.2256, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.12040133779264214, |
| "grad_norm": 0.7143808603286743, |
| "learning_rate": 0.00019986651835372636, |
| "loss": 3.8449, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.13377926421404682, |
| "grad_norm": 0.7799420356750488, |
| "learning_rate": 0.00019982202447163517, |
| "loss": 4.4301, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.14715719063545152, |
| "grad_norm": 0.8880407214164734, |
| "learning_rate": 0.00019977753058954395, |
| "loss": 4.2266, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.1605351170568562, |
| "grad_norm": 0.7776209712028503, |
| "learning_rate": 0.00019973303670745273, |
| "loss": 4.3208, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 0.9125858545303345, |
| "learning_rate": 0.0001996885428253615, |
| "loss": 4.4363, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.18729096989966554, |
| "grad_norm": 0.9000256657600403, |
| "learning_rate": 0.00019964404894327032, |
| "loss": 4.2917, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.20066889632107024, |
| "grad_norm": 0.9995108246803284, |
| "learning_rate": 0.00019959955506117908, |
| "loss": 4.1784, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.2140468227424749, |
| "grad_norm": 0.9209024310112, |
| "learning_rate": 0.0001995550611790879, |
| "loss": 4.7852, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.22742474916387959, |
| "grad_norm": 0.9421981573104858, |
| "learning_rate": 0.00019951056729699667, |
| "loss": 4.8501, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.2408026755852843, |
| "grad_norm": 0.9213201403617859, |
| "learning_rate": 0.00019946607341490545, |
| "loss": 4.7923, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.25418060200668896, |
| "grad_norm": 0.9378194212913513, |
| "learning_rate": 0.00019942157953281423, |
| "loss": 4.9593, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.26755852842809363, |
| "grad_norm": 1.0096492767333984, |
| "learning_rate": 0.00019937708565072304, |
| "loss": 4.7099, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2809364548494983, |
| "grad_norm": 0.8903587460517883, |
| "learning_rate": 0.00019933259176863183, |
| "loss": 4.3746, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.29431438127090304, |
| "grad_norm": 0.7808490991592407, |
| "learning_rate": 0.0001992880978865406, |
| "loss": 4.5873, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 0.8145670294761658, |
| "learning_rate": 0.0001992436040044494, |
| "loss": 4.7924, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.3210702341137124, |
| "grad_norm": 0.7945849299430847, |
| "learning_rate": 0.0001991991101223582, |
| "loss": 4.8881, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.33444816053511706, |
| "grad_norm": 0.7871395349502563, |
| "learning_rate": 0.00019915461624026696, |
| "loss": 4.6922, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 0.9111238718032837, |
| "learning_rate": 0.00019911012235817577, |
| "loss": 4.9982, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.3612040133779264, |
| "grad_norm": 0.7121369242668152, |
| "learning_rate": 0.00019906562847608455, |
| "loss": 4.4756, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.3745819397993311, |
| "grad_norm": 0.7118422389030457, |
| "learning_rate": 0.00019902113459399333, |
| "loss": 5.1389, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.3879598662207358, |
| "grad_norm": 0.7100292444229126, |
| "learning_rate": 0.0001989766407119021, |
| "loss": 4.7691, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.4013377926421405, |
| "grad_norm": 0.708591639995575, |
| "learning_rate": 0.00019893214682981092, |
| "loss": 4.8721, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.41471571906354515, |
| "grad_norm": 0.6711616516113281, |
| "learning_rate": 0.0001988876529477197, |
| "loss": 4.9152, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.4280936454849498, |
| "grad_norm": 0.7158232927322388, |
| "learning_rate": 0.0001988431590656285, |
| "loss": 4.828, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.4414715719063545, |
| "grad_norm": 0.6246087551116943, |
| "learning_rate": 0.00019879866518353727, |
| "loss": 4.8452, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.45484949832775917, |
| "grad_norm": 0.6088873744010925, |
| "learning_rate": 0.00019875417130144608, |
| "loss": 4.9702, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.4682274247491639, |
| "grad_norm": 0.5798126459121704, |
| "learning_rate": 0.00019870967741935483, |
| "loss": 4.9838, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.4816053511705686, |
| "grad_norm": 0.6268919706344604, |
| "learning_rate": 0.00019866518353726364, |
| "loss": 4.7636, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.49498327759197325, |
| "grad_norm": 0.5649904012680054, |
| "learning_rate": 0.00019862068965517243, |
| "loss": 4.506, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.5083612040133779, |
| "grad_norm": 0.5947792530059814, |
| "learning_rate": 0.0001985761957730812, |
| "loss": 4.8057, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 0.6204257011413574, |
| "learning_rate": 0.00019853170189099, |
| "loss": 5.0511, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.5351170568561873, |
| "grad_norm": 0.5972265601158142, |
| "learning_rate": 0.0001984872080088988, |
| "loss": 4.924, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5484949832775919, |
| "grad_norm": 0.6117077469825745, |
| "learning_rate": 0.00019844271412680758, |
| "loss": 4.8729, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.5618729096989966, |
| "grad_norm": 0.5085508823394775, |
| "learning_rate": 0.00019839822024471637, |
| "loss": 4.3616, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.5752508361204013, |
| "grad_norm": 0.550647497177124, |
| "learning_rate": 0.00019835372636262515, |
| "loss": 5.2512, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.5886287625418061, |
| "grad_norm": 0.48329588770866394, |
| "learning_rate": 0.00019830923248053396, |
| "loss": 4.9501, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.6020066889632107, |
| "grad_norm": 0.6313246488571167, |
| "learning_rate": 0.0001982647385984427, |
| "loss": 4.5767, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.5111928582191467, |
| "learning_rate": 0.00019822024471635152, |
| "loss": 4.5586, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.6287625418060201, |
| "grad_norm": 0.5264492630958557, |
| "learning_rate": 0.0001981757508342603, |
| "loss": 4.7033, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.6421404682274248, |
| "grad_norm": 0.5058289170265198, |
| "learning_rate": 0.0001981312569521691, |
| "loss": 4.8396, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.6555183946488294, |
| "grad_norm": 0.5688439607620239, |
| "learning_rate": 0.00019808676307007787, |
| "loss": 5.1887, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.6688963210702341, |
| "grad_norm": 0.5488842129707336, |
| "learning_rate": 0.00019804226918798665, |
| "loss": 4.6075, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6822742474916388, |
| "grad_norm": 0.5358632206916809, |
| "learning_rate": 0.00019799777530589546, |
| "loss": 5.0205, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 0.47869494557380676, |
| "learning_rate": 0.00019795328142380422, |
| "loss": 4.8495, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.7090301003344481, |
| "grad_norm": 0.49378660321235657, |
| "learning_rate": 0.00019790878754171303, |
| "loss": 4.6563, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.7224080267558528, |
| "grad_norm": 0.5167868733406067, |
| "learning_rate": 0.0001978642936596218, |
| "loss": 5.2558, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.7357859531772575, |
| "grad_norm": 0.5230040550231934, |
| "learning_rate": 0.0001978197997775306, |
| "loss": 4.7769, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.7491638795986622, |
| "grad_norm": 0.4822310507297516, |
| "learning_rate": 0.00019777530589543937, |
| "loss": 4.9282, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.7625418060200669, |
| "grad_norm": 0.500045895576477, |
| "learning_rate": 0.00019773081201334818, |
| "loss": 5.0399, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.7759197324414716, |
| "grad_norm": 0.4740642309188843, |
| "learning_rate": 0.00019768631813125696, |
| "loss": 4.8041, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.7892976588628763, |
| "grad_norm": 0.45918184518814087, |
| "learning_rate": 0.00019764182424916575, |
| "loss": 4.6304, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.802675585284281, |
| "grad_norm": 0.53122878074646, |
| "learning_rate": 0.00019759733036707453, |
| "loss": 4.8377, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8160535117056856, |
| "grad_norm": 0.4925791919231415, |
| "learning_rate": 0.00019755283648498334, |
| "loss": 5.0919, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.8294314381270903, |
| "grad_norm": 0.4777262806892395, |
| "learning_rate": 0.0001975083426028921, |
| "loss": 4.8379, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.842809364548495, |
| "grad_norm": 0.49119675159454346, |
| "learning_rate": 0.0001974638487208009, |
| "loss": 5.0819, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.8561872909698997, |
| "grad_norm": 0.4732685089111328, |
| "learning_rate": 0.00019741935483870969, |
| "loss": 4.8948, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.46269145607948303, |
| "learning_rate": 0.00019737486095661847, |
| "loss": 4.824, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.882943143812709, |
| "grad_norm": 0.49532708525657654, |
| "learning_rate": 0.00019733036707452725, |
| "loss": 4.8986, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.8963210702341137, |
| "grad_norm": 0.5253002643585205, |
| "learning_rate": 0.00019728587319243606, |
| "loss": 4.9073, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.9096989966555183, |
| "grad_norm": 0.5069419145584106, |
| "learning_rate": 0.00019724137931034484, |
| "loss": 4.8962, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.5038817524909973, |
| "learning_rate": 0.00019719688542825363, |
| "loss": 4.8711, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.9364548494983278, |
| "grad_norm": 0.4987100064754486, |
| "learning_rate": 0.0001971523915461624, |
| "loss": 4.8816, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9498327759197325, |
| "grad_norm": 0.47370976209640503, |
| "learning_rate": 0.00019710789766407122, |
| "loss": 4.9675, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.9632107023411371, |
| "grad_norm": 0.5081727504730225, |
| "learning_rate": 0.00019706340378197997, |
| "loss": 4.2768, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.9765886287625418, |
| "grad_norm": 0.45571258664131165, |
| "learning_rate": 0.00019701890989988878, |
| "loss": 4.6182, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.9899665551839465, |
| "grad_norm": 0.5216127634048462, |
| "learning_rate": 0.00019697441601779756, |
| "loss": 4.7126, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5393329858779907, |
| "learning_rate": 0.00019692992213570635, |
| "loss": 4.4919, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.0133779264214047, |
| "grad_norm": 0.4506986737251282, |
| "learning_rate": 0.00019688542825361513, |
| "loss": 4.5089, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.0267558528428093, |
| "grad_norm": 0.4328899085521698, |
| "learning_rate": 0.00019684093437152394, |
| "loss": 4.7518, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.040133779264214, |
| "grad_norm": 0.4397362470626831, |
| "learning_rate": 0.00019679644048943272, |
| "loss": 4.5069, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.0535117056856187, |
| "grad_norm": 0.4604664146900177, |
| "learning_rate": 0.0001967519466073415, |
| "loss": 4.7054, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.0668896321070234, |
| "grad_norm": 0.4398234784603119, |
| "learning_rate": 0.00019670745272525029, |
| "loss": 4.2743, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.080267558528428, |
| "grad_norm": 0.4570735692977905, |
| "learning_rate": 0.0001966629588431591, |
| "loss": 4.8012, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.0936454849498327, |
| "grad_norm": 0.4814144968986511, |
| "learning_rate": 0.00019661846496106785, |
| "loss": 4.6449, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.1070234113712374, |
| "grad_norm": 0.4526231288909912, |
| "learning_rate": 0.00019657397107897666, |
| "loss": 4.5546, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.120401337792642, |
| "grad_norm": 0.4847906827926636, |
| "learning_rate": 0.00019652947719688544, |
| "loss": 4.4421, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.1337792642140467, |
| "grad_norm": 0.5136271715164185, |
| "learning_rate": 0.00019648498331479422, |
| "loss": 4.7136, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.1471571906354514, |
| "grad_norm": 0.49209895730018616, |
| "learning_rate": 0.000196440489432703, |
| "loss": 4.3145, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.160535117056856, |
| "grad_norm": 0.4972032904624939, |
| "learning_rate": 0.00019639599555061182, |
| "loss": 4.0408, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.1739130434782608, |
| "grad_norm": 0.5077862739562988, |
| "learning_rate": 0.0001963515016685206, |
| "loss": 4.4074, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.1872909698996654, |
| "grad_norm": 0.5293861031532288, |
| "learning_rate": 0.00019630700778642935, |
| "loss": 4.5385, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.2006688963210703, |
| "grad_norm": 0.5062645673751831, |
| "learning_rate": 0.00019626251390433816, |
| "loss": 4.5141, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.214046822742475, |
| "grad_norm": 0.49655866622924805, |
| "learning_rate": 0.00019621802002224695, |
| "loss": 4.4765, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.2274247491638797, |
| "grad_norm": 0.6059755086898804, |
| "learning_rate": 0.00019617352614015573, |
| "loss": 4.568, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.2408026755852843, |
| "grad_norm": 0.5442761778831482, |
| "learning_rate": 0.0001961290322580645, |
| "loss": 4.7724, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.254180602006689, |
| "grad_norm": 0.5426056385040283, |
| "learning_rate": 0.00019608453837597332, |
| "loss": 4.5308, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.2675585284280937, |
| "grad_norm": 0.525372326374054, |
| "learning_rate": 0.0001960400444938821, |
| "loss": 4.394, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.2809364548494984, |
| "grad_norm": 0.5407588481903076, |
| "learning_rate": 0.00019599555061179089, |
| "loss": 4.7347, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.294314381270903, |
| "grad_norm": 0.5726659893989563, |
| "learning_rate": 0.00019595105672969967, |
| "loss": 4.9446, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.3076923076923077, |
| "grad_norm": 0.6211283206939697, |
| "learning_rate": 0.00019590656284760848, |
| "loss": 4.697, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.3210702341137124, |
| "grad_norm": 0.5627567172050476, |
| "learning_rate": 0.00019586206896551723, |
| "loss": 4.4892, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.334448160535117, |
| "grad_norm": 0.6174790859222412, |
| "learning_rate": 0.00019581757508342604, |
| "loss": 4.5686, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.3478260869565217, |
| "grad_norm": 0.5586990118026733, |
| "learning_rate": 0.00019577308120133482, |
| "loss": 4.3916, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.3612040133779264, |
| "grad_norm": 0.5655365586280823, |
| "learning_rate": 0.0001957285873192436, |
| "loss": 4.1789, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.374581939799331, |
| "grad_norm": 0.5834594964981079, |
| "learning_rate": 0.0001956840934371524, |
| "loss": 4.3316, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.3879598662207357, |
| "grad_norm": 0.6065447926521301, |
| "learning_rate": 0.0001956395995550612, |
| "loss": 4.5167, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.4013377926421404, |
| "grad_norm": 0.5250216722488403, |
| "learning_rate": 0.00019559510567296998, |
| "loss": 4.1718, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.414715719063545, |
| "grad_norm": 0.5861116051673889, |
| "learning_rate": 0.00019555061179087876, |
| "loss": 4.3077, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.4280936454849498, |
| "grad_norm": 0.6138104796409607, |
| "learning_rate": 0.00019550611790878755, |
| "loss": 4.4748, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.4414715719063544, |
| "grad_norm": 0.6742071509361267, |
| "learning_rate": 0.00019546162402669636, |
| "loss": 4.8769, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.4548494983277591, |
| "grad_norm": 0.6634951233863831, |
| "learning_rate": 0.0001954171301446051, |
| "loss": 4.6423, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.468227424749164, |
| "grad_norm": 0.626646876335144, |
| "learning_rate": 0.00019537263626251392, |
| "loss": 4.4654, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.4816053511705687, |
| "grad_norm": 0.6306963562965393, |
| "learning_rate": 0.0001953281423804227, |
| "loss": 4.7021, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.4949832775919734, |
| "grad_norm": 0.620370626449585, |
| "learning_rate": 0.00019528364849833149, |
| "loss": 4.587, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.508361204013378, |
| "grad_norm": 0.6410287618637085, |
| "learning_rate": 0.00019523915461624027, |
| "loss": 4.8089, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.5217391304347827, |
| "grad_norm": 0.676434338092804, |
| "learning_rate": 0.00019519466073414908, |
| "loss": 4.668, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.5351170568561874, |
| "grad_norm": 0.5756319761276245, |
| "learning_rate": 0.00019515016685205786, |
| "loss": 4.3223, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.548494983277592, |
| "grad_norm": 0.5850693583488464, |
| "learning_rate": 0.00019510567296996664, |
| "loss": 4.2343, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.5618729096989967, |
| "grad_norm": 0.6172360777854919, |
| "learning_rate": 0.00019506117908787542, |
| "loss": 4.6102, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.5752508361204014, |
| "grad_norm": 0.5887568593025208, |
| "learning_rate": 0.00019501668520578423, |
| "loss": 4.8097, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.588628762541806, |
| "grad_norm": 0.5763369798660278, |
| "learning_rate": 0.000194972191323693, |
| "loss": 4.2001, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.6020066889632107, |
| "grad_norm": 0.6158986687660217, |
| "learning_rate": 0.0001949276974416018, |
| "loss": 4.7075, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 0.5540957450866699, |
| "learning_rate": 0.00019488320355951058, |
| "loss": 4.452, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.62876254180602, |
| "grad_norm": 0.6193795204162598, |
| "learning_rate": 0.00019483870967741936, |
| "loss": 4.4583, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.6421404682274248, |
| "grad_norm": 0.6699966788291931, |
| "learning_rate": 0.00019479421579532815, |
| "loss": 4.3728, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.6555183946488294, |
| "grad_norm": 0.5904677510261536, |
| "learning_rate": 0.00019474972191323696, |
| "loss": 4.5452, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.6688963210702341, |
| "grad_norm": 0.6137760281562805, |
| "learning_rate": 0.00019470522803114574, |
| "loss": 4.2853, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.6822742474916388, |
| "grad_norm": 0.6396192908287048, |
| "learning_rate": 0.00019466073414905452, |
| "loss": 4.4258, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.6956521739130435, |
| "grad_norm": 0.6190487742424011, |
| "learning_rate": 0.0001946162402669633, |
| "loss": 4.9866, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.7090301003344481, |
| "grad_norm": 0.6971675157546997, |
| "learning_rate": 0.0001945717463848721, |
| "loss": 4.2126, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.7224080267558528, |
| "grad_norm": 0.6245931386947632, |
| "learning_rate": 0.00019452725250278087, |
| "loss": 4.8477, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.7357859531772575, |
| "grad_norm": 0.5675052404403687, |
| "learning_rate": 0.00019448275862068965, |
| "loss": 4.4097, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.7491638795986622, |
| "grad_norm": 0.6594040393829346, |
| "learning_rate": 0.00019443826473859846, |
| "loss": 4.3747, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.7625418060200668, |
| "grad_norm": 0.6377655267715454, |
| "learning_rate": 0.00019439377085650724, |
| "loss": 4.2733, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.7759197324414715, |
| "grad_norm": 0.6167862415313721, |
| "learning_rate": 0.00019434927697441602, |
| "loss": 4.5694, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.7892976588628762, |
| "grad_norm": 0.577671468257904, |
| "learning_rate": 0.0001943047830923248, |
| "loss": 4.5006, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.8026755852842808, |
| "grad_norm": 0.6361016035079956, |
| "learning_rate": 0.00019426028921023362, |
| "loss": 4.9907, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.8160535117056855, |
| "grad_norm": 0.6445321440696716, |
| "learning_rate": 0.00019421579532814237, |
| "loss": 4.779, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.8294314381270902, |
| "grad_norm": 0.5955402851104736, |
| "learning_rate": 0.00019417130144605118, |
| "loss": 4.6026, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.8428093645484949, |
| "grad_norm": 0.6807080507278442, |
| "learning_rate": 0.00019412680756395996, |
| "loss": 4.7124, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.8561872909698995, |
| "grad_norm": 0.5799288153648376, |
| "learning_rate": 0.00019408231368186875, |
| "loss": 4.0701, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.8695652173913042, |
| "grad_norm": 0.6187757253646851, |
| "learning_rate": 0.00019403781979977753, |
| "loss": 4.705, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.8829431438127089, |
| "grad_norm": 0.6614826917648315, |
| "learning_rate": 0.00019399332591768634, |
| "loss": 4.8146, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.8963210702341136, |
| "grad_norm": 0.6204859614372253, |
| "learning_rate": 0.00019394883203559512, |
| "loss": 4.3041, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.9096989966555182, |
| "grad_norm": 0.6527450680732727, |
| "learning_rate": 0.0001939043381535039, |
| "loss": 4.4493, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 0.6470615267753601, |
| "learning_rate": 0.00019385984427141268, |
| "loss": 4.7771, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.9364548494983278, |
| "grad_norm": 0.5642555952072144, |
| "learning_rate": 0.0001938153503893215, |
| "loss": 4.3344, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.9498327759197325, |
| "grad_norm": 0.6206467151641846, |
| "learning_rate": 0.00019377085650723025, |
| "loss": 4.2191, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.9632107023411371, |
| "grad_norm": 0.6079016923904419, |
| "learning_rate": 0.00019372636262513906, |
| "loss": 4.7397, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.9765886287625418, |
| "grad_norm": 0.6197662353515625, |
| "learning_rate": 0.00019368186874304784, |
| "loss": 4.5342, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.9899665551839465, |
| "grad_norm": 0.6556297540664673, |
| "learning_rate": 0.00019363737486095662, |
| "loss": 4.6709, |
| "step": 149 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.7837930917739868, |
| "learning_rate": 0.0001935928809788654, |
| "loss": 4.6215, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.0133779264214047, |
| "grad_norm": 0.5267267227172852, |
| "learning_rate": 0.00019354838709677422, |
| "loss": 4.2695, |
| "step": 151 |
| }, |
| { |
| "epoch": 2.0267558528428093, |
| "grad_norm": 0.5862157344818115, |
| "learning_rate": 0.000193503893214683, |
| "loss": 4.3702, |
| "step": 152 |
| }, |
| { |
| "epoch": 2.040133779264214, |
| "grad_norm": 0.538254976272583, |
| "learning_rate": 0.00019345939933259178, |
| "loss": 4.3953, |
| "step": 153 |
| }, |
| { |
| "epoch": 2.0535117056856187, |
| "grad_norm": 0.5977053642272949, |
| "learning_rate": 0.00019341490545050056, |
| "loss": 4.2156, |
| "step": 154 |
| }, |
| { |
| "epoch": 2.0668896321070234, |
| "grad_norm": 0.606006383895874, |
| "learning_rate": 0.00019337041156840937, |
| "loss": 4.2802, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.080267558528428, |
| "grad_norm": 0.6071277856826782, |
| "learning_rate": 0.00019332591768631813, |
| "loss": 4.5545, |
| "step": 156 |
| }, |
| { |
| "epoch": 2.0936454849498327, |
| "grad_norm": 0.6281546354293823, |
| "learning_rate": 0.00019328142380422694, |
| "loss": 4.6105, |
| "step": 157 |
| }, |
| { |
| "epoch": 2.1070234113712374, |
| "grad_norm": 0.5703116655349731, |
| "learning_rate": 0.00019323692992213572, |
| "loss": 4.2751, |
| "step": 158 |
| }, |
| { |
| "epoch": 2.120401337792642, |
| "grad_norm": 0.6587452292442322, |
| "learning_rate": 0.0001931924360400445, |
| "loss": 4.6342, |
| "step": 159 |
| }, |
| { |
| "epoch": 2.1337792642140467, |
| "grad_norm": 0.6141905784606934, |
| "learning_rate": 0.00019314794215795328, |
| "loss": 4.4345, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.1471571906354514, |
| "grad_norm": 0.6741939187049866, |
| "learning_rate": 0.0001931034482758621, |
| "loss": 4.0257, |
| "step": 161 |
| }, |
| { |
| "epoch": 2.160535117056856, |
| "grad_norm": 0.6468759179115295, |
| "learning_rate": 0.00019305895439377088, |
| "loss": 4.2313, |
| "step": 162 |
| }, |
| { |
| "epoch": 2.1739130434782608, |
| "grad_norm": 0.6703383326530457, |
| "learning_rate": 0.00019301446051167966, |
| "loss": 4.2164, |
| "step": 163 |
| }, |
| { |
| "epoch": 2.1872909698996654, |
| "grad_norm": 0.710967481136322, |
| "learning_rate": 0.00019296996662958844, |
| "loss": 4.3398, |
| "step": 164 |
| }, |
| { |
| "epoch": 2.20066889632107, |
| "grad_norm": 0.6862124800682068, |
| "learning_rate": 0.00019292547274749725, |
| "loss": 4.3379, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.2140468227424748, |
| "grad_norm": 0.6288430690765381, |
| "learning_rate": 0.000192880978865406, |
| "loss": 4.3487, |
| "step": 166 |
| }, |
| { |
| "epoch": 2.2274247491638794, |
| "grad_norm": 0.6358796954154968, |
| "learning_rate": 0.00019283648498331481, |
| "loss": 4.1656, |
| "step": 167 |
| }, |
| { |
| "epoch": 2.240802675585284, |
| "grad_norm": 0.6818917393684387, |
| "learning_rate": 0.0001927919911012236, |
| "loss": 4.5363, |
| "step": 168 |
| }, |
| { |
| "epoch": 2.254180602006689, |
| "grad_norm": 0.6996105313301086, |
| "learning_rate": 0.00019274749721913238, |
| "loss": 4.3208, |
| "step": 169 |
| }, |
| { |
| "epoch": 2.2675585284280935, |
| "grad_norm": 0.6730326414108276, |
| "learning_rate": 0.00019270300333704116, |
| "loss": 4.1401, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.280936454849498, |
| "grad_norm": 0.7022603750228882, |
| "learning_rate": 0.00019265850945494994, |
| "loss": 4.5761, |
| "step": 171 |
| }, |
| { |
| "epoch": 2.294314381270903, |
| "grad_norm": 0.6525995135307312, |
| "learning_rate": 0.00019261401557285875, |
| "loss": 4.4017, |
| "step": 172 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 0.7066033482551575, |
| "learning_rate": 0.0001925695216907675, |
| "loss": 4.0037, |
| "step": 173 |
| }, |
| { |
| "epoch": 2.321070234113712, |
| "grad_norm": 0.6708059310913086, |
| "learning_rate": 0.00019252502780867632, |
| "loss": 4.1947, |
| "step": 174 |
| }, |
| { |
| "epoch": 2.334448160535117, |
| "grad_norm": 0.8711172342300415, |
| "learning_rate": 0.0001924805339265851, |
| "loss": 3.9958, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.3478260869565215, |
| "grad_norm": 0.7258634567260742, |
| "learning_rate": 0.00019243604004449388, |
| "loss": 4.4682, |
| "step": 176 |
| }, |
| { |
| "epoch": 2.361204013377926, |
| "grad_norm": 0.7693021893501282, |
| "learning_rate": 0.00019239154616240267, |
| "loss": 4.54, |
| "step": 177 |
| }, |
| { |
| "epoch": 2.374581939799331, |
| "grad_norm": 0.7271276116371155, |
| "learning_rate": 0.00019234705228031148, |
| "loss": 4.2942, |
| "step": 178 |
| }, |
| { |
| "epoch": 2.387959866220736, |
| "grad_norm": 0.6836609244346619, |
| "learning_rate": 0.00019230255839822026, |
| "loss": 4.3099, |
| "step": 179 |
| }, |
| { |
| "epoch": 2.4013377926421406, |
| "grad_norm": 0.731164813041687, |
| "learning_rate": 0.00019225806451612904, |
| "loss": 4.4077, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.4147157190635453, |
| "grad_norm": 0.7575274109840393, |
| "learning_rate": 0.00019221357063403782, |
| "loss": 4.6572, |
| "step": 181 |
| }, |
| { |
| "epoch": 2.42809364548495, |
| "grad_norm": 0.8461325168609619, |
| "learning_rate": 0.00019216907675194663, |
| "loss": 4.4922, |
| "step": 182 |
| }, |
| { |
| "epoch": 2.4414715719063547, |
| "grad_norm": 0.7225251197814941, |
| "learning_rate": 0.0001921245828698554, |
| "loss": 4.0372, |
| "step": 183 |
| }, |
| { |
| "epoch": 2.4548494983277593, |
| "grad_norm": 3.563720703125, |
| "learning_rate": 0.0001920800889877642, |
| "loss": 4.5412, |
| "step": 184 |
| }, |
| { |
| "epoch": 2.468227424749164, |
| "grad_norm": 0.8452121019363403, |
| "learning_rate": 0.00019203559510567298, |
| "loss": 4.4961, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.4816053511705687, |
| "grad_norm": 0.8734024167060852, |
| "learning_rate": 0.00019199110122358176, |
| "loss": 4.0884, |
| "step": 186 |
| }, |
| { |
| "epoch": 2.4949832775919734, |
| "grad_norm": 1.1765823364257812, |
| "learning_rate": 0.00019194660734149054, |
| "loss": 4.2228, |
| "step": 187 |
| }, |
| { |
| "epoch": 2.508361204013378, |
| "grad_norm": 0.750206708908081, |
| "learning_rate": 0.00019190211345939935, |
| "loss": 4.4305, |
| "step": 188 |
| }, |
| { |
| "epoch": 2.5217391304347827, |
| "grad_norm": 0.7574430704116821, |
| "learning_rate": 0.00019185761957730814, |
| "loss": 4.1511, |
| "step": 189 |
| }, |
| { |
| "epoch": 2.5351170568561874, |
| "grad_norm": 0.7105517387390137, |
| "learning_rate": 0.00019181312569521692, |
| "loss": 4.4793, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.548494983277592, |
| "grad_norm": 0.7495557069778442, |
| "learning_rate": 0.0001917686318131257, |
| "loss": 4.1335, |
| "step": 191 |
| }, |
| { |
| "epoch": 2.5618729096989967, |
| "grad_norm": 0.8001168966293335, |
| "learning_rate": 0.0001917241379310345, |
| "loss": 4.7898, |
| "step": 192 |
| }, |
| { |
| "epoch": 2.5752508361204014, |
| "grad_norm": 0.7402104735374451, |
| "learning_rate": 0.00019167964404894327, |
| "loss": 4.4482, |
| "step": 193 |
| }, |
| { |
| "epoch": 2.588628762541806, |
| "grad_norm": 0.748267650604248, |
| "learning_rate": 0.00019163515016685207, |
| "loss": 4.3167, |
| "step": 194 |
| }, |
| { |
| "epoch": 2.6020066889632107, |
| "grad_norm": 0.8291250467300415, |
| "learning_rate": 0.00019159065628476086, |
| "loss": 4.058, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.6153846153846154, |
| "grad_norm": 0.6945542693138123, |
| "learning_rate": 0.00019154616240266964, |
| "loss": 3.9751, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.62876254180602, |
| "grad_norm": 0.7307319045066833, |
| "learning_rate": 0.00019150166852057842, |
| "loss": 4.2736, |
| "step": 197 |
| }, |
| { |
| "epoch": 2.6421404682274248, |
| "grad_norm": 0.7489168047904968, |
| "learning_rate": 0.00019145717463848723, |
| "loss": 4.3075, |
| "step": 198 |
| }, |
| { |
| "epoch": 2.6555183946488294, |
| "grad_norm": 0.9727582931518555, |
| "learning_rate": 0.00019141268075639601, |
| "loss": 4.6474, |
| "step": 199 |
| }, |
| { |
| "epoch": 2.668896321070234, |
| "grad_norm": 0.6776256561279297, |
| "learning_rate": 0.0001913681868743048, |
| "loss": 4.4217, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.682274247491639, |
| "grad_norm": 0.7305111885070801, |
| "learning_rate": 0.00019132369299221358, |
| "loss": 4.2804, |
| "step": 201 |
| }, |
| { |
| "epoch": 2.6956521739130435, |
| "grad_norm": 0.7196978330612183, |
| "learning_rate": 0.0001912791991101224, |
| "loss": 4.3941, |
| "step": 202 |
| }, |
| { |
| "epoch": 2.709030100334448, |
| "grad_norm": 0.7988458871841431, |
| "learning_rate": 0.00019123470522803114, |
| "loss": 4.437, |
| "step": 203 |
| }, |
| { |
| "epoch": 2.722408026755853, |
| "grad_norm": 0.7004797458648682, |
| "learning_rate": 0.00019119021134593995, |
| "loss": 4.4986, |
| "step": 204 |
| }, |
| { |
| "epoch": 2.7357859531772575, |
| "grad_norm": 0.677796483039856, |
| "learning_rate": 0.00019114571746384874, |
| "loss": 4.0851, |
| "step": 205 |
| }, |
| { |
| "epoch": 2.749163879598662, |
| "grad_norm": 0.7527475357055664, |
| "learning_rate": 0.00019110122358175752, |
| "loss": 4.4469, |
| "step": 206 |
| }, |
| { |
| "epoch": 2.762541806020067, |
| "grad_norm": 1.1659115552902222, |
| "learning_rate": 0.0001910567296996663, |
| "loss": 4.3284, |
| "step": 207 |
| }, |
| { |
| "epoch": 2.7759197324414715, |
| "grad_norm": 0.7238364815711975, |
| "learning_rate": 0.0001910122358175751, |
| "loss": 4.2605, |
| "step": 208 |
| }, |
| { |
| "epoch": 2.789297658862876, |
| "grad_norm": 0.7537760734558105, |
| "learning_rate": 0.0001909677419354839, |
| "loss": 4.3775, |
| "step": 209 |
| }, |
| { |
| "epoch": 2.802675585284281, |
| "grad_norm": 0.6874127388000488, |
| "learning_rate": 0.00019092324805339267, |
| "loss": 4.3404, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.8160535117056855, |
| "grad_norm": 0.7045959830284119, |
| "learning_rate": 0.00019087875417130146, |
| "loss": 4.1568, |
| "step": 211 |
| }, |
| { |
| "epoch": 2.82943143812709, |
| "grad_norm": 0.7249194383621216, |
| "learning_rate": 0.00019083426028921027, |
| "loss": 4.1969, |
| "step": 212 |
| }, |
| { |
| "epoch": 2.842809364548495, |
| "grad_norm": 0.8331268429756165, |
| "learning_rate": 0.00019078976640711902, |
| "loss": 4.3169, |
| "step": 213 |
| }, |
| { |
| "epoch": 2.8561872909698995, |
| "grad_norm": 0.7171936631202698, |
| "learning_rate": 0.0001907452725250278, |
| "loss": 4.5123, |
| "step": 214 |
| }, |
| { |
| "epoch": 2.869565217391304, |
| "grad_norm": 0.759919285774231, |
| "learning_rate": 0.0001907007786429366, |
| "loss": 4.5412, |
| "step": 215 |
| }, |
| { |
| "epoch": 2.882943143812709, |
| "grad_norm": 0.7451274991035461, |
| "learning_rate": 0.0001906562847608454, |
| "loss": 4.5253, |
| "step": 216 |
| }, |
| { |
| "epoch": 2.8963210702341136, |
| "grad_norm": 0.6564481258392334, |
| "learning_rate": 0.00019061179087875418, |
| "loss": 4.1092, |
| "step": 217 |
| }, |
| { |
| "epoch": 2.9096989966555182, |
| "grad_norm": 0.7339865565299988, |
| "learning_rate": 0.00019056729699666296, |
| "loss": 4.5092, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.9230769230769234, |
| "grad_norm": 0.7113937735557556, |
| "learning_rate": 0.00019052280311457177, |
| "loss": 4.3355, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.936454849498328, |
| "grad_norm": 0.7306456565856934, |
| "learning_rate": 0.00019047830923248053, |
| "loss": 4.5745, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.9498327759197327, |
| "grad_norm": 0.7971818447113037, |
| "learning_rate": 0.00019043381535038933, |
| "loss": 4.2903, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.9632107023411374, |
| "grad_norm": 0.7757331728935242, |
| "learning_rate": 0.00019038932146829812, |
| "loss": 4.2832, |
| "step": 222 |
| }, |
| { |
| "epoch": 2.976588628762542, |
| "grad_norm": 0.7326288223266602, |
| "learning_rate": 0.0001903448275862069, |
| "loss": 4.2444, |
| "step": 223 |
| }, |
| { |
| "epoch": 2.9899665551839467, |
| "grad_norm": 0.7363834381103516, |
| "learning_rate": 0.00019030033370411568, |
| "loss": 4.6744, |
| "step": 224 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.8835271596908569, |
| "learning_rate": 0.0001902558398220245, |
| "loss": 4.432, |
| "step": 225 |
| }, |
| { |
| "epoch": 3.0133779264214047, |
| "grad_norm": 0.6591921448707581, |
| "learning_rate": 0.00019021134593993327, |
| "loss": 4.1353, |
| "step": 226 |
| }, |
| { |
| "epoch": 3.0267558528428093, |
| "grad_norm": 0.6895263195037842, |
| "learning_rate": 0.00019016685205784206, |
| "loss": 4.1253, |
| "step": 227 |
| }, |
| { |
| "epoch": 3.040133779264214, |
| "grad_norm": 0.6476898789405823, |
| "learning_rate": 0.00019012235817575084, |
| "loss": 4.0354, |
| "step": 228 |
| }, |
| { |
| "epoch": 3.0535117056856187, |
| "grad_norm": 0.6398957967758179, |
| "learning_rate": 0.00019007786429365965, |
| "loss": 4.024, |
| "step": 229 |
| }, |
| { |
| "epoch": 3.0668896321070234, |
| "grad_norm": 0.7483389973640442, |
| "learning_rate": 0.0001900333704115684, |
| "loss": 4.1405, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.080267558528428, |
| "grad_norm": 0.7003724575042725, |
| "learning_rate": 0.0001899888765294772, |
| "loss": 4.3593, |
| "step": 231 |
| }, |
| { |
| "epoch": 3.0936454849498327, |
| "grad_norm": 0.7426732182502747, |
| "learning_rate": 0.000189944382647386, |
| "loss": 4.485, |
| "step": 232 |
| }, |
| { |
| "epoch": 3.1070234113712374, |
| "grad_norm": 0.6957541108131409, |
| "learning_rate": 0.00018989988876529478, |
| "loss": 4.1017, |
| "step": 233 |
| }, |
| { |
| "epoch": 3.120401337792642, |
| "grad_norm": 0.8613067865371704, |
| "learning_rate": 0.00018985539488320356, |
| "loss": 4.3038, |
| "step": 234 |
| }, |
| { |
| "epoch": 3.1337792642140467, |
| "grad_norm": 0.8375754952430725, |
| "learning_rate": 0.00018981090100111237, |
| "loss": 4.4356, |
| "step": 235 |
| }, |
| { |
| "epoch": 3.1471571906354514, |
| "grad_norm": 0.7878522872924805, |
| "learning_rate": 0.00018976640711902115, |
| "loss": 3.9916, |
| "step": 236 |
| }, |
| { |
| "epoch": 3.160535117056856, |
| "grad_norm": 0.7463901042938232, |
| "learning_rate": 0.00018972191323692993, |
| "loss": 3.6761, |
| "step": 237 |
| }, |
| { |
| "epoch": 3.1739130434782608, |
| "grad_norm": 0.7360939979553223, |
| "learning_rate": 0.00018967741935483872, |
| "loss": 3.9573, |
| "step": 238 |
| }, |
| { |
| "epoch": 3.1872909698996654, |
| "grad_norm": 0.891861081123352, |
| "learning_rate": 0.00018963292547274753, |
| "loss": 4.1853, |
| "step": 239 |
| }, |
| { |
| "epoch": 3.20066889632107, |
| "grad_norm": 0.8589549660682678, |
| "learning_rate": 0.00018958843159065628, |
| "loss": 4.0679, |
| "step": 240 |
| }, |
| { |
| "epoch": 3.2140468227424748, |
| "grad_norm": 0.9534163475036621, |
| "learning_rate": 0.0001895439377085651, |
| "loss": 3.732, |
| "step": 241 |
| }, |
| { |
| "epoch": 3.2274247491638794, |
| "grad_norm": 0.8968185186386108, |
| "learning_rate": 0.00018949944382647387, |
| "loss": 4.2217, |
| "step": 242 |
| }, |
| { |
| "epoch": 3.240802675585284, |
| "grad_norm": 0.81589275598526, |
| "learning_rate": 0.00018945494994438266, |
| "loss": 4.428, |
| "step": 243 |
| }, |
| { |
| "epoch": 3.254180602006689, |
| "grad_norm": 0.929050862789154, |
| "learning_rate": 0.00018941045606229144, |
| "loss": 4.3468, |
| "step": 244 |
| }, |
| { |
| "epoch": 3.2675585284280935, |
| "grad_norm": 0.8535035252571106, |
| "learning_rate": 0.00018936596218020025, |
| "loss": 3.8489, |
| "step": 245 |
| }, |
| { |
| "epoch": 3.280936454849498, |
| "grad_norm": 0.9484681487083435, |
| "learning_rate": 0.00018932146829810903, |
| "loss": 4.0132, |
| "step": 246 |
| }, |
| { |
| "epoch": 3.294314381270903, |
| "grad_norm": 0.8190047144889832, |
| "learning_rate": 0.0001892769744160178, |
| "loss": 4.3574, |
| "step": 247 |
| }, |
| { |
| "epoch": 3.3076923076923075, |
| "grad_norm": 0.8764749765396118, |
| "learning_rate": 0.0001892324805339266, |
| "loss": 4.3103, |
| "step": 248 |
| }, |
| { |
| "epoch": 3.321070234113712, |
| "grad_norm": 0.8929185271263123, |
| "learning_rate": 0.0001891879866518354, |
| "loss": 4.3606, |
| "step": 249 |
| }, |
| { |
| "epoch": 3.334448160535117, |
| "grad_norm": 0.9096692204475403, |
| "learning_rate": 0.00018914349276974416, |
| "loss": 4.0047, |
| "step": 250 |
| }, |
| { |
| "epoch": 3.3478260869565215, |
| "grad_norm": 0.885143518447876, |
| "learning_rate": 0.00018909899888765297, |
| "loss": 4.182, |
| "step": 251 |
| }, |
| { |
| "epoch": 3.361204013377926, |
| "grad_norm": 0.7724215984344482, |
| "learning_rate": 0.00018905450500556175, |
| "loss": 3.9529, |
| "step": 252 |
| }, |
| { |
| "epoch": 3.374581939799331, |
| "grad_norm": 0.8351865410804749, |
| "learning_rate": 0.00018901001112347053, |
| "loss": 3.9533, |
| "step": 253 |
| }, |
| { |
| "epoch": 3.387959866220736, |
| "grad_norm": 0.8684999942779541, |
| "learning_rate": 0.00018896551724137932, |
| "loss": 3.8594, |
| "step": 254 |
| }, |
| { |
| "epoch": 3.4013377926421406, |
| "grad_norm": 0.8903334736824036, |
| "learning_rate": 0.0001889210233592881, |
| "loss": 3.9248, |
| "step": 255 |
| }, |
| { |
| "epoch": 3.4147157190635453, |
| "grad_norm": 0.826690137386322, |
| "learning_rate": 0.0001888765294771969, |
| "loss": 4.0389, |
| "step": 256 |
| }, |
| { |
| "epoch": 3.42809364548495, |
| "grad_norm": 0.8306142687797546, |
| "learning_rate": 0.00018883203559510566, |
| "loss": 3.8168, |
| "step": 257 |
| }, |
| { |
| "epoch": 3.4414715719063547, |
| "grad_norm": 0.9032199382781982, |
| "learning_rate": 0.00018878754171301447, |
| "loss": 4.178, |
| "step": 258 |
| }, |
| { |
| "epoch": 3.4548494983277593, |
| "grad_norm": 0.9081966280937195, |
| "learning_rate": 0.00018874304783092326, |
| "loss": 4.2583, |
| "step": 259 |
| }, |
| { |
| "epoch": 3.468227424749164, |
| "grad_norm": 0.8424077033996582, |
| "learning_rate": 0.00018869855394883204, |
| "loss": 4.3285, |
| "step": 260 |
| }, |
| { |
| "epoch": 3.4816053511705687, |
| "grad_norm": 0.8302170038223267, |
| "learning_rate": 0.00018865406006674082, |
| "loss": 4.1346, |
| "step": 261 |
| }, |
| { |
| "epoch": 3.4949832775919734, |
| "grad_norm": 0.8747193217277527, |
| "learning_rate": 0.00018860956618464963, |
| "loss": 4.0747, |
| "step": 262 |
| }, |
| { |
| "epoch": 3.508361204013378, |
| "grad_norm": 0.8613927364349365, |
| "learning_rate": 0.0001885650723025584, |
| "loss": 4.2346, |
| "step": 263 |
| }, |
| { |
| "epoch": 3.5217391304347827, |
| "grad_norm": 0.8321558833122253, |
| "learning_rate": 0.0001885205784204672, |
| "loss": 3.9781, |
| "step": 264 |
| }, |
| { |
| "epoch": 3.5351170568561874, |
| "grad_norm": 0.8961741328239441, |
| "learning_rate": 0.00018847608453837598, |
| "loss": 4.311, |
| "step": 265 |
| }, |
| { |
| "epoch": 3.548494983277592, |
| "grad_norm": 0.7703898549079895, |
| "learning_rate": 0.00018843159065628479, |
| "loss": 4.1163, |
| "step": 266 |
| }, |
| { |
| "epoch": 3.5618729096989967, |
| "grad_norm": 0.880051851272583, |
| "learning_rate": 0.00018838709677419354, |
| "loss": 3.8032, |
| "step": 267 |
| }, |
| { |
| "epoch": 3.5752508361204014, |
| "grad_norm": 0.8287038207054138, |
| "learning_rate": 0.00018834260289210235, |
| "loss": 4.1627, |
| "step": 268 |
| }, |
| { |
| "epoch": 3.588628762541806, |
| "grad_norm": 0.9726569652557373, |
| "learning_rate": 0.00018829810901001113, |
| "loss": 4.4055, |
| "step": 269 |
| }, |
| { |
| "epoch": 3.6020066889632107, |
| "grad_norm": 0.8071132898330688, |
| "learning_rate": 0.00018825361512791992, |
| "loss": 4.1709, |
| "step": 270 |
| }, |
| { |
| "epoch": 3.6153846153846154, |
| "grad_norm": 0.8310988545417786, |
| "learning_rate": 0.0001882091212458287, |
| "loss": 4.2359, |
| "step": 271 |
| }, |
| { |
| "epoch": 3.62876254180602, |
| "grad_norm": 0.8713561296463013, |
| "learning_rate": 0.0001881646273637375, |
| "loss": 4.1247, |
| "step": 272 |
| }, |
| { |
| "epoch": 3.6421404682274248, |
| "grad_norm": 0.8964342474937439, |
| "learning_rate": 0.0001881201334816463, |
| "loss": 4.0794, |
| "step": 273 |
| }, |
| { |
| "epoch": 3.6555183946488294, |
| "grad_norm": 0.9901681542396545, |
| "learning_rate": 0.00018807563959955507, |
| "loss": 4.0217, |
| "step": 274 |
| }, |
| { |
| "epoch": 3.668896321070234, |
| "grad_norm": 0.9279042482376099, |
| "learning_rate": 0.00018803114571746385, |
| "loss": 4.3244, |
| "step": 275 |
| }, |
| { |
| "epoch": 3.682274247491639, |
| "grad_norm": 0.8105964660644531, |
| "learning_rate": 0.00018798665183537266, |
| "loss": 3.9041, |
| "step": 276 |
| }, |
| { |
| "epoch": 3.6956521739130435, |
| "grad_norm": 0.8511622548103333, |
| "learning_rate": 0.00018794215795328142, |
| "loss": 3.8969, |
| "step": 277 |
| }, |
| { |
| "epoch": 3.709030100334448, |
| "grad_norm": 0.9072037935256958, |
| "learning_rate": 0.00018789766407119023, |
| "loss": 4.2185, |
| "step": 278 |
| }, |
| { |
| "epoch": 3.722408026755853, |
| "grad_norm": 0.9792962670326233, |
| "learning_rate": 0.000187853170189099, |
| "loss": 4.1915, |
| "step": 279 |
| }, |
| { |
| "epoch": 3.7357859531772575, |
| "grad_norm": 0.8579828143119812, |
| "learning_rate": 0.0001878086763070078, |
| "loss": 3.8903, |
| "step": 280 |
| }, |
| { |
| "epoch": 3.749163879598662, |
| "grad_norm": 0.9866719841957092, |
| "learning_rate": 0.00018776418242491658, |
| "loss": 4.2022, |
| "step": 281 |
| }, |
| { |
| "epoch": 3.762541806020067, |
| "grad_norm": 0.9251964688301086, |
| "learning_rate": 0.00018771968854282539, |
| "loss": 3.9536, |
| "step": 282 |
| }, |
| { |
| "epoch": 3.7759197324414715, |
| "grad_norm": 1.0300836563110352, |
| "learning_rate": 0.00018767519466073417, |
| "loss": 4.2908, |
| "step": 283 |
| }, |
| { |
| "epoch": 3.789297658862876, |
| "grad_norm": 1.0194575786590576, |
| "learning_rate": 0.00018763070077864295, |
| "loss": 4.1851, |
| "step": 284 |
| }, |
| { |
| "epoch": 3.802675585284281, |
| "grad_norm": 0.8165330290794373, |
| "learning_rate": 0.00018758620689655173, |
| "loss": 4.138, |
| "step": 285 |
| }, |
| { |
| "epoch": 3.8160535117056855, |
| "grad_norm": 1.0104280710220337, |
| "learning_rate": 0.00018754171301446054, |
| "loss": 3.9481, |
| "step": 286 |
| }, |
| { |
| "epoch": 3.82943143812709, |
| "grad_norm": 0.9972538352012634, |
| "learning_rate": 0.0001874972191323693, |
| "loss": 4.3932, |
| "step": 287 |
| }, |
| { |
| "epoch": 3.842809364548495, |
| "grad_norm": 0.96323162317276, |
| "learning_rate": 0.0001874527252502781, |
| "loss": 4.1133, |
| "step": 288 |
| }, |
| { |
| "epoch": 3.8561872909698995, |
| "grad_norm": 0.8500615954399109, |
| "learning_rate": 0.0001874082313681869, |
| "loss": 4.2205, |
| "step": 289 |
| }, |
| { |
| "epoch": 3.869565217391304, |
| "grad_norm": 0.8451250195503235, |
| "learning_rate": 0.00018736373748609567, |
| "loss": 4.1371, |
| "step": 290 |
| }, |
| { |
| "epoch": 3.882943143812709, |
| "grad_norm": 0.9399815201759338, |
| "learning_rate": 0.00018731924360400445, |
| "loss": 4.5237, |
| "step": 291 |
| }, |
| { |
| "epoch": 3.8963210702341136, |
| "grad_norm": 0.8061622977256775, |
| "learning_rate": 0.00018727474972191326, |
| "loss": 4.1033, |
| "step": 292 |
| }, |
| { |
| "epoch": 3.9096989966555182, |
| "grad_norm": 0.7987121343612671, |
| "learning_rate": 0.00018723025583982205, |
| "loss": 3.9311, |
| "step": 293 |
| }, |
| { |
| "epoch": 3.9230769230769234, |
| "grad_norm": 0.9041138291358948, |
| "learning_rate": 0.00018718576195773083, |
| "loss": 4.0252, |
| "step": 294 |
| }, |
| { |
| "epoch": 3.936454849498328, |
| "grad_norm": 1.0002484321594238, |
| "learning_rate": 0.0001871412680756396, |
| "loss": 4.4605, |
| "step": 295 |
| }, |
| { |
| "epoch": 3.9498327759197327, |
| "grad_norm": 0.9991098046302795, |
| "learning_rate": 0.0001870967741935484, |
| "loss": 4.1528, |
| "step": 296 |
| }, |
| { |
| "epoch": 3.9632107023411374, |
| "grad_norm": 1.2179397344589233, |
| "learning_rate": 0.00018705228031145718, |
| "loss": 4.5224, |
| "step": 297 |
| }, |
| { |
| "epoch": 3.976588628762542, |
| "grad_norm": 0.8279774785041809, |
| "learning_rate": 0.00018700778642936596, |
| "loss": 3.9464, |
| "step": 298 |
| }, |
| { |
| "epoch": 3.9899665551839467, |
| "grad_norm": 0.8012803792953491, |
| "learning_rate": 0.00018696329254727477, |
| "loss": 4.0139, |
| "step": 299 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.9700272083282471, |
| "learning_rate": 0.00018691879866518355, |
| "loss": 3.8306, |
| "step": 300 |
| }, |
| { |
| "epoch": 4.013377926421405, |
| "grad_norm": 0.7136749625205994, |
| "learning_rate": 0.00018687430478309233, |
| "loss": 3.9253, |
| "step": 301 |
| }, |
| { |
| "epoch": 4.026755852842809, |
| "grad_norm": 0.7885096669197083, |
| "learning_rate": 0.00018682981090100111, |
| "loss": 3.927, |
| "step": 302 |
| }, |
| { |
| "epoch": 4.040133779264214, |
| "grad_norm": 0.7801666855812073, |
| "learning_rate": 0.00018678531701890992, |
| "loss": 3.6482, |
| "step": 303 |
| }, |
| { |
| "epoch": 4.053511705685619, |
| "grad_norm": 0.7857955098152161, |
| "learning_rate": 0.00018674082313681868, |
| "loss": 4.0665, |
| "step": 304 |
| }, |
| { |
| "epoch": 4.066889632107023, |
| "grad_norm": 0.707421064376831, |
| "learning_rate": 0.0001866963292547275, |
| "loss": 3.9142, |
| "step": 305 |
| }, |
| { |
| "epoch": 4.080267558528428, |
| "grad_norm": 0.7936912775039673, |
| "learning_rate": 0.00018665183537263627, |
| "loss": 4.1227, |
| "step": 306 |
| }, |
| { |
| "epoch": 4.093645484949833, |
| "grad_norm": 0.8899754881858826, |
| "learning_rate": 0.00018660734149054505, |
| "loss": 3.7661, |
| "step": 307 |
| }, |
| { |
| "epoch": 4.107023411371237, |
| "grad_norm": 0.7760347723960876, |
| "learning_rate": 0.00018656284760845384, |
| "loss": 3.8921, |
| "step": 308 |
| }, |
| { |
| "epoch": 4.120401337792642, |
| "grad_norm": 0.8672968745231628, |
| "learning_rate": 0.00018651835372636265, |
| "loss": 3.6037, |
| "step": 309 |
| }, |
| { |
| "epoch": 4.133779264214047, |
| "grad_norm": 0.8046863675117493, |
| "learning_rate": 0.0001864738598442714, |
| "loss": 3.9117, |
| "step": 310 |
| }, |
| { |
| "epoch": 4.147157190635451, |
| "grad_norm": 0.9172897934913635, |
| "learning_rate": 0.0001864293659621802, |
| "loss": 3.7229, |
| "step": 311 |
| }, |
| { |
| "epoch": 4.160535117056856, |
| "grad_norm": 0.9616653919219971, |
| "learning_rate": 0.000186384872080089, |
| "loss": 3.8851, |
| "step": 312 |
| }, |
| { |
| "epoch": 4.173913043478261, |
| "grad_norm": 0.9659278988838196, |
| "learning_rate": 0.0001863403781979978, |
| "loss": 4.005, |
| "step": 313 |
| }, |
| { |
| "epoch": 4.187290969899665, |
| "grad_norm": 0.9171205163002014, |
| "learning_rate": 0.00018629588431590656, |
| "loss": 3.8634, |
| "step": 314 |
| }, |
| { |
| "epoch": 4.20066889632107, |
| "grad_norm": 0.9968683123588562, |
| "learning_rate": 0.00018625139043381537, |
| "loss": 3.7321, |
| "step": 315 |
| }, |
| { |
| "epoch": 4.214046822742475, |
| "grad_norm": 0.8762083053588867, |
| "learning_rate": 0.00018620689655172415, |
| "loss": 3.931, |
| "step": 316 |
| }, |
| { |
| "epoch": 4.2274247491638794, |
| "grad_norm": 0.9815887212753296, |
| "learning_rate": 0.00018616240266963293, |
| "loss": 3.9975, |
| "step": 317 |
| }, |
| { |
| "epoch": 4.240802675585284, |
| "grad_norm": 1.0065505504608154, |
| "learning_rate": 0.00018611790878754171, |
| "loss": 3.8364, |
| "step": 318 |
| }, |
| { |
| "epoch": 4.254180602006689, |
| "grad_norm": 0.9785431623458862, |
| "learning_rate": 0.00018607341490545052, |
| "loss": 3.8822, |
| "step": 319 |
| }, |
| { |
| "epoch": 4.2675585284280935, |
| "grad_norm": 1.077799677848816, |
| "learning_rate": 0.00018602892102335928, |
| "loss": 3.8299, |
| "step": 320 |
| }, |
| { |
| "epoch": 4.280936454849498, |
| "grad_norm": 0.8109619617462158, |
| "learning_rate": 0.0001859844271412681, |
| "loss": 3.8096, |
| "step": 321 |
| }, |
| { |
| "epoch": 4.294314381270903, |
| "grad_norm": 0.967856764793396, |
| "learning_rate": 0.00018593993325917687, |
| "loss": 3.8639, |
| "step": 322 |
| }, |
| { |
| "epoch": 4.3076923076923075, |
| "grad_norm": 0.8657905459403992, |
| "learning_rate": 0.00018589543937708568, |
| "loss": 3.7556, |
| "step": 323 |
| }, |
| { |
| "epoch": 4.321070234113712, |
| "grad_norm": 0.9641517400741577, |
| "learning_rate": 0.00018585094549499444, |
| "loss": 3.9702, |
| "step": 324 |
| }, |
| { |
| "epoch": 4.334448160535117, |
| "grad_norm": 0.9664435982704163, |
| "learning_rate": 0.00018580645161290325, |
| "loss": 3.8754, |
| "step": 325 |
| }, |
| { |
| "epoch": 4.3478260869565215, |
| "grad_norm": 0.8322617411613464, |
| "learning_rate": 0.00018576195773081203, |
| "loss": 3.83, |
| "step": 326 |
| }, |
| { |
| "epoch": 4.361204013377926, |
| "grad_norm": 1.0363450050354004, |
| "learning_rate": 0.0001857174638487208, |
| "loss": 3.9825, |
| "step": 327 |
| }, |
| { |
| "epoch": 4.374581939799331, |
| "grad_norm": 1.0125840902328491, |
| "learning_rate": 0.0001856729699666296, |
| "loss": 3.6525, |
| "step": 328 |
| }, |
| { |
| "epoch": 4.3879598662207355, |
| "grad_norm": 0.9922601580619812, |
| "learning_rate": 0.0001856284760845384, |
| "loss": 4.2373, |
| "step": 329 |
| }, |
| { |
| "epoch": 4.40133779264214, |
| "grad_norm": 0.9070426225662231, |
| "learning_rate": 0.00018558398220244716, |
| "loss": 3.9623, |
| "step": 330 |
| }, |
| { |
| "epoch": 4.414715719063545, |
| "grad_norm": 0.9369637370109558, |
| "learning_rate": 0.00018553948832035597, |
| "loss": 3.9297, |
| "step": 331 |
| }, |
| { |
| "epoch": 4.4280936454849495, |
| "grad_norm": 1.108876347541809, |
| "learning_rate": 0.00018549499443826475, |
| "loss": 3.7325, |
| "step": 332 |
| }, |
| { |
| "epoch": 4.441471571906354, |
| "grad_norm": 0.9405660629272461, |
| "learning_rate": 0.00018545050055617356, |
| "loss": 3.8615, |
| "step": 333 |
| }, |
| { |
| "epoch": 4.454849498327759, |
| "grad_norm": 0.9730128645896912, |
| "learning_rate": 0.00018540600667408231, |
| "loss": 4.1794, |
| "step": 334 |
| }, |
| { |
| "epoch": 4.468227424749164, |
| "grad_norm": 0.9341335892677307, |
| "learning_rate": 0.00018536151279199112, |
| "loss": 3.9422, |
| "step": 335 |
| }, |
| { |
| "epoch": 4.481605351170568, |
| "grad_norm": 0.9262625575065613, |
| "learning_rate": 0.0001853170189098999, |
| "loss": 3.9819, |
| "step": 336 |
| }, |
| { |
| "epoch": 4.494983277591973, |
| "grad_norm": 1.0419141054153442, |
| "learning_rate": 0.00018527252502780866, |
| "loss": 3.7481, |
| "step": 337 |
| }, |
| { |
| "epoch": 4.508361204013378, |
| "grad_norm": 0.8986826539039612, |
| "learning_rate": 0.00018522803114571747, |
| "loss": 4.1195, |
| "step": 338 |
| }, |
| { |
| "epoch": 4.521739130434782, |
| "grad_norm": 0.9502431154251099, |
| "learning_rate": 0.00018518353726362625, |
| "loss": 3.8521, |
| "step": 339 |
| }, |
| { |
| "epoch": 4.535117056856187, |
| "grad_norm": 0.8936267495155334, |
| "learning_rate": 0.00018513904338153504, |
| "loss": 3.647, |
| "step": 340 |
| }, |
| { |
| "epoch": 4.548494983277592, |
| "grad_norm": 0.8870158195495605, |
| "learning_rate": 0.00018509454949944382, |
| "loss": 3.7624, |
| "step": 341 |
| }, |
| { |
| "epoch": 4.561872909698996, |
| "grad_norm": 0.9030978679656982, |
| "learning_rate": 0.00018505005561735263, |
| "loss": 3.8018, |
| "step": 342 |
| }, |
| { |
| "epoch": 4.575250836120401, |
| "grad_norm": 0.8690946698188782, |
| "learning_rate": 0.0001850055617352614, |
| "loss": 3.6183, |
| "step": 343 |
| }, |
| { |
| "epoch": 4.588628762541806, |
| "grad_norm": 0.9812071323394775, |
| "learning_rate": 0.0001849610678531702, |
| "loss": 4.1231, |
| "step": 344 |
| }, |
| { |
| "epoch": 4.602006688963211, |
| "grad_norm": 0.9404383301734924, |
| "learning_rate": 0.00018491657397107897, |
| "loss": 3.9645, |
| "step": 345 |
| }, |
| { |
| "epoch": 4.615384615384615, |
| "grad_norm": 1.0422123670578003, |
| "learning_rate": 0.00018487208008898778, |
| "loss": 3.9031, |
| "step": 346 |
| }, |
| { |
| "epoch": 4.6287625418060205, |
| "grad_norm": 0.9838129281997681, |
| "learning_rate": 0.00018482758620689654, |
| "loss": 3.9985, |
| "step": 347 |
| }, |
| { |
| "epoch": 4.642140468227424, |
| "grad_norm": 0.9232532978057861, |
| "learning_rate": 0.00018478309232480535, |
| "loss": 4.0343, |
| "step": 348 |
| }, |
| { |
| "epoch": 4.65551839464883, |
| "grad_norm": 0.9242956042289734, |
| "learning_rate": 0.00018473859844271413, |
| "loss": 4.0669, |
| "step": 349 |
| }, |
| { |
| "epoch": 4.668896321070234, |
| "grad_norm": 0.919269859790802, |
| "learning_rate": 0.0001846941045606229, |
| "loss": 4.0549, |
| "step": 350 |
| }, |
| { |
| "epoch": 4.682274247491639, |
| "grad_norm": 0.93565833568573, |
| "learning_rate": 0.0001846496106785317, |
| "loss": 4.1306, |
| "step": 351 |
| }, |
| { |
| "epoch": 4.695652173913043, |
| "grad_norm": 0.9001899361610413, |
| "learning_rate": 0.0001846051167964405, |
| "loss": 3.8916, |
| "step": 352 |
| }, |
| { |
| "epoch": 4.709030100334449, |
| "grad_norm": 0.8896821737289429, |
| "learning_rate": 0.0001845606229143493, |
| "loss": 3.8377, |
| "step": 353 |
| }, |
| { |
| "epoch": 4.722408026755852, |
| "grad_norm": 1.0137807130813599, |
| "learning_rate": 0.00018451612903225807, |
| "loss": 3.9923, |
| "step": 354 |
| }, |
| { |
| "epoch": 4.735785953177258, |
| "grad_norm": 1.075823426246643, |
| "learning_rate": 0.00018447163515016685, |
| "loss": 4.0706, |
| "step": 355 |
| }, |
| { |
| "epoch": 4.749163879598662, |
| "grad_norm": 1.0076895952224731, |
| "learning_rate": 0.00018442714126807566, |
| "loss": 4.0759, |
| "step": 356 |
| }, |
| { |
| "epoch": 4.762541806020067, |
| "grad_norm": 0.9387428164482117, |
| "learning_rate": 0.00018438264738598442, |
| "loss": 3.6959, |
| "step": 357 |
| }, |
| { |
| "epoch": 4.775919732441472, |
| "grad_norm": 0.8920648097991943, |
| "learning_rate": 0.00018433815350389323, |
| "loss": 3.9213, |
| "step": 358 |
| }, |
| { |
| "epoch": 4.789297658862877, |
| "grad_norm": 1.0252491235733032, |
| "learning_rate": 0.000184293659621802, |
| "loss": 3.9118, |
| "step": 359 |
| }, |
| { |
| "epoch": 4.802675585284281, |
| "grad_norm": 1.0382707118988037, |
| "learning_rate": 0.0001842491657397108, |
| "loss": 4.0172, |
| "step": 360 |
| }, |
| { |
| "epoch": 4.816053511705686, |
| "grad_norm": 1.07838773727417, |
| "learning_rate": 0.00018420467185761957, |
| "loss": 3.8531, |
| "step": 361 |
| }, |
| { |
| "epoch": 4.829431438127091, |
| "grad_norm": 0.9974546432495117, |
| "learning_rate": 0.00018416017797552838, |
| "loss": 4.0387, |
| "step": 362 |
| }, |
| { |
| "epoch": 4.842809364548495, |
| "grad_norm": 1.024491548538208, |
| "learning_rate": 0.00018411568409343717, |
| "loss": 3.9504, |
| "step": 363 |
| }, |
| { |
| "epoch": 4.8561872909699, |
| "grad_norm": 0.9236369132995605, |
| "learning_rate": 0.00018407119021134595, |
| "loss": 3.7119, |
| "step": 364 |
| }, |
| { |
| "epoch": 4.869565217391305, |
| "grad_norm": 0.935644268989563, |
| "learning_rate": 0.00018402669632925473, |
| "loss": 4.0077, |
| "step": 365 |
| }, |
| { |
| "epoch": 4.882943143812709, |
| "grad_norm": 0.9328681230545044, |
| "learning_rate": 0.00018398220244716354, |
| "loss": 3.9133, |
| "step": 366 |
| }, |
| { |
| "epoch": 4.896321070234114, |
| "grad_norm": 0.9596607089042664, |
| "learning_rate": 0.0001839377085650723, |
| "loss": 3.8003, |
| "step": 367 |
| }, |
| { |
| "epoch": 4.909698996655519, |
| "grad_norm": 0.9878052473068237, |
| "learning_rate": 0.0001838932146829811, |
| "loss": 3.8805, |
| "step": 368 |
| }, |
| { |
| "epoch": 4.923076923076923, |
| "grad_norm": 1.00381600856781, |
| "learning_rate": 0.0001838487208008899, |
| "loss": 4.0264, |
| "step": 369 |
| }, |
| { |
| "epoch": 4.936454849498328, |
| "grad_norm": 1.024754524230957, |
| "learning_rate": 0.00018380422691879867, |
| "loss": 3.7291, |
| "step": 370 |
| }, |
| { |
| "epoch": 4.949832775919733, |
| "grad_norm": 0.9670823812484741, |
| "learning_rate": 0.00018375973303670745, |
| "loss": 3.9418, |
| "step": 371 |
| }, |
| { |
| "epoch": 4.963210702341137, |
| "grad_norm": 0.9736581444740295, |
| "learning_rate": 0.00018371523915461626, |
| "loss": 3.8813, |
| "step": 372 |
| }, |
| { |
| "epoch": 4.976588628762542, |
| "grad_norm": 0.9752672910690308, |
| "learning_rate": 0.00018367074527252504, |
| "loss": 3.6717, |
| "step": 373 |
| }, |
| { |
| "epoch": 4.989966555183947, |
| "grad_norm": 1.1268304586410522, |
| "learning_rate": 0.00018362625139043383, |
| "loss": 3.9782, |
| "step": 374 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.7933701276779175, |
| "learning_rate": 0.0001835817575083426, |
| "loss": 3.001, |
| "step": 375 |
| }, |
| { |
| "epoch": 5.013377926421405, |
| "grad_norm": 0.8035010099411011, |
| "learning_rate": 0.00018353726362625142, |
| "loss": 3.7943, |
| "step": 376 |
| }, |
| { |
| "epoch": 5.026755852842809, |
| "grad_norm": 0.8016420006752014, |
| "learning_rate": 0.00018349276974416017, |
| "loss": 3.7454, |
| "step": 377 |
| }, |
| { |
| "epoch": 5.040133779264214, |
| "grad_norm": 0.6844643354415894, |
| "learning_rate": 0.00018344827586206896, |
| "loss": 3.699, |
| "step": 378 |
| }, |
| { |
| "epoch": 5.053511705685619, |
| "grad_norm": 0.8649943470954895, |
| "learning_rate": 0.00018340378197997777, |
| "loss": 3.7197, |
| "step": 379 |
| }, |
| { |
| "epoch": 5.066889632107023, |
| "grad_norm": 0.9685015678405762, |
| "learning_rate": 0.00018335928809788655, |
| "loss": 3.6952, |
| "step": 380 |
| }, |
| { |
| "epoch": 5.080267558528428, |
| "grad_norm": 0.8728330135345459, |
| "learning_rate": 0.00018331479421579533, |
| "loss": 3.7164, |
| "step": 381 |
| }, |
| { |
| "epoch": 5.093645484949833, |
| "grad_norm": 0.962504506111145, |
| "learning_rate": 0.0001832703003337041, |
| "loss": 3.6123, |
| "step": 382 |
| }, |
| { |
| "epoch": 5.107023411371237, |
| "grad_norm": 0.9194462895393372, |
| "learning_rate": 0.00018322580645161292, |
| "loss": 3.275, |
| "step": 383 |
| }, |
| { |
| "epoch": 5.120401337792642, |
| "grad_norm": 0.9851329326629639, |
| "learning_rate": 0.00018318131256952168, |
| "loss": 3.6222, |
| "step": 384 |
| }, |
| { |
| "epoch": 5.133779264214047, |
| "grad_norm": 1.0702580213546753, |
| "learning_rate": 0.0001831368186874305, |
| "loss": 3.8728, |
| "step": 385 |
| }, |
| { |
| "epoch": 5.147157190635451, |
| "grad_norm": 1.3237228393554688, |
| "learning_rate": 0.00018309232480533927, |
| "loss": 3.8948, |
| "step": 386 |
| }, |
| { |
| "epoch": 5.160535117056856, |
| "grad_norm": 1.0076218843460083, |
| "learning_rate": 0.00018304783092324805, |
| "loss": 3.8894, |
| "step": 387 |
| }, |
| { |
| "epoch": 5.173913043478261, |
| "grad_norm": 1.084722876548767, |
| "learning_rate": 0.00018300333704115683, |
| "loss": 3.7398, |
| "step": 388 |
| }, |
| { |
| "epoch": 5.187290969899665, |
| "grad_norm": 0.9112711548805237, |
| "learning_rate": 0.00018295884315906564, |
| "loss": 3.5901, |
| "step": 389 |
| }, |
| { |
| "epoch": 5.20066889632107, |
| "grad_norm": 0.9451406002044678, |
| "learning_rate": 0.00018291434927697443, |
| "loss": 3.6313, |
| "step": 390 |
| }, |
| { |
| "epoch": 5.214046822742475, |
| "grad_norm": 0.8901047706604004, |
| "learning_rate": 0.0001828698553948832, |
| "loss": 3.3191, |
| "step": 391 |
| }, |
| { |
| "epoch": 5.2274247491638794, |
| "grad_norm": 0.9838565587997437, |
| "learning_rate": 0.000182825361512792, |
| "loss": 3.882, |
| "step": 392 |
| }, |
| { |
| "epoch": 5.240802675585284, |
| "grad_norm": 0.9839156866073608, |
| "learning_rate": 0.0001827808676307008, |
| "loss": 3.6068, |
| "step": 393 |
| }, |
| { |
| "epoch": 5.254180602006689, |
| "grad_norm": 0.9328583478927612, |
| "learning_rate": 0.00018273637374860956, |
| "loss": 3.6856, |
| "step": 394 |
| }, |
| { |
| "epoch": 5.2675585284280935, |
| "grad_norm": 0.8705796003341675, |
| "learning_rate": 0.00018269187986651837, |
| "loss": 3.7282, |
| "step": 395 |
| }, |
| { |
| "epoch": 5.280936454849498, |
| "grad_norm": 0.9675374031066895, |
| "learning_rate": 0.00018264738598442715, |
| "loss": 3.6588, |
| "step": 396 |
| }, |
| { |
| "epoch": 5.294314381270903, |
| "grad_norm": 1.145280361175537, |
| "learning_rate": 0.00018260289210233593, |
| "loss": 3.8843, |
| "step": 397 |
| }, |
| { |
| "epoch": 5.3076923076923075, |
| "grad_norm": 0.9769694805145264, |
| "learning_rate": 0.0001825583982202447, |
| "loss": 3.7207, |
| "step": 398 |
| }, |
| { |
| "epoch": 5.321070234113712, |
| "grad_norm": 0.9277816414833069, |
| "learning_rate": 0.00018251390433815352, |
| "loss": 3.712, |
| "step": 399 |
| }, |
| { |
| "epoch": 5.334448160535117, |
| "grad_norm": 1.1015180349349976, |
| "learning_rate": 0.0001824694104560623, |
| "loss": 3.7941, |
| "step": 400 |
| }, |
| { |
| "epoch": 5.3478260869565215, |
| "grad_norm": 1.2234200239181519, |
| "learning_rate": 0.0001824249165739711, |
| "loss": 3.6559, |
| "step": 401 |
| }, |
| { |
| "epoch": 5.361204013377926, |
| "grad_norm": 0.9358471035957336, |
| "learning_rate": 0.00018238042269187987, |
| "loss": 3.7665, |
| "step": 402 |
| }, |
| { |
| "epoch": 5.374581939799331, |
| "grad_norm": 0.8287034630775452, |
| "learning_rate": 0.00018233592880978868, |
| "loss": 3.8265, |
| "step": 403 |
| }, |
| { |
| "epoch": 5.3879598662207355, |
| "grad_norm": 1.0219204425811768, |
| "learning_rate": 0.00018229143492769743, |
| "loss": 3.6829, |
| "step": 404 |
| }, |
| { |
| "epoch": 5.40133779264214, |
| "grad_norm": 1.0601041316986084, |
| "learning_rate": 0.00018224694104560624, |
| "loss": 3.5879, |
| "step": 405 |
| }, |
| { |
| "epoch": 5.414715719063545, |
| "grad_norm": 1.2221566438674927, |
| "learning_rate": 0.00018220244716351503, |
| "loss": 3.6023, |
| "step": 406 |
| }, |
| { |
| "epoch": 5.4280936454849495, |
| "grad_norm": 0.9589087963104248, |
| "learning_rate": 0.0001821579532814238, |
| "loss": 3.9109, |
| "step": 407 |
| }, |
| { |
| "epoch": 5.441471571906354, |
| "grad_norm": 1.088295340538025, |
| "learning_rate": 0.0001821134593993326, |
| "loss": 3.5695, |
| "step": 408 |
| }, |
| { |
| "epoch": 5.454849498327759, |
| "grad_norm": 1.1284915208816528, |
| "learning_rate": 0.0001820689655172414, |
| "loss": 3.6004, |
| "step": 409 |
| }, |
| { |
| "epoch": 5.468227424749164, |
| "grad_norm": 1.0108689069747925, |
| "learning_rate": 0.00018202447163515018, |
| "loss": 3.7797, |
| "step": 410 |
| }, |
| { |
| "epoch": 5.481605351170568, |
| "grad_norm": 0.8550918102264404, |
| "learning_rate": 0.00018197997775305896, |
| "loss": 3.5758, |
| "step": 411 |
| }, |
| { |
| "epoch": 5.494983277591973, |
| "grad_norm": 0.8765145540237427, |
| "learning_rate": 0.00018193548387096775, |
| "loss": 3.7625, |
| "step": 412 |
| }, |
| { |
| "epoch": 5.508361204013378, |
| "grad_norm": 1.0253541469573975, |
| "learning_rate": 0.00018189098998887656, |
| "loss": 3.6033, |
| "step": 413 |
| }, |
| { |
| "epoch": 5.521739130434782, |
| "grad_norm": 1.0475622415542603, |
| "learning_rate": 0.0001818464961067853, |
| "loss": 3.813, |
| "step": 414 |
| }, |
| { |
| "epoch": 5.535117056856187, |
| "grad_norm": 1.053133249282837, |
| "learning_rate": 0.00018180200222469412, |
| "loss": 3.4779, |
| "step": 415 |
| }, |
| { |
| "epoch": 5.548494983277592, |
| "grad_norm": 1.0151216983795166, |
| "learning_rate": 0.0001817575083426029, |
| "loss": 3.9038, |
| "step": 416 |
| }, |
| { |
| "epoch": 5.561872909698996, |
| "grad_norm": 1.4666434526443481, |
| "learning_rate": 0.00018171301446051169, |
| "loss": 3.4735, |
| "step": 417 |
| }, |
| { |
| "epoch": 5.575250836120401, |
| "grad_norm": 1.1043344736099243, |
| "learning_rate": 0.00018166852057842047, |
| "loss": 3.7449, |
| "step": 418 |
| }, |
| { |
| "epoch": 5.588628762541806, |
| "grad_norm": 0.900745153427124, |
| "learning_rate": 0.00018162402669632925, |
| "loss": 3.7401, |
| "step": 419 |
| }, |
| { |
| "epoch": 5.602006688963211, |
| "grad_norm": 0.9771101474761963, |
| "learning_rate": 0.00018157953281423806, |
| "loss": 3.8328, |
| "step": 420 |
| }, |
| { |
| "epoch": 5.615384615384615, |
| "grad_norm": 0.9099516272544861, |
| "learning_rate": 0.00018153503893214682, |
| "loss": 3.6245, |
| "step": 421 |
| }, |
| { |
| "epoch": 5.6287625418060205, |
| "grad_norm": 0.9844585657119751, |
| "learning_rate": 0.00018149054505005563, |
| "loss": 3.5776, |
| "step": 422 |
| }, |
| { |
| "epoch": 5.642140468227424, |
| "grad_norm": 1.0481154918670654, |
| "learning_rate": 0.0001814460511679644, |
| "loss": 3.6304, |
| "step": 423 |
| }, |
| { |
| "epoch": 5.65551839464883, |
| "grad_norm": 0.9971081614494324, |
| "learning_rate": 0.0001814015572858732, |
| "loss": 3.7863, |
| "step": 424 |
| }, |
| { |
| "epoch": 5.668896321070234, |
| "grad_norm": 0.9247872829437256, |
| "learning_rate": 0.00018135706340378197, |
| "loss": 3.7319, |
| "step": 425 |
| }, |
| { |
| "epoch": 5.682274247491639, |
| "grad_norm": 0.9895725846290588, |
| "learning_rate": 0.00018131256952169078, |
| "loss": 3.78, |
| "step": 426 |
| }, |
| { |
| "epoch": 5.695652173913043, |
| "grad_norm": 1.0847641229629517, |
| "learning_rate": 0.00018126807563959956, |
| "loss": 3.8662, |
| "step": 427 |
| }, |
| { |
| "epoch": 5.709030100334449, |
| "grad_norm": 0.986259937286377, |
| "learning_rate": 0.00018122358175750835, |
| "loss": 3.5621, |
| "step": 428 |
| }, |
| { |
| "epoch": 5.722408026755852, |
| "grad_norm": 0.9166681170463562, |
| "learning_rate": 0.00018117908787541713, |
| "loss": 3.6153, |
| "step": 429 |
| }, |
| { |
| "epoch": 5.735785953177258, |
| "grad_norm": 1.1331177949905396, |
| "learning_rate": 0.00018113459399332594, |
| "loss": 3.5976, |
| "step": 430 |
| }, |
| { |
| "epoch": 5.749163879598662, |
| "grad_norm": 0.8743540644645691, |
| "learning_rate": 0.0001810901001112347, |
| "loss": 3.2511, |
| "step": 431 |
| }, |
| { |
| "epoch": 5.762541806020067, |
| "grad_norm": 1.0700207948684692, |
| "learning_rate": 0.0001810456062291435, |
| "loss": 3.7634, |
| "step": 432 |
| }, |
| { |
| "epoch": 5.775919732441472, |
| "grad_norm": 0.9412694573402405, |
| "learning_rate": 0.00018100111234705229, |
| "loss": 3.6264, |
| "step": 433 |
| }, |
| { |
| "epoch": 5.789297658862877, |
| "grad_norm": 1.0398496389389038, |
| "learning_rate": 0.00018095661846496107, |
| "loss": 3.744, |
| "step": 434 |
| }, |
| { |
| "epoch": 5.802675585284281, |
| "grad_norm": 0.9605004787445068, |
| "learning_rate": 0.00018091212458286985, |
| "loss": 3.5532, |
| "step": 435 |
| }, |
| { |
| "epoch": 5.816053511705686, |
| "grad_norm": 1.0449095964431763, |
| "learning_rate": 0.00018086763070077866, |
| "loss": 4.0611, |
| "step": 436 |
| }, |
| { |
| "epoch": 5.829431438127091, |
| "grad_norm": 0.9342606663703918, |
| "learning_rate": 0.00018082313681868744, |
| "loss": 3.9957, |
| "step": 437 |
| }, |
| { |
| "epoch": 5.842809364548495, |
| "grad_norm": 0.9687880873680115, |
| "learning_rate": 0.00018077864293659622, |
| "loss": 3.9299, |
| "step": 438 |
| }, |
| { |
| "epoch": 5.8561872909699, |
| "grad_norm": 1.1390576362609863, |
| "learning_rate": 0.000180734149054505, |
| "loss": 3.6552, |
| "step": 439 |
| }, |
| { |
| "epoch": 5.869565217391305, |
| "grad_norm": 0.9280988574028015, |
| "learning_rate": 0.00018068965517241382, |
| "loss": 3.7828, |
| "step": 440 |
| }, |
| { |
| "epoch": 5.882943143812709, |
| "grad_norm": 1.2928193807601929, |
| "learning_rate": 0.00018064516129032257, |
| "loss": 3.6292, |
| "step": 441 |
| }, |
| { |
| "epoch": 5.896321070234114, |
| "grad_norm": 1.0959875583648682, |
| "learning_rate": 0.00018060066740823138, |
| "loss": 3.4293, |
| "step": 442 |
| }, |
| { |
| "epoch": 5.909698996655519, |
| "grad_norm": 1.0713289976119995, |
| "learning_rate": 0.00018055617352614016, |
| "loss": 3.7767, |
| "step": 443 |
| }, |
| { |
| "epoch": 5.923076923076923, |
| "grad_norm": 0.9309440851211548, |
| "learning_rate": 0.00018051167964404895, |
| "loss": 3.5473, |
| "step": 444 |
| }, |
| { |
| "epoch": 5.936454849498328, |
| "grad_norm": 1.0999056100845337, |
| "learning_rate": 0.00018046718576195773, |
| "loss": 3.9694, |
| "step": 445 |
| }, |
| { |
| "epoch": 5.949832775919733, |
| "grad_norm": 1.1073781251907349, |
| "learning_rate": 0.00018042269187986654, |
| "loss": 3.6882, |
| "step": 446 |
| }, |
| { |
| "epoch": 5.963210702341137, |
| "grad_norm": 1.0430257320404053, |
| "learning_rate": 0.00018037819799777532, |
| "loss": 3.4009, |
| "step": 447 |
| }, |
| { |
| "epoch": 5.976588628762542, |
| "grad_norm": 1.1132690906524658, |
| "learning_rate": 0.0001803337041156841, |
| "loss": 3.8832, |
| "step": 448 |
| }, |
| { |
| "epoch": 5.989966555183947, |
| "grad_norm": 1.0147771835327148, |
| "learning_rate": 0.00018028921023359289, |
| "loss": 3.7117, |
| "step": 449 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.458959698677063, |
| "learning_rate": 0.0001802447163515017, |
| "loss": 3.5745, |
| "step": 450 |
| }, |
| { |
| "epoch": 6.013377926421405, |
| "grad_norm": 0.8363592028617859, |
| "learning_rate": 0.00018020022246941045, |
| "loss": 3.5835, |
| "step": 451 |
| }, |
| { |
| "epoch": 6.026755852842809, |
| "grad_norm": 0.8071937561035156, |
| "learning_rate": 0.00018015572858731926, |
| "loss": 3.5923, |
| "step": 452 |
| }, |
| { |
| "epoch": 6.040133779264214, |
| "grad_norm": 0.7746313214302063, |
| "learning_rate": 0.00018011123470522804, |
| "loss": 3.5688, |
| "step": 453 |
| }, |
| { |
| "epoch": 6.053511705685619, |
| "grad_norm": 0.689179539680481, |
| "learning_rate": 0.00018006674082313682, |
| "loss": 3.412, |
| "step": 454 |
| }, |
| { |
| "epoch": 6.066889632107023, |
| "grad_norm": 0.8438050746917725, |
| "learning_rate": 0.0001800222469410456, |
| "loss": 3.403, |
| "step": 455 |
| }, |
| { |
| "epoch": 6.080267558528428, |
| "grad_norm": 0.7670062780380249, |
| "learning_rate": 0.00017997775305895442, |
| "loss": 3.5029, |
| "step": 456 |
| }, |
| { |
| "epoch": 6.093645484949833, |
| "grad_norm": 0.8185870051383972, |
| "learning_rate": 0.0001799332591768632, |
| "loss": 3.4584, |
| "step": 457 |
| }, |
| { |
| "epoch": 6.107023411371237, |
| "grad_norm": 0.9618543386459351, |
| "learning_rate": 0.00017988876529477198, |
| "loss": 3.6538, |
| "step": 458 |
| }, |
| { |
| "epoch": 6.120401337792642, |
| "grad_norm": 0.959724485874176, |
| "learning_rate": 0.00017984427141268076, |
| "loss": 3.5284, |
| "step": 459 |
| }, |
| { |
| "epoch": 6.133779264214047, |
| "grad_norm": 0.8044765591621399, |
| "learning_rate": 0.00017979977753058955, |
| "loss": 3.2198, |
| "step": 460 |
| }, |
| { |
| "epoch": 6.147157190635451, |
| "grad_norm": 0.8287092447280884, |
| "learning_rate": 0.00017975528364849833, |
| "loss": 3.4977, |
| "step": 461 |
| }, |
| { |
| "epoch": 6.160535117056856, |
| "grad_norm": 0.8855329155921936, |
| "learning_rate": 0.0001797107897664071, |
| "loss": 3.5008, |
| "step": 462 |
| }, |
| { |
| "epoch": 6.173913043478261, |
| "grad_norm": 0.8839483857154846, |
| "learning_rate": 0.00017966629588431592, |
| "loss": 3.6135, |
| "step": 463 |
| }, |
| { |
| "epoch": 6.187290969899665, |
| "grad_norm": 0.963446319103241, |
| "learning_rate": 0.0001796218020022247, |
| "loss": 3.6156, |
| "step": 464 |
| }, |
| { |
| "epoch": 6.20066889632107, |
| "grad_norm": 0.896743655204773, |
| "learning_rate": 0.00017957730812013348, |
| "loss": 3.6623, |
| "step": 465 |
| }, |
| { |
| "epoch": 6.214046822742475, |
| "grad_norm": 0.9268617033958435, |
| "learning_rate": 0.00017953281423804227, |
| "loss": 3.4343, |
| "step": 466 |
| }, |
| { |
| "epoch": 6.2274247491638794, |
| "grad_norm": 0.8335449695587158, |
| "learning_rate": 0.00017948832035595108, |
| "loss": 3.5716, |
| "step": 467 |
| }, |
| { |
| "epoch": 6.240802675585284, |
| "grad_norm": 0.7771849036216736, |
| "learning_rate": 0.00017944382647385983, |
| "loss": 3.5191, |
| "step": 468 |
| }, |
| { |
| "epoch": 6.254180602006689, |
| "grad_norm": 0.9157488346099854, |
| "learning_rate": 0.00017939933259176864, |
| "loss": 3.5583, |
| "step": 469 |
| }, |
| { |
| "epoch": 6.2675585284280935, |
| "grad_norm": 0.9348477721214294, |
| "learning_rate": 0.00017935483870967742, |
| "loss": 3.3137, |
| "step": 470 |
| }, |
| { |
| "epoch": 6.280936454849498, |
| "grad_norm": 0.8791135549545288, |
| "learning_rate": 0.0001793103448275862, |
| "loss": 3.5111, |
| "step": 471 |
| }, |
| { |
| "epoch": 6.294314381270903, |
| "grad_norm": 0.9963672757148743, |
| "learning_rate": 0.000179265850945495, |
| "loss": 3.7518, |
| "step": 472 |
| }, |
| { |
| "epoch": 6.3076923076923075, |
| "grad_norm": 0.9291539192199707, |
| "learning_rate": 0.0001792213570634038, |
| "loss": 3.4524, |
| "step": 473 |
| }, |
| { |
| "epoch": 6.321070234113712, |
| "grad_norm": 0.9349279403686523, |
| "learning_rate": 0.00017917686318131258, |
| "loss": 3.4753, |
| "step": 474 |
| }, |
| { |
| "epoch": 6.334448160535117, |
| "grad_norm": 0.8984476327896118, |
| "learning_rate": 0.00017913236929922136, |
| "loss": 3.7325, |
| "step": 475 |
| }, |
| { |
| "epoch": 6.3478260869565215, |
| "grad_norm": 0.8452139496803284, |
| "learning_rate": 0.00017908787541713015, |
| "loss": 3.8021, |
| "step": 476 |
| }, |
| { |
| "epoch": 6.361204013377926, |
| "grad_norm": 0.9418376088142395, |
| "learning_rate": 0.00017904338153503895, |
| "loss": 3.7426, |
| "step": 477 |
| }, |
| { |
| "epoch": 6.374581939799331, |
| "grad_norm": 1.0661097764968872, |
| "learning_rate": 0.0001789988876529477, |
| "loss": 3.7556, |
| "step": 478 |
| }, |
| { |
| "epoch": 6.3879598662207355, |
| "grad_norm": 0.9645984768867493, |
| "learning_rate": 0.00017895439377085652, |
| "loss": 3.3353, |
| "step": 479 |
| }, |
| { |
| "epoch": 6.40133779264214, |
| "grad_norm": 0.9243470430374146, |
| "learning_rate": 0.0001789098998887653, |
| "loss": 3.5729, |
| "step": 480 |
| }, |
| { |
| "epoch": 6.414715719063545, |
| "grad_norm": 0.885061502456665, |
| "learning_rate": 0.00017886540600667408, |
| "loss": 3.5699, |
| "step": 481 |
| }, |
| { |
| "epoch": 6.4280936454849495, |
| "grad_norm": 0.9025402069091797, |
| "learning_rate": 0.00017882091212458287, |
| "loss": 3.4532, |
| "step": 482 |
| }, |
| { |
| "epoch": 6.441471571906354, |
| "grad_norm": 0.9760842323303223, |
| "learning_rate": 0.00017877641824249168, |
| "loss": 3.7222, |
| "step": 483 |
| }, |
| { |
| "epoch": 6.454849498327759, |
| "grad_norm": 1.2709609270095825, |
| "learning_rate": 0.00017873192436040046, |
| "loss": 3.756, |
| "step": 484 |
| }, |
| { |
| "epoch": 6.468227424749164, |
| "grad_norm": 0.904513955116272, |
| "learning_rate": 0.00017868743047830924, |
| "loss": 3.657, |
| "step": 485 |
| }, |
| { |
| "epoch": 6.481605351170568, |
| "grad_norm": 1.158915400505066, |
| "learning_rate": 0.00017864293659621802, |
| "loss": 3.5897, |
| "step": 486 |
| }, |
| { |
| "epoch": 6.494983277591973, |
| "grad_norm": 0.9457879066467285, |
| "learning_rate": 0.00017859844271412683, |
| "loss": 3.4394, |
| "step": 487 |
| }, |
| { |
| "epoch": 6.508361204013378, |
| "grad_norm": 1.04762601852417, |
| "learning_rate": 0.0001785539488320356, |
| "loss": 3.39, |
| "step": 488 |
| }, |
| { |
| "epoch": 6.521739130434782, |
| "grad_norm": 0.9370948076248169, |
| "learning_rate": 0.0001785094549499444, |
| "loss": 3.4816, |
| "step": 489 |
| }, |
| { |
| "epoch": 6.535117056856187, |
| "grad_norm": 1.1307988166809082, |
| "learning_rate": 0.00017846496106785318, |
| "loss": 3.5079, |
| "step": 490 |
| }, |
| { |
| "epoch": 6.548494983277592, |
| "grad_norm": 1.0025054216384888, |
| "learning_rate": 0.00017842046718576196, |
| "loss": 3.3447, |
| "step": 491 |
| }, |
| { |
| "epoch": 6.561872909698996, |
| "grad_norm": 0.9893412590026855, |
| "learning_rate": 0.00017837597330367074, |
| "loss": 3.4698, |
| "step": 492 |
| }, |
| { |
| "epoch": 6.575250836120401, |
| "grad_norm": 1.0476347208023071, |
| "learning_rate": 0.00017833147942157955, |
| "loss": 3.4955, |
| "step": 493 |
| }, |
| { |
| "epoch": 6.588628762541806, |
| "grad_norm": 1.0508891344070435, |
| "learning_rate": 0.00017828698553948834, |
| "loss": 3.6568, |
| "step": 494 |
| }, |
| { |
| "epoch": 6.602006688963211, |
| "grad_norm": 1.0397465229034424, |
| "learning_rate": 0.00017824249165739712, |
| "loss": 3.5087, |
| "step": 495 |
| }, |
| { |
| "epoch": 6.615384615384615, |
| "grad_norm": 0.9884181022644043, |
| "learning_rate": 0.0001781979977753059, |
| "loss": 3.6778, |
| "step": 496 |
| }, |
| { |
| "epoch": 6.6287625418060205, |
| "grad_norm": 1.1187562942504883, |
| "learning_rate": 0.0001781535038932147, |
| "loss": 3.4345, |
| "step": 497 |
| }, |
| { |
| "epoch": 6.642140468227424, |
| "grad_norm": 1.1133880615234375, |
| "learning_rate": 0.00017810901001112347, |
| "loss": 3.4959, |
| "step": 498 |
| }, |
| { |
| "epoch": 6.65551839464883, |
| "grad_norm": 0.9368589520454407, |
| "learning_rate": 0.00017806451612903228, |
| "loss": 3.628, |
| "step": 499 |
| }, |
| { |
| "epoch": 6.668896321070234, |
| "grad_norm": 1.0427212715148926, |
| "learning_rate": 0.00017802002224694106, |
| "loss": 3.6073, |
| "step": 500 |
| }, |
| { |
| "epoch": 6.682274247491639, |
| "grad_norm": 0.9564261436462402, |
| "learning_rate": 0.00017797552836484984, |
| "loss": 3.4406, |
| "step": 501 |
| }, |
| { |
| "epoch": 6.695652173913043, |
| "grad_norm": 0.9754629135131836, |
| "learning_rate": 0.00017793103448275862, |
| "loss": 3.7025, |
| "step": 502 |
| }, |
| { |
| "epoch": 6.709030100334449, |
| "grad_norm": 0.9669683575630188, |
| "learning_rate": 0.0001778865406006674, |
| "loss": 3.5872, |
| "step": 503 |
| }, |
| { |
| "epoch": 6.722408026755852, |
| "grad_norm": 0.959619402885437, |
| "learning_rate": 0.00017784204671857621, |
| "loss": 3.5124, |
| "step": 504 |
| }, |
| { |
| "epoch": 6.735785953177258, |
| "grad_norm": 0.981737494468689, |
| "learning_rate": 0.00017779755283648497, |
| "loss": 3.4038, |
| "step": 505 |
| }, |
| { |
| "epoch": 6.749163879598662, |
| "grad_norm": 0.9372640252113342, |
| "learning_rate": 0.00017775305895439378, |
| "loss": 3.6725, |
| "step": 506 |
| }, |
| { |
| "epoch": 6.762541806020067, |
| "grad_norm": 0.947066605091095, |
| "learning_rate": 0.00017770856507230256, |
| "loss": 3.807, |
| "step": 507 |
| }, |
| { |
| "epoch": 6.775919732441472, |
| "grad_norm": 0.7659755349159241, |
| "learning_rate": 0.00017766407119021134, |
| "loss": 3.0411, |
| "step": 508 |
| }, |
| { |
| "epoch": 6.789297658862877, |
| "grad_norm": 1.0432168245315552, |
| "learning_rate": 0.00017761957730812013, |
| "loss": 3.6259, |
| "step": 509 |
| }, |
| { |
| "epoch": 6.802675585284281, |
| "grad_norm": 1.0104693174362183, |
| "learning_rate": 0.00017757508342602894, |
| "loss": 3.5971, |
| "step": 510 |
| }, |
| { |
| "epoch": 6.816053511705686, |
| "grad_norm": 0.9007440805435181, |
| "learning_rate": 0.00017753058954393772, |
| "loss": 3.5894, |
| "step": 511 |
| }, |
| { |
| "epoch": 6.829431438127091, |
| "grad_norm": 0.8829946517944336, |
| "learning_rate": 0.0001774860956618465, |
| "loss": 3.6094, |
| "step": 512 |
| }, |
| { |
| "epoch": 6.842809364548495, |
| "grad_norm": 0.9823127388954163, |
| "learning_rate": 0.00017744160177975528, |
| "loss": 3.6568, |
| "step": 513 |
| }, |
| { |
| "epoch": 6.8561872909699, |
| "grad_norm": 1.0247899293899536, |
| "learning_rate": 0.0001773971078976641, |
| "loss": 3.259, |
| "step": 514 |
| }, |
| { |
| "epoch": 6.869565217391305, |
| "grad_norm": 0.9435336589813232, |
| "learning_rate": 0.00017735261401557285, |
| "loss": 3.4978, |
| "step": 515 |
| }, |
| { |
| "epoch": 6.882943143812709, |
| "grad_norm": 1.0135424137115479, |
| "learning_rate": 0.00017730812013348166, |
| "loss": 3.4765, |
| "step": 516 |
| }, |
| { |
| "epoch": 6.896321070234114, |
| "grad_norm": 1.1327738761901855, |
| "learning_rate": 0.00017726362625139044, |
| "loss": 3.6927, |
| "step": 517 |
| }, |
| { |
| "epoch": 6.909698996655519, |
| "grad_norm": 0.9335159659385681, |
| "learning_rate": 0.00017721913236929922, |
| "loss": 3.7278, |
| "step": 518 |
| }, |
| { |
| "epoch": 6.923076923076923, |
| "grad_norm": 0.8229056000709534, |
| "learning_rate": 0.000177174638487208, |
| "loss": 3.549, |
| "step": 519 |
| }, |
| { |
| "epoch": 6.936454849498328, |
| "grad_norm": 0.8996124267578125, |
| "learning_rate": 0.00017713014460511681, |
| "loss": 3.5093, |
| "step": 520 |
| }, |
| { |
| "epoch": 6.949832775919733, |
| "grad_norm": 0.9984102845191956, |
| "learning_rate": 0.0001770856507230256, |
| "loss": 3.4772, |
| "step": 521 |
| }, |
| { |
| "epoch": 6.963210702341137, |
| "grad_norm": 0.9136049747467041, |
| "learning_rate": 0.00017704115684093438, |
| "loss": 3.6487, |
| "step": 522 |
| }, |
| { |
| "epoch": 6.976588628762542, |
| "grad_norm": 0.9707899689674377, |
| "learning_rate": 0.00017699666295884316, |
| "loss": 3.7471, |
| "step": 523 |
| }, |
| { |
| "epoch": 6.989966555183947, |
| "grad_norm": 0.919865071773529, |
| "learning_rate": 0.00017695216907675197, |
| "loss": 3.8013, |
| "step": 524 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 1.125541090965271, |
| "learning_rate": 0.00017690767519466073, |
| "loss": 3.4869, |
| "step": 525 |
| }, |
| { |
| "epoch": 7.013377926421405, |
| "grad_norm": 0.8512810468673706, |
| "learning_rate": 0.00017686318131256954, |
| "loss": 3.3334, |
| "step": 526 |
| }, |
| { |
| "epoch": 7.026755852842809, |
| "grad_norm": 0.8450623750686646, |
| "learning_rate": 0.00017681868743047832, |
| "loss": 3.5769, |
| "step": 527 |
| }, |
| { |
| "epoch": 7.040133779264214, |
| "grad_norm": 0.8526298403739929, |
| "learning_rate": 0.0001767741935483871, |
| "loss": 3.4546, |
| "step": 528 |
| }, |
| { |
| "epoch": 7.053511705685619, |
| "grad_norm": 0.75905442237854, |
| "learning_rate": 0.00017672969966629588, |
| "loss": 3.4228, |
| "step": 529 |
| }, |
| { |
| "epoch": 7.066889632107023, |
| "grad_norm": 0.8442811965942383, |
| "learning_rate": 0.0001766852057842047, |
| "loss": 3.5766, |
| "step": 530 |
| }, |
| { |
| "epoch": 7.080267558528428, |
| "grad_norm": 0.9584814310073853, |
| "learning_rate": 0.00017664071190211347, |
| "loss": 3.5312, |
| "step": 531 |
| }, |
| { |
| "epoch": 7.093645484949833, |
| "grad_norm": 0.9741052985191345, |
| "learning_rate": 0.00017659621802002226, |
| "loss": 3.4877, |
| "step": 532 |
| }, |
| { |
| "epoch": 7.107023411371237, |
| "grad_norm": 0.8638135194778442, |
| "learning_rate": 0.00017655172413793104, |
| "loss": 3.5701, |
| "step": 533 |
| }, |
| { |
| "epoch": 7.120401337792642, |
| "grad_norm": 1.0128440856933594, |
| "learning_rate": 0.00017650723025583985, |
| "loss": 3.3629, |
| "step": 534 |
| }, |
| { |
| "epoch": 7.133779264214047, |
| "grad_norm": 0.9763593673706055, |
| "learning_rate": 0.0001764627363737486, |
| "loss": 3.5651, |
| "step": 535 |
| }, |
| { |
| "epoch": 7.147157190635451, |
| "grad_norm": 0.8706293702125549, |
| "learning_rate": 0.00017641824249165741, |
| "loss": 3.454, |
| "step": 536 |
| }, |
| { |
| "epoch": 7.160535117056856, |
| "grad_norm": 0.9227468967437744, |
| "learning_rate": 0.0001763737486095662, |
| "loss": 3.5528, |
| "step": 537 |
| }, |
| { |
| "epoch": 7.173913043478261, |
| "grad_norm": 0.7493206262588501, |
| "learning_rate": 0.00017632925472747498, |
| "loss": 3.2662, |
| "step": 538 |
| }, |
| { |
| "epoch": 7.187290969899665, |
| "grad_norm": 0.8414123058319092, |
| "learning_rate": 0.00017628476084538376, |
| "loss": 3.3864, |
| "step": 539 |
| }, |
| { |
| "epoch": 7.20066889632107, |
| "grad_norm": 0.8352764248847961, |
| "learning_rate": 0.00017624026696329257, |
| "loss": 3.2407, |
| "step": 540 |
| }, |
| { |
| "epoch": 7.214046822742475, |
| "grad_norm": 0.7413480281829834, |
| "learning_rate": 0.00017619577308120135, |
| "loss": 3.3989, |
| "step": 541 |
| }, |
| { |
| "epoch": 7.2274247491638794, |
| "grad_norm": 0.7661281228065491, |
| "learning_rate": 0.00017615127919911014, |
| "loss": 3.3792, |
| "step": 542 |
| }, |
| { |
| "epoch": 7.240802675585284, |
| "grad_norm": 0.86900395154953, |
| "learning_rate": 0.00017610678531701892, |
| "loss": 3.2191, |
| "step": 543 |
| }, |
| { |
| "epoch": 7.254180602006689, |
| "grad_norm": 0.8536344170570374, |
| "learning_rate": 0.0001760622914349277, |
| "loss": 3.3366, |
| "step": 544 |
| }, |
| { |
| "epoch": 7.2675585284280935, |
| "grad_norm": 0.8729544878005981, |
| "learning_rate": 0.00017601779755283648, |
| "loss": 3.4976, |
| "step": 545 |
| }, |
| { |
| "epoch": 7.280936454849498, |
| "grad_norm": 0.8263023495674133, |
| "learning_rate": 0.00017597330367074526, |
| "loss": 3.4102, |
| "step": 546 |
| }, |
| { |
| "epoch": 7.294314381270903, |
| "grad_norm": 0.748373806476593, |
| "learning_rate": 0.00017592880978865407, |
| "loss": 3.332, |
| "step": 547 |
| }, |
| { |
| "epoch": 7.3076923076923075, |
| "grad_norm": 0.7606791853904724, |
| "learning_rate": 0.00017588431590656286, |
| "loss": 3.361, |
| "step": 548 |
| }, |
| { |
| "epoch": 7.321070234113712, |
| "grad_norm": 0.9155070781707764, |
| "learning_rate": 0.00017583982202447164, |
| "loss": 3.657, |
| "step": 549 |
| }, |
| { |
| "epoch": 7.334448160535117, |
| "grad_norm": 0.7440597414970398, |
| "learning_rate": 0.00017579532814238042, |
| "loss": 3.335, |
| "step": 550 |
| }, |
| { |
| "epoch": 7.3478260869565215, |
| "grad_norm": 0.8781002759933472, |
| "learning_rate": 0.00017575083426028923, |
| "loss": 3.5579, |
| "step": 551 |
| }, |
| { |
| "epoch": 7.361204013377926, |
| "grad_norm": 0.7886701822280884, |
| "learning_rate": 0.00017570634037819799, |
| "loss": 3.4636, |
| "step": 552 |
| }, |
| { |
| "epoch": 7.374581939799331, |
| "grad_norm": 0.8931376934051514, |
| "learning_rate": 0.0001756618464961068, |
| "loss": 3.4375, |
| "step": 553 |
| }, |
| { |
| "epoch": 7.3879598662207355, |
| "grad_norm": 0.7599623799324036, |
| "learning_rate": 0.00017561735261401558, |
| "loss": 3.6551, |
| "step": 554 |
| }, |
| { |
| "epoch": 7.40133779264214, |
| "grad_norm": 0.7692762613296509, |
| "learning_rate": 0.00017557285873192436, |
| "loss": 3.6373, |
| "step": 555 |
| }, |
| { |
| "epoch": 7.414715719063545, |
| "grad_norm": 0.8861828446388245, |
| "learning_rate": 0.00017552836484983314, |
| "loss": 3.4791, |
| "step": 556 |
| }, |
| { |
| "epoch": 7.4280936454849495, |
| "grad_norm": 0.9560372829437256, |
| "learning_rate": 0.00017548387096774195, |
| "loss": 3.7291, |
| "step": 557 |
| }, |
| { |
| "epoch": 7.441471571906354, |
| "grad_norm": 0.8745344281196594, |
| "learning_rate": 0.00017543937708565073, |
| "loss": 3.3071, |
| "step": 558 |
| }, |
| { |
| "epoch": 7.454849498327759, |
| "grad_norm": 0.8178285360336304, |
| "learning_rate": 0.00017539488320355952, |
| "loss": 3.4738, |
| "step": 559 |
| }, |
| { |
| "epoch": 7.468227424749164, |
| "grad_norm": 0.8611259460449219, |
| "learning_rate": 0.0001753503893214683, |
| "loss": 3.25, |
| "step": 560 |
| }, |
| { |
| "epoch": 7.481605351170568, |
| "grad_norm": 0.8623505234718323, |
| "learning_rate": 0.0001753058954393771, |
| "loss": 3.3701, |
| "step": 561 |
| }, |
| { |
| "epoch": 7.494983277591973, |
| "grad_norm": 0.76930171251297, |
| "learning_rate": 0.00017526140155728586, |
| "loss": 3.332, |
| "step": 562 |
| }, |
| { |
| "epoch": 7.508361204013378, |
| "grad_norm": 0.8986758589744568, |
| "learning_rate": 0.00017521690767519467, |
| "loss": 3.3927, |
| "step": 563 |
| }, |
| { |
| "epoch": 7.521739130434782, |
| "grad_norm": 0.9844257831573486, |
| "learning_rate": 0.00017517241379310346, |
| "loss": 3.5664, |
| "step": 564 |
| }, |
| { |
| "epoch": 7.535117056856187, |
| "grad_norm": 0.983921229839325, |
| "learning_rate": 0.00017512791991101224, |
| "loss": 3.3888, |
| "step": 565 |
| }, |
| { |
| "epoch": 7.548494983277592, |
| "grad_norm": 0.8052308559417725, |
| "learning_rate": 0.00017508342602892102, |
| "loss": 3.4809, |
| "step": 566 |
| }, |
| { |
| "epoch": 7.561872909698996, |
| "grad_norm": 0.7996425032615662, |
| "learning_rate": 0.00017503893214682983, |
| "loss": 3.4793, |
| "step": 567 |
| }, |
| { |
| "epoch": 7.575250836120401, |
| "grad_norm": 0.8453391194343567, |
| "learning_rate": 0.0001749944382647386, |
| "loss": 3.3199, |
| "step": 568 |
| }, |
| { |
| "epoch": 7.588628762541806, |
| "grad_norm": 0.8720147013664246, |
| "learning_rate": 0.0001749499443826474, |
| "loss": 3.5612, |
| "step": 569 |
| }, |
| { |
| "epoch": 7.602006688963211, |
| "grad_norm": 0.9093672633171082, |
| "learning_rate": 0.00017490545050055618, |
| "loss": 3.0509, |
| "step": 570 |
| }, |
| { |
| "epoch": 7.615384615384615, |
| "grad_norm": 0.8936579823493958, |
| "learning_rate": 0.000174860956618465, |
| "loss": 3.4408, |
| "step": 571 |
| }, |
| { |
| "epoch": 7.6287625418060205, |
| "grad_norm": 0.7683162689208984, |
| "learning_rate": 0.00017481646273637374, |
| "loss": 3.3536, |
| "step": 572 |
| }, |
| { |
| "epoch": 7.642140468227424, |
| "grad_norm": 0.7943581342697144, |
| "learning_rate": 0.00017477196885428255, |
| "loss": 3.4542, |
| "step": 573 |
| }, |
| { |
| "epoch": 7.65551839464883, |
| "grad_norm": 0.8183353543281555, |
| "learning_rate": 0.00017472747497219133, |
| "loss": 3.603, |
| "step": 574 |
| }, |
| { |
| "epoch": 7.668896321070234, |
| "grad_norm": 0.7816463708877563, |
| "learning_rate": 0.00017468298109010012, |
| "loss": 3.7388, |
| "step": 575 |
| }, |
| { |
| "epoch": 7.682274247491639, |
| "grad_norm": 0.8167930245399475, |
| "learning_rate": 0.0001746384872080089, |
| "loss": 3.7743, |
| "step": 576 |
| }, |
| { |
| "epoch": 7.695652173913043, |
| "grad_norm": 0.832392156124115, |
| "learning_rate": 0.0001745939933259177, |
| "loss": 3.7488, |
| "step": 577 |
| }, |
| { |
| "epoch": 7.709030100334449, |
| "grad_norm": 0.9362333416938782, |
| "learning_rate": 0.0001745494994438265, |
| "loss": 3.5722, |
| "step": 578 |
| }, |
| { |
| "epoch": 7.722408026755852, |
| "grad_norm": 1.0247248411178589, |
| "learning_rate": 0.00017450500556173527, |
| "loss": 3.2048, |
| "step": 579 |
| }, |
| { |
| "epoch": 7.735785953177258, |
| "grad_norm": 0.8833767175674438, |
| "learning_rate": 0.00017446051167964406, |
| "loss": 3.389, |
| "step": 580 |
| }, |
| { |
| "epoch": 7.749163879598662, |
| "grad_norm": 0.8344758749008179, |
| "learning_rate": 0.00017441601779755287, |
| "loss": 3.5264, |
| "step": 581 |
| }, |
| { |
| "epoch": 7.762541806020067, |
| "grad_norm": 0.9771448373794556, |
| "learning_rate": 0.00017437152391546162, |
| "loss": 3.041, |
| "step": 582 |
| }, |
| { |
| "epoch": 7.775919732441472, |
| "grad_norm": 0.8279567956924438, |
| "learning_rate": 0.00017432703003337043, |
| "loss": 3.6273, |
| "step": 583 |
| }, |
| { |
| "epoch": 7.789297658862877, |
| "grad_norm": 0.957206130027771, |
| "learning_rate": 0.0001742825361512792, |
| "loss": 3.3831, |
| "step": 584 |
| }, |
| { |
| "epoch": 7.802675585284281, |
| "grad_norm": 0.860619843006134, |
| "learning_rate": 0.000174238042269188, |
| "loss": 3.4566, |
| "step": 585 |
| }, |
| { |
| "epoch": 7.816053511705686, |
| "grad_norm": 0.8725448846817017, |
| "learning_rate": 0.00017419354838709678, |
| "loss": 3.6594, |
| "step": 586 |
| }, |
| { |
| "epoch": 7.829431438127091, |
| "grad_norm": 0.8343111276626587, |
| "learning_rate": 0.00017414905450500556, |
| "loss": 3.3423, |
| "step": 587 |
| }, |
| { |
| "epoch": 7.842809364548495, |
| "grad_norm": 0.9043267965316772, |
| "learning_rate": 0.00017410456062291437, |
| "loss": 3.221, |
| "step": 588 |
| }, |
| { |
| "epoch": 7.8561872909699, |
| "grad_norm": 0.9563114643096924, |
| "learning_rate": 0.00017406006674082312, |
| "loss": 3.5143, |
| "step": 589 |
| }, |
| { |
| "epoch": 7.869565217391305, |
| "grad_norm": 0.9726302027702332, |
| "learning_rate": 0.00017401557285873193, |
| "loss": 3.4373, |
| "step": 590 |
| }, |
| { |
| "epoch": 7.882943143812709, |
| "grad_norm": 0.9203178882598877, |
| "learning_rate": 0.00017397107897664072, |
| "loss": 3.6014, |
| "step": 591 |
| }, |
| { |
| "epoch": 7.896321070234114, |
| "grad_norm": 0.9120233654975891, |
| "learning_rate": 0.0001739265850945495, |
| "loss": 3.1429, |
| "step": 592 |
| }, |
| { |
| "epoch": 7.909698996655519, |
| "grad_norm": 0.7576518058776855, |
| "learning_rate": 0.00017388209121245828, |
| "loss": 3.2065, |
| "step": 593 |
| }, |
| { |
| "epoch": 7.923076923076923, |
| "grad_norm": 0.9629240036010742, |
| "learning_rate": 0.0001738375973303671, |
| "loss": 3.4788, |
| "step": 594 |
| }, |
| { |
| "epoch": 7.936454849498328, |
| "grad_norm": 0.8390881419181824, |
| "learning_rate": 0.00017379310344827587, |
| "loss": 3.2857, |
| "step": 595 |
| }, |
| { |
| "epoch": 7.949832775919733, |
| "grad_norm": 0.8708979487419128, |
| "learning_rate": 0.00017374860956618466, |
| "loss": 3.3321, |
| "step": 596 |
| }, |
| { |
| "epoch": 7.963210702341137, |
| "grad_norm": 0.7076835632324219, |
| "learning_rate": 0.00017370411568409344, |
| "loss": 3.5905, |
| "step": 597 |
| }, |
| { |
| "epoch": 7.976588628762542, |
| "grad_norm": 1.016526222229004, |
| "learning_rate": 0.00017365962180200225, |
| "loss": 3.4053, |
| "step": 598 |
| }, |
| { |
| "epoch": 7.989966555183947, |
| "grad_norm": 0.7592278718948364, |
| "learning_rate": 0.000173615127919911, |
| "loss": 3.3968, |
| "step": 599 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 1.0106462240219116, |
| "learning_rate": 0.0001735706340378198, |
| "loss": 3.3722, |
| "step": 600 |
| }, |
| { |
| "epoch": 8.013377926421406, |
| "grad_norm": 0.740808367729187, |
| "learning_rate": 0.0001735261401557286, |
| "loss": 3.394, |
| "step": 601 |
| }, |
| { |
| "epoch": 8.02675585284281, |
| "grad_norm": 0.6732498407363892, |
| "learning_rate": 0.00017348164627363738, |
| "loss": 3.297, |
| "step": 602 |
| }, |
| { |
| "epoch": 8.040133779264215, |
| "grad_norm": 0.8319197297096252, |
| "learning_rate": 0.00017343715239154616, |
| "loss": 3.2898, |
| "step": 603 |
| }, |
| { |
| "epoch": 8.053511705685619, |
| "grad_norm": 0.7834349870681763, |
| "learning_rate": 0.00017339265850945497, |
| "loss": 3.2341, |
| "step": 604 |
| }, |
| { |
| "epoch": 8.066889632107024, |
| "grad_norm": 0.705737292766571, |
| "learning_rate": 0.00017334816462736375, |
| "loss": 3.3429, |
| "step": 605 |
| }, |
| { |
| "epoch": 8.080267558528428, |
| "grad_norm": 0.8270958065986633, |
| "learning_rate": 0.00017330367074527253, |
| "loss": 3.0458, |
| "step": 606 |
| }, |
| { |
| "epoch": 8.093645484949834, |
| "grad_norm": 0.7254801392555237, |
| "learning_rate": 0.00017325917686318132, |
| "loss": 3.5143, |
| "step": 607 |
| }, |
| { |
| "epoch": 8.107023411371237, |
| "grad_norm": 0.8450751900672913, |
| "learning_rate": 0.00017321468298109013, |
| "loss": 3.1507, |
| "step": 608 |
| }, |
| { |
| "epoch": 8.120401337792643, |
| "grad_norm": 0.7936837673187256, |
| "learning_rate": 0.00017317018909899888, |
| "loss": 3.3979, |
| "step": 609 |
| }, |
| { |
| "epoch": 8.133779264214047, |
| "grad_norm": 0.6496401429176331, |
| "learning_rate": 0.0001731256952169077, |
| "loss": 3.3613, |
| "step": 610 |
| }, |
| { |
| "epoch": 8.147157190635452, |
| "grad_norm": 0.8721235990524292, |
| "learning_rate": 0.00017308120133481647, |
| "loss": 3.4299, |
| "step": 611 |
| }, |
| { |
| "epoch": 8.160535117056856, |
| "grad_norm": 0.7671874761581421, |
| "learning_rate": 0.00017303670745272525, |
| "loss": 3.3333, |
| "step": 612 |
| }, |
| { |
| "epoch": 8.173913043478262, |
| "grad_norm": 0.6427144408226013, |
| "learning_rate": 0.00017299221357063404, |
| "loss": 3.3304, |
| "step": 613 |
| }, |
| { |
| "epoch": 8.187290969899665, |
| "grad_norm": 0.7999966144561768, |
| "learning_rate": 0.00017294771968854285, |
| "loss": 3.4016, |
| "step": 614 |
| }, |
| { |
| "epoch": 8.200668896321071, |
| "grad_norm": 0.8216206431388855, |
| "learning_rate": 0.00017290322580645163, |
| "loss": 3.1724, |
| "step": 615 |
| }, |
| { |
| "epoch": 8.214046822742475, |
| "grad_norm": 0.7364024519920349, |
| "learning_rate": 0.0001728587319243604, |
| "loss": 3.34, |
| "step": 616 |
| }, |
| { |
| "epoch": 8.22742474916388, |
| "grad_norm": 0.7688239812850952, |
| "learning_rate": 0.0001728142380422692, |
| "loss": 3.2114, |
| "step": 617 |
| }, |
| { |
| "epoch": 8.240802675585284, |
| "grad_norm": 0.8786870837211609, |
| "learning_rate": 0.000172769744160178, |
| "loss": 3.378, |
| "step": 618 |
| }, |
| { |
| "epoch": 8.25418060200669, |
| "grad_norm": 0.9048855900764465, |
| "learning_rate": 0.00017272525027808676, |
| "loss": 3.1801, |
| "step": 619 |
| }, |
| { |
| "epoch": 8.267558528428093, |
| "grad_norm": 0.657189130783081, |
| "learning_rate": 0.00017268075639599557, |
| "loss": 3.1389, |
| "step": 620 |
| }, |
| { |
| "epoch": 8.280936454849499, |
| "grad_norm": 0.8015987873077393, |
| "learning_rate": 0.00017263626251390435, |
| "loss": 3.4621, |
| "step": 621 |
| }, |
| { |
| "epoch": 8.294314381270903, |
| "grad_norm": 0.8232793807983398, |
| "learning_rate": 0.00017259176863181313, |
| "loss": 3.1763, |
| "step": 622 |
| }, |
| { |
| "epoch": 8.307692307692308, |
| "grad_norm": 0.7447130680084229, |
| "learning_rate": 0.00017254727474972192, |
| "loss": 3.1266, |
| "step": 623 |
| }, |
| { |
| "epoch": 8.321070234113712, |
| "grad_norm": 0.7649840116500854, |
| "learning_rate": 0.00017250278086763072, |
| "loss": 3.1959, |
| "step": 624 |
| }, |
| { |
| "epoch": 8.334448160535118, |
| "grad_norm": 0.7119699120521545, |
| "learning_rate": 0.0001724582869855395, |
| "loss": 3.5626, |
| "step": 625 |
| }, |
| { |
| "epoch": 8.347826086956522, |
| "grad_norm": 0.8238518834114075, |
| "learning_rate": 0.00017241379310344826, |
| "loss": 3.1873, |
| "step": 626 |
| }, |
| { |
| "epoch": 8.361204013377927, |
| "grad_norm": 0.8248497843742371, |
| "learning_rate": 0.00017236929922135707, |
| "loss": 3.5686, |
| "step": 627 |
| }, |
| { |
| "epoch": 8.37458193979933, |
| "grad_norm": 0.8704475164413452, |
| "learning_rate": 0.00017232480533926585, |
| "loss": 3.3195, |
| "step": 628 |
| }, |
| { |
| "epoch": 8.387959866220736, |
| "grad_norm": 0.8160929083824158, |
| "learning_rate": 0.00017228031145717464, |
| "loss": 3.3808, |
| "step": 629 |
| }, |
| { |
| "epoch": 8.40133779264214, |
| "grad_norm": 0.8537085652351379, |
| "learning_rate": 0.00017223581757508342, |
| "loss": 3.3638, |
| "step": 630 |
| }, |
| { |
| "epoch": 8.414715719063546, |
| "grad_norm": 0.876519501209259, |
| "learning_rate": 0.00017219132369299223, |
| "loss": 3.2019, |
| "step": 631 |
| }, |
| { |
| "epoch": 8.42809364548495, |
| "grad_norm": 0.6573703289031982, |
| "learning_rate": 0.000172146829810901, |
| "loss": 3.4998, |
| "step": 632 |
| }, |
| { |
| "epoch": 8.441471571906355, |
| "grad_norm": 0.8822937607765198, |
| "learning_rate": 0.0001721023359288098, |
| "loss": 3.4281, |
| "step": 633 |
| }, |
| { |
| "epoch": 8.454849498327759, |
| "grad_norm": 0.764872670173645, |
| "learning_rate": 0.00017205784204671858, |
| "loss": 3.3693, |
| "step": 634 |
| }, |
| { |
| "epoch": 8.468227424749164, |
| "grad_norm": 0.7492384910583496, |
| "learning_rate": 0.00017201334816462739, |
| "loss": 3.5672, |
| "step": 635 |
| }, |
| { |
| "epoch": 8.481605351170568, |
| "grad_norm": 0.8037416934967041, |
| "learning_rate": 0.00017196885428253614, |
| "loss": 3.3804, |
| "step": 636 |
| }, |
| { |
| "epoch": 8.494983277591974, |
| "grad_norm": 0.8380945324897766, |
| "learning_rate": 0.00017192436040044495, |
| "loss": 3.2272, |
| "step": 637 |
| }, |
| { |
| "epoch": 8.508361204013378, |
| "grad_norm": 0.8467932939529419, |
| "learning_rate": 0.00017187986651835373, |
| "loss": 3.2649, |
| "step": 638 |
| }, |
| { |
| "epoch": 8.521739130434783, |
| "grad_norm": 0.751542866230011, |
| "learning_rate": 0.00017183537263626252, |
| "loss": 3.4135, |
| "step": 639 |
| }, |
| { |
| "epoch": 8.535117056856187, |
| "grad_norm": 0.7618190050125122, |
| "learning_rate": 0.0001717908787541713, |
| "loss": 3.483, |
| "step": 640 |
| }, |
| { |
| "epoch": 8.548494983277592, |
| "grad_norm": 0.9661890864372253, |
| "learning_rate": 0.0001717463848720801, |
| "loss": 3.2201, |
| "step": 641 |
| }, |
| { |
| "epoch": 8.561872909698996, |
| "grad_norm": 0.8655393719673157, |
| "learning_rate": 0.0001717018909899889, |
| "loss": 3.4142, |
| "step": 642 |
| }, |
| { |
| "epoch": 8.575250836120402, |
| "grad_norm": 0.796047031879425, |
| "learning_rate": 0.00017165739710789767, |
| "loss": 3.3558, |
| "step": 643 |
| }, |
| { |
| "epoch": 8.588628762541806, |
| "grad_norm": 1.0098161697387695, |
| "learning_rate": 0.00017161290322580645, |
| "loss": 3.553, |
| "step": 644 |
| }, |
| { |
| "epoch": 8.602006688963211, |
| "grad_norm": 1.1880302429199219, |
| "learning_rate": 0.00017156840934371526, |
| "loss": 3.3581, |
| "step": 645 |
| }, |
| { |
| "epoch": 8.615384615384615, |
| "grad_norm": 0.9361609220504761, |
| "learning_rate": 0.00017152391546162402, |
| "loss": 3.4543, |
| "step": 646 |
| }, |
| { |
| "epoch": 8.62876254180602, |
| "grad_norm": 0.8794479966163635, |
| "learning_rate": 0.00017147942157953283, |
| "loss": 3.3654, |
| "step": 647 |
| }, |
| { |
| "epoch": 8.642140468227424, |
| "grad_norm": 0.9263080954551697, |
| "learning_rate": 0.0001714349276974416, |
| "loss": 3.4376, |
| "step": 648 |
| }, |
| { |
| "epoch": 8.65551839464883, |
| "grad_norm": 1.0015815496444702, |
| "learning_rate": 0.0001713904338153504, |
| "loss": 3.3533, |
| "step": 649 |
| }, |
| { |
| "epoch": 8.668896321070234, |
| "grad_norm": 0.8525484204292297, |
| "learning_rate": 0.00017134593993325918, |
| "loss": 3.3897, |
| "step": 650 |
| }, |
| { |
| "epoch": 8.68227424749164, |
| "grad_norm": 0.7196484804153442, |
| "learning_rate": 0.00017130144605116799, |
| "loss": 3.2428, |
| "step": 651 |
| }, |
| { |
| "epoch": 8.695652173913043, |
| "grad_norm": 0.8779593706130981, |
| "learning_rate": 0.00017125695216907677, |
| "loss": 3.5471, |
| "step": 652 |
| }, |
| { |
| "epoch": 8.709030100334449, |
| "grad_norm": 0.9256909489631653, |
| "learning_rate": 0.00017121245828698555, |
| "loss": 3.1776, |
| "step": 653 |
| }, |
| { |
| "epoch": 8.722408026755852, |
| "grad_norm": 0.7774620652198792, |
| "learning_rate": 0.00017116796440489433, |
| "loss": 3.4836, |
| "step": 654 |
| }, |
| { |
| "epoch": 8.735785953177258, |
| "grad_norm": 0.8112596273422241, |
| "learning_rate": 0.00017112347052280314, |
| "loss": 3.5194, |
| "step": 655 |
| }, |
| { |
| "epoch": 8.749163879598662, |
| "grad_norm": 0.7350602746009827, |
| "learning_rate": 0.0001710789766407119, |
| "loss": 3.2248, |
| "step": 656 |
| }, |
| { |
| "epoch": 8.762541806020067, |
| "grad_norm": 0.8231781125068665, |
| "learning_rate": 0.0001710344827586207, |
| "loss": 3.4659, |
| "step": 657 |
| }, |
| { |
| "epoch": 8.775919732441471, |
| "grad_norm": 0.8921564221382141, |
| "learning_rate": 0.0001709899888765295, |
| "loss": 3.2712, |
| "step": 658 |
| }, |
| { |
| "epoch": 8.789297658862877, |
| "grad_norm": 0.8921830058097839, |
| "learning_rate": 0.00017094549499443827, |
| "loss": 3.5071, |
| "step": 659 |
| }, |
| { |
| "epoch": 8.80267558528428, |
| "grad_norm": 0.7809077501296997, |
| "learning_rate": 0.00017090100111234705, |
| "loss": 3.6639, |
| "step": 660 |
| }, |
| { |
| "epoch": 8.816053511705686, |
| "grad_norm": 0.9431234002113342, |
| "learning_rate": 0.00017085650723025586, |
| "loss": 3.2795, |
| "step": 661 |
| }, |
| { |
| "epoch": 8.82943143812709, |
| "grad_norm": 0.9707314968109131, |
| "learning_rate": 0.00017081201334816465, |
| "loss": 3.3395, |
| "step": 662 |
| }, |
| { |
| "epoch": 8.842809364548495, |
| "grad_norm": 0.7547470331192017, |
| "learning_rate": 0.00017076751946607343, |
| "loss": 3.5316, |
| "step": 663 |
| }, |
| { |
| "epoch": 8.856187290969899, |
| "grad_norm": 0.8989250659942627, |
| "learning_rate": 0.0001707230255839822, |
| "loss": 3.4029, |
| "step": 664 |
| }, |
| { |
| "epoch": 8.869565217391305, |
| "grad_norm": 1.0237400531768799, |
| "learning_rate": 0.00017067853170189102, |
| "loss": 3.5014, |
| "step": 665 |
| }, |
| { |
| "epoch": 8.882943143812708, |
| "grad_norm": 0.7289263010025024, |
| "learning_rate": 0.00017063403781979978, |
| "loss": 3.4211, |
| "step": 666 |
| }, |
| { |
| "epoch": 8.896321070234114, |
| "grad_norm": 0.7978695034980774, |
| "learning_rate": 0.00017058954393770856, |
| "loss": 3.5421, |
| "step": 667 |
| }, |
| { |
| "epoch": 8.909698996655518, |
| "grad_norm": 0.7401835918426514, |
| "learning_rate": 0.00017054505005561737, |
| "loss": 3.3419, |
| "step": 668 |
| }, |
| { |
| "epoch": 8.923076923076923, |
| "grad_norm": 0.8952983617782593, |
| "learning_rate": 0.00017050055617352615, |
| "loss": 3.1322, |
| "step": 669 |
| }, |
| { |
| "epoch": 8.936454849498327, |
| "grad_norm": 0.6922047734260559, |
| "learning_rate": 0.00017045606229143493, |
| "loss": 3.5872, |
| "step": 670 |
| }, |
| { |
| "epoch": 8.949832775919733, |
| "grad_norm": 0.8618977665901184, |
| "learning_rate": 0.00017041156840934371, |
| "loss": 3.1366, |
| "step": 671 |
| }, |
| { |
| "epoch": 8.963210702341136, |
| "grad_norm": 0.7933799624443054, |
| "learning_rate": 0.00017036707452725252, |
| "loss": 3.3108, |
| "step": 672 |
| }, |
| { |
| "epoch": 8.976588628762542, |
| "grad_norm": 0.718401312828064, |
| "learning_rate": 0.00017032258064516128, |
| "loss": 3.3771, |
| "step": 673 |
| }, |
| { |
| "epoch": 8.989966555183946, |
| "grad_norm": 0.8096804618835449, |
| "learning_rate": 0.0001702780867630701, |
| "loss": 3.3225, |
| "step": 674 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 1.0055694580078125, |
| "learning_rate": 0.00017023359288097887, |
| "loss": 3.5545, |
| "step": 675 |
| }, |
| { |
| "epoch": 9.013377926421406, |
| "grad_norm": 0.710986852645874, |
| "learning_rate": 0.00017018909899888765, |
| "loss": 3.3333, |
| "step": 676 |
| }, |
| { |
| "epoch": 9.02675585284281, |
| "grad_norm": 0.672132134437561, |
| "learning_rate": 0.00017014460511679644, |
| "loss": 2.9995, |
| "step": 677 |
| }, |
| { |
| "epoch": 9.040133779264215, |
| "grad_norm": 0.6752933263778687, |
| "learning_rate": 0.00017010011123470525, |
| "loss": 3.3571, |
| "step": 678 |
| }, |
| { |
| "epoch": 9.053511705685619, |
| "grad_norm": 0.6553521156311035, |
| "learning_rate": 0.00017005561735261403, |
| "loss": 3.3407, |
| "step": 679 |
| }, |
| { |
| "epoch": 9.066889632107024, |
| "grad_norm": 0.7492311596870422, |
| "learning_rate": 0.0001700111234705228, |
| "loss": 3.325, |
| "step": 680 |
| }, |
| { |
| "epoch": 9.080267558528428, |
| "grad_norm": 0.736139714717865, |
| "learning_rate": 0.0001699666295884316, |
| "loss": 3.2626, |
| "step": 681 |
| }, |
| { |
| "epoch": 9.093645484949834, |
| "grad_norm": 0.7131486535072327, |
| "learning_rate": 0.0001699221357063404, |
| "loss": 3.2612, |
| "step": 682 |
| }, |
| { |
| "epoch": 9.107023411371237, |
| "grad_norm": 0.7037603855133057, |
| "learning_rate": 0.00016987764182424916, |
| "loss": 3.2418, |
| "step": 683 |
| }, |
| { |
| "epoch": 9.120401337792643, |
| "grad_norm": 0.685518205165863, |
| "learning_rate": 0.00016983314794215797, |
| "loss": 3.4854, |
| "step": 684 |
| }, |
| { |
| "epoch": 9.133779264214047, |
| "grad_norm": 0.6050254106521606, |
| "learning_rate": 0.00016978865406006675, |
| "loss": 3.2312, |
| "step": 685 |
| }, |
| { |
| "epoch": 9.147157190635452, |
| "grad_norm": 0.6932830810546875, |
| "learning_rate": 0.00016974416017797553, |
| "loss": 3.4634, |
| "step": 686 |
| }, |
| { |
| "epoch": 9.160535117056856, |
| "grad_norm": 0.7055158615112305, |
| "learning_rate": 0.00016969966629588431, |
| "loss": 3.1408, |
| "step": 687 |
| }, |
| { |
| "epoch": 9.173913043478262, |
| "grad_norm": 0.6887643337249756, |
| "learning_rate": 0.00016965517241379312, |
| "loss": 3.0697, |
| "step": 688 |
| }, |
| { |
| "epoch": 9.187290969899665, |
| "grad_norm": 0.7201237082481384, |
| "learning_rate": 0.0001696106785317019, |
| "loss": 3.303, |
| "step": 689 |
| }, |
| { |
| "epoch": 9.200668896321071, |
| "grad_norm": 0.6617894768714905, |
| "learning_rate": 0.0001695661846496107, |
| "loss": 3.4846, |
| "step": 690 |
| }, |
| { |
| "epoch": 9.214046822742475, |
| "grad_norm": 0.8979818224906921, |
| "learning_rate": 0.00016952169076751947, |
| "loss": 3.1898, |
| "step": 691 |
| }, |
| { |
| "epoch": 9.22742474916388, |
| "grad_norm": 0.9507981538772583, |
| "learning_rate": 0.00016947719688542828, |
| "loss": 3.4748, |
| "step": 692 |
| }, |
| { |
| "epoch": 9.240802675585284, |
| "grad_norm": 0.7935391068458557, |
| "learning_rate": 0.00016943270300333704, |
| "loss": 3.3661, |
| "step": 693 |
| }, |
| { |
| "epoch": 9.25418060200669, |
| "grad_norm": 0.7437114715576172, |
| "learning_rate": 0.00016938820912124584, |
| "loss": 3.4407, |
| "step": 694 |
| }, |
| { |
| "epoch": 9.267558528428093, |
| "grad_norm": 0.680610179901123, |
| "learning_rate": 0.00016934371523915463, |
| "loss": 3.3135, |
| "step": 695 |
| }, |
| { |
| "epoch": 9.280936454849499, |
| "grad_norm": 0.846716582775116, |
| "learning_rate": 0.0001692992213570634, |
| "loss": 3.3923, |
| "step": 696 |
| }, |
| { |
| "epoch": 9.294314381270903, |
| "grad_norm": 0.9567786455154419, |
| "learning_rate": 0.0001692547274749722, |
| "loss": 3.1405, |
| "step": 697 |
| }, |
| { |
| "epoch": 9.307692307692308, |
| "grad_norm": 0.7509344816207886, |
| "learning_rate": 0.000169210233592881, |
| "loss": 3.4031, |
| "step": 698 |
| }, |
| { |
| "epoch": 9.321070234113712, |
| "grad_norm": 0.8118243217468262, |
| "learning_rate": 0.00016916573971078978, |
| "loss": 3.317, |
| "step": 699 |
| }, |
| { |
| "epoch": 9.334448160535118, |
| "grad_norm": 0.7445617318153381, |
| "learning_rate": 0.00016912124582869857, |
| "loss": 3.3989, |
| "step": 700 |
| }, |
| { |
| "epoch": 9.347826086956522, |
| "grad_norm": 0.7520869970321655, |
| "learning_rate": 0.00016907675194660735, |
| "loss": 3.08, |
| "step": 701 |
| }, |
| { |
| "epoch": 9.361204013377927, |
| "grad_norm": 0.7466426491737366, |
| "learning_rate": 0.00016903225806451616, |
| "loss": 3.3338, |
| "step": 702 |
| }, |
| { |
| "epoch": 9.37458193979933, |
| "grad_norm": 0.7595514059066772, |
| "learning_rate": 0.0001689877641824249, |
| "loss": 3.08, |
| "step": 703 |
| }, |
| { |
| "epoch": 9.387959866220736, |
| "grad_norm": 0.713771641254425, |
| "learning_rate": 0.00016894327030033372, |
| "loss": 3.236, |
| "step": 704 |
| }, |
| { |
| "epoch": 9.40133779264214, |
| "grad_norm": 0.670863687992096, |
| "learning_rate": 0.0001688987764182425, |
| "loss": 3.4491, |
| "step": 705 |
| }, |
| { |
| "epoch": 9.414715719063546, |
| "grad_norm": 0.8842789530754089, |
| "learning_rate": 0.0001688542825361513, |
| "loss": 3.3444, |
| "step": 706 |
| }, |
| { |
| "epoch": 9.42809364548495, |
| "grad_norm": 0.8298172950744629, |
| "learning_rate": 0.00016880978865406007, |
| "loss": 3.3818, |
| "step": 707 |
| }, |
| { |
| "epoch": 9.441471571906355, |
| "grad_norm": 0.7407504320144653, |
| "learning_rate": 0.00016876529477196885, |
| "loss": 3.2294, |
| "step": 708 |
| }, |
| { |
| "epoch": 9.454849498327759, |
| "grad_norm": 0.6642070412635803, |
| "learning_rate": 0.00016872080088987766, |
| "loss": 3.3009, |
| "step": 709 |
| }, |
| { |
| "epoch": 9.468227424749164, |
| "grad_norm": 0.7627503275871277, |
| "learning_rate": 0.00016867630700778642, |
| "loss": 3.3486, |
| "step": 710 |
| }, |
| { |
| "epoch": 9.481605351170568, |
| "grad_norm": 0.7307603359222412, |
| "learning_rate": 0.00016863181312569523, |
| "loss": 3.0257, |
| "step": 711 |
| }, |
| { |
| "epoch": 9.494983277591974, |
| "grad_norm": 0.7932866215705872, |
| "learning_rate": 0.000168587319243604, |
| "loss": 3.4191, |
| "step": 712 |
| }, |
| { |
| "epoch": 9.508361204013378, |
| "grad_norm": 0.7457575798034668, |
| "learning_rate": 0.0001685428253615128, |
| "loss": 3.2283, |
| "step": 713 |
| }, |
| { |
| "epoch": 9.521739130434783, |
| "grad_norm": 0.6718200445175171, |
| "learning_rate": 0.00016849833147942157, |
| "loss": 3.4569, |
| "step": 714 |
| }, |
| { |
| "epoch": 9.535117056856187, |
| "grad_norm": 0.8189072608947754, |
| "learning_rate": 0.00016845383759733038, |
| "loss": 3.2585, |
| "step": 715 |
| }, |
| { |
| "epoch": 9.548494983277592, |
| "grad_norm": 0.6895336508750916, |
| "learning_rate": 0.00016840934371523917, |
| "loss": 3.2417, |
| "step": 716 |
| }, |
| { |
| "epoch": 9.561872909698996, |
| "grad_norm": 0.723173975944519, |
| "learning_rate": 0.00016836484983314795, |
| "loss": 3.3243, |
| "step": 717 |
| }, |
| { |
| "epoch": 9.575250836120402, |
| "grad_norm": 0.8354344964027405, |
| "learning_rate": 0.00016832035595105673, |
| "loss": 3.3585, |
| "step": 718 |
| }, |
| { |
| "epoch": 9.588628762541806, |
| "grad_norm": 0.6736294031143188, |
| "learning_rate": 0.00016827586206896554, |
| "loss": 3.188, |
| "step": 719 |
| }, |
| { |
| "epoch": 9.602006688963211, |
| "grad_norm": 0.7790263295173645, |
| "learning_rate": 0.0001682313681868743, |
| "loss": 3.3171, |
| "step": 720 |
| }, |
| { |
| "epoch": 9.615384615384615, |
| "grad_norm": 0.6426937580108643, |
| "learning_rate": 0.0001681868743047831, |
| "loss": 3.0854, |
| "step": 721 |
| }, |
| { |
| "epoch": 9.62876254180602, |
| "grad_norm": 0.7029106020927429, |
| "learning_rate": 0.0001681423804226919, |
| "loss": 3.3629, |
| "step": 722 |
| }, |
| { |
| "epoch": 9.642140468227424, |
| "grad_norm": 0.8353022933006287, |
| "learning_rate": 0.00016809788654060067, |
| "loss": 3.3715, |
| "step": 723 |
| }, |
| { |
| "epoch": 9.65551839464883, |
| "grad_norm": 0.8578335642814636, |
| "learning_rate": 0.00016805339265850945, |
| "loss": 3.3554, |
| "step": 724 |
| }, |
| { |
| "epoch": 9.668896321070234, |
| "grad_norm": 0.6998556852340698, |
| "learning_rate": 0.00016800889877641826, |
| "loss": 3.3043, |
| "step": 725 |
| }, |
| { |
| "epoch": 9.68227424749164, |
| "grad_norm": 0.7134855389595032, |
| "learning_rate": 0.00016796440489432704, |
| "loss": 3.5856, |
| "step": 726 |
| }, |
| { |
| "epoch": 9.695652173913043, |
| "grad_norm": 0.6636050939559937, |
| "learning_rate": 0.00016791991101223583, |
| "loss": 3.2156, |
| "step": 727 |
| }, |
| { |
| "epoch": 9.709030100334449, |
| "grad_norm": 0.7757130861282349, |
| "learning_rate": 0.0001678754171301446, |
| "loss": 3.4974, |
| "step": 728 |
| }, |
| { |
| "epoch": 9.722408026755852, |
| "grad_norm": 0.770648181438446, |
| "learning_rate": 0.00016783092324805342, |
| "loss": 3.3251, |
| "step": 729 |
| }, |
| { |
| "epoch": 9.735785953177258, |
| "grad_norm": 0.7728201746940613, |
| "learning_rate": 0.00016778642936596217, |
| "loss": 3.2666, |
| "step": 730 |
| }, |
| { |
| "epoch": 9.749163879598662, |
| "grad_norm": 0.8277239203453064, |
| "learning_rate": 0.00016774193548387098, |
| "loss": 3.3867, |
| "step": 731 |
| }, |
| { |
| "epoch": 9.762541806020067, |
| "grad_norm": 0.6534886360168457, |
| "learning_rate": 0.00016769744160177977, |
| "loss": 3.175, |
| "step": 732 |
| }, |
| { |
| "epoch": 9.775919732441471, |
| "grad_norm": 0.8508428335189819, |
| "learning_rate": 0.00016765294771968855, |
| "loss": 3.2084, |
| "step": 733 |
| }, |
| { |
| "epoch": 9.789297658862877, |
| "grad_norm": 0.7656721472740173, |
| "learning_rate": 0.00016760845383759733, |
| "loss": 3.1426, |
| "step": 734 |
| }, |
| { |
| "epoch": 9.80267558528428, |
| "grad_norm": 0.9495553970336914, |
| "learning_rate": 0.00016756395995550614, |
| "loss": 3.1623, |
| "step": 735 |
| }, |
| { |
| "epoch": 9.816053511705686, |
| "grad_norm": 0.7998641729354858, |
| "learning_rate": 0.00016751946607341492, |
| "loss": 3.3893, |
| "step": 736 |
| }, |
| { |
| "epoch": 9.82943143812709, |
| "grad_norm": 0.8124551177024841, |
| "learning_rate": 0.0001674749721913237, |
| "loss": 3.2012, |
| "step": 737 |
| }, |
| { |
| "epoch": 9.842809364548495, |
| "grad_norm": 0.6332049369812012, |
| "learning_rate": 0.0001674304783092325, |
| "loss": 3.3384, |
| "step": 738 |
| }, |
| { |
| "epoch": 9.856187290969899, |
| "grad_norm": 0.7114555835723877, |
| "learning_rate": 0.0001673859844271413, |
| "loss": 3.0802, |
| "step": 739 |
| }, |
| { |
| "epoch": 9.869565217391305, |
| "grad_norm": 0.7175182700157166, |
| "learning_rate": 0.00016734149054505005, |
| "loss": 3.2572, |
| "step": 740 |
| }, |
| { |
| "epoch": 9.882943143812708, |
| "grad_norm": 0.7724816799163818, |
| "learning_rate": 0.00016729699666295886, |
| "loss": 3.1078, |
| "step": 741 |
| }, |
| { |
| "epoch": 9.896321070234114, |
| "grad_norm": 0.7834901213645935, |
| "learning_rate": 0.00016725250278086764, |
| "loss": 3.2513, |
| "step": 742 |
| }, |
| { |
| "epoch": 9.909698996655518, |
| "grad_norm": 0.663495659828186, |
| "learning_rate": 0.00016720800889877643, |
| "loss": 3.327, |
| "step": 743 |
| }, |
| { |
| "epoch": 9.923076923076923, |
| "grad_norm": 0.7828975319862366, |
| "learning_rate": 0.0001671635150166852, |
| "loss": 3.609, |
| "step": 744 |
| }, |
| { |
| "epoch": 9.936454849498327, |
| "grad_norm": 0.6747825145721436, |
| "learning_rate": 0.00016711902113459402, |
| "loss": 3.4479, |
| "step": 745 |
| }, |
| { |
| "epoch": 9.949832775919733, |
| "grad_norm": 0.7816379070281982, |
| "learning_rate": 0.0001670745272525028, |
| "loss": 3.2369, |
| "step": 746 |
| }, |
| { |
| "epoch": 9.963210702341136, |
| "grad_norm": 0.7011098265647888, |
| "learning_rate": 0.00016703003337041158, |
| "loss": 2.9103, |
| "step": 747 |
| }, |
| { |
| "epoch": 9.976588628762542, |
| "grad_norm": 0.7165176868438721, |
| "learning_rate": 0.00016698553948832036, |
| "loss": 3.1669, |
| "step": 748 |
| }, |
| { |
| "epoch": 9.989966555183946, |
| "grad_norm": 0.766315758228302, |
| "learning_rate": 0.00016694104560622915, |
| "loss": 3.1138, |
| "step": 749 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.876315176486969, |
| "learning_rate": 0.00016689655172413793, |
| "loss": 3.4851, |
| "step": 750 |
| }, |
| { |
| "epoch": 10.013377926421406, |
| "grad_norm": 0.807686984539032, |
| "learning_rate": 0.0001668520578420467, |
| "loss": 3.193, |
| "step": 751 |
| }, |
| { |
| "epoch": 10.02675585284281, |
| "grad_norm": 0.7085704803466797, |
| "learning_rate": 0.00016680756395995552, |
| "loss": 3.4797, |
| "step": 752 |
| }, |
| { |
| "epoch": 10.040133779264215, |
| "grad_norm": 0.7119605541229248, |
| "learning_rate": 0.0001667630700778643, |
| "loss": 3.1713, |
| "step": 753 |
| }, |
| { |
| "epoch": 10.053511705685619, |
| "grad_norm": 0.6569423675537109, |
| "learning_rate": 0.00016671857619577309, |
| "loss": 3.1661, |
| "step": 754 |
| }, |
| { |
| "epoch": 10.066889632107024, |
| "grad_norm": 0.8173550367355347, |
| "learning_rate": 0.00016667408231368187, |
| "loss": 2.8467, |
| "step": 755 |
| }, |
| { |
| "epoch": 10.080267558528428, |
| "grad_norm": 0.7261365056037903, |
| "learning_rate": 0.00016662958843159068, |
| "loss": 3.3679, |
| "step": 756 |
| }, |
| { |
| "epoch": 10.093645484949834, |
| "grad_norm": 0.7997227311134338, |
| "learning_rate": 0.00016658509454949943, |
| "loss": 3.0985, |
| "step": 757 |
| }, |
| { |
| "epoch": 10.107023411371237, |
| "grad_norm": 0.653391420841217, |
| "learning_rate": 0.00016654060066740824, |
| "loss": 3.2156, |
| "step": 758 |
| }, |
| { |
| "epoch": 10.120401337792643, |
| "grad_norm": 0.6799002289772034, |
| "learning_rate": 0.00016649610678531703, |
| "loss": 3.3302, |
| "step": 759 |
| }, |
| { |
| "epoch": 10.133779264214047, |
| "grad_norm": 0.6444498896598816, |
| "learning_rate": 0.0001664516129032258, |
| "loss": 3.2813, |
| "step": 760 |
| }, |
| { |
| "epoch": 10.147157190635452, |
| "grad_norm": 1.064769983291626, |
| "learning_rate": 0.0001664071190211346, |
| "loss": 3.1852, |
| "step": 761 |
| }, |
| { |
| "epoch": 10.160535117056856, |
| "grad_norm": 0.6534339189529419, |
| "learning_rate": 0.0001663626251390434, |
| "loss": 3.1563, |
| "step": 762 |
| }, |
| { |
| "epoch": 10.173913043478262, |
| "grad_norm": 0.6909127235412598, |
| "learning_rate": 0.00016631813125695218, |
| "loss": 3.2728, |
| "step": 763 |
| }, |
| { |
| "epoch": 10.187290969899665, |
| "grad_norm": 0.6549767851829529, |
| "learning_rate": 0.00016627363737486096, |
| "loss": 3.0491, |
| "step": 764 |
| }, |
| { |
| "epoch": 10.200668896321071, |
| "grad_norm": 0.678054928779602, |
| "learning_rate": 0.00016622914349276975, |
| "loss": 3.4807, |
| "step": 765 |
| }, |
| { |
| "epoch": 10.214046822742475, |
| "grad_norm": 0.613358199596405, |
| "learning_rate": 0.00016618464961067856, |
| "loss": 3.1746, |
| "step": 766 |
| }, |
| { |
| "epoch": 10.22742474916388, |
| "grad_norm": 0.6624737977981567, |
| "learning_rate": 0.0001661401557285873, |
| "loss": 2.8528, |
| "step": 767 |
| }, |
| { |
| "epoch": 10.240802675585284, |
| "grad_norm": 0.65067458152771, |
| "learning_rate": 0.00016609566184649612, |
| "loss": 3.1843, |
| "step": 768 |
| }, |
| { |
| "epoch": 10.25418060200669, |
| "grad_norm": 0.6192435622215271, |
| "learning_rate": 0.0001660511679644049, |
| "loss": 3.3162, |
| "step": 769 |
| }, |
| { |
| "epoch": 10.267558528428093, |
| "grad_norm": 0.6456341743469238, |
| "learning_rate": 0.00016600667408231369, |
| "loss": 3.2302, |
| "step": 770 |
| }, |
| { |
| "epoch": 10.280936454849499, |
| "grad_norm": 2.357724189758301, |
| "learning_rate": 0.00016596218020022247, |
| "loss": 3.2741, |
| "step": 771 |
| }, |
| { |
| "epoch": 10.294314381270903, |
| "grad_norm": 0.6833475828170776, |
| "learning_rate": 0.00016591768631813128, |
| "loss": 3.1516, |
| "step": 772 |
| }, |
| { |
| "epoch": 10.307692307692308, |
| "grad_norm": 0.5557199716567993, |
| "learning_rate": 0.00016587319243604006, |
| "loss": 3.281, |
| "step": 773 |
| }, |
| { |
| "epoch": 10.321070234113712, |
| "grad_norm": 0.6617905497550964, |
| "learning_rate": 0.00016582869855394884, |
| "loss": 3.375, |
| "step": 774 |
| }, |
| { |
| "epoch": 10.334448160535118, |
| "grad_norm": 0.5671921372413635, |
| "learning_rate": 0.00016578420467185762, |
| "loss": 3.4335, |
| "step": 775 |
| }, |
| { |
| "epoch": 10.347826086956522, |
| "grad_norm": 0.8487278819084167, |
| "learning_rate": 0.00016573971078976643, |
| "loss": 3.143, |
| "step": 776 |
| }, |
| { |
| "epoch": 10.361204013377927, |
| "grad_norm": 0.6489982604980469, |
| "learning_rate": 0.0001656952169076752, |
| "loss": 3.3258, |
| "step": 777 |
| }, |
| { |
| "epoch": 10.37458193979933, |
| "grad_norm": 0.8773537278175354, |
| "learning_rate": 0.000165650723025584, |
| "loss": 3.0466, |
| "step": 778 |
| }, |
| { |
| "epoch": 10.387959866220736, |
| "grad_norm": 0.5961865782737732, |
| "learning_rate": 0.00016560622914349278, |
| "loss": 3.3417, |
| "step": 779 |
| }, |
| { |
| "epoch": 10.40133779264214, |
| "grad_norm": 0.6149600148200989, |
| "learning_rate": 0.00016556173526140156, |
| "loss": 3.0622, |
| "step": 780 |
| }, |
| { |
| "epoch": 10.414715719063546, |
| "grad_norm": 0.7591158151626587, |
| "learning_rate": 0.00016551724137931035, |
| "loss": 3.2078, |
| "step": 781 |
| }, |
| { |
| "epoch": 10.42809364548495, |
| "grad_norm": 0.7915151119232178, |
| "learning_rate": 0.00016547274749721916, |
| "loss": 3.3082, |
| "step": 782 |
| }, |
| { |
| "epoch": 10.441471571906355, |
| "grad_norm": 0.8709903359413147, |
| "learning_rate": 0.00016542825361512794, |
| "loss": 3.3073, |
| "step": 783 |
| }, |
| { |
| "epoch": 10.454849498327759, |
| "grad_norm": 0.6593959331512451, |
| "learning_rate": 0.00016538375973303672, |
| "loss": 3.1574, |
| "step": 784 |
| }, |
| { |
| "epoch": 10.468227424749164, |
| "grad_norm": 0.8101013898849487, |
| "learning_rate": 0.0001653392658509455, |
| "loss": 3.3631, |
| "step": 785 |
| }, |
| { |
| "epoch": 10.481605351170568, |
| "grad_norm": 0.8200273513793945, |
| "learning_rate": 0.0001652947719688543, |
| "loss": 3.0447, |
| "step": 786 |
| }, |
| { |
| "epoch": 10.494983277591974, |
| "grad_norm": 0.7304090857505798, |
| "learning_rate": 0.00016525027808676307, |
| "loss": 3.3382, |
| "step": 787 |
| }, |
| { |
| "epoch": 10.508361204013378, |
| "grad_norm": 0.7059088349342346, |
| "learning_rate": 0.00016520578420467188, |
| "loss": 3.085, |
| "step": 788 |
| }, |
| { |
| "epoch": 10.521739130434783, |
| "grad_norm": 0.6664522886276245, |
| "learning_rate": 0.00016516129032258066, |
| "loss": 3.3198, |
| "step": 789 |
| }, |
| { |
| "epoch": 10.535117056856187, |
| "grad_norm": 0.9230799078941345, |
| "learning_rate": 0.00016511679644048944, |
| "loss": 3.2502, |
| "step": 790 |
| }, |
| { |
| "epoch": 10.548494983277592, |
| "grad_norm": 0.6974027752876282, |
| "learning_rate": 0.00016507230255839822, |
| "loss": 3.2432, |
| "step": 791 |
| }, |
| { |
| "epoch": 10.561872909698996, |
| "grad_norm": 0.7186788320541382, |
| "learning_rate": 0.000165027808676307, |
| "loss": 3.2273, |
| "step": 792 |
| }, |
| { |
| "epoch": 10.575250836120402, |
| "grad_norm": 0.6168047189712524, |
| "learning_rate": 0.00016498331479421582, |
| "loss": 3.2319, |
| "step": 793 |
| }, |
| { |
| "epoch": 10.588628762541806, |
| "grad_norm": 0.6219142079353333, |
| "learning_rate": 0.00016493882091212457, |
| "loss": 2.9733, |
| "step": 794 |
| }, |
| { |
| "epoch": 10.602006688963211, |
| "grad_norm": 0.573359489440918, |
| "learning_rate": 0.00016489432703003338, |
| "loss": 3.214, |
| "step": 795 |
| }, |
| { |
| "epoch": 10.615384615384615, |
| "grad_norm": 0.678263783454895, |
| "learning_rate": 0.00016484983314794216, |
| "loss": 3.2331, |
| "step": 796 |
| }, |
| { |
| "epoch": 10.62876254180602, |
| "grad_norm": 0.6593761444091797, |
| "learning_rate": 0.00016480533926585095, |
| "loss": 3.3605, |
| "step": 797 |
| }, |
| { |
| "epoch": 10.642140468227424, |
| "grad_norm": 0.8732627034187317, |
| "learning_rate": 0.00016476084538375973, |
| "loss": 3.3531, |
| "step": 798 |
| }, |
| { |
| "epoch": 10.65551839464883, |
| "grad_norm": 0.7198925614356995, |
| "learning_rate": 0.00016471635150166854, |
| "loss": 3.2927, |
| "step": 799 |
| }, |
| { |
| "epoch": 10.668896321070234, |
| "grad_norm": 0.7275107502937317, |
| "learning_rate": 0.00016467185761957732, |
| "loss": 3.3695, |
| "step": 800 |
| }, |
| { |
| "epoch": 10.68227424749164, |
| "grad_norm": 0.7077828049659729, |
| "learning_rate": 0.0001646273637374861, |
| "loss": 2.846, |
| "step": 801 |
| }, |
| { |
| "epoch": 10.695652173913043, |
| "grad_norm": 0.7579251527786255, |
| "learning_rate": 0.00016458286985539488, |
| "loss": 3.1917, |
| "step": 802 |
| }, |
| { |
| "epoch": 10.709030100334449, |
| "grad_norm": 0.7607265114784241, |
| "learning_rate": 0.0001645383759733037, |
| "loss": 3.2919, |
| "step": 803 |
| }, |
| { |
| "epoch": 10.722408026755852, |
| "grad_norm": 0.7122685313224792, |
| "learning_rate": 0.00016449388209121245, |
| "loss": 3.7108, |
| "step": 804 |
| }, |
| { |
| "epoch": 10.735785953177258, |
| "grad_norm": 0.7256726622581482, |
| "learning_rate": 0.00016444938820912126, |
| "loss": 3.3209, |
| "step": 805 |
| }, |
| { |
| "epoch": 10.749163879598662, |
| "grad_norm": 0.7903631925582886, |
| "learning_rate": 0.00016440489432703004, |
| "loss": 3.398, |
| "step": 806 |
| }, |
| { |
| "epoch": 10.762541806020067, |
| "grad_norm": 1.78204345703125, |
| "learning_rate": 0.00016436040044493882, |
| "loss": 3.1118, |
| "step": 807 |
| }, |
| { |
| "epoch": 10.775919732441471, |
| "grad_norm": 0.7647016644477844, |
| "learning_rate": 0.0001643159065628476, |
| "loss": 3.1889, |
| "step": 808 |
| }, |
| { |
| "epoch": 10.789297658862877, |
| "grad_norm": 0.8039811253547668, |
| "learning_rate": 0.00016427141268075642, |
| "loss": 3.0104, |
| "step": 809 |
| }, |
| { |
| "epoch": 10.80267558528428, |
| "grad_norm": 0.6011155843734741, |
| "learning_rate": 0.0001642269187986652, |
| "loss": 3.0988, |
| "step": 810 |
| }, |
| { |
| "epoch": 10.816053511705686, |
| "grad_norm": 0.8137276768684387, |
| "learning_rate": 0.00016418242491657398, |
| "loss": 3.4308, |
| "step": 811 |
| }, |
| { |
| "epoch": 10.82943143812709, |
| "grad_norm": 0.6501771807670593, |
| "learning_rate": 0.00016413793103448276, |
| "loss": 3.1405, |
| "step": 812 |
| }, |
| { |
| "epoch": 10.842809364548495, |
| "grad_norm": 0.678032636642456, |
| "learning_rate": 0.00016409343715239157, |
| "loss": 3.2243, |
| "step": 813 |
| }, |
| { |
| "epoch": 10.856187290969899, |
| "grad_norm": 0.6830305457115173, |
| "learning_rate": 0.00016404894327030033, |
| "loss": 3.2685, |
| "step": 814 |
| }, |
| { |
| "epoch": 10.869565217391305, |
| "grad_norm": 0.7482068538665771, |
| "learning_rate": 0.00016400444938820914, |
| "loss": 3.3363, |
| "step": 815 |
| }, |
| { |
| "epoch": 10.882943143812708, |
| "grad_norm": 0.6592227816581726, |
| "learning_rate": 0.00016395995550611792, |
| "loss": 3.2914, |
| "step": 816 |
| }, |
| { |
| "epoch": 10.896321070234114, |
| "grad_norm": 0.7520759105682373, |
| "learning_rate": 0.0001639154616240267, |
| "loss": 3.1371, |
| "step": 817 |
| }, |
| { |
| "epoch": 10.909698996655518, |
| "grad_norm": 0.6802201271057129, |
| "learning_rate": 0.00016387096774193548, |
| "loss": 3.2925, |
| "step": 818 |
| }, |
| { |
| "epoch": 10.923076923076923, |
| "grad_norm": 0.7528939247131348, |
| "learning_rate": 0.0001638264738598443, |
| "loss": 3.2147, |
| "step": 819 |
| }, |
| { |
| "epoch": 10.936454849498327, |
| "grad_norm": 0.7070727348327637, |
| "learning_rate": 0.00016378197997775308, |
| "loss": 3.2649, |
| "step": 820 |
| }, |
| { |
| "epoch": 10.949832775919733, |
| "grad_norm": 0.6121620535850525, |
| "learning_rate": 0.00016373748609566186, |
| "loss": 3.3999, |
| "step": 821 |
| }, |
| { |
| "epoch": 10.963210702341136, |
| "grad_norm": 0.7355679273605347, |
| "learning_rate": 0.00016369299221357064, |
| "loss": 3.2561, |
| "step": 822 |
| }, |
| { |
| "epoch": 10.976588628762542, |
| "grad_norm": 0.7294445037841797, |
| "learning_rate": 0.00016364849833147945, |
| "loss": 2.7019, |
| "step": 823 |
| }, |
| { |
| "epoch": 10.989966555183946, |
| "grad_norm": 0.8628729581832886, |
| "learning_rate": 0.0001636040044493882, |
| "loss": 3.3655, |
| "step": 824 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.8784325122833252, |
| "learning_rate": 0.00016355951056729702, |
| "loss": 3.504, |
| "step": 825 |
| }, |
| { |
| "epoch": 11.013377926421406, |
| "grad_norm": 0.6880869269371033, |
| "learning_rate": 0.0001635150166852058, |
| "loss": 3.1878, |
| "step": 826 |
| }, |
| { |
| "epoch": 11.02675585284281, |
| "grad_norm": 0.5625393390655518, |
| "learning_rate": 0.00016347052280311458, |
| "loss": 3.2757, |
| "step": 827 |
| }, |
| { |
| "epoch": 11.040133779264215, |
| "grad_norm": 0.5854038596153259, |
| "learning_rate": 0.00016342602892102336, |
| "loss": 3.0994, |
| "step": 828 |
| }, |
| { |
| "epoch": 11.053511705685619, |
| "grad_norm": 0.6682130098342896, |
| "learning_rate": 0.00016338153503893217, |
| "loss": 3.1935, |
| "step": 829 |
| }, |
| { |
| "epoch": 11.066889632107024, |
| "grad_norm": 0.6216278076171875, |
| "learning_rate": 0.00016333704115684095, |
| "loss": 3.1273, |
| "step": 830 |
| }, |
| { |
| "epoch": 11.080267558528428, |
| "grad_norm": 0.61285001039505, |
| "learning_rate": 0.0001632925472747497, |
| "loss": 3.2377, |
| "step": 831 |
| }, |
| { |
| "epoch": 11.093645484949834, |
| "grad_norm": 0.6559188365936279, |
| "learning_rate": 0.00016324805339265852, |
| "loss": 3.1084, |
| "step": 832 |
| }, |
| { |
| "epoch": 11.107023411371237, |
| "grad_norm": 0.6322848200798035, |
| "learning_rate": 0.0001632035595105673, |
| "loss": 3.3178, |
| "step": 833 |
| }, |
| { |
| "epoch": 11.120401337792643, |
| "grad_norm": 0.6306194067001343, |
| "learning_rate": 0.00016315906562847608, |
| "loss": 3.2012, |
| "step": 834 |
| }, |
| { |
| "epoch": 11.133779264214047, |
| "grad_norm": 0.6923161149024963, |
| "learning_rate": 0.00016311457174638487, |
| "loss": 2.9328, |
| "step": 835 |
| }, |
| { |
| "epoch": 11.147157190635452, |
| "grad_norm": 0.6900002360343933, |
| "learning_rate": 0.00016307007786429368, |
| "loss": 3.1436, |
| "step": 836 |
| }, |
| { |
| "epoch": 11.160535117056856, |
| "grad_norm": 0.817669153213501, |
| "learning_rate": 0.00016302558398220246, |
| "loss": 3.0935, |
| "step": 837 |
| }, |
| { |
| "epoch": 11.173913043478262, |
| "grad_norm": 0.7544119954109192, |
| "learning_rate": 0.00016298109010011124, |
| "loss": 3.0646, |
| "step": 838 |
| }, |
| { |
| "epoch": 11.187290969899665, |
| "grad_norm": 0.7996231913566589, |
| "learning_rate": 0.00016293659621802002, |
| "loss": 3.1975, |
| "step": 839 |
| }, |
| { |
| "epoch": 11.200668896321071, |
| "grad_norm": 0.6186792850494385, |
| "learning_rate": 0.00016289210233592883, |
| "loss": 3.31, |
| "step": 840 |
| }, |
| { |
| "epoch": 11.214046822742475, |
| "grad_norm": 0.6926666498184204, |
| "learning_rate": 0.0001628476084538376, |
| "loss": 3.0765, |
| "step": 841 |
| }, |
| { |
| "epoch": 11.22742474916388, |
| "grad_norm": 0.7475588917732239, |
| "learning_rate": 0.0001628031145717464, |
| "loss": 3.1743, |
| "step": 842 |
| }, |
| { |
| "epoch": 11.240802675585284, |
| "grad_norm": 0.5520989298820496, |
| "learning_rate": 0.00016275862068965518, |
| "loss": 3.4243, |
| "step": 843 |
| }, |
| { |
| "epoch": 11.25418060200669, |
| "grad_norm": 0.6556730270385742, |
| "learning_rate": 0.00016271412680756396, |
| "loss": 3.3293, |
| "step": 844 |
| }, |
| { |
| "epoch": 11.267558528428093, |
| "grad_norm": 0.6509199738502502, |
| "learning_rate": 0.00016266963292547274, |
| "loss": 2.999, |
| "step": 845 |
| }, |
| { |
| "epoch": 11.280936454849499, |
| "grad_norm": 0.6254273653030396, |
| "learning_rate": 0.00016262513904338155, |
| "loss": 3.1869, |
| "step": 846 |
| }, |
| { |
| "epoch": 11.294314381270903, |
| "grad_norm": 0.7454530000686646, |
| "learning_rate": 0.00016258064516129034, |
| "loss": 3.3694, |
| "step": 847 |
| }, |
| { |
| "epoch": 11.307692307692308, |
| "grad_norm": 0.7563592791557312, |
| "learning_rate": 0.00016253615127919912, |
| "loss": 3.0057, |
| "step": 848 |
| }, |
| { |
| "epoch": 11.321070234113712, |
| "grad_norm": 0.6986783742904663, |
| "learning_rate": 0.0001624916573971079, |
| "loss": 2.9893, |
| "step": 849 |
| }, |
| { |
| "epoch": 11.334448160535118, |
| "grad_norm": 0.7260631322860718, |
| "learning_rate": 0.0001624471635150167, |
| "loss": 3.1733, |
| "step": 850 |
| }, |
| { |
| "epoch": 11.347826086956522, |
| "grad_norm": 0.7522863745689392, |
| "learning_rate": 0.00016240266963292547, |
| "loss": 2.9829, |
| "step": 851 |
| }, |
| { |
| "epoch": 11.361204013377927, |
| "grad_norm": 0.7290140986442566, |
| "learning_rate": 0.00016235817575083428, |
| "loss": 3.1887, |
| "step": 852 |
| }, |
| { |
| "epoch": 11.37458193979933, |
| "grad_norm": 0.6470169425010681, |
| "learning_rate": 0.00016231368186874306, |
| "loss": 3.2537, |
| "step": 853 |
| }, |
| { |
| "epoch": 11.387959866220736, |
| "grad_norm": 0.863742470741272, |
| "learning_rate": 0.00016226918798665184, |
| "loss": 3.3443, |
| "step": 854 |
| }, |
| { |
| "epoch": 11.40133779264214, |
| "grad_norm": 0.7363939881324768, |
| "learning_rate": 0.00016222469410456062, |
| "loss": 3.3653, |
| "step": 855 |
| }, |
| { |
| "epoch": 11.414715719063546, |
| "grad_norm": 0.6548926830291748, |
| "learning_rate": 0.00016218020022246943, |
| "loss": 3.0373, |
| "step": 856 |
| }, |
| { |
| "epoch": 11.42809364548495, |
| "grad_norm": 0.8087872862815857, |
| "learning_rate": 0.00016213570634037821, |
| "loss": 3.0118, |
| "step": 857 |
| }, |
| { |
| "epoch": 11.441471571906355, |
| "grad_norm": 0.677811324596405, |
| "learning_rate": 0.000162091212458287, |
| "loss": 3.0339, |
| "step": 858 |
| }, |
| { |
| "epoch": 11.454849498327759, |
| "grad_norm": 0.6907945275306702, |
| "learning_rate": 0.00016204671857619578, |
| "loss": 2.9496, |
| "step": 859 |
| }, |
| { |
| "epoch": 11.468227424749164, |
| "grad_norm": 0.6940027475357056, |
| "learning_rate": 0.0001620022246941046, |
| "loss": 3.2825, |
| "step": 860 |
| }, |
| { |
| "epoch": 11.481605351170568, |
| "grad_norm": 0.7132136225700378, |
| "learning_rate": 0.00016195773081201334, |
| "loss": 3.1271, |
| "step": 861 |
| }, |
| { |
| "epoch": 11.494983277591974, |
| "grad_norm": 0.5997372269630432, |
| "learning_rate": 0.00016191323692992215, |
| "loss": 3.1292, |
| "step": 862 |
| }, |
| { |
| "epoch": 11.508361204013378, |
| "grad_norm": 0.6468494534492493, |
| "learning_rate": 0.00016186874304783094, |
| "loss": 3.32, |
| "step": 863 |
| }, |
| { |
| "epoch": 11.521739130434783, |
| "grad_norm": 0.5792532563209534, |
| "learning_rate": 0.00016182424916573972, |
| "loss": 3.3657, |
| "step": 864 |
| }, |
| { |
| "epoch": 11.535117056856187, |
| "grad_norm": 0.8242068290710449, |
| "learning_rate": 0.0001617797552836485, |
| "loss": 3.0094, |
| "step": 865 |
| }, |
| { |
| "epoch": 11.548494983277592, |
| "grad_norm": 0.9260333776473999, |
| "learning_rate": 0.0001617352614015573, |
| "loss": 3.1123, |
| "step": 866 |
| }, |
| { |
| "epoch": 11.561872909698996, |
| "grad_norm": 0.6337956786155701, |
| "learning_rate": 0.0001616907675194661, |
| "loss": 3.1761, |
| "step": 867 |
| }, |
| { |
| "epoch": 11.575250836120402, |
| "grad_norm": 0.6010364294052124, |
| "learning_rate": 0.00016164627363737487, |
| "loss": 3.2987, |
| "step": 868 |
| }, |
| { |
| "epoch": 11.588628762541806, |
| "grad_norm": 0.7492111921310425, |
| "learning_rate": 0.00016160177975528366, |
| "loss": 2.9085, |
| "step": 869 |
| }, |
| { |
| "epoch": 11.602006688963211, |
| "grad_norm": 0.6329553127288818, |
| "learning_rate": 0.00016155728587319247, |
| "loss": 3.1841, |
| "step": 870 |
| }, |
| { |
| "epoch": 11.615384615384615, |
| "grad_norm": 0.768527626991272, |
| "learning_rate": 0.00016151279199110122, |
| "loss": 3.0061, |
| "step": 871 |
| }, |
| { |
| "epoch": 11.62876254180602, |
| "grad_norm": 0.6333640813827515, |
| "learning_rate": 0.00016146829810901003, |
| "loss": 3.3046, |
| "step": 872 |
| }, |
| { |
| "epoch": 11.642140468227424, |
| "grad_norm": 0.7457571625709534, |
| "learning_rate": 0.00016142380422691881, |
| "loss": 3.2728, |
| "step": 873 |
| }, |
| { |
| "epoch": 11.65551839464883, |
| "grad_norm": 0.6389586925506592, |
| "learning_rate": 0.0001613793103448276, |
| "loss": 3.419, |
| "step": 874 |
| }, |
| { |
| "epoch": 11.668896321070234, |
| "grad_norm": 0.8885436058044434, |
| "learning_rate": 0.00016133481646273638, |
| "loss": 3.0938, |
| "step": 875 |
| }, |
| { |
| "epoch": 11.68227424749164, |
| "grad_norm": 0.7936431765556335, |
| "learning_rate": 0.00016129032258064516, |
| "loss": 3.305, |
| "step": 876 |
| }, |
| { |
| "epoch": 11.695652173913043, |
| "grad_norm": 0.6133994460105896, |
| "learning_rate": 0.00016124582869855397, |
| "loss": 3.3474, |
| "step": 877 |
| }, |
| { |
| "epoch": 11.709030100334449, |
| "grad_norm": 0.6638192534446716, |
| "learning_rate": 0.00016120133481646273, |
| "loss": 3.1418, |
| "step": 878 |
| }, |
| { |
| "epoch": 11.722408026755852, |
| "grad_norm": 0.6820496320724487, |
| "learning_rate": 0.00016115684093437154, |
| "loss": 3.1421, |
| "step": 879 |
| }, |
| { |
| "epoch": 11.735785953177258, |
| "grad_norm": 0.6057732105255127, |
| "learning_rate": 0.00016111234705228032, |
| "loss": 3.091, |
| "step": 880 |
| }, |
| { |
| "epoch": 11.749163879598662, |
| "grad_norm": 0.6267048716545105, |
| "learning_rate": 0.0001610678531701891, |
| "loss": 3.1289, |
| "step": 881 |
| }, |
| { |
| "epoch": 11.762541806020067, |
| "grad_norm": 0.6822847723960876, |
| "learning_rate": 0.00016102335928809788, |
| "loss": 3.157, |
| "step": 882 |
| }, |
| { |
| "epoch": 11.775919732441471, |
| "grad_norm": 0.6809714436531067, |
| "learning_rate": 0.0001609788654060067, |
| "loss": 3.0806, |
| "step": 883 |
| }, |
| { |
| "epoch": 11.789297658862877, |
| "grad_norm": 0.5546092391014099, |
| "learning_rate": 0.00016093437152391547, |
| "loss": 3.1853, |
| "step": 884 |
| }, |
| { |
| "epoch": 11.80267558528428, |
| "grad_norm": 0.7375029921531677, |
| "learning_rate": 0.00016088987764182426, |
| "loss": 3.1287, |
| "step": 885 |
| }, |
| { |
| "epoch": 11.816053511705686, |
| "grad_norm": 0.6246840953826904, |
| "learning_rate": 0.00016084538375973304, |
| "loss": 3.1331, |
| "step": 886 |
| }, |
| { |
| "epoch": 11.82943143812709, |
| "grad_norm": 0.6088026762008667, |
| "learning_rate": 0.00016080088987764185, |
| "loss": 3.3781, |
| "step": 887 |
| }, |
| { |
| "epoch": 11.842809364548495, |
| "grad_norm": 0.7996237874031067, |
| "learning_rate": 0.0001607563959955506, |
| "loss": 3.0161, |
| "step": 888 |
| }, |
| { |
| "epoch": 11.856187290969899, |
| "grad_norm": 0.6221441626548767, |
| "learning_rate": 0.0001607119021134594, |
| "loss": 3.1491, |
| "step": 889 |
| }, |
| { |
| "epoch": 11.869565217391305, |
| "grad_norm": 0.6276041269302368, |
| "learning_rate": 0.0001606674082313682, |
| "loss": 3.2575, |
| "step": 890 |
| }, |
| { |
| "epoch": 11.882943143812708, |
| "grad_norm": 0.6394500136375427, |
| "learning_rate": 0.00016062291434927698, |
| "loss": 3.3437, |
| "step": 891 |
| }, |
| { |
| "epoch": 11.896321070234114, |
| "grad_norm": 0.7674509286880493, |
| "learning_rate": 0.00016057842046718576, |
| "loss": 3.1995, |
| "step": 892 |
| }, |
| { |
| "epoch": 11.909698996655518, |
| "grad_norm": 0.7502215504646301, |
| "learning_rate": 0.00016053392658509457, |
| "loss": 3.3129, |
| "step": 893 |
| }, |
| { |
| "epoch": 11.923076923076923, |
| "grad_norm": 0.6078189611434937, |
| "learning_rate": 0.00016048943270300335, |
| "loss": 3.1623, |
| "step": 894 |
| }, |
| { |
| "epoch": 11.936454849498327, |
| "grad_norm": 0.6113708019256592, |
| "learning_rate": 0.00016044493882091213, |
| "loss": 3.6063, |
| "step": 895 |
| }, |
| { |
| "epoch": 11.949832775919733, |
| "grad_norm": 0.6606878638267517, |
| "learning_rate": 0.00016040044493882092, |
| "loss": 3.3216, |
| "step": 896 |
| }, |
| { |
| "epoch": 11.963210702341136, |
| "grad_norm": 0.7055956125259399, |
| "learning_rate": 0.00016035595105672973, |
| "loss": 3.2006, |
| "step": 897 |
| }, |
| { |
| "epoch": 11.976588628762542, |
| "grad_norm": 0.7424116730690002, |
| "learning_rate": 0.00016031145717463848, |
| "loss": 3.1298, |
| "step": 898 |
| }, |
| { |
| "epoch": 11.989966555183946, |
| "grad_norm": 0.6675695180892944, |
| "learning_rate": 0.0001602669632925473, |
| "loss": 3.1116, |
| "step": 899 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.9356181621551514, |
| "learning_rate": 0.00016022246941045607, |
| "loss": 3.2461, |
| "step": 900 |
| }, |
| { |
| "epoch": 12.013377926421406, |
| "grad_norm": 0.8539507985115051, |
| "learning_rate": 0.00016017797552836486, |
| "loss": 3.0671, |
| "step": 901 |
| }, |
| { |
| "epoch": 12.02675585284281, |
| "grad_norm": 0.573266327381134, |
| "learning_rate": 0.00016013348164627364, |
| "loss": 3.0789, |
| "step": 902 |
| }, |
| { |
| "epoch": 12.040133779264215, |
| "grad_norm": 0.5849746465682983, |
| "learning_rate": 0.00016008898776418245, |
| "loss": 3.1623, |
| "step": 903 |
| }, |
| { |
| "epoch": 12.053511705685619, |
| "grad_norm": 0.6523334980010986, |
| "learning_rate": 0.00016004449388209123, |
| "loss": 3.0243, |
| "step": 904 |
| }, |
| { |
| "epoch": 12.066889632107024, |
| "grad_norm": 0.6428223848342896, |
| "learning_rate": 0.00016, |
| "loss": 3.1241, |
| "step": 905 |
| }, |
| { |
| "epoch": 12.080267558528428, |
| "grad_norm": 0.5881937742233276, |
| "learning_rate": 0.0001599555061179088, |
| "loss": 3.1744, |
| "step": 906 |
| }, |
| { |
| "epoch": 12.093645484949834, |
| "grad_norm": 0.7523583173751831, |
| "learning_rate": 0.0001599110122358176, |
| "loss": 3.0821, |
| "step": 907 |
| }, |
| { |
| "epoch": 12.107023411371237, |
| "grad_norm": 0.6120390295982361, |
| "learning_rate": 0.00015986651835372636, |
| "loss": 3.16, |
| "step": 908 |
| }, |
| { |
| "epoch": 12.120401337792643, |
| "grad_norm": 0.6486253142356873, |
| "learning_rate": 0.00015982202447163517, |
| "loss": 3.1402, |
| "step": 909 |
| }, |
| { |
| "epoch": 12.133779264214047, |
| "grad_norm": 0.6322839260101318, |
| "learning_rate": 0.00015977753058954395, |
| "loss": 3.1325, |
| "step": 910 |
| }, |
| { |
| "epoch": 12.147157190635452, |
| "grad_norm": 0.5858875513076782, |
| "learning_rate": 0.00015973303670745273, |
| "loss": 3.1705, |
| "step": 911 |
| }, |
| { |
| "epoch": 12.160535117056856, |
| "grad_norm": 0.6495780348777771, |
| "learning_rate": 0.00015968854282536152, |
| "loss": 3.1977, |
| "step": 912 |
| }, |
| { |
| "epoch": 12.173913043478262, |
| "grad_norm": 0.6483474969863892, |
| "learning_rate": 0.00015964404894327033, |
| "loss": 2.9101, |
| "step": 913 |
| }, |
| { |
| "epoch": 12.187290969899665, |
| "grad_norm": 0.6021110415458679, |
| "learning_rate": 0.0001595995550611791, |
| "loss": 3.2106, |
| "step": 914 |
| }, |
| { |
| "epoch": 12.200668896321071, |
| "grad_norm": 0.5101630687713623, |
| "learning_rate": 0.00015955506117908786, |
| "loss": 3.1316, |
| "step": 915 |
| }, |
| { |
| "epoch": 12.214046822742475, |
| "grad_norm": 0.6226193904876709, |
| "learning_rate": 0.00015951056729699667, |
| "loss": 3.0897, |
| "step": 916 |
| }, |
| { |
| "epoch": 12.22742474916388, |
| "grad_norm": 0.6268473267555237, |
| "learning_rate": 0.00015946607341490546, |
| "loss": 3.2036, |
| "step": 917 |
| }, |
| { |
| "epoch": 12.240802675585284, |
| "grad_norm": 0.7825391292572021, |
| "learning_rate": 0.00015942157953281424, |
| "loss": 3.0082, |
| "step": 918 |
| }, |
| { |
| "epoch": 12.25418060200669, |
| "grad_norm": 0.7881148457527161, |
| "learning_rate": 0.00015937708565072302, |
| "loss": 3.0779, |
| "step": 919 |
| }, |
| { |
| "epoch": 12.267558528428093, |
| "grad_norm": 0.6725586652755737, |
| "learning_rate": 0.00015933259176863183, |
| "loss": 3.04, |
| "step": 920 |
| }, |
| { |
| "epoch": 12.280936454849499, |
| "grad_norm": 0.5831689238548279, |
| "learning_rate": 0.0001592880978865406, |
| "loss": 3.3319, |
| "step": 921 |
| }, |
| { |
| "epoch": 12.294314381270903, |
| "grad_norm": 0.6057907342910767, |
| "learning_rate": 0.0001592436040044494, |
| "loss": 3.0869, |
| "step": 922 |
| }, |
| { |
| "epoch": 12.307692307692308, |
| "grad_norm": 0.771857500076294, |
| "learning_rate": 0.00015919911012235818, |
| "loss": 3.1472, |
| "step": 923 |
| }, |
| { |
| "epoch": 12.321070234113712, |
| "grad_norm": 0.7447528839111328, |
| "learning_rate": 0.000159154616240267, |
| "loss": 3.174, |
| "step": 924 |
| }, |
| { |
| "epoch": 12.334448160535118, |
| "grad_norm": 0.5772632956504822, |
| "learning_rate": 0.00015911012235817574, |
| "loss": 3.1767, |
| "step": 925 |
| }, |
| { |
| "epoch": 12.347826086956522, |
| "grad_norm": 0.6952618956565857, |
| "learning_rate": 0.00015906562847608455, |
| "loss": 3.1597, |
| "step": 926 |
| }, |
| { |
| "epoch": 12.361204013377927, |
| "grad_norm": 0.600922703742981, |
| "learning_rate": 0.00015902113459399333, |
| "loss": 3.3612, |
| "step": 927 |
| }, |
| { |
| "epoch": 12.37458193979933, |
| "grad_norm": 0.7571472525596619, |
| "learning_rate": 0.00015897664071190212, |
| "loss": 2.9405, |
| "step": 928 |
| }, |
| { |
| "epoch": 12.387959866220736, |
| "grad_norm": 0.9343985915184021, |
| "learning_rate": 0.0001589321468298109, |
| "loss": 3.2886, |
| "step": 929 |
| }, |
| { |
| "epoch": 12.40133779264214, |
| "grad_norm": 0.7046729922294617, |
| "learning_rate": 0.0001588876529477197, |
| "loss": 3.4421, |
| "step": 930 |
| }, |
| { |
| "epoch": 12.414715719063546, |
| "grad_norm": 0.7591777443885803, |
| "learning_rate": 0.0001588431590656285, |
| "loss": 3.005, |
| "step": 931 |
| }, |
| { |
| "epoch": 12.42809364548495, |
| "grad_norm": 0.6508903503417969, |
| "learning_rate": 0.00015879866518353727, |
| "loss": 2.8554, |
| "step": 932 |
| }, |
| { |
| "epoch": 12.441471571906355, |
| "grad_norm": 0.6557784676551819, |
| "learning_rate": 0.00015875417130144606, |
| "loss": 3.3268, |
| "step": 933 |
| }, |
| { |
| "epoch": 12.454849498327759, |
| "grad_norm": 0.6941578984260559, |
| "learning_rate": 0.00015870967741935487, |
| "loss": 3.2088, |
| "step": 934 |
| }, |
| { |
| "epoch": 12.468227424749164, |
| "grad_norm": 0.6824263334274292, |
| "learning_rate": 0.00015866518353726362, |
| "loss": 3.0897, |
| "step": 935 |
| }, |
| { |
| "epoch": 12.481605351170568, |
| "grad_norm": 0.7324599027633667, |
| "learning_rate": 0.00015862068965517243, |
| "loss": 3.1443, |
| "step": 936 |
| }, |
| { |
| "epoch": 12.494983277591974, |
| "grad_norm": 0.577022135257721, |
| "learning_rate": 0.0001585761957730812, |
| "loss": 3.0896, |
| "step": 937 |
| }, |
| { |
| "epoch": 12.508361204013378, |
| "grad_norm": 0.6165060997009277, |
| "learning_rate": 0.00015853170189099, |
| "loss": 2.7546, |
| "step": 938 |
| }, |
| { |
| "epoch": 12.521739130434783, |
| "grad_norm": 0.561906635761261, |
| "learning_rate": 0.00015848720800889878, |
| "loss": 3.4192, |
| "step": 939 |
| }, |
| { |
| "epoch": 12.535117056856187, |
| "grad_norm": 0.5894923806190491, |
| "learning_rate": 0.0001584427141268076, |
| "loss": 3.0388, |
| "step": 940 |
| }, |
| { |
| "epoch": 12.548494983277592, |
| "grad_norm": 0.6261674761772156, |
| "learning_rate": 0.00015839822024471637, |
| "loss": 3.0705, |
| "step": 941 |
| }, |
| { |
| "epoch": 12.561872909698996, |
| "grad_norm": 0.695101261138916, |
| "learning_rate": 0.00015835372636262515, |
| "loss": 3.1684, |
| "step": 942 |
| }, |
| { |
| "epoch": 12.575250836120402, |
| "grad_norm": 0.6176817417144775, |
| "learning_rate": 0.00015830923248053393, |
| "loss": 3.0708, |
| "step": 943 |
| }, |
| { |
| "epoch": 12.588628762541806, |
| "grad_norm": 0.6548507213592529, |
| "learning_rate": 0.00015826473859844274, |
| "loss": 3.1569, |
| "step": 944 |
| }, |
| { |
| "epoch": 12.602006688963211, |
| "grad_norm": 0.6046382188796997, |
| "learning_rate": 0.0001582202447163515, |
| "loss": 3.2479, |
| "step": 945 |
| }, |
| { |
| "epoch": 12.615384615384615, |
| "grad_norm": 0.7103912234306335, |
| "learning_rate": 0.0001581757508342603, |
| "loss": 3.127, |
| "step": 946 |
| }, |
| { |
| "epoch": 12.62876254180602, |
| "grad_norm": 0.7131765484809875, |
| "learning_rate": 0.0001581312569521691, |
| "loss": 3.0975, |
| "step": 947 |
| }, |
| { |
| "epoch": 12.642140468227424, |
| "grad_norm": 0.6442859768867493, |
| "learning_rate": 0.00015808676307007787, |
| "loss": 3.2885, |
| "step": 948 |
| }, |
| { |
| "epoch": 12.65551839464883, |
| "grad_norm": 0.6430050134658813, |
| "learning_rate": 0.00015804226918798666, |
| "loss": 3.0397, |
| "step": 949 |
| }, |
| { |
| "epoch": 12.668896321070234, |
| "grad_norm": 0.6894303560256958, |
| "learning_rate": 0.00015799777530589546, |
| "loss": 3.0756, |
| "step": 950 |
| }, |
| { |
| "epoch": 12.68227424749164, |
| "grad_norm": 0.7319600582122803, |
| "learning_rate": 0.00015795328142380425, |
| "loss": 3.0457, |
| "step": 951 |
| }, |
| { |
| "epoch": 12.695652173913043, |
| "grad_norm": 0.6445140838623047, |
| "learning_rate": 0.00015790878754171303, |
| "loss": 2.9828, |
| "step": 952 |
| }, |
| { |
| "epoch": 12.709030100334449, |
| "grad_norm": 0.7522070407867432, |
| "learning_rate": 0.0001578642936596218, |
| "loss": 2.8942, |
| "step": 953 |
| }, |
| { |
| "epoch": 12.722408026755852, |
| "grad_norm": 0.7962691783905029, |
| "learning_rate": 0.00015781979977753062, |
| "loss": 3.0985, |
| "step": 954 |
| }, |
| { |
| "epoch": 12.735785953177258, |
| "grad_norm": 0.6391687393188477, |
| "learning_rate": 0.00015777530589543938, |
| "loss": 3.1752, |
| "step": 955 |
| }, |
| { |
| "epoch": 12.749163879598662, |
| "grad_norm": 0.7632976174354553, |
| "learning_rate": 0.00015773081201334816, |
| "loss": 3.3505, |
| "step": 956 |
| }, |
| { |
| "epoch": 12.762541806020067, |
| "grad_norm": 0.7491022944450378, |
| "learning_rate": 0.00015768631813125697, |
| "loss": 3.0721, |
| "step": 957 |
| }, |
| { |
| "epoch": 12.775919732441471, |
| "grad_norm": 0.6163421273231506, |
| "learning_rate": 0.00015764182424916572, |
| "loss": 3.3242, |
| "step": 958 |
| }, |
| { |
| "epoch": 12.789297658862877, |
| "grad_norm": 0.6831198334693909, |
| "learning_rate": 0.00015759733036707453, |
| "loss": 3.1409, |
| "step": 959 |
| }, |
| { |
| "epoch": 12.80267558528428, |
| "grad_norm": 0.812300980091095, |
| "learning_rate": 0.00015755283648498332, |
| "loss": 2.9606, |
| "step": 960 |
| }, |
| { |
| "epoch": 12.816053511705686, |
| "grad_norm": 0.6904334425926208, |
| "learning_rate": 0.00015750834260289213, |
| "loss": 3.0398, |
| "step": 961 |
| }, |
| { |
| "epoch": 12.82943143812709, |
| "grad_norm": 0.6349720358848572, |
| "learning_rate": 0.00015746384872080088, |
| "loss": 3.1033, |
| "step": 962 |
| }, |
| { |
| "epoch": 12.842809364548495, |
| "grad_norm": 0.6837566494941711, |
| "learning_rate": 0.0001574193548387097, |
| "loss": 3.2353, |
| "step": 963 |
| }, |
| { |
| "epoch": 12.856187290969899, |
| "grad_norm": 0.5852749943733215, |
| "learning_rate": 0.00015737486095661847, |
| "loss": 3.0972, |
| "step": 964 |
| }, |
| { |
| "epoch": 12.869565217391305, |
| "grad_norm": 0.6641372442245483, |
| "learning_rate": 0.00015733036707452725, |
| "loss": 3.2243, |
| "step": 965 |
| }, |
| { |
| "epoch": 12.882943143812708, |
| "grad_norm": 0.6613900065422058, |
| "learning_rate": 0.00015728587319243604, |
| "loss": 3.1263, |
| "step": 966 |
| }, |
| { |
| "epoch": 12.896321070234114, |
| "grad_norm": 0.6126120090484619, |
| "learning_rate": 0.00015724137931034485, |
| "loss": 3.069, |
| "step": 967 |
| }, |
| { |
| "epoch": 12.909698996655518, |
| "grad_norm": 0.6764604449272156, |
| "learning_rate": 0.0001571968854282536, |
| "loss": 3.1397, |
| "step": 968 |
| }, |
| { |
| "epoch": 12.923076923076923, |
| "grad_norm": 0.6447578072547913, |
| "learning_rate": 0.0001571523915461624, |
| "loss": 3.1839, |
| "step": 969 |
| }, |
| { |
| "epoch": 12.936454849498327, |
| "grad_norm": 0.5872016549110413, |
| "learning_rate": 0.0001571078976640712, |
| "loss": 3.3144, |
| "step": 970 |
| }, |
| { |
| "epoch": 12.949832775919733, |
| "grad_norm": 0.626276969909668, |
| "learning_rate": 0.00015706340378198, |
| "loss": 3.1295, |
| "step": 971 |
| }, |
| { |
| "epoch": 12.963210702341136, |
| "grad_norm": 0.6829231381416321, |
| "learning_rate": 0.00015701890989988876, |
| "loss": 3.2261, |
| "step": 972 |
| }, |
| { |
| "epoch": 12.976588628762542, |
| "grad_norm": 0.6197345852851868, |
| "learning_rate": 0.00015697441601779757, |
| "loss": 3.1117, |
| "step": 973 |
| }, |
| { |
| "epoch": 12.989966555183946, |
| "grad_norm": 0.6137062907218933, |
| "learning_rate": 0.00015692992213570635, |
| "loss": 3.1548, |
| "step": 974 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.7483121752738953, |
| "learning_rate": 0.00015688542825361513, |
| "loss": 3.4944, |
| "step": 975 |
| }, |
| { |
| "epoch": 13.013377926421406, |
| "grad_norm": 0.6102525591850281, |
| "learning_rate": 0.00015684093437152392, |
| "loss": 3.1403, |
| "step": 976 |
| }, |
| { |
| "epoch": 13.02675585284281, |
| "grad_norm": 0.7258747220039368, |
| "learning_rate": 0.00015679644048943272, |
| "loss": 2.8474, |
| "step": 977 |
| }, |
| { |
| "epoch": 13.040133779264215, |
| "grad_norm": 0.6891087293624878, |
| "learning_rate": 0.00015675194660734148, |
| "loss": 2.9722, |
| "step": 978 |
| }, |
| { |
| "epoch": 13.053511705685619, |
| "grad_norm": 0.6320910453796387, |
| "learning_rate": 0.0001567074527252503, |
| "loss": 2.9807, |
| "step": 979 |
| }, |
| { |
| "epoch": 13.066889632107024, |
| "grad_norm": 0.8684266209602356, |
| "learning_rate": 0.00015666295884315907, |
| "loss": 2.8448, |
| "step": 980 |
| }, |
| { |
| "epoch": 13.080267558528428, |
| "grad_norm": 0.7126099467277527, |
| "learning_rate": 0.00015661846496106788, |
| "loss": 3.1915, |
| "step": 981 |
| }, |
| { |
| "epoch": 13.093645484949834, |
| "grad_norm": 0.7001529335975647, |
| "learning_rate": 0.00015657397107897664, |
| "loss": 3.2017, |
| "step": 982 |
| }, |
| { |
| "epoch": 13.107023411371237, |
| "grad_norm": 0.7901191711425781, |
| "learning_rate": 0.00015652947719688545, |
| "loss": 3.1273, |
| "step": 983 |
| }, |
| { |
| "epoch": 13.120401337792643, |
| "grad_norm": 0.5769410729408264, |
| "learning_rate": 0.00015648498331479423, |
| "loss": 2.8582, |
| "step": 984 |
| }, |
| { |
| "epoch": 13.133779264214047, |
| "grad_norm": 0.5969700813293457, |
| "learning_rate": 0.000156440489432703, |
| "loss": 3.118, |
| "step": 985 |
| }, |
| { |
| "epoch": 13.147157190635452, |
| "grad_norm": 0.5789377093315125, |
| "learning_rate": 0.0001563959955506118, |
| "loss": 2.8121, |
| "step": 986 |
| }, |
| { |
| "epoch": 13.160535117056856, |
| "grad_norm": 0.7945278882980347, |
| "learning_rate": 0.0001563515016685206, |
| "loss": 2.9212, |
| "step": 987 |
| }, |
| { |
| "epoch": 13.173913043478262, |
| "grad_norm": 0.60884690284729, |
| "learning_rate": 0.00015630700778642936, |
| "loss": 3.1993, |
| "step": 988 |
| }, |
| { |
| "epoch": 13.187290969899665, |
| "grad_norm": 0.616142213344574, |
| "learning_rate": 0.00015626251390433817, |
| "loss": 3.107, |
| "step": 989 |
| }, |
| { |
| "epoch": 13.200668896321071, |
| "grad_norm": 0.6428812146186829, |
| "learning_rate": 0.00015621802002224695, |
| "loss": 3.1401, |
| "step": 990 |
| }, |
| { |
| "epoch": 13.214046822742475, |
| "grad_norm": 0.5723693370819092, |
| "learning_rate": 0.00015617352614015576, |
| "loss": 3.0474, |
| "step": 991 |
| }, |
| { |
| "epoch": 13.22742474916388, |
| "grad_norm": 0.5820907950401306, |
| "learning_rate": 0.00015612903225806451, |
| "loss": 3.2211, |
| "step": 992 |
| }, |
| { |
| "epoch": 13.240802675585284, |
| "grad_norm": 0.555957555770874, |
| "learning_rate": 0.00015608453837597332, |
| "loss": 3.1306, |
| "step": 993 |
| }, |
| { |
| "epoch": 13.25418060200669, |
| "grad_norm": 0.528698205947876, |
| "learning_rate": 0.0001560400444938821, |
| "loss": 3.0989, |
| "step": 994 |
| }, |
| { |
| "epoch": 13.267558528428093, |
| "grad_norm": 0.5959749817848206, |
| "learning_rate": 0.0001559955506117909, |
| "loss": 2.9128, |
| "step": 995 |
| }, |
| { |
| "epoch": 13.280936454849499, |
| "grad_norm": 0.6702240705490112, |
| "learning_rate": 0.00015595105672969967, |
| "loss": 2.9969, |
| "step": 996 |
| }, |
| { |
| "epoch": 13.294314381270903, |
| "grad_norm": 0.5363825559616089, |
| "learning_rate": 0.00015590656284760845, |
| "loss": 3.2027, |
| "step": 997 |
| }, |
| { |
| "epoch": 13.307692307692308, |
| "grad_norm": 0.6402661204338074, |
| "learning_rate": 0.00015586206896551724, |
| "loss": 3.2733, |
| "step": 998 |
| }, |
| { |
| "epoch": 13.321070234113712, |
| "grad_norm": 0.700517475605011, |
| "learning_rate": 0.00015581757508342602, |
| "loss": 3.2629, |
| "step": 999 |
| }, |
| { |
| "epoch": 13.334448160535118, |
| "grad_norm": 0.5500949621200562, |
| "learning_rate": 0.00015577308120133483, |
| "loss": 3.172, |
| "step": 1000 |
| }, |
| { |
| "epoch": 13.347826086956522, |
| "grad_norm": 0.6199147701263428, |
| "learning_rate": 0.0001557285873192436, |
| "loss": 3.0556, |
| "step": 1001 |
| }, |
| { |
| "epoch": 13.361204013377927, |
| "grad_norm": 0.5900529026985168, |
| "learning_rate": 0.0001556840934371524, |
| "loss": 3.1248, |
| "step": 1002 |
| }, |
| { |
| "epoch": 13.37458193979933, |
| "grad_norm": 0.7272413372993469, |
| "learning_rate": 0.00015563959955506118, |
| "loss": 3.1142, |
| "step": 1003 |
| }, |
| { |
| "epoch": 13.387959866220736, |
| "grad_norm": 0.6461951732635498, |
| "learning_rate": 0.00015559510567296998, |
| "loss": 3.1145, |
| "step": 1004 |
| }, |
| { |
| "epoch": 13.40133779264214, |
| "grad_norm": 0.5750373005867004, |
| "learning_rate": 0.00015555061179087874, |
| "loss": 3.2117, |
| "step": 1005 |
| }, |
| { |
| "epoch": 13.414715719063546, |
| "grad_norm": 0.6486302614212036, |
| "learning_rate": 0.00015550611790878755, |
| "loss": 2.9018, |
| "step": 1006 |
| }, |
| { |
| "epoch": 13.42809364548495, |
| "grad_norm": 0.6897476315498352, |
| "learning_rate": 0.00015546162402669633, |
| "loss": 3.2987, |
| "step": 1007 |
| }, |
| { |
| "epoch": 13.441471571906355, |
| "grad_norm": 0.5997576713562012, |
| "learning_rate": 0.00015541713014460511, |
| "loss": 2.974, |
| "step": 1008 |
| }, |
| { |
| "epoch": 13.454849498327759, |
| "grad_norm": 0.6484793424606323, |
| "learning_rate": 0.0001553726362625139, |
| "loss": 2.9827, |
| "step": 1009 |
| }, |
| { |
| "epoch": 13.468227424749164, |
| "grad_norm": 0.562312126159668, |
| "learning_rate": 0.0001553281423804227, |
| "loss": 3.2243, |
| "step": 1010 |
| }, |
| { |
| "epoch": 13.481605351170568, |
| "grad_norm": 0.7257137298583984, |
| "learning_rate": 0.0001552836484983315, |
| "loss": 3.1081, |
| "step": 1011 |
| }, |
| { |
| "epoch": 13.494983277591974, |
| "grad_norm": 0.7201404571533203, |
| "learning_rate": 0.00015523915461624027, |
| "loss": 3.1468, |
| "step": 1012 |
| }, |
| { |
| "epoch": 13.508361204013378, |
| "grad_norm": 0.666539192199707, |
| "learning_rate": 0.00015519466073414905, |
| "loss": 3.0081, |
| "step": 1013 |
| }, |
| { |
| "epoch": 13.521739130434783, |
| "grad_norm": 0.6867642998695374, |
| "learning_rate": 0.00015515016685205786, |
| "loss": 2.7889, |
| "step": 1014 |
| }, |
| { |
| "epoch": 13.535117056856187, |
| "grad_norm": 0.5799785256385803, |
| "learning_rate": 0.00015510567296996662, |
| "loss": 3.2995, |
| "step": 1015 |
| }, |
| { |
| "epoch": 13.548494983277592, |
| "grad_norm": 0.6155371069908142, |
| "learning_rate": 0.00015506117908787543, |
| "loss": 3.2227, |
| "step": 1016 |
| }, |
| { |
| "epoch": 13.561872909698996, |
| "grad_norm": 0.7604040503501892, |
| "learning_rate": 0.0001550166852057842, |
| "loss": 3.0384, |
| "step": 1017 |
| }, |
| { |
| "epoch": 13.575250836120402, |
| "grad_norm": 0.8445917963981628, |
| "learning_rate": 0.000154972191323693, |
| "loss": 2.9566, |
| "step": 1018 |
| }, |
| { |
| "epoch": 13.588628762541806, |
| "grad_norm": 0.7978566288948059, |
| "learning_rate": 0.00015492769744160177, |
| "loss": 3.0175, |
| "step": 1019 |
| }, |
| { |
| "epoch": 13.602006688963211, |
| "grad_norm": 0.5899437069892883, |
| "learning_rate": 0.00015488320355951058, |
| "loss": 3.2418, |
| "step": 1020 |
| }, |
| { |
| "epoch": 13.615384615384615, |
| "grad_norm": 0.7204627990722656, |
| "learning_rate": 0.00015483870967741937, |
| "loss": 3.1568, |
| "step": 1021 |
| }, |
| { |
| "epoch": 13.62876254180602, |
| "grad_norm": 0.6504855155944824, |
| "learning_rate": 0.00015479421579532815, |
| "loss": 2.8952, |
| "step": 1022 |
| }, |
| { |
| "epoch": 13.642140468227424, |
| "grad_norm": 0.8101251125335693, |
| "learning_rate": 0.00015474972191323693, |
| "loss": 3.1975, |
| "step": 1023 |
| }, |
| { |
| "epoch": 13.65551839464883, |
| "grad_norm": 0.6161416172981262, |
| "learning_rate": 0.00015470522803114574, |
| "loss": 3.1565, |
| "step": 1024 |
| }, |
| { |
| "epoch": 13.668896321070234, |
| "grad_norm": 0.6131258606910706, |
| "learning_rate": 0.0001546607341490545, |
| "loss": 3.0382, |
| "step": 1025 |
| }, |
| { |
| "epoch": 13.68227424749164, |
| "grad_norm": 0.8008583784103394, |
| "learning_rate": 0.0001546162402669633, |
| "loss": 2.995, |
| "step": 1026 |
| }, |
| { |
| "epoch": 13.695652173913043, |
| "grad_norm": 0.7101227045059204, |
| "learning_rate": 0.0001545717463848721, |
| "loss": 3.0704, |
| "step": 1027 |
| }, |
| { |
| "epoch": 13.709030100334449, |
| "grad_norm": 0.7988458275794983, |
| "learning_rate": 0.00015452725250278087, |
| "loss": 3.1424, |
| "step": 1028 |
| }, |
| { |
| "epoch": 13.722408026755852, |
| "grad_norm": 0.6013655662536621, |
| "learning_rate": 0.00015448275862068965, |
| "loss": 2.9986, |
| "step": 1029 |
| }, |
| { |
| "epoch": 13.735785953177258, |
| "grad_norm": 0.6368236541748047, |
| "learning_rate": 0.00015443826473859846, |
| "loss": 3.0193, |
| "step": 1030 |
| }, |
| { |
| "epoch": 13.749163879598662, |
| "grad_norm": 0.8222694396972656, |
| "learning_rate": 0.00015439377085650724, |
| "loss": 3.0888, |
| "step": 1031 |
| }, |
| { |
| "epoch": 13.762541806020067, |
| "grad_norm": 0.7270404696464539, |
| "learning_rate": 0.00015434927697441603, |
| "loss": 3.0929, |
| "step": 1032 |
| }, |
| { |
| "epoch": 13.775919732441471, |
| "grad_norm": 0.7292355298995972, |
| "learning_rate": 0.0001543047830923248, |
| "loss": 2.7676, |
| "step": 1033 |
| }, |
| { |
| "epoch": 13.789297658862877, |
| "grad_norm": 0.6662157773971558, |
| "learning_rate": 0.00015426028921023362, |
| "loss": 3.1984, |
| "step": 1034 |
| }, |
| { |
| "epoch": 13.80267558528428, |
| "grad_norm": 0.6350163817405701, |
| "learning_rate": 0.00015421579532814237, |
| "loss": 3.3042, |
| "step": 1035 |
| }, |
| { |
| "epoch": 13.816053511705686, |
| "grad_norm": 0.5999907851219177, |
| "learning_rate": 0.00015417130144605118, |
| "loss": 3.0983, |
| "step": 1036 |
| }, |
| { |
| "epoch": 13.82943143812709, |
| "grad_norm": 0.5942257642745972, |
| "learning_rate": 0.00015412680756395997, |
| "loss": 3.0474, |
| "step": 1037 |
| }, |
| { |
| "epoch": 13.842809364548495, |
| "grad_norm": 0.662589430809021, |
| "learning_rate": 0.00015408231368186875, |
| "loss": 2.9251, |
| "step": 1038 |
| }, |
| { |
| "epoch": 13.856187290969899, |
| "grad_norm": 0.5817089080810547, |
| "learning_rate": 0.00015403781979977753, |
| "loss": 3.0716, |
| "step": 1039 |
| }, |
| { |
| "epoch": 13.869565217391305, |
| "grad_norm": 0.6019257307052612, |
| "learning_rate": 0.0001539933259176863, |
| "loss": 3.1754, |
| "step": 1040 |
| }, |
| { |
| "epoch": 13.882943143812708, |
| "grad_norm": 0.6301860213279724, |
| "learning_rate": 0.00015394883203559512, |
| "loss": 3.2066, |
| "step": 1041 |
| }, |
| { |
| "epoch": 13.896321070234114, |
| "grad_norm": 0.6468888521194458, |
| "learning_rate": 0.00015390433815350388, |
| "loss": 3.3001, |
| "step": 1042 |
| }, |
| { |
| "epoch": 13.909698996655518, |
| "grad_norm": 0.6510801911354065, |
| "learning_rate": 0.0001538598442714127, |
| "loss": 3.2198, |
| "step": 1043 |
| }, |
| { |
| "epoch": 13.923076923076923, |
| "grad_norm": 0.5692014694213867, |
| "learning_rate": 0.00015381535038932147, |
| "loss": 3.3257, |
| "step": 1044 |
| }, |
| { |
| "epoch": 13.936454849498327, |
| "grad_norm": 0.594219982624054, |
| "learning_rate": 0.00015377085650723025, |
| "loss": 2.9918, |
| "step": 1045 |
| }, |
| { |
| "epoch": 13.949832775919733, |
| "grad_norm": 0.6501769423484802, |
| "learning_rate": 0.00015372636262513903, |
| "loss": 2.9653, |
| "step": 1046 |
| }, |
| { |
| "epoch": 13.963210702341136, |
| "grad_norm": 0.6310623288154602, |
| "learning_rate": 0.00015368186874304784, |
| "loss": 3.19, |
| "step": 1047 |
| }, |
| { |
| "epoch": 13.976588628762542, |
| "grad_norm": 0.5795436501502991, |
| "learning_rate": 0.00015363737486095663, |
| "loss": 3.272, |
| "step": 1048 |
| }, |
| { |
| "epoch": 13.989966555183946, |
| "grad_norm": 0.5421392917633057, |
| "learning_rate": 0.0001535928809788654, |
| "loss": 3.2109, |
| "step": 1049 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.833959698677063, |
| "learning_rate": 0.0001535483870967742, |
| "loss": 3.3017, |
| "step": 1050 |
| }, |
| { |
| "epoch": 14.013377926421406, |
| "grad_norm": 0.6856208443641663, |
| "learning_rate": 0.000153503893214683, |
| "loss": 3.0784, |
| "step": 1051 |
| }, |
| { |
| "epoch": 14.02675585284281, |
| "grad_norm": 0.5841811895370483, |
| "learning_rate": 0.00015345939933259176, |
| "loss": 2.848, |
| "step": 1052 |
| }, |
| { |
| "epoch": 14.040133779264215, |
| "grad_norm": 0.557906985282898, |
| "learning_rate": 0.00015341490545050057, |
| "loss": 3.1564, |
| "step": 1053 |
| }, |
| { |
| "epoch": 14.053511705685619, |
| "grad_norm": 0.5468619465827942, |
| "learning_rate": 0.00015337041156840935, |
| "loss": 3.1237, |
| "step": 1054 |
| }, |
| { |
| "epoch": 14.066889632107024, |
| "grad_norm": 0.7213225364685059, |
| "learning_rate": 0.00015332591768631813, |
| "loss": 2.8993, |
| "step": 1055 |
| }, |
| { |
| "epoch": 14.080267558528428, |
| "grad_norm": 0.7413175106048584, |
| "learning_rate": 0.0001532814238042269, |
| "loss": 3.0028, |
| "step": 1056 |
| }, |
| { |
| "epoch": 14.093645484949834, |
| "grad_norm": 0.6072244644165039, |
| "learning_rate": 0.00015323692992213572, |
| "loss": 2.8534, |
| "step": 1057 |
| }, |
| { |
| "epoch": 14.107023411371237, |
| "grad_norm": 0.683262288570404, |
| "learning_rate": 0.0001531924360400445, |
| "loss": 2.8982, |
| "step": 1058 |
| }, |
| { |
| "epoch": 14.120401337792643, |
| "grad_norm": 0.5880157351493835, |
| "learning_rate": 0.0001531479421579533, |
| "loss": 2.9567, |
| "step": 1059 |
| }, |
| { |
| "epoch": 14.133779264214047, |
| "grad_norm": 0.7519298195838928, |
| "learning_rate": 0.00015310344827586207, |
| "loss": 2.9615, |
| "step": 1060 |
| }, |
| { |
| "epoch": 14.147157190635452, |
| "grad_norm": 0.7747945189476013, |
| "learning_rate": 0.00015305895439377088, |
| "loss": 2.7823, |
| "step": 1061 |
| }, |
| { |
| "epoch": 14.160535117056856, |
| "grad_norm": 0.6560395956039429, |
| "learning_rate": 0.00015301446051167963, |
| "loss": 3.1791, |
| "step": 1062 |
| }, |
| { |
| "epoch": 14.173913043478262, |
| "grad_norm": 0.6388076543807983, |
| "learning_rate": 0.00015296996662958844, |
| "loss": 2.9868, |
| "step": 1063 |
| }, |
| { |
| "epoch": 14.187290969899665, |
| "grad_norm": 0.7349525690078735, |
| "learning_rate": 0.00015292547274749723, |
| "loss": 2.9291, |
| "step": 1064 |
| }, |
| { |
| "epoch": 14.200668896321071, |
| "grad_norm": 0.7184433341026306, |
| "learning_rate": 0.000152880978865406, |
| "loss": 2.7889, |
| "step": 1065 |
| }, |
| { |
| "epoch": 14.214046822742475, |
| "grad_norm": 0.6776930093765259, |
| "learning_rate": 0.0001528364849833148, |
| "loss": 3.1572, |
| "step": 1066 |
| }, |
| { |
| "epoch": 14.22742474916388, |
| "grad_norm": 0.818756103515625, |
| "learning_rate": 0.0001527919911012236, |
| "loss": 2.767, |
| "step": 1067 |
| }, |
| { |
| "epoch": 14.240802675585284, |
| "grad_norm": 0.6005066633224487, |
| "learning_rate": 0.00015274749721913238, |
| "loss": 3.1796, |
| "step": 1068 |
| }, |
| { |
| "epoch": 14.25418060200669, |
| "grad_norm": 0.6367926001548767, |
| "learning_rate": 0.00015270300333704117, |
| "loss": 2.9999, |
| "step": 1069 |
| }, |
| { |
| "epoch": 14.267558528428093, |
| "grad_norm": 0.6823679208755493, |
| "learning_rate": 0.00015265850945494995, |
| "loss": 2.7663, |
| "step": 1070 |
| }, |
| { |
| "epoch": 14.280936454849499, |
| "grad_norm": 0.6238808631896973, |
| "learning_rate": 0.00015261401557285876, |
| "loss": 3.1908, |
| "step": 1071 |
| }, |
| { |
| "epoch": 14.294314381270903, |
| "grad_norm": 0.6983721256256104, |
| "learning_rate": 0.0001525695216907675, |
| "loss": 3.0264, |
| "step": 1072 |
| }, |
| { |
| "epoch": 14.307692307692308, |
| "grad_norm": 0.7568501234054565, |
| "learning_rate": 0.00015252502780867632, |
| "loss": 3.0474, |
| "step": 1073 |
| }, |
| { |
| "epoch": 14.321070234113712, |
| "grad_norm": 0.6250051259994507, |
| "learning_rate": 0.0001524805339265851, |
| "loss": 3.114, |
| "step": 1074 |
| }, |
| { |
| "epoch": 14.334448160535118, |
| "grad_norm": 0.5907386541366577, |
| "learning_rate": 0.0001524360400444939, |
| "loss": 3.1823, |
| "step": 1075 |
| }, |
| { |
| "epoch": 14.347826086956522, |
| "grad_norm": 0.6719332337379456, |
| "learning_rate": 0.00015239154616240267, |
| "loss": 2.994, |
| "step": 1076 |
| }, |
| { |
| "epoch": 14.361204013377927, |
| "grad_norm": 0.5911534428596497, |
| "learning_rate": 0.00015234705228031148, |
| "loss": 3.4178, |
| "step": 1077 |
| }, |
| { |
| "epoch": 14.37458193979933, |
| "grad_norm": 0.8071689009666443, |
| "learning_rate": 0.00015230255839822026, |
| "loss": 2.9633, |
| "step": 1078 |
| }, |
| { |
| "epoch": 14.387959866220736, |
| "grad_norm": 0.5957038998603821, |
| "learning_rate": 0.00015225806451612902, |
| "loss": 3.1126, |
| "step": 1079 |
| }, |
| { |
| "epoch": 14.40133779264214, |
| "grad_norm": 0.6604459285736084, |
| "learning_rate": 0.00015221357063403783, |
| "loss": 2.9193, |
| "step": 1080 |
| }, |
| { |
| "epoch": 14.414715719063546, |
| "grad_norm": 0.626081109046936, |
| "learning_rate": 0.0001521690767519466, |
| "loss": 2.9321, |
| "step": 1081 |
| }, |
| { |
| "epoch": 14.42809364548495, |
| "grad_norm": 0.5767174959182739, |
| "learning_rate": 0.0001521245828698554, |
| "loss": 3.1518, |
| "step": 1082 |
| }, |
| { |
| "epoch": 14.441471571906355, |
| "grad_norm": 0.6444874405860901, |
| "learning_rate": 0.00015208008898776417, |
| "loss": 3.1948, |
| "step": 1083 |
| }, |
| { |
| "epoch": 14.454849498327759, |
| "grad_norm": 0.668171763420105, |
| "learning_rate": 0.00015203559510567298, |
| "loss": 3.016, |
| "step": 1084 |
| }, |
| { |
| "epoch": 14.468227424749164, |
| "grad_norm": 0.6998944878578186, |
| "learning_rate": 0.00015199110122358176, |
| "loss": 2.9558, |
| "step": 1085 |
| }, |
| { |
| "epoch": 14.481605351170568, |
| "grad_norm": 0.5896235704421997, |
| "learning_rate": 0.00015194660734149055, |
| "loss": 3.0997, |
| "step": 1086 |
| }, |
| { |
| "epoch": 14.494983277591974, |
| "grad_norm": 0.6724826097488403, |
| "learning_rate": 0.00015190211345939933, |
| "loss": 2.9369, |
| "step": 1087 |
| }, |
| { |
| "epoch": 14.508361204013378, |
| "grad_norm": 0.5710486769676208, |
| "learning_rate": 0.00015185761957730814, |
| "loss": 3.4724, |
| "step": 1088 |
| }, |
| { |
| "epoch": 14.521739130434783, |
| "grad_norm": 0.9997962117195129, |
| "learning_rate": 0.0001518131256952169, |
| "loss": 2.9058, |
| "step": 1089 |
| }, |
| { |
| "epoch": 14.535117056856187, |
| "grad_norm": 0.668074905872345, |
| "learning_rate": 0.0001517686318131257, |
| "loss": 3.2401, |
| "step": 1090 |
| }, |
| { |
| "epoch": 14.548494983277592, |
| "grad_norm": 0.6180433630943298, |
| "learning_rate": 0.00015172413793103449, |
| "loss": 2.8975, |
| "step": 1091 |
| }, |
| { |
| "epoch": 14.561872909698996, |
| "grad_norm": 0.6412661075592041, |
| "learning_rate": 0.00015167964404894327, |
| "loss": 2.9882, |
| "step": 1092 |
| }, |
| { |
| "epoch": 14.575250836120402, |
| "grad_norm": 0.715288519859314, |
| "learning_rate": 0.00015163515016685205, |
| "loss": 3.4004, |
| "step": 1093 |
| }, |
| { |
| "epoch": 14.588628762541806, |
| "grad_norm": 0.689164400100708, |
| "learning_rate": 0.00015159065628476086, |
| "loss": 3.0705, |
| "step": 1094 |
| }, |
| { |
| "epoch": 14.602006688963211, |
| "grad_norm": 0.7713497281074524, |
| "learning_rate": 0.00015154616240266964, |
| "loss": 3.159, |
| "step": 1095 |
| }, |
| { |
| "epoch": 14.615384615384615, |
| "grad_norm": 0.6725841164588928, |
| "learning_rate": 0.00015150166852057843, |
| "loss": 2.8938, |
| "step": 1096 |
| }, |
| { |
| "epoch": 14.62876254180602, |
| "grad_norm": 0.658108651638031, |
| "learning_rate": 0.0001514571746384872, |
| "loss": 2.8747, |
| "step": 1097 |
| }, |
| { |
| "epoch": 14.642140468227424, |
| "grad_norm": 0.5711888074874878, |
| "learning_rate": 0.00015141268075639602, |
| "loss": 2.8989, |
| "step": 1098 |
| }, |
| { |
| "epoch": 14.65551839464883, |
| "grad_norm": 0.6184161305427551, |
| "learning_rate": 0.00015136818687430477, |
| "loss": 3.0904, |
| "step": 1099 |
| }, |
| { |
| "epoch": 14.668896321070234, |
| "grad_norm": 0.5937799215316772, |
| "learning_rate": 0.00015132369299221358, |
| "loss": 3.1637, |
| "step": 1100 |
| }, |
| { |
| "epoch": 14.68227424749164, |
| "grad_norm": 0.591673731803894, |
| "learning_rate": 0.00015127919911012236, |
| "loss": 2.9547, |
| "step": 1101 |
| }, |
| { |
| "epoch": 14.695652173913043, |
| "grad_norm": 0.7317401170730591, |
| "learning_rate": 0.00015123470522803115, |
| "loss": 3.2043, |
| "step": 1102 |
| }, |
| { |
| "epoch": 14.709030100334449, |
| "grad_norm": 0.5784003734588623, |
| "learning_rate": 0.00015119021134593993, |
| "loss": 3.102, |
| "step": 1103 |
| }, |
| { |
| "epoch": 14.722408026755852, |
| "grad_norm": 0.7077385187149048, |
| "learning_rate": 0.00015114571746384874, |
| "loss": 3.011, |
| "step": 1104 |
| }, |
| { |
| "epoch": 14.735785953177258, |
| "grad_norm": 0.6472675204277039, |
| "learning_rate": 0.00015110122358175752, |
| "loss": 3.2075, |
| "step": 1105 |
| }, |
| { |
| "epoch": 14.749163879598662, |
| "grad_norm": 0.6789306998252869, |
| "learning_rate": 0.0001510567296996663, |
| "loss": 2.9458, |
| "step": 1106 |
| }, |
| { |
| "epoch": 14.762541806020067, |
| "grad_norm": 0.6602732539176941, |
| "learning_rate": 0.00015101223581757509, |
| "loss": 2.9941, |
| "step": 1107 |
| }, |
| { |
| "epoch": 14.775919732441471, |
| "grad_norm": 0.7484832406044006, |
| "learning_rate": 0.0001509677419354839, |
| "loss": 3.0358, |
| "step": 1108 |
| }, |
| { |
| "epoch": 14.789297658862877, |
| "grad_norm": 0.704139769077301, |
| "learning_rate": 0.00015092324805339265, |
| "loss": 3.1006, |
| "step": 1109 |
| }, |
| { |
| "epoch": 14.80267558528428, |
| "grad_norm": 0.6545978784561157, |
| "learning_rate": 0.00015087875417130146, |
| "loss": 3.0369, |
| "step": 1110 |
| }, |
| { |
| "epoch": 14.816053511705686, |
| "grad_norm": 0.5718163847923279, |
| "learning_rate": 0.00015083426028921024, |
| "loss": 3.1683, |
| "step": 1111 |
| }, |
| { |
| "epoch": 14.82943143812709, |
| "grad_norm": 0.5773367285728455, |
| "learning_rate": 0.00015078976640711902, |
| "loss": 3.2753, |
| "step": 1112 |
| }, |
| { |
| "epoch": 14.842809364548495, |
| "grad_norm": 0.6617185473442078, |
| "learning_rate": 0.0001507452725250278, |
| "loss": 2.9713, |
| "step": 1113 |
| }, |
| { |
| "epoch": 14.856187290969899, |
| "grad_norm": 0.6748194098472595, |
| "learning_rate": 0.00015070077864293662, |
| "loss": 3.0961, |
| "step": 1114 |
| }, |
| { |
| "epoch": 14.869565217391305, |
| "grad_norm": 0.6942034959793091, |
| "learning_rate": 0.0001506562847608454, |
| "loss": 3.0778, |
| "step": 1115 |
| }, |
| { |
| "epoch": 14.882943143812708, |
| "grad_norm": 1.0203640460968018, |
| "learning_rate": 0.00015061179087875418, |
| "loss": 3.0705, |
| "step": 1116 |
| }, |
| { |
| "epoch": 14.896321070234114, |
| "grad_norm": 0.5746601223945618, |
| "learning_rate": 0.00015056729699666296, |
| "loss": 3.1204, |
| "step": 1117 |
| }, |
| { |
| "epoch": 14.909698996655518, |
| "grad_norm": 0.7374005317687988, |
| "learning_rate": 0.00015052280311457177, |
| "loss": 3.1289, |
| "step": 1118 |
| }, |
| { |
| "epoch": 14.923076923076923, |
| "grad_norm": 0.5524411201477051, |
| "learning_rate": 0.00015047830923248053, |
| "loss": 3.2795, |
| "step": 1119 |
| }, |
| { |
| "epoch": 14.936454849498327, |
| "grad_norm": 0.7024741768836975, |
| "learning_rate": 0.0001504338153503893, |
| "loss": 3.0675, |
| "step": 1120 |
| }, |
| { |
| "epoch": 14.949832775919733, |
| "grad_norm": 0.7431137561798096, |
| "learning_rate": 0.00015038932146829812, |
| "loss": 3.1222, |
| "step": 1121 |
| }, |
| { |
| "epoch": 14.963210702341136, |
| "grad_norm": 0.6568113565444946, |
| "learning_rate": 0.0001503448275862069, |
| "loss": 3.1523, |
| "step": 1122 |
| }, |
| { |
| "epoch": 14.976588628762542, |
| "grad_norm": 0.6193330883979797, |
| "learning_rate": 0.00015030033370411569, |
| "loss": 3.1632, |
| "step": 1123 |
| }, |
| { |
| "epoch": 14.989966555183946, |
| "grad_norm": 0.6371363401412964, |
| "learning_rate": 0.00015025583982202447, |
| "loss": 3.0525, |
| "step": 1124 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.652542233467102, |
| "learning_rate": 0.00015021134593993328, |
| "loss": 3.1846, |
| "step": 1125 |
| }, |
| { |
| "epoch": 15.013377926421406, |
| "grad_norm": 0.6337831616401672, |
| "learning_rate": 0.00015016685205784203, |
| "loss": 2.9292, |
| "step": 1126 |
| }, |
| { |
| "epoch": 15.02675585284281, |
| "grad_norm": 0.85350501537323, |
| "learning_rate": 0.00015012235817575084, |
| "loss": 2.913, |
| "step": 1127 |
| }, |
| { |
| "epoch": 15.040133779264215, |
| "grad_norm": 0.6439313888549805, |
| "learning_rate": 0.00015007786429365962, |
| "loss": 3.0809, |
| "step": 1128 |
| }, |
| { |
| "epoch": 15.053511705685619, |
| "grad_norm": 0.5232247114181519, |
| "learning_rate": 0.0001500333704115684, |
| "loss": 3.1348, |
| "step": 1129 |
| }, |
| { |
| "epoch": 15.066889632107024, |
| "grad_norm": 0.6082741618156433, |
| "learning_rate": 0.0001499888765294772, |
| "loss": 2.9282, |
| "step": 1130 |
| }, |
| { |
| "epoch": 15.080267558528428, |
| "grad_norm": 0.5736444592475891, |
| "learning_rate": 0.000149944382647386, |
| "loss": 2.9891, |
| "step": 1131 |
| }, |
| { |
| "epoch": 15.093645484949834, |
| "grad_norm": 0.7732790112495422, |
| "learning_rate": 0.00014989988876529478, |
| "loss": 3.0461, |
| "step": 1132 |
| }, |
| { |
| "epoch": 15.107023411371237, |
| "grad_norm": 0.618357241153717, |
| "learning_rate": 0.00014985539488320356, |
| "loss": 3.0296, |
| "step": 1133 |
| }, |
| { |
| "epoch": 15.120401337792643, |
| "grad_norm": 0.7245836853981018, |
| "learning_rate": 0.00014981090100111235, |
| "loss": 3.0078, |
| "step": 1134 |
| }, |
| { |
| "epoch": 15.133779264214047, |
| "grad_norm": 0.6738787293434143, |
| "learning_rate": 0.00014976640711902116, |
| "loss": 3.1171, |
| "step": 1135 |
| }, |
| { |
| "epoch": 15.147157190635452, |
| "grad_norm": 0.5802761316299438, |
| "learning_rate": 0.0001497219132369299, |
| "loss": 3.1403, |
| "step": 1136 |
| }, |
| { |
| "epoch": 15.160535117056856, |
| "grad_norm": 0.5941367745399475, |
| "learning_rate": 0.00014967741935483872, |
| "loss": 2.9149, |
| "step": 1137 |
| }, |
| { |
| "epoch": 15.173913043478262, |
| "grad_norm": 0.8884940147399902, |
| "learning_rate": 0.0001496329254727475, |
| "loss": 2.8983, |
| "step": 1138 |
| }, |
| { |
| "epoch": 15.187290969899665, |
| "grad_norm": 0.7229192852973938, |
| "learning_rate": 0.00014958843159065628, |
| "loss": 2.9774, |
| "step": 1139 |
| }, |
| { |
| "epoch": 15.200668896321071, |
| "grad_norm": 0.6714467406272888, |
| "learning_rate": 0.00014954393770856507, |
| "loss": 2.9219, |
| "step": 1140 |
| }, |
| { |
| "epoch": 15.214046822742475, |
| "grad_norm": 0.6785704493522644, |
| "learning_rate": 0.00014949944382647388, |
| "loss": 3.0205, |
| "step": 1141 |
| }, |
| { |
| "epoch": 15.22742474916388, |
| "grad_norm": 0.6349677443504333, |
| "learning_rate": 0.00014945494994438266, |
| "loss": 3.1601, |
| "step": 1142 |
| }, |
| { |
| "epoch": 15.240802675585284, |
| "grad_norm": 0.557123064994812, |
| "learning_rate": 0.00014941045606229144, |
| "loss": 2.6297, |
| "step": 1143 |
| }, |
| { |
| "epoch": 15.25418060200669, |
| "grad_norm": 0.6714944243431091, |
| "learning_rate": 0.00014936596218020022, |
| "loss": 2.7951, |
| "step": 1144 |
| }, |
| { |
| "epoch": 15.267558528428093, |
| "grad_norm": 0.6747463345527649, |
| "learning_rate": 0.00014932146829810903, |
| "loss": 2.7909, |
| "step": 1145 |
| }, |
| { |
| "epoch": 15.280936454849499, |
| "grad_norm": 0.5717387199401855, |
| "learning_rate": 0.0001492769744160178, |
| "loss": 3.2896, |
| "step": 1146 |
| }, |
| { |
| "epoch": 15.294314381270903, |
| "grad_norm": 0.6589123010635376, |
| "learning_rate": 0.0001492324805339266, |
| "loss": 2.8332, |
| "step": 1147 |
| }, |
| { |
| "epoch": 15.307692307692308, |
| "grad_norm": 0.6273646950721741, |
| "learning_rate": 0.00014918798665183538, |
| "loss": 3.0084, |
| "step": 1148 |
| }, |
| { |
| "epoch": 15.321070234113712, |
| "grad_norm": 0.6551377773284912, |
| "learning_rate": 0.00014914349276974416, |
| "loss": 2.8147, |
| "step": 1149 |
| }, |
| { |
| "epoch": 15.334448160535118, |
| "grad_norm": 0.6751659512519836, |
| "learning_rate": 0.00014909899888765295, |
| "loss": 3.1345, |
| "step": 1150 |
| }, |
| { |
| "epoch": 15.347826086956522, |
| "grad_norm": 0.677094042301178, |
| "learning_rate": 0.00014905450500556175, |
| "loss": 3.1958, |
| "step": 1151 |
| }, |
| { |
| "epoch": 15.361204013377927, |
| "grad_norm": 0.6613426804542542, |
| "learning_rate": 0.00014901001112347054, |
| "loss": 3.081, |
| "step": 1152 |
| }, |
| { |
| "epoch": 15.37458193979933, |
| "grad_norm": 0.7645783424377441, |
| "learning_rate": 0.00014896551724137932, |
| "loss": 2.8799, |
| "step": 1153 |
| }, |
| { |
| "epoch": 15.387959866220736, |
| "grad_norm": 0.5698953866958618, |
| "learning_rate": 0.0001489210233592881, |
| "loss": 3.1691, |
| "step": 1154 |
| }, |
| { |
| "epoch": 15.40133779264214, |
| "grad_norm": 0.6581351161003113, |
| "learning_rate": 0.0001488765294771969, |
| "loss": 3.2365, |
| "step": 1155 |
| }, |
| { |
| "epoch": 15.414715719063546, |
| "grad_norm": 0.7809271812438965, |
| "learning_rate": 0.00014883203559510567, |
| "loss": 2.833, |
| "step": 1156 |
| }, |
| { |
| "epoch": 15.42809364548495, |
| "grad_norm": 0.6226280927658081, |
| "learning_rate": 0.00014878754171301448, |
| "loss": 3.1502, |
| "step": 1157 |
| }, |
| { |
| "epoch": 15.441471571906355, |
| "grad_norm": 0.5494824051856995, |
| "learning_rate": 0.00014874304783092326, |
| "loss": 3.2195, |
| "step": 1158 |
| }, |
| { |
| "epoch": 15.454849498327759, |
| "grad_norm": 0.5729116797447205, |
| "learning_rate": 0.00014869855394883204, |
| "loss": 2.9463, |
| "step": 1159 |
| }, |
| { |
| "epoch": 15.468227424749164, |
| "grad_norm": 0.6673750877380371, |
| "learning_rate": 0.00014865406006674082, |
| "loss": 3.1514, |
| "step": 1160 |
| }, |
| { |
| "epoch": 15.481605351170568, |
| "grad_norm": 0.6746686697006226, |
| "learning_rate": 0.0001486095661846496, |
| "loss": 3.0088, |
| "step": 1161 |
| }, |
| { |
| "epoch": 15.494983277591974, |
| "grad_norm": 0.6898564100265503, |
| "learning_rate": 0.00014856507230255842, |
| "loss": 2.8468, |
| "step": 1162 |
| }, |
| { |
| "epoch": 15.508361204013378, |
| "grad_norm": 0.7262438535690308, |
| "learning_rate": 0.00014852057842046717, |
| "loss": 2.8017, |
| "step": 1163 |
| }, |
| { |
| "epoch": 15.521739130434783, |
| "grad_norm": 0.6878666877746582, |
| "learning_rate": 0.00014847608453837598, |
| "loss": 2.9486, |
| "step": 1164 |
| }, |
| { |
| "epoch": 15.535117056856187, |
| "grad_norm": 0.6375080347061157, |
| "learning_rate": 0.00014843159065628476, |
| "loss": 3.0757, |
| "step": 1165 |
| }, |
| { |
| "epoch": 15.548494983277592, |
| "grad_norm": 0.6540268063545227, |
| "learning_rate": 0.00014838709677419355, |
| "loss": 3.1262, |
| "step": 1166 |
| }, |
| { |
| "epoch": 15.561872909698996, |
| "grad_norm": 0.6036689877510071, |
| "learning_rate": 0.00014834260289210233, |
| "loss": 3.0607, |
| "step": 1167 |
| }, |
| { |
| "epoch": 15.575250836120402, |
| "grad_norm": 0.5899893641471863, |
| "learning_rate": 0.00014829810901001114, |
| "loss": 3.1222, |
| "step": 1168 |
| }, |
| { |
| "epoch": 15.588628762541806, |
| "grad_norm": 0.7268028259277344, |
| "learning_rate": 0.00014825361512791992, |
| "loss": 3.1777, |
| "step": 1169 |
| }, |
| { |
| "epoch": 15.602006688963211, |
| "grad_norm": 0.6990141272544861, |
| "learning_rate": 0.0001482091212458287, |
| "loss": 3.2142, |
| "step": 1170 |
| }, |
| { |
| "epoch": 15.615384615384615, |
| "grad_norm": 0.6009657382965088, |
| "learning_rate": 0.00014816462736373748, |
| "loss": 3.141, |
| "step": 1171 |
| }, |
| { |
| "epoch": 15.62876254180602, |
| "grad_norm": 0.6287830471992493, |
| "learning_rate": 0.0001481201334816463, |
| "loss": 3.0621, |
| "step": 1172 |
| }, |
| { |
| "epoch": 15.642140468227424, |
| "grad_norm": 0.6720128655433655, |
| "learning_rate": 0.00014807563959955505, |
| "loss": 3.0353, |
| "step": 1173 |
| }, |
| { |
| "epoch": 15.65551839464883, |
| "grad_norm": 0.6694427132606506, |
| "learning_rate": 0.00014803114571746386, |
| "loss": 3.0171, |
| "step": 1174 |
| }, |
| { |
| "epoch": 15.668896321070234, |
| "grad_norm": 0.5630237460136414, |
| "learning_rate": 0.00014798665183537264, |
| "loss": 3.1444, |
| "step": 1175 |
| }, |
| { |
| "epoch": 15.68227424749164, |
| "grad_norm": 0.7139558792114258, |
| "learning_rate": 0.00014794215795328142, |
| "loss": 3.0012, |
| "step": 1176 |
| }, |
| { |
| "epoch": 15.695652173913043, |
| "grad_norm": 0.6374551057815552, |
| "learning_rate": 0.0001478976640711902, |
| "loss": 2.9123, |
| "step": 1177 |
| }, |
| { |
| "epoch": 15.709030100334449, |
| "grad_norm": 0.5957819223403931, |
| "learning_rate": 0.00014785317018909902, |
| "loss": 3.09, |
| "step": 1178 |
| }, |
| { |
| "epoch": 15.722408026755852, |
| "grad_norm": 0.6083621382713318, |
| "learning_rate": 0.0001478086763070078, |
| "loss": 3.0231, |
| "step": 1179 |
| }, |
| { |
| "epoch": 15.735785953177258, |
| "grad_norm": 0.6169192790985107, |
| "learning_rate": 0.00014776418242491658, |
| "loss": 2.9863, |
| "step": 1180 |
| }, |
| { |
| "epoch": 15.749163879598662, |
| "grad_norm": 0.6058081984519958, |
| "learning_rate": 0.00014771968854282536, |
| "loss": 3.0261, |
| "step": 1181 |
| }, |
| { |
| "epoch": 15.762541806020067, |
| "grad_norm": 0.5816760659217834, |
| "learning_rate": 0.00014767519466073417, |
| "loss": 3.1593, |
| "step": 1182 |
| }, |
| { |
| "epoch": 15.775919732441471, |
| "grad_norm": 0.6246895790100098, |
| "learning_rate": 0.00014763070077864293, |
| "loss": 3.1029, |
| "step": 1183 |
| }, |
| { |
| "epoch": 15.789297658862877, |
| "grad_norm": 0.56280517578125, |
| "learning_rate": 0.00014758620689655174, |
| "loss": 2.9778, |
| "step": 1184 |
| }, |
| { |
| "epoch": 15.80267558528428, |
| "grad_norm": 0.5743212699890137, |
| "learning_rate": 0.00014754171301446052, |
| "loss": 2.8799, |
| "step": 1185 |
| }, |
| { |
| "epoch": 15.816053511705686, |
| "grad_norm": 0.6163922548294067, |
| "learning_rate": 0.0001474972191323693, |
| "loss": 3.0226, |
| "step": 1186 |
| }, |
| { |
| "epoch": 15.82943143812709, |
| "grad_norm": 0.5892409682273865, |
| "learning_rate": 0.00014745272525027808, |
| "loss": 3.1167, |
| "step": 1187 |
| }, |
| { |
| "epoch": 15.842809364548495, |
| "grad_norm": 0.7977785468101501, |
| "learning_rate": 0.0001474082313681869, |
| "loss": 2.8427, |
| "step": 1188 |
| }, |
| { |
| "epoch": 15.856187290969899, |
| "grad_norm": 0.7396023273468018, |
| "learning_rate": 0.00014736373748609568, |
| "loss": 2.7809, |
| "step": 1189 |
| }, |
| { |
| "epoch": 15.869565217391305, |
| "grad_norm": 0.58844393491745, |
| "learning_rate": 0.00014731924360400446, |
| "loss": 3.0624, |
| "step": 1190 |
| }, |
| { |
| "epoch": 15.882943143812708, |
| "grad_norm": 0.6903204321861267, |
| "learning_rate": 0.00014727474972191324, |
| "loss": 3.1246, |
| "step": 1191 |
| }, |
| { |
| "epoch": 15.896321070234114, |
| "grad_norm": 0.5902391672134399, |
| "learning_rate": 0.00014723025583982205, |
| "loss": 3.0505, |
| "step": 1192 |
| }, |
| { |
| "epoch": 15.909698996655518, |
| "grad_norm": 0.575752317905426, |
| "learning_rate": 0.0001471857619577308, |
| "loss": 2.8508, |
| "step": 1193 |
| }, |
| { |
| "epoch": 15.923076923076923, |
| "grad_norm": 0.7248224020004272, |
| "learning_rate": 0.00014714126807563961, |
| "loss": 3.1438, |
| "step": 1194 |
| }, |
| { |
| "epoch": 15.936454849498327, |
| "grad_norm": 0.5669791102409363, |
| "learning_rate": 0.0001470967741935484, |
| "loss": 3.1765, |
| "step": 1195 |
| }, |
| { |
| "epoch": 15.949832775919733, |
| "grad_norm": 0.6656806468963623, |
| "learning_rate": 0.00014705228031145718, |
| "loss": 3.1456, |
| "step": 1196 |
| }, |
| { |
| "epoch": 15.963210702341136, |
| "grad_norm": 0.6073266863822937, |
| "learning_rate": 0.00014700778642936596, |
| "loss": 3.1836, |
| "step": 1197 |
| }, |
| { |
| "epoch": 15.976588628762542, |
| "grad_norm": 0.8209658861160278, |
| "learning_rate": 0.00014696329254727477, |
| "loss": 2.8457, |
| "step": 1198 |
| }, |
| { |
| "epoch": 15.989966555183946, |
| "grad_norm": 0.6495081186294556, |
| "learning_rate": 0.00014691879866518355, |
| "loss": 3.0161, |
| "step": 1199 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.7522635459899902, |
| "learning_rate": 0.00014687430478309234, |
| "loss": 2.8293, |
| "step": 1200 |
| }, |
| { |
| "epoch": 16.013377926421406, |
| "grad_norm": 0.8024417161941528, |
| "learning_rate": 0.00014682981090100112, |
| "loss": 2.909, |
| "step": 1201 |
| }, |
| { |
| "epoch": 16.02675585284281, |
| "grad_norm": 0.7147983908653259, |
| "learning_rate": 0.0001467853170189099, |
| "loss": 2.7664, |
| "step": 1202 |
| }, |
| { |
| "epoch": 16.040133779264213, |
| "grad_norm": 0.602427065372467, |
| "learning_rate": 0.00014674082313681868, |
| "loss": 2.8189, |
| "step": 1203 |
| }, |
| { |
| "epoch": 16.05351170568562, |
| "grad_norm": 0.7274264097213745, |
| "learning_rate": 0.00014669632925472747, |
| "loss": 2.9957, |
| "step": 1204 |
| }, |
| { |
| "epoch": 16.066889632107024, |
| "grad_norm": 0.6042147278785706, |
| "learning_rate": 0.00014665183537263628, |
| "loss": 2.8925, |
| "step": 1205 |
| }, |
| { |
| "epoch": 16.08026755852843, |
| "grad_norm": 0.592339813709259, |
| "learning_rate": 0.00014660734149054506, |
| "loss": 3.1463, |
| "step": 1206 |
| }, |
| { |
| "epoch": 16.093645484949832, |
| "grad_norm": 0.6099987030029297, |
| "learning_rate": 0.00014656284760845384, |
| "loss": 3.2264, |
| "step": 1207 |
| }, |
| { |
| "epoch": 16.107023411371237, |
| "grad_norm": 0.9324999451637268, |
| "learning_rate": 0.00014651835372636262, |
| "loss": 2.916, |
| "step": 1208 |
| }, |
| { |
| "epoch": 16.120401337792643, |
| "grad_norm": 0.6000798344612122, |
| "learning_rate": 0.00014647385984427143, |
| "loss": 2.9242, |
| "step": 1209 |
| }, |
| { |
| "epoch": 16.13377926421405, |
| "grad_norm": 0.609199047088623, |
| "learning_rate": 0.0001464293659621802, |
| "loss": 2.9405, |
| "step": 1210 |
| }, |
| { |
| "epoch": 16.14715719063545, |
| "grad_norm": 0.6789796948432922, |
| "learning_rate": 0.000146384872080089, |
| "loss": 3.003, |
| "step": 1211 |
| }, |
| { |
| "epoch": 16.160535117056856, |
| "grad_norm": 0.6567651629447937, |
| "learning_rate": 0.00014634037819799778, |
| "loss": 2.8733, |
| "step": 1212 |
| }, |
| { |
| "epoch": 16.17391304347826, |
| "grad_norm": 0.5860549211502075, |
| "learning_rate": 0.00014629588431590656, |
| "loss": 3.1429, |
| "step": 1213 |
| }, |
| { |
| "epoch": 16.187290969899667, |
| "grad_norm": 0.6623414158821106, |
| "learning_rate": 0.00014625139043381534, |
| "loss": 3.1741, |
| "step": 1214 |
| }, |
| { |
| "epoch": 16.20066889632107, |
| "grad_norm": 0.8366180062294006, |
| "learning_rate": 0.00014620689655172415, |
| "loss": 2.8627, |
| "step": 1215 |
| }, |
| { |
| "epoch": 16.214046822742475, |
| "grad_norm": 0.616780698299408, |
| "learning_rate": 0.00014616240266963294, |
| "loss": 2.7619, |
| "step": 1216 |
| }, |
| { |
| "epoch": 16.22742474916388, |
| "grad_norm": 0.6345306634902954, |
| "learning_rate": 0.00014611790878754172, |
| "loss": 2.9104, |
| "step": 1217 |
| }, |
| { |
| "epoch": 16.240802675585286, |
| "grad_norm": 0.6326844096183777, |
| "learning_rate": 0.0001460734149054505, |
| "loss": 2.8453, |
| "step": 1218 |
| }, |
| { |
| "epoch": 16.254180602006688, |
| "grad_norm": 0.5441793203353882, |
| "learning_rate": 0.0001460289210233593, |
| "loss": 3.2924, |
| "step": 1219 |
| }, |
| { |
| "epoch": 16.267558528428093, |
| "grad_norm": 0.604637086391449, |
| "learning_rate": 0.00014598442714126807, |
| "loss": 3.0362, |
| "step": 1220 |
| }, |
| { |
| "epoch": 16.2809364548495, |
| "grad_norm": 0.6100621819496155, |
| "learning_rate": 0.00014593993325917687, |
| "loss": 2.917, |
| "step": 1221 |
| }, |
| { |
| "epoch": 16.294314381270905, |
| "grad_norm": 0.6323224902153015, |
| "learning_rate": 0.00014589543937708566, |
| "loss": 3.144, |
| "step": 1222 |
| }, |
| { |
| "epoch": 16.307692307692307, |
| "grad_norm": 0.595485270023346, |
| "learning_rate": 0.00014585094549499444, |
| "loss": 2.8869, |
| "step": 1223 |
| }, |
| { |
| "epoch": 16.321070234113712, |
| "grad_norm": 0.6350538730621338, |
| "learning_rate": 0.00014580645161290322, |
| "loss": 2.7253, |
| "step": 1224 |
| }, |
| { |
| "epoch": 16.334448160535118, |
| "grad_norm": 0.5804395079612732, |
| "learning_rate": 0.00014576195773081203, |
| "loss": 3.1523, |
| "step": 1225 |
| }, |
| { |
| "epoch": 16.347826086956523, |
| "grad_norm": 0.5905717015266418, |
| "learning_rate": 0.00014571746384872081, |
| "loss": 3.1419, |
| "step": 1226 |
| }, |
| { |
| "epoch": 16.361204013377925, |
| "grad_norm": 0.6824894547462463, |
| "learning_rate": 0.0001456729699666296, |
| "loss": 2.8953, |
| "step": 1227 |
| }, |
| { |
| "epoch": 16.37458193979933, |
| "grad_norm": 0.5840978622436523, |
| "learning_rate": 0.00014562847608453838, |
| "loss": 3.1229, |
| "step": 1228 |
| }, |
| { |
| "epoch": 16.387959866220736, |
| "grad_norm": 0.7102469801902771, |
| "learning_rate": 0.0001455839822024472, |
| "loss": 2.6571, |
| "step": 1229 |
| }, |
| { |
| "epoch": 16.401337792642142, |
| "grad_norm": 0.6148349046707153, |
| "learning_rate": 0.00014553948832035594, |
| "loss": 3.0084, |
| "step": 1230 |
| }, |
| { |
| "epoch": 16.414715719063544, |
| "grad_norm": 0.60859215259552, |
| "learning_rate": 0.00014549499443826475, |
| "loss": 3.1348, |
| "step": 1231 |
| }, |
| { |
| "epoch": 16.42809364548495, |
| "grad_norm": 0.6059513688087463, |
| "learning_rate": 0.00014545050055617354, |
| "loss": 2.9678, |
| "step": 1232 |
| }, |
| { |
| "epoch": 16.441471571906355, |
| "grad_norm": 0.6012231707572937, |
| "learning_rate": 0.00014540600667408232, |
| "loss": 2.9279, |
| "step": 1233 |
| }, |
| { |
| "epoch": 16.45484949832776, |
| "grad_norm": 0.6185587644577026, |
| "learning_rate": 0.0001453615127919911, |
| "loss": 2.9922, |
| "step": 1234 |
| }, |
| { |
| "epoch": 16.468227424749163, |
| "grad_norm": 0.5989127159118652, |
| "learning_rate": 0.0001453170189098999, |
| "loss": 3.0736, |
| "step": 1235 |
| }, |
| { |
| "epoch": 16.48160535117057, |
| "grad_norm": 0.673633337020874, |
| "learning_rate": 0.0001452725250278087, |
| "loss": 2.9703, |
| "step": 1236 |
| }, |
| { |
| "epoch": 16.494983277591974, |
| "grad_norm": 0.6815900206565857, |
| "learning_rate": 0.00014522803114571747, |
| "loss": 3.0171, |
| "step": 1237 |
| }, |
| { |
| "epoch": 16.50836120401338, |
| "grad_norm": 0.7687232494354248, |
| "learning_rate": 0.00014518353726362626, |
| "loss": 2.8988, |
| "step": 1238 |
| }, |
| { |
| "epoch": 16.52173913043478, |
| "grad_norm": 0.7992755174636841, |
| "learning_rate": 0.00014513904338153507, |
| "loss": 3.0347, |
| "step": 1239 |
| }, |
| { |
| "epoch": 16.535117056856187, |
| "grad_norm": 0.8673639893531799, |
| "learning_rate": 0.00014509454949944382, |
| "loss": 2.9654, |
| "step": 1240 |
| }, |
| { |
| "epoch": 16.548494983277592, |
| "grad_norm": 0.6200671792030334, |
| "learning_rate": 0.00014505005561735263, |
| "loss": 3.1075, |
| "step": 1241 |
| }, |
| { |
| "epoch": 16.561872909698998, |
| "grad_norm": 0.8055624961853027, |
| "learning_rate": 0.0001450055617352614, |
| "loss": 2.877, |
| "step": 1242 |
| }, |
| { |
| "epoch": 16.5752508361204, |
| "grad_norm": 0.6428245902061462, |
| "learning_rate": 0.0001449610678531702, |
| "loss": 3.0326, |
| "step": 1243 |
| }, |
| { |
| "epoch": 16.588628762541806, |
| "grad_norm": 0.844804584980011, |
| "learning_rate": 0.00014491657397107898, |
| "loss": 2.8093, |
| "step": 1244 |
| }, |
| { |
| "epoch": 16.60200668896321, |
| "grad_norm": 0.5699613690376282, |
| "learning_rate": 0.00014487208008898776, |
| "loss": 3.1914, |
| "step": 1245 |
| }, |
| { |
| "epoch": 16.615384615384617, |
| "grad_norm": 0.6638582348823547, |
| "learning_rate": 0.00014482758620689657, |
| "loss": 2.7484, |
| "step": 1246 |
| }, |
| { |
| "epoch": 16.62876254180602, |
| "grad_norm": 0.7390914559364319, |
| "learning_rate": 0.00014478309232480533, |
| "loss": 3.0355, |
| "step": 1247 |
| }, |
| { |
| "epoch": 16.642140468227424, |
| "grad_norm": 0.6177923083305359, |
| "learning_rate": 0.00014473859844271413, |
| "loss": 3.0067, |
| "step": 1248 |
| }, |
| { |
| "epoch": 16.65551839464883, |
| "grad_norm": 0.6234062314033508, |
| "learning_rate": 0.00014469410456062292, |
| "loss": 2.8153, |
| "step": 1249 |
| }, |
| { |
| "epoch": 16.668896321070235, |
| "grad_norm": 0.8505418300628662, |
| "learning_rate": 0.0001446496106785317, |
| "loss": 2.8292, |
| "step": 1250 |
| }, |
| { |
| "epoch": 16.682274247491637, |
| "grad_norm": 0.8339266180992126, |
| "learning_rate": 0.00014460511679644048, |
| "loss": 2.8072, |
| "step": 1251 |
| }, |
| { |
| "epoch": 16.695652173913043, |
| "grad_norm": 0.5782635807991028, |
| "learning_rate": 0.0001445606229143493, |
| "loss": 3.1079, |
| "step": 1252 |
| }, |
| { |
| "epoch": 16.70903010033445, |
| "grad_norm": 0.687126874923706, |
| "learning_rate": 0.00014451612903225807, |
| "loss": 3.1401, |
| "step": 1253 |
| }, |
| { |
| "epoch": 16.722408026755854, |
| "grad_norm": 0.7313762307167053, |
| "learning_rate": 0.00014447163515016686, |
| "loss": 2.9167, |
| "step": 1254 |
| }, |
| { |
| "epoch": 16.735785953177256, |
| "grad_norm": 0.8815247416496277, |
| "learning_rate": 0.00014442714126807564, |
| "loss": 3.0294, |
| "step": 1255 |
| }, |
| { |
| "epoch": 16.74916387959866, |
| "grad_norm": 0.7636277675628662, |
| "learning_rate": 0.00014438264738598445, |
| "loss": 3.0758, |
| "step": 1256 |
| }, |
| { |
| "epoch": 16.762541806020067, |
| "grad_norm": 0.5961578488349915, |
| "learning_rate": 0.0001443381535038932, |
| "loss": 3.1027, |
| "step": 1257 |
| }, |
| { |
| "epoch": 16.775919732441473, |
| "grad_norm": 0.6840028762817383, |
| "learning_rate": 0.000144293659621802, |
| "loss": 2.9192, |
| "step": 1258 |
| }, |
| { |
| "epoch": 16.789297658862875, |
| "grad_norm": 0.7895340323448181, |
| "learning_rate": 0.0001442491657397108, |
| "loss": 2.8772, |
| "step": 1259 |
| }, |
| { |
| "epoch": 16.80267558528428, |
| "grad_norm": 0.8516091704368591, |
| "learning_rate": 0.00014420467185761958, |
| "loss": 2.9168, |
| "step": 1260 |
| }, |
| { |
| "epoch": 16.816053511705686, |
| "grad_norm": 0.6745076179504395, |
| "learning_rate": 0.00014416017797552836, |
| "loss": 3.1352, |
| "step": 1261 |
| }, |
| { |
| "epoch": 16.82943143812709, |
| "grad_norm": 0.6744667887687683, |
| "learning_rate": 0.00014411568409343717, |
| "loss": 2.8439, |
| "step": 1262 |
| }, |
| { |
| "epoch": 16.842809364548494, |
| "grad_norm": 0.6307089924812317, |
| "learning_rate": 0.00014407119021134595, |
| "loss": 3.2054, |
| "step": 1263 |
| }, |
| { |
| "epoch": 16.8561872909699, |
| "grad_norm": 0.6480753421783447, |
| "learning_rate": 0.00014402669632925473, |
| "loss": 3.039, |
| "step": 1264 |
| }, |
| { |
| "epoch": 16.869565217391305, |
| "grad_norm": 0.6143667697906494, |
| "learning_rate": 0.00014398220244716352, |
| "loss": 2.9475, |
| "step": 1265 |
| }, |
| { |
| "epoch": 16.88294314381271, |
| "grad_norm": 0.6289299130439758, |
| "learning_rate": 0.00014393770856507233, |
| "loss": 3.1437, |
| "step": 1266 |
| }, |
| { |
| "epoch": 16.896321070234112, |
| "grad_norm": 0.6618160009384155, |
| "learning_rate": 0.00014389321468298108, |
| "loss": 2.8641, |
| "step": 1267 |
| }, |
| { |
| "epoch": 16.909698996655518, |
| "grad_norm": 0.6053375601768494, |
| "learning_rate": 0.0001438487208008899, |
| "loss": 2.9129, |
| "step": 1268 |
| }, |
| { |
| "epoch": 16.923076923076923, |
| "grad_norm": 0.5706185102462769, |
| "learning_rate": 0.00014380422691879867, |
| "loss": 3.005, |
| "step": 1269 |
| }, |
| { |
| "epoch": 16.93645484949833, |
| "grad_norm": 0.6779253482818604, |
| "learning_rate": 0.00014375973303670746, |
| "loss": 3.1071, |
| "step": 1270 |
| }, |
| { |
| "epoch": 16.94983277591973, |
| "grad_norm": 0.6679616570472717, |
| "learning_rate": 0.00014371523915461624, |
| "loss": 3.1792, |
| "step": 1271 |
| }, |
| { |
| "epoch": 16.963210702341136, |
| "grad_norm": 0.6018584966659546, |
| "learning_rate": 0.00014367074527252505, |
| "loss": 2.9947, |
| "step": 1272 |
| }, |
| { |
| "epoch": 16.976588628762542, |
| "grad_norm": 0.6106094717979431, |
| "learning_rate": 0.00014362625139043383, |
| "loss": 3.1965, |
| "step": 1273 |
| }, |
| { |
| "epoch": 16.989966555183948, |
| "grad_norm": 0.5486257672309875, |
| "learning_rate": 0.0001435817575083426, |
| "loss": 3.0975, |
| "step": 1274 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.6516265273094177, |
| "learning_rate": 0.0001435372636262514, |
| "loss": 3.1064, |
| "step": 1275 |
| }, |
| { |
| "epoch": 17.013377926421406, |
| "grad_norm": 0.6434454917907715, |
| "learning_rate": 0.0001434927697441602, |
| "loss": 2.8903, |
| "step": 1276 |
| }, |
| { |
| "epoch": 17.02675585284281, |
| "grad_norm": 0.6237186193466187, |
| "learning_rate": 0.00014344827586206896, |
| "loss": 3.135, |
| "step": 1277 |
| }, |
| { |
| "epoch": 17.040133779264213, |
| "grad_norm": 0.5920026302337646, |
| "learning_rate": 0.00014340378197997777, |
| "loss": 2.8612, |
| "step": 1278 |
| }, |
| { |
| "epoch": 17.05351170568562, |
| "grad_norm": 0.6545232534408569, |
| "learning_rate": 0.00014335928809788655, |
| "loss": 2.8934, |
| "step": 1279 |
| }, |
| { |
| "epoch": 17.066889632107024, |
| "grad_norm": 0.7839710116386414, |
| "learning_rate": 0.00014331479421579533, |
| "loss": 2.965, |
| "step": 1280 |
| }, |
| { |
| "epoch": 17.08026755852843, |
| "grad_norm": 0.6448978781700134, |
| "learning_rate": 0.00014327030033370412, |
| "loss": 2.9343, |
| "step": 1281 |
| }, |
| { |
| "epoch": 17.093645484949832, |
| "grad_norm": 0.5713958144187927, |
| "learning_rate": 0.00014322580645161293, |
| "loss": 3.1566, |
| "step": 1282 |
| }, |
| { |
| "epoch": 17.107023411371237, |
| "grad_norm": 0.666409969329834, |
| "learning_rate": 0.0001431813125695217, |
| "loss": 2.7705, |
| "step": 1283 |
| }, |
| { |
| "epoch": 17.120401337792643, |
| "grad_norm": 0.6068354249000549, |
| "learning_rate": 0.0001431368186874305, |
| "loss": 3.088, |
| "step": 1284 |
| }, |
| { |
| "epoch": 17.13377926421405, |
| "grad_norm": 0.8292580246925354, |
| "learning_rate": 0.00014309232480533927, |
| "loss": 2.8939, |
| "step": 1285 |
| }, |
| { |
| "epoch": 17.14715719063545, |
| "grad_norm": 0.6789494156837463, |
| "learning_rate": 0.00014304783092324806, |
| "loss": 3.2317, |
| "step": 1286 |
| }, |
| { |
| "epoch": 17.160535117056856, |
| "grad_norm": 0.6030963063240051, |
| "learning_rate": 0.00014300333704115684, |
| "loss": 2.9248, |
| "step": 1287 |
| }, |
| { |
| "epoch": 17.17391304347826, |
| "grad_norm": 0.7090041041374207, |
| "learning_rate": 0.00014295884315906562, |
| "loss": 2.9097, |
| "step": 1288 |
| }, |
| { |
| "epoch": 17.187290969899667, |
| "grad_norm": 0.5750879645347595, |
| "learning_rate": 0.00014291434927697443, |
| "loss": 2.9585, |
| "step": 1289 |
| }, |
| { |
| "epoch": 17.20066889632107, |
| "grad_norm": 0.6379792094230652, |
| "learning_rate": 0.0001428698553948832, |
| "loss": 2.794, |
| "step": 1290 |
| }, |
| { |
| "epoch": 17.214046822742475, |
| "grad_norm": 0.6736400127410889, |
| "learning_rate": 0.000142825361512792, |
| "loss": 2.8812, |
| "step": 1291 |
| }, |
| { |
| "epoch": 17.22742474916388, |
| "grad_norm": 0.6580933332443237, |
| "learning_rate": 0.00014278086763070078, |
| "loss": 2.8029, |
| "step": 1292 |
| }, |
| { |
| "epoch": 17.240802675585286, |
| "grad_norm": 0.6550527215003967, |
| "learning_rate": 0.00014273637374860959, |
| "loss": 3.1135, |
| "step": 1293 |
| }, |
| { |
| "epoch": 17.254180602006688, |
| "grad_norm": 0.6616796255111694, |
| "learning_rate": 0.00014269187986651834, |
| "loss": 2.8916, |
| "step": 1294 |
| }, |
| { |
| "epoch": 17.267558528428093, |
| "grad_norm": 0.7247623801231384, |
| "learning_rate": 0.00014264738598442715, |
| "loss": 2.8482, |
| "step": 1295 |
| }, |
| { |
| "epoch": 17.2809364548495, |
| "grad_norm": 0.7138639092445374, |
| "learning_rate": 0.00014260289210233593, |
| "loss": 2.9283, |
| "step": 1296 |
| }, |
| { |
| "epoch": 17.294314381270905, |
| "grad_norm": 0.6413894891738892, |
| "learning_rate": 0.00014255839822024472, |
| "loss": 3.0273, |
| "step": 1297 |
| }, |
| { |
| "epoch": 17.307692307692307, |
| "grad_norm": 0.6106882095336914, |
| "learning_rate": 0.0001425139043381535, |
| "loss": 3.062, |
| "step": 1298 |
| }, |
| { |
| "epoch": 17.321070234113712, |
| "grad_norm": 0.6762199997901917, |
| "learning_rate": 0.0001424694104560623, |
| "loss": 2.7864, |
| "step": 1299 |
| }, |
| { |
| "epoch": 17.334448160535118, |
| "grad_norm": 0.65083909034729, |
| "learning_rate": 0.0001424249165739711, |
| "loss": 3.115, |
| "step": 1300 |
| }, |
| { |
| "epoch": 17.347826086956523, |
| "grad_norm": 0.7381249666213989, |
| "learning_rate": 0.00014238042269187987, |
| "loss": 2.6981, |
| "step": 1301 |
| }, |
| { |
| "epoch": 17.361204013377925, |
| "grad_norm": 0.5674475431442261, |
| "learning_rate": 0.00014233592880978865, |
| "loss": 2.8646, |
| "step": 1302 |
| }, |
| { |
| "epoch": 17.37458193979933, |
| "grad_norm": 0.6201330423355103, |
| "learning_rate": 0.00014229143492769746, |
| "loss": 3.207, |
| "step": 1303 |
| }, |
| { |
| "epoch": 17.387959866220736, |
| "grad_norm": 0.7004446983337402, |
| "learning_rate": 0.00014224694104560622, |
| "loss": 2.7623, |
| "step": 1304 |
| }, |
| { |
| "epoch": 17.401337792642142, |
| "grad_norm": 0.7278717756271362, |
| "learning_rate": 0.00014220244716351503, |
| "loss": 2.7967, |
| "step": 1305 |
| }, |
| { |
| "epoch": 17.414715719063544, |
| "grad_norm": 0.6384133696556091, |
| "learning_rate": 0.0001421579532814238, |
| "loss": 2.7542, |
| "step": 1306 |
| }, |
| { |
| "epoch": 17.42809364548495, |
| "grad_norm": 0.6443619132041931, |
| "learning_rate": 0.0001421134593993326, |
| "loss": 2.9376, |
| "step": 1307 |
| }, |
| { |
| "epoch": 17.441471571906355, |
| "grad_norm": 0.6889017224311829, |
| "learning_rate": 0.00014206896551724138, |
| "loss": 2.8868, |
| "step": 1308 |
| }, |
| { |
| "epoch": 17.45484949832776, |
| "grad_norm": 0.624864935874939, |
| "learning_rate": 0.00014202447163515019, |
| "loss": 3.0459, |
| "step": 1309 |
| }, |
| { |
| "epoch": 17.468227424749163, |
| "grad_norm": 0.5964120626449585, |
| "learning_rate": 0.00014197997775305897, |
| "loss": 2.8604, |
| "step": 1310 |
| }, |
| { |
| "epoch": 17.48160535117057, |
| "grad_norm": 0.5584320425987244, |
| "learning_rate": 0.00014193548387096775, |
| "loss": 3.1267, |
| "step": 1311 |
| }, |
| { |
| "epoch": 17.494983277591974, |
| "grad_norm": 0.6566155552864075, |
| "learning_rate": 0.00014189098998887653, |
| "loss": 2.7059, |
| "step": 1312 |
| }, |
| { |
| "epoch": 17.50836120401338, |
| "grad_norm": 0.7166509032249451, |
| "learning_rate": 0.00014184649610678534, |
| "loss": 3.1076, |
| "step": 1313 |
| }, |
| { |
| "epoch": 17.52173913043478, |
| "grad_norm": 0.6076642870903015, |
| "learning_rate": 0.0001418020022246941, |
| "loss": 3.1121, |
| "step": 1314 |
| }, |
| { |
| "epoch": 17.535117056856187, |
| "grad_norm": 0.6293672323226929, |
| "learning_rate": 0.0001417575083426029, |
| "loss": 3.0719, |
| "step": 1315 |
| }, |
| { |
| "epoch": 17.548494983277592, |
| "grad_norm": 0.6696231365203857, |
| "learning_rate": 0.0001417130144605117, |
| "loss": 2.4281, |
| "step": 1316 |
| }, |
| { |
| "epoch": 17.561872909698998, |
| "grad_norm": 0.7928171157836914, |
| "learning_rate": 0.00014166852057842047, |
| "loss": 2.8602, |
| "step": 1317 |
| }, |
| { |
| "epoch": 17.5752508361204, |
| "grad_norm": 0.5897494554519653, |
| "learning_rate": 0.00014162402669632925, |
| "loss": 3.1556, |
| "step": 1318 |
| }, |
| { |
| "epoch": 17.588628762541806, |
| "grad_norm": 0.6028451323509216, |
| "learning_rate": 0.00014157953281423806, |
| "loss": 3.1228, |
| "step": 1319 |
| }, |
| { |
| "epoch": 17.60200668896321, |
| "grad_norm": 0.6237207651138306, |
| "learning_rate": 0.00014153503893214685, |
| "loss": 2.9691, |
| "step": 1320 |
| }, |
| { |
| "epoch": 17.615384615384617, |
| "grad_norm": 0.6401494741439819, |
| "learning_rate": 0.00014149054505005563, |
| "loss": 3.0659, |
| "step": 1321 |
| }, |
| { |
| "epoch": 17.62876254180602, |
| "grad_norm": 0.7098166942596436, |
| "learning_rate": 0.0001414460511679644, |
| "loss": 2.9606, |
| "step": 1322 |
| }, |
| { |
| "epoch": 17.642140468227424, |
| "grad_norm": 0.6416228413581848, |
| "learning_rate": 0.00014140155728587322, |
| "loss": 3.1056, |
| "step": 1323 |
| }, |
| { |
| "epoch": 17.65551839464883, |
| "grad_norm": 0.7303211092948914, |
| "learning_rate": 0.00014135706340378198, |
| "loss": 2.8604, |
| "step": 1324 |
| }, |
| { |
| "epoch": 17.668896321070235, |
| "grad_norm": 0.64544677734375, |
| "learning_rate": 0.00014131256952169079, |
| "loss": 2.6751, |
| "step": 1325 |
| }, |
| { |
| "epoch": 17.682274247491637, |
| "grad_norm": 0.6870211362838745, |
| "learning_rate": 0.00014126807563959957, |
| "loss": 2.9802, |
| "step": 1326 |
| }, |
| { |
| "epoch": 17.695652173913043, |
| "grad_norm": 0.6570687294006348, |
| "learning_rate": 0.00014122358175750835, |
| "loss": 3.0496, |
| "step": 1327 |
| }, |
| { |
| "epoch": 17.70903010033445, |
| "grad_norm": 0.7057302594184875, |
| "learning_rate": 0.00014117908787541713, |
| "loss": 2.845, |
| "step": 1328 |
| }, |
| { |
| "epoch": 17.722408026755854, |
| "grad_norm": 0.8613574504852295, |
| "learning_rate": 0.00014113459399332591, |
| "loss": 3.0314, |
| "step": 1329 |
| }, |
| { |
| "epoch": 17.735785953177256, |
| "grad_norm": 0.7408957481384277, |
| "learning_rate": 0.00014109010011123472, |
| "loss": 2.8651, |
| "step": 1330 |
| }, |
| { |
| "epoch": 17.74916387959866, |
| "grad_norm": 0.6553664803504944, |
| "learning_rate": 0.00014104560622914348, |
| "loss": 2.9647, |
| "step": 1331 |
| }, |
| { |
| "epoch": 17.762541806020067, |
| "grad_norm": 0.5991332530975342, |
| "learning_rate": 0.0001410011123470523, |
| "loss": 2.9796, |
| "step": 1332 |
| }, |
| { |
| "epoch": 17.775919732441473, |
| "grad_norm": 0.6124044060707092, |
| "learning_rate": 0.00014095661846496107, |
| "loss": 3.0682, |
| "step": 1333 |
| }, |
| { |
| "epoch": 17.789297658862875, |
| "grad_norm": 0.5788628458976746, |
| "learning_rate": 0.00014091212458286985, |
| "loss": 3.0357, |
| "step": 1334 |
| }, |
| { |
| "epoch": 17.80267558528428, |
| "grad_norm": 0.6785842776298523, |
| "learning_rate": 0.00014086763070077864, |
| "loss": 3.1128, |
| "step": 1335 |
| }, |
| { |
| "epoch": 17.816053511705686, |
| "grad_norm": 0.5994388461112976, |
| "learning_rate": 0.00014082313681868745, |
| "loss": 2.9087, |
| "step": 1336 |
| }, |
| { |
| "epoch": 17.82943143812709, |
| "grad_norm": 0.6150069236755371, |
| "learning_rate": 0.00014077864293659623, |
| "loss": 2.9488, |
| "step": 1337 |
| }, |
| { |
| "epoch": 17.842809364548494, |
| "grad_norm": 0.6211126446723938, |
| "learning_rate": 0.000140734149054505, |
| "loss": 2.8006, |
| "step": 1338 |
| }, |
| { |
| "epoch": 17.8561872909699, |
| "grad_norm": 0.6093603372573853, |
| "learning_rate": 0.0001406896551724138, |
| "loss": 3.0013, |
| "step": 1339 |
| }, |
| { |
| "epoch": 17.869565217391305, |
| "grad_norm": 0.6861109137535095, |
| "learning_rate": 0.0001406451612903226, |
| "loss": 2.9423, |
| "step": 1340 |
| }, |
| { |
| "epoch": 17.88294314381271, |
| "grad_norm": 0.6148517727851868, |
| "learning_rate": 0.00014060066740823136, |
| "loss": 2.8471, |
| "step": 1341 |
| }, |
| { |
| "epoch": 17.896321070234112, |
| "grad_norm": 0.8285694718360901, |
| "learning_rate": 0.00014055617352614017, |
| "loss": 3.055, |
| "step": 1342 |
| }, |
| { |
| "epoch": 17.909698996655518, |
| "grad_norm": 0.5955973863601685, |
| "learning_rate": 0.00014051167964404895, |
| "loss": 3.0088, |
| "step": 1343 |
| }, |
| { |
| "epoch": 17.923076923076923, |
| "grad_norm": 0.6020825505256653, |
| "learning_rate": 0.00014046718576195773, |
| "loss": 3.1445, |
| "step": 1344 |
| }, |
| { |
| "epoch": 17.93645484949833, |
| "grad_norm": 0.6115384101867676, |
| "learning_rate": 0.00014042269187986651, |
| "loss": 3.0889, |
| "step": 1345 |
| }, |
| { |
| "epoch": 17.94983277591973, |
| "grad_norm": 0.6469634175300598, |
| "learning_rate": 0.00014037819799777532, |
| "loss": 3.1916, |
| "step": 1346 |
| }, |
| { |
| "epoch": 17.963210702341136, |
| "grad_norm": 0.6653386354446411, |
| "learning_rate": 0.0001403337041156841, |
| "loss": 2.8805, |
| "step": 1347 |
| }, |
| { |
| "epoch": 17.976588628762542, |
| "grad_norm": 0.6167243719100952, |
| "learning_rate": 0.0001402892102335929, |
| "loss": 2.9819, |
| "step": 1348 |
| }, |
| { |
| "epoch": 17.989966555183948, |
| "grad_norm": 0.6281883716583252, |
| "learning_rate": 0.00014024471635150167, |
| "loss": 3.1109, |
| "step": 1349 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.6747295260429382, |
| "learning_rate": 0.00014020022246941048, |
| "loss": 2.8772, |
| "step": 1350 |
| }, |
| { |
| "epoch": 18.013377926421406, |
| "grad_norm": 0.5834116339683533, |
| "learning_rate": 0.00014015572858731924, |
| "loss": 2.9399, |
| "step": 1351 |
| }, |
| { |
| "epoch": 18.02675585284281, |
| "grad_norm": 0.620858371257782, |
| "learning_rate": 0.00014011123470522805, |
| "loss": 2.7351, |
| "step": 1352 |
| }, |
| { |
| "epoch": 18.040133779264213, |
| "grad_norm": 0.5407689809799194, |
| "learning_rate": 0.00014006674082313683, |
| "loss": 2.8329, |
| "step": 1353 |
| }, |
| { |
| "epoch": 18.05351170568562, |
| "grad_norm": 0.6045056581497192, |
| "learning_rate": 0.0001400222469410456, |
| "loss": 2.7382, |
| "step": 1354 |
| }, |
| { |
| "epoch": 18.066889632107024, |
| "grad_norm": 0.5433570146560669, |
| "learning_rate": 0.0001399777530589544, |
| "loss": 2.9154, |
| "step": 1355 |
| }, |
| { |
| "epoch": 18.08026755852843, |
| "grad_norm": 0.6174083352088928, |
| "learning_rate": 0.0001399332591768632, |
| "loss": 2.7438, |
| "step": 1356 |
| }, |
| { |
| "epoch": 18.093645484949832, |
| "grad_norm": 0.6720690727233887, |
| "learning_rate": 0.00013988876529477198, |
| "loss": 3.1324, |
| "step": 1357 |
| }, |
| { |
| "epoch": 18.107023411371237, |
| "grad_norm": 0.648423433303833, |
| "learning_rate": 0.00013984427141268077, |
| "loss": 2.7802, |
| "step": 1358 |
| }, |
| { |
| "epoch": 18.120401337792643, |
| "grad_norm": 0.6625978350639343, |
| "learning_rate": 0.00013979977753058955, |
| "loss": 2.9152, |
| "step": 1359 |
| }, |
| { |
| "epoch": 18.13377926421405, |
| "grad_norm": 0.5362007021903992, |
| "learning_rate": 0.00013975528364849836, |
| "loss": 2.9552, |
| "step": 1360 |
| }, |
| { |
| "epoch": 18.14715719063545, |
| "grad_norm": 0.6275555491447449, |
| "learning_rate": 0.00013971078976640711, |
| "loss": 3.0742, |
| "step": 1361 |
| }, |
| { |
| "epoch": 18.160535117056856, |
| "grad_norm": 0.5755884647369385, |
| "learning_rate": 0.00013966629588431592, |
| "loss": 2.8705, |
| "step": 1362 |
| }, |
| { |
| "epoch": 18.17391304347826, |
| "grad_norm": 0.5092719793319702, |
| "learning_rate": 0.0001396218020022247, |
| "loss": 2.9684, |
| "step": 1363 |
| }, |
| { |
| "epoch": 18.187290969899667, |
| "grad_norm": 0.7400075197219849, |
| "learning_rate": 0.0001395773081201335, |
| "loss": 2.9726, |
| "step": 1364 |
| }, |
| { |
| "epoch": 18.20066889632107, |
| "grad_norm": 0.6478124260902405, |
| "learning_rate": 0.00013953281423804227, |
| "loss": 2.7427, |
| "step": 1365 |
| }, |
| { |
| "epoch": 18.214046822742475, |
| "grad_norm": 0.6313418745994568, |
| "learning_rate": 0.00013948832035595108, |
| "loss": 2.7713, |
| "step": 1366 |
| }, |
| { |
| "epoch": 18.22742474916388, |
| "grad_norm": 0.5571421980857849, |
| "learning_rate": 0.00013944382647385986, |
| "loss": 2.9221, |
| "step": 1367 |
| }, |
| { |
| "epoch": 18.240802675585286, |
| "grad_norm": 0.5346395373344421, |
| "learning_rate": 0.00013939933259176862, |
| "loss": 2.9842, |
| "step": 1368 |
| }, |
| { |
| "epoch": 18.254180602006688, |
| "grad_norm": 0.5828048586845398, |
| "learning_rate": 0.00013935483870967743, |
| "loss": 2.8683, |
| "step": 1369 |
| }, |
| { |
| "epoch": 18.267558528428093, |
| "grad_norm": 0.6446037888526917, |
| "learning_rate": 0.0001393103448275862, |
| "loss": 2.8, |
| "step": 1370 |
| }, |
| { |
| "epoch": 18.2809364548495, |
| "grad_norm": 0.612689197063446, |
| "learning_rate": 0.000139265850945495, |
| "loss": 3.0642, |
| "step": 1371 |
| }, |
| { |
| "epoch": 18.294314381270905, |
| "grad_norm": 0.5511941909790039, |
| "learning_rate": 0.00013922135706340377, |
| "loss": 3.0633, |
| "step": 1372 |
| }, |
| { |
| "epoch": 18.307692307692307, |
| "grad_norm": 0.7538149356842041, |
| "learning_rate": 0.00013917686318131258, |
| "loss": 2.7224, |
| "step": 1373 |
| }, |
| { |
| "epoch": 18.321070234113712, |
| "grad_norm": 0.6194874048233032, |
| "learning_rate": 0.00013913236929922137, |
| "loss": 2.9357, |
| "step": 1374 |
| }, |
| { |
| "epoch": 18.334448160535118, |
| "grad_norm": 0.5770833492279053, |
| "learning_rate": 0.00013908787541713015, |
| "loss": 2.8585, |
| "step": 1375 |
| }, |
| { |
| "epoch": 18.347826086956523, |
| "grad_norm": 0.609080970287323, |
| "learning_rate": 0.00013904338153503893, |
| "loss": 2.7763, |
| "step": 1376 |
| }, |
| { |
| "epoch": 18.361204013377925, |
| "grad_norm": 0.5578462481498718, |
| "learning_rate": 0.00013899888765294774, |
| "loss": 3.0324, |
| "step": 1377 |
| }, |
| { |
| "epoch": 18.37458193979933, |
| "grad_norm": 0.5949610471725464, |
| "learning_rate": 0.0001389543937708565, |
| "loss": 2.7332, |
| "step": 1378 |
| }, |
| { |
| "epoch": 18.387959866220736, |
| "grad_norm": 0.6248785257339478, |
| "learning_rate": 0.0001389098998887653, |
| "loss": 2.7355, |
| "step": 1379 |
| }, |
| { |
| "epoch": 18.401337792642142, |
| "grad_norm": 0.606239378452301, |
| "learning_rate": 0.0001388654060066741, |
| "loss": 2.9883, |
| "step": 1380 |
| }, |
| { |
| "epoch": 18.414715719063544, |
| "grad_norm": 0.6222496628761292, |
| "learning_rate": 0.00013882091212458287, |
| "loss": 2.7384, |
| "step": 1381 |
| }, |
| { |
| "epoch": 18.42809364548495, |
| "grad_norm": 0.6253412365913391, |
| "learning_rate": 0.00013877641824249165, |
| "loss": 2.7555, |
| "step": 1382 |
| }, |
| { |
| "epoch": 18.441471571906355, |
| "grad_norm": 0.6204626560211182, |
| "learning_rate": 0.00013873192436040046, |
| "loss": 2.7279, |
| "step": 1383 |
| }, |
| { |
| "epoch": 18.45484949832776, |
| "grad_norm": 0.7254919409751892, |
| "learning_rate": 0.00013868743047830924, |
| "loss": 2.9053, |
| "step": 1384 |
| }, |
| { |
| "epoch": 18.468227424749163, |
| "grad_norm": 0.6207154393196106, |
| "learning_rate": 0.00013864293659621803, |
| "loss": 3.0648, |
| "step": 1385 |
| }, |
| { |
| "epoch": 18.48160535117057, |
| "grad_norm": 0.6959066390991211, |
| "learning_rate": 0.0001385984427141268, |
| "loss": 2.9641, |
| "step": 1386 |
| }, |
| { |
| "epoch": 18.494983277591974, |
| "grad_norm": 0.6345707774162292, |
| "learning_rate": 0.00013855394883203562, |
| "loss": 2.9452, |
| "step": 1387 |
| }, |
| { |
| "epoch": 18.50836120401338, |
| "grad_norm": 0.5806639790534973, |
| "learning_rate": 0.00013850945494994437, |
| "loss": 2.9375, |
| "step": 1388 |
| }, |
| { |
| "epoch": 18.52173913043478, |
| "grad_norm": 0.6498666405677795, |
| "learning_rate": 0.00013846496106785318, |
| "loss": 2.867, |
| "step": 1389 |
| }, |
| { |
| "epoch": 18.535117056856187, |
| "grad_norm": 0.629264771938324, |
| "learning_rate": 0.00013842046718576197, |
| "loss": 2.822, |
| "step": 1390 |
| }, |
| { |
| "epoch": 18.548494983277592, |
| "grad_norm": 0.6734644174575806, |
| "learning_rate": 0.00013837597330367075, |
| "loss": 2.9065, |
| "step": 1391 |
| }, |
| { |
| "epoch": 18.561872909698998, |
| "grad_norm": 0.5705899000167847, |
| "learning_rate": 0.00013833147942157953, |
| "loss": 2.9909, |
| "step": 1392 |
| }, |
| { |
| "epoch": 18.5752508361204, |
| "grad_norm": 0.6786744594573975, |
| "learning_rate": 0.00013828698553948834, |
| "loss": 3.0667, |
| "step": 1393 |
| }, |
| { |
| "epoch": 18.588628762541806, |
| "grad_norm": 0.6044118404388428, |
| "learning_rate": 0.00013824249165739712, |
| "loss": 2.8345, |
| "step": 1394 |
| }, |
| { |
| "epoch": 18.60200668896321, |
| "grad_norm": 0.5928333401679993, |
| "learning_rate": 0.0001381979977753059, |
| "loss": 2.974, |
| "step": 1395 |
| }, |
| { |
| "epoch": 18.615384615384617, |
| "grad_norm": 0.636883556842804, |
| "learning_rate": 0.0001381535038932147, |
| "loss": 3.1034, |
| "step": 1396 |
| }, |
| { |
| "epoch": 18.62876254180602, |
| "grad_norm": 0.6029159426689148, |
| "learning_rate": 0.0001381090100111235, |
| "loss": 3.0303, |
| "step": 1397 |
| }, |
| { |
| "epoch": 18.642140468227424, |
| "grad_norm": 0.6479122638702393, |
| "learning_rate": 0.00013806451612903225, |
| "loss": 3.1782, |
| "step": 1398 |
| }, |
| { |
| "epoch": 18.65551839464883, |
| "grad_norm": 0.6547753810882568, |
| "learning_rate": 0.00013802002224694106, |
| "loss": 3.0594, |
| "step": 1399 |
| }, |
| { |
| "epoch": 18.668896321070235, |
| "grad_norm": 0.6506614089012146, |
| "learning_rate": 0.00013797552836484984, |
| "loss": 3.0955, |
| "step": 1400 |
| }, |
| { |
| "epoch": 18.682274247491637, |
| "grad_norm": 0.6073411107063293, |
| "learning_rate": 0.00013793103448275863, |
| "loss": 2.7452, |
| "step": 1401 |
| }, |
| { |
| "epoch": 18.695652173913043, |
| "grad_norm": 0.6307429075241089, |
| "learning_rate": 0.0001378865406006674, |
| "loss": 2.8329, |
| "step": 1402 |
| }, |
| { |
| "epoch": 18.70903010033445, |
| "grad_norm": 0.7205286026000977, |
| "learning_rate": 0.00013784204671857622, |
| "loss": 2.8228, |
| "step": 1403 |
| }, |
| { |
| "epoch": 18.722408026755854, |
| "grad_norm": 0.6154866814613342, |
| "learning_rate": 0.000137797552836485, |
| "loss": 2.7081, |
| "step": 1404 |
| }, |
| { |
| "epoch": 18.735785953177256, |
| "grad_norm": 0.548799455165863, |
| "learning_rate": 0.00013775305895439378, |
| "loss": 2.9336, |
| "step": 1405 |
| }, |
| { |
| "epoch": 18.74916387959866, |
| "grad_norm": 0.5723371505737305, |
| "learning_rate": 0.00013770856507230257, |
| "loss": 2.9661, |
| "step": 1406 |
| }, |
| { |
| "epoch": 18.762541806020067, |
| "grad_norm": 0.5917731523513794, |
| "learning_rate": 0.00013766407119021137, |
| "loss": 2.8458, |
| "step": 1407 |
| }, |
| { |
| "epoch": 18.775919732441473, |
| "grad_norm": 0.6160814166069031, |
| "learning_rate": 0.00013761957730812013, |
| "loss": 2.8453, |
| "step": 1408 |
| }, |
| { |
| "epoch": 18.789297658862875, |
| "grad_norm": 0.5995500683784485, |
| "learning_rate": 0.0001375750834260289, |
| "loss": 3.1361, |
| "step": 1409 |
| }, |
| { |
| "epoch": 18.80267558528428, |
| "grad_norm": 0.5574924945831299, |
| "learning_rate": 0.00013753058954393772, |
| "loss": 2.9808, |
| "step": 1410 |
| }, |
| { |
| "epoch": 18.816053511705686, |
| "grad_norm": 0.6444510221481323, |
| "learning_rate": 0.0001374860956618465, |
| "loss": 2.9204, |
| "step": 1411 |
| }, |
| { |
| "epoch": 18.82943143812709, |
| "grad_norm": 0.6153264045715332, |
| "learning_rate": 0.0001374416017797553, |
| "loss": 3.036, |
| "step": 1412 |
| }, |
| { |
| "epoch": 18.842809364548494, |
| "grad_norm": 0.5951060652732849, |
| "learning_rate": 0.00013739710789766407, |
| "loss": 3.1012, |
| "step": 1413 |
| }, |
| { |
| "epoch": 18.8561872909699, |
| "grad_norm": 0.5688861012458801, |
| "learning_rate": 0.00013735261401557288, |
| "loss": 3.1568, |
| "step": 1414 |
| }, |
| { |
| "epoch": 18.869565217391305, |
| "grad_norm": 0.6256094574928284, |
| "learning_rate": 0.00013730812013348163, |
| "loss": 2.7721, |
| "step": 1415 |
| }, |
| { |
| "epoch": 18.88294314381271, |
| "grad_norm": 0.646250307559967, |
| "learning_rate": 0.00013726362625139044, |
| "loss": 3.0091, |
| "step": 1416 |
| }, |
| { |
| "epoch": 18.896321070234112, |
| "grad_norm": 0.6805879473686218, |
| "learning_rate": 0.00013721913236929923, |
| "loss": 2.8397, |
| "step": 1417 |
| }, |
| { |
| "epoch": 18.909698996655518, |
| "grad_norm": 0.6164728999137878, |
| "learning_rate": 0.000137174638487208, |
| "loss": 2.8318, |
| "step": 1418 |
| }, |
| { |
| "epoch": 18.923076923076923, |
| "grad_norm": 0.6298549771308899, |
| "learning_rate": 0.0001371301446051168, |
| "loss": 2.8493, |
| "step": 1419 |
| }, |
| { |
| "epoch": 18.93645484949833, |
| "grad_norm": 0.5760109424591064, |
| "learning_rate": 0.0001370856507230256, |
| "loss": 3.1736, |
| "step": 1420 |
| }, |
| { |
| "epoch": 18.94983277591973, |
| "grad_norm": 0.6126035451889038, |
| "learning_rate": 0.00013704115684093438, |
| "loss": 3.1313, |
| "step": 1421 |
| }, |
| { |
| "epoch": 18.963210702341136, |
| "grad_norm": 0.6092283129692078, |
| "learning_rate": 0.00013699666295884316, |
| "loss": 3.0601, |
| "step": 1422 |
| }, |
| { |
| "epoch": 18.976588628762542, |
| "grad_norm": 0.6506980657577515, |
| "learning_rate": 0.00013695216907675195, |
| "loss": 2.7787, |
| "step": 1423 |
| }, |
| { |
| "epoch": 18.989966555183948, |
| "grad_norm": 0.6060482263565063, |
| "learning_rate": 0.00013690767519466076, |
| "loss": 3.062, |
| "step": 1424 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.7881284952163696, |
| "learning_rate": 0.0001368631813125695, |
| "loss": 2.9882, |
| "step": 1425 |
| }, |
| { |
| "epoch": 19.013377926421406, |
| "grad_norm": 0.5459823608398438, |
| "learning_rate": 0.00013681868743047832, |
| "loss": 2.9312, |
| "step": 1426 |
| }, |
| { |
| "epoch": 19.02675585284281, |
| "grad_norm": 0.71108078956604, |
| "learning_rate": 0.0001367741935483871, |
| "loss": 2.6003, |
| "step": 1427 |
| }, |
| { |
| "epoch": 19.040133779264213, |
| "grad_norm": 0.6824572682380676, |
| "learning_rate": 0.00013672969966629589, |
| "loss": 2.8354, |
| "step": 1428 |
| }, |
| { |
| "epoch": 19.05351170568562, |
| "grad_norm": 0.607400119304657, |
| "learning_rate": 0.00013668520578420467, |
| "loss": 2.95, |
| "step": 1429 |
| }, |
| { |
| "epoch": 19.066889632107024, |
| "grad_norm": 0.5925526022911072, |
| "learning_rate": 0.00013664071190211348, |
| "loss": 2.7101, |
| "step": 1430 |
| }, |
| { |
| "epoch": 19.08026755852843, |
| "grad_norm": 0.6208476424217224, |
| "learning_rate": 0.00013659621802002226, |
| "loss": 2.7759, |
| "step": 1431 |
| }, |
| { |
| "epoch": 19.093645484949832, |
| "grad_norm": 0.6047778725624084, |
| "learning_rate": 0.00013655172413793104, |
| "loss": 3.0377, |
| "step": 1432 |
| }, |
| { |
| "epoch": 19.107023411371237, |
| "grad_norm": 0.5979378819465637, |
| "learning_rate": 0.00013650723025583983, |
| "loss": 2.8671, |
| "step": 1433 |
| }, |
| { |
| "epoch": 19.120401337792643, |
| "grad_norm": 0.6338753700256348, |
| "learning_rate": 0.00013646273637374863, |
| "loss": 2.7869, |
| "step": 1434 |
| }, |
| { |
| "epoch": 19.13377926421405, |
| "grad_norm": 0.6263737678527832, |
| "learning_rate": 0.0001364182424916574, |
| "loss": 2.6591, |
| "step": 1435 |
| }, |
| { |
| "epoch": 19.14715719063545, |
| "grad_norm": 0.646990180015564, |
| "learning_rate": 0.0001363737486095662, |
| "loss": 2.896, |
| "step": 1436 |
| }, |
| { |
| "epoch": 19.160535117056856, |
| "grad_norm": 0.5691138505935669, |
| "learning_rate": 0.00013632925472747498, |
| "loss": 2.8774, |
| "step": 1437 |
| }, |
| { |
| "epoch": 19.17391304347826, |
| "grad_norm": 0.5838844180107117, |
| "learning_rate": 0.00013628476084538376, |
| "loss": 2.9722, |
| "step": 1438 |
| }, |
| { |
| "epoch": 19.187290969899667, |
| "grad_norm": 0.6154463887214661, |
| "learning_rate": 0.00013624026696329255, |
| "loss": 3.0143, |
| "step": 1439 |
| }, |
| { |
| "epoch": 19.20066889632107, |
| "grad_norm": 0.6480549573898315, |
| "learning_rate": 0.00013619577308120136, |
| "loss": 2.8118, |
| "step": 1440 |
| }, |
| { |
| "epoch": 19.214046822742475, |
| "grad_norm": 0.7675592303276062, |
| "learning_rate": 0.00013615127919911014, |
| "loss": 2.865, |
| "step": 1441 |
| }, |
| { |
| "epoch": 19.22742474916388, |
| "grad_norm": 0.7231382131576538, |
| "learning_rate": 0.00013610678531701892, |
| "loss": 2.6942, |
| "step": 1442 |
| }, |
| { |
| "epoch": 19.240802675585286, |
| "grad_norm": 0.6359425187110901, |
| "learning_rate": 0.0001360622914349277, |
| "loss": 2.5976, |
| "step": 1443 |
| }, |
| { |
| "epoch": 19.254180602006688, |
| "grad_norm": 0.6486908793449402, |
| "learning_rate": 0.0001360177975528365, |
| "loss": 2.9494, |
| "step": 1444 |
| }, |
| { |
| "epoch": 19.267558528428093, |
| "grad_norm": 0.5930846929550171, |
| "learning_rate": 0.00013597330367074527, |
| "loss": 3.1295, |
| "step": 1445 |
| }, |
| { |
| "epoch": 19.2809364548495, |
| "grad_norm": 0.6988996267318726, |
| "learning_rate": 0.00013592880978865408, |
| "loss": 2.9627, |
| "step": 1446 |
| }, |
| { |
| "epoch": 19.294314381270905, |
| "grad_norm": 0.5971337556838989, |
| "learning_rate": 0.00013588431590656286, |
| "loss": 2.9636, |
| "step": 1447 |
| }, |
| { |
| "epoch": 19.307692307692307, |
| "grad_norm": 0.6479155421257019, |
| "learning_rate": 0.00013583982202447164, |
| "loss": 2.6575, |
| "step": 1448 |
| }, |
| { |
| "epoch": 19.321070234113712, |
| "grad_norm": 0.6771759986877441, |
| "learning_rate": 0.00013579532814238043, |
| "loss": 3.0667, |
| "step": 1449 |
| }, |
| { |
| "epoch": 19.334448160535118, |
| "grad_norm": 0.5785907506942749, |
| "learning_rate": 0.0001357508342602892, |
| "loss": 2.8839, |
| "step": 1450 |
| }, |
| { |
| "epoch": 19.347826086956523, |
| "grad_norm": 0.6315357089042664, |
| "learning_rate": 0.00013570634037819802, |
| "loss": 2.6925, |
| "step": 1451 |
| }, |
| { |
| "epoch": 19.361204013377925, |
| "grad_norm": 0.6047807931900024, |
| "learning_rate": 0.00013566184649610677, |
| "loss": 2.7948, |
| "step": 1452 |
| }, |
| { |
| "epoch": 19.37458193979933, |
| "grad_norm": 0.6079906225204468, |
| "learning_rate": 0.00013561735261401558, |
| "loss": 3.1798, |
| "step": 1453 |
| }, |
| { |
| "epoch": 19.387959866220736, |
| "grad_norm": 0.5820274353027344, |
| "learning_rate": 0.00013557285873192436, |
| "loss": 3.1159, |
| "step": 1454 |
| }, |
| { |
| "epoch": 19.401337792642142, |
| "grad_norm": 0.562022864818573, |
| "learning_rate": 0.00013552836484983315, |
| "loss": 2.9869, |
| "step": 1455 |
| }, |
| { |
| "epoch": 19.414715719063544, |
| "grad_norm": 0.6663182973861694, |
| "learning_rate": 0.00013548387096774193, |
| "loss": 2.8038, |
| "step": 1456 |
| }, |
| { |
| "epoch": 19.42809364548495, |
| "grad_norm": 0.7092719078063965, |
| "learning_rate": 0.00013543937708565074, |
| "loss": 3.0159, |
| "step": 1457 |
| }, |
| { |
| "epoch": 19.441471571906355, |
| "grad_norm": 0.5990714430809021, |
| "learning_rate": 0.00013539488320355952, |
| "loss": 2.8858, |
| "step": 1458 |
| }, |
| { |
| "epoch": 19.45484949832776, |
| "grad_norm": 0.600847065448761, |
| "learning_rate": 0.0001353503893214683, |
| "loss": 2.9352, |
| "step": 1459 |
| }, |
| { |
| "epoch": 19.468227424749163, |
| "grad_norm": 0.6393849849700928, |
| "learning_rate": 0.00013530589543937709, |
| "loss": 2.6911, |
| "step": 1460 |
| }, |
| { |
| "epoch": 19.48160535117057, |
| "grad_norm": 0.6170421838760376, |
| "learning_rate": 0.0001352614015572859, |
| "loss": 2.749, |
| "step": 1461 |
| }, |
| { |
| "epoch": 19.494983277591974, |
| "grad_norm": 0.53690105676651, |
| "learning_rate": 0.00013521690767519465, |
| "loss": 2.9516, |
| "step": 1462 |
| }, |
| { |
| "epoch": 19.50836120401338, |
| "grad_norm": 0.5976501703262329, |
| "learning_rate": 0.00013517241379310346, |
| "loss": 2.9996, |
| "step": 1463 |
| }, |
| { |
| "epoch": 19.52173913043478, |
| "grad_norm": 0.5892135500907898, |
| "learning_rate": 0.00013512791991101224, |
| "loss": 3.2039, |
| "step": 1464 |
| }, |
| { |
| "epoch": 19.535117056856187, |
| "grad_norm": 0.65968918800354, |
| "learning_rate": 0.00013508342602892102, |
| "loss": 2.5897, |
| "step": 1465 |
| }, |
| { |
| "epoch": 19.548494983277592, |
| "grad_norm": 0.591454267501831, |
| "learning_rate": 0.0001350389321468298, |
| "loss": 3.0104, |
| "step": 1466 |
| }, |
| { |
| "epoch": 19.561872909698998, |
| "grad_norm": 0.6272184252738953, |
| "learning_rate": 0.00013499443826473862, |
| "loss": 2.7266, |
| "step": 1467 |
| }, |
| { |
| "epoch": 19.5752508361204, |
| "grad_norm": 0.6142420172691345, |
| "learning_rate": 0.0001349499443826474, |
| "loss": 2.8358, |
| "step": 1468 |
| }, |
| { |
| "epoch": 19.588628762541806, |
| "grad_norm": 0.6268441677093506, |
| "learning_rate": 0.00013490545050055618, |
| "loss": 3.0196, |
| "step": 1469 |
| }, |
| { |
| "epoch": 19.60200668896321, |
| "grad_norm": 0.6512436866760254, |
| "learning_rate": 0.00013486095661846496, |
| "loss": 2.9558, |
| "step": 1470 |
| }, |
| { |
| "epoch": 19.615384615384617, |
| "grad_norm": 0.5983771681785583, |
| "learning_rate": 0.00013481646273637377, |
| "loss": 2.9958, |
| "step": 1471 |
| }, |
| { |
| "epoch": 19.62876254180602, |
| "grad_norm": 0.6994190216064453, |
| "learning_rate": 0.00013477196885428253, |
| "loss": 2.8019, |
| "step": 1472 |
| }, |
| { |
| "epoch": 19.642140468227424, |
| "grad_norm": 0.5878567695617676, |
| "learning_rate": 0.00013472747497219134, |
| "loss": 2.7007, |
| "step": 1473 |
| }, |
| { |
| "epoch": 19.65551839464883, |
| "grad_norm": 0.6140199303627014, |
| "learning_rate": 0.00013468298109010012, |
| "loss": 2.9212, |
| "step": 1474 |
| }, |
| { |
| "epoch": 19.668896321070235, |
| "grad_norm": 0.648714542388916, |
| "learning_rate": 0.0001346384872080089, |
| "loss": 3.0053, |
| "step": 1475 |
| }, |
| { |
| "epoch": 19.682274247491637, |
| "grad_norm": 0.5991750359535217, |
| "learning_rate": 0.00013459399332591769, |
| "loss": 2.9129, |
| "step": 1476 |
| }, |
| { |
| "epoch": 19.695652173913043, |
| "grad_norm": 0.5538223385810852, |
| "learning_rate": 0.0001345494994438265, |
| "loss": 2.9097, |
| "step": 1477 |
| }, |
| { |
| "epoch": 19.70903010033445, |
| "grad_norm": 0.5864409804344177, |
| "learning_rate": 0.00013450500556173528, |
| "loss": 2.9348, |
| "step": 1478 |
| }, |
| { |
| "epoch": 19.722408026755854, |
| "grad_norm": 0.6004533767700195, |
| "learning_rate": 0.00013446051167964406, |
| "loss": 2.8845, |
| "step": 1479 |
| }, |
| { |
| "epoch": 19.735785953177256, |
| "grad_norm": 0.6316581964492798, |
| "learning_rate": 0.00013441601779755284, |
| "loss": 2.8619, |
| "step": 1480 |
| }, |
| { |
| "epoch": 19.74916387959866, |
| "grad_norm": 0.593138575553894, |
| "learning_rate": 0.00013437152391546165, |
| "loss": 3.041, |
| "step": 1481 |
| }, |
| { |
| "epoch": 19.762541806020067, |
| "grad_norm": 0.5826678276062012, |
| "learning_rate": 0.0001343270300333704, |
| "loss": 2.7432, |
| "step": 1482 |
| }, |
| { |
| "epoch": 19.775919732441473, |
| "grad_norm": 0.6341697573661804, |
| "learning_rate": 0.00013428253615127922, |
| "loss": 2.9755, |
| "step": 1483 |
| }, |
| { |
| "epoch": 19.789297658862875, |
| "grad_norm": 0.5894901156425476, |
| "learning_rate": 0.000134238042269188, |
| "loss": 2.9754, |
| "step": 1484 |
| }, |
| { |
| "epoch": 19.80267558528428, |
| "grad_norm": 0.5840655565261841, |
| "learning_rate": 0.00013419354838709678, |
| "loss": 2.9959, |
| "step": 1485 |
| }, |
| { |
| "epoch": 19.816053511705686, |
| "grad_norm": 0.6006319522857666, |
| "learning_rate": 0.00013414905450500556, |
| "loss": 3.0196, |
| "step": 1486 |
| }, |
| { |
| "epoch": 19.82943143812709, |
| "grad_norm": 0.5647453665733337, |
| "learning_rate": 0.00013410456062291437, |
| "loss": 2.9662, |
| "step": 1487 |
| }, |
| { |
| "epoch": 19.842809364548494, |
| "grad_norm": 0.6583006978034973, |
| "learning_rate": 0.00013406006674082316, |
| "loss": 2.8719, |
| "step": 1488 |
| }, |
| { |
| "epoch": 19.8561872909699, |
| "grad_norm": 0.6041131615638733, |
| "learning_rate": 0.00013401557285873194, |
| "loss": 3.0019, |
| "step": 1489 |
| }, |
| { |
| "epoch": 19.869565217391305, |
| "grad_norm": 0.5524600148200989, |
| "learning_rate": 0.00013397107897664072, |
| "loss": 2.7409, |
| "step": 1490 |
| }, |
| { |
| "epoch": 19.88294314381271, |
| "grad_norm": 0.6532869338989258, |
| "learning_rate": 0.0001339265850945495, |
| "loss": 2.9336, |
| "step": 1491 |
| }, |
| { |
| "epoch": 19.896321070234112, |
| "grad_norm": 0.6459875106811523, |
| "learning_rate": 0.00013388209121245828, |
| "loss": 2.9572, |
| "step": 1492 |
| }, |
| { |
| "epoch": 19.909698996655518, |
| "grad_norm": 0.6051417589187622, |
| "learning_rate": 0.00013383759733036707, |
| "loss": 2.864, |
| "step": 1493 |
| }, |
| { |
| "epoch": 19.923076923076923, |
| "grad_norm": 0.6565695405006409, |
| "learning_rate": 0.00013379310344827588, |
| "loss": 2.865, |
| "step": 1494 |
| }, |
| { |
| "epoch": 19.93645484949833, |
| "grad_norm": 0.6118014454841614, |
| "learning_rate": 0.00013374860956618466, |
| "loss": 2.7594, |
| "step": 1495 |
| }, |
| { |
| "epoch": 19.94983277591973, |
| "grad_norm": 0.6801209449768066, |
| "learning_rate": 0.00013370411568409344, |
| "loss": 2.7532, |
| "step": 1496 |
| }, |
| { |
| "epoch": 19.963210702341136, |
| "grad_norm": 0.5785267353057861, |
| "learning_rate": 0.00013365962180200222, |
| "loss": 3.0618, |
| "step": 1497 |
| }, |
| { |
| "epoch": 19.976588628762542, |
| "grad_norm": 0.6344903707504272, |
| "learning_rate": 0.00013361512791991103, |
| "loss": 2.7908, |
| "step": 1498 |
| }, |
| { |
| "epoch": 19.989966555183948, |
| "grad_norm": 0.6073011159896851, |
| "learning_rate": 0.0001335706340378198, |
| "loss": 3.0699, |
| "step": 1499 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.6989748477935791, |
| "learning_rate": 0.0001335261401557286, |
| "loss": 2.7932, |
| "step": 1500 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 4500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 60, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.6403780569344e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|