| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4004272852128796, | |
| "eval_steps": 500, | |
| "global_step": 1312, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0003052037234854265, | |
| "grad_norm": 19.476922880741295, | |
| "learning_rate": 1.0101010101010103e-07, | |
| "loss": 1.1728, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.000610407446970853, | |
| "grad_norm": 29.879020388476594, | |
| "learning_rate": 2.0202020202020205e-07, | |
| "loss": 1.0955, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0009156111704562796, | |
| "grad_norm": 24.931945947136526, | |
| "learning_rate": 3.0303030303030305e-07, | |
| "loss": 0.9541, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.001220814893941706, | |
| "grad_norm": 27.83214939667906, | |
| "learning_rate": 4.040404040404041e-07, | |
| "loss": 1.0735, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0015260186174271325, | |
| "grad_norm": 21.219233961021736, | |
| "learning_rate": 5.05050505050505e-07, | |
| "loss": 1.0455, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0018312223409125592, | |
| "grad_norm": 20.022707446211225, | |
| "learning_rate": 6.060606060606061e-07, | |
| "loss": 0.9675, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0021364260643979855, | |
| "grad_norm": 26.532427830157193, | |
| "learning_rate": 7.070707070707071e-07, | |
| "loss": 1.1393, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.002441629787883412, | |
| "grad_norm": 27.89728780710031, | |
| "learning_rate": 8.080808080808082e-07, | |
| "loss": 1.0952, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0027468335113688385, | |
| "grad_norm": 20.346264005570532, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 0.9626, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.003052037234854265, | |
| "grad_norm": 18.804489508720884, | |
| "learning_rate": 1.01010101010101e-06, | |
| "loss": 1.0255, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.003357240958339692, | |
| "grad_norm": 19.776534785573535, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 0.7399, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0036624446818251184, | |
| "grad_norm": 21.16130386460154, | |
| "learning_rate": 1.2121212121212122e-06, | |
| "loss": 0.5413, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0039676484053105445, | |
| "grad_norm": 16.482713371526263, | |
| "learning_rate": 1.3131313131313134e-06, | |
| "loss": 0.5773, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.004272852128795971, | |
| "grad_norm": 10.780528168770594, | |
| "learning_rate": 1.4141414141414143e-06, | |
| "loss": 0.6782, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0045780558522813975, | |
| "grad_norm": 7.0900135030469915, | |
| "learning_rate": 1.5151515151515152e-06, | |
| "loss": 0.9153, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.004883259575766824, | |
| "grad_norm": 8.490445320662754, | |
| "learning_rate": 1.6161616161616164e-06, | |
| "loss": 0.4798, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0051884632992522505, | |
| "grad_norm": 6.677142812986669, | |
| "learning_rate": 1.7171717171717173e-06, | |
| "loss": 0.4782, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.005493667022737677, | |
| "grad_norm": 5.9204247946017485, | |
| "learning_rate": 1.8181818181818183e-06, | |
| "loss": 0.3191, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0057988707462231035, | |
| "grad_norm": 5.012462343754674, | |
| "learning_rate": 1.9191919191919192e-06, | |
| "loss": 0.4115, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.00610407446970853, | |
| "grad_norm": 3.9095937836899113, | |
| "learning_rate": 2.02020202020202e-06, | |
| "loss": 0.6158, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.006409278193193957, | |
| "grad_norm": 4.438163815129716, | |
| "learning_rate": 2.1212121212121216e-06, | |
| "loss": 0.7388, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.006714481916679384, | |
| "grad_norm": 3.62875198348435, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.2875, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.00701968564016481, | |
| "grad_norm": 4.963543929599541, | |
| "learning_rate": 2.3232323232323234e-06, | |
| "loss": 0.4662, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.007324889363650237, | |
| "grad_norm": 4.274904100558248, | |
| "learning_rate": 2.4242424242424244e-06, | |
| "loss": 0.5171, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.007630093087135663, | |
| "grad_norm": 2.670885047669819, | |
| "learning_rate": 2.5252525252525258e-06, | |
| "loss": 0.4488, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.007935296810621089, | |
| "grad_norm": 2.6864388610994014, | |
| "learning_rate": 2.6262626262626267e-06, | |
| "loss": 0.372, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.008240500534106516, | |
| "grad_norm": 3.804357369452407, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 0.2646, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.008545704257591942, | |
| "grad_norm": 4.059008227452532, | |
| "learning_rate": 2.8282828282828286e-06, | |
| "loss": 0.5907, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.008850907981077369, | |
| "grad_norm": 4.9062443629918855, | |
| "learning_rate": 2.9292929292929295e-06, | |
| "loss": 0.2972, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.009156111704562795, | |
| "grad_norm": 3.5391495380267064, | |
| "learning_rate": 3.0303030303030305e-06, | |
| "loss": 0.3821, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.009461315428048222, | |
| "grad_norm": 2.5896920322264854, | |
| "learning_rate": 3.131313131313132e-06, | |
| "loss": 0.4164, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.009766519151533648, | |
| "grad_norm": 3.0230775761822937, | |
| "learning_rate": 3.232323232323233e-06, | |
| "loss": 0.4237, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.010071722875019075, | |
| "grad_norm": 2.8417717057519423, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.3353, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.010376926598504501, | |
| "grad_norm": 2.5789157463945878, | |
| "learning_rate": 3.4343434343434347e-06, | |
| "loss": 0.3769, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.010682130321989928, | |
| "grad_norm": 2.5222241581850096, | |
| "learning_rate": 3.5353535353535356e-06, | |
| "loss": 0.519, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.010987334045475354, | |
| "grad_norm": 2.8704682168269127, | |
| "learning_rate": 3.6363636363636366e-06, | |
| "loss": 0.2829, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.01129253776896078, | |
| "grad_norm": 3.24684532820184, | |
| "learning_rate": 3.737373737373738e-06, | |
| "loss": 0.3586, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.011597741492446207, | |
| "grad_norm": 5.24792475783676, | |
| "learning_rate": 3.8383838383838385e-06, | |
| "loss": 0.402, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.011902945215931634, | |
| "grad_norm": 3.111184671834165, | |
| "learning_rate": 3.93939393939394e-06, | |
| "loss": 0.466, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.01220814893941706, | |
| "grad_norm": 3.165565566985893, | |
| "learning_rate": 4.04040404040404e-06, | |
| "loss": 0.2678, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.012513352662902488, | |
| "grad_norm": 2.5486933296193257, | |
| "learning_rate": 4.141414141414142e-06, | |
| "loss": 0.5457, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.012818556386387915, | |
| "grad_norm": 3.4373721012250438, | |
| "learning_rate": 4.242424242424243e-06, | |
| "loss": 0.3862, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.013123760109873341, | |
| "grad_norm": 2.863317221380458, | |
| "learning_rate": 4.343434343434344e-06, | |
| "loss": 0.3601, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.013428963833358768, | |
| "grad_norm": 2.1041128573446035, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 0.3693, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.013734167556844194, | |
| "grad_norm": 2.286990324679626, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.2513, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01403937128032962, | |
| "grad_norm": 8.793466778432636, | |
| "learning_rate": 4.646464646464647e-06, | |
| "loss": 0.4343, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.014344575003815047, | |
| "grad_norm": 1.8648737533834159, | |
| "learning_rate": 4.747474747474748e-06, | |
| "loss": 0.2631, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.014649778727300474, | |
| "grad_norm": 2.3081781364995324, | |
| "learning_rate": 4.848484848484849e-06, | |
| "loss": 0.2755, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0149549824507859, | |
| "grad_norm": 2.284005369243557, | |
| "learning_rate": 4.94949494949495e-06, | |
| "loss": 0.4186, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.015260186174271327, | |
| "grad_norm": 2.6759709423238096, | |
| "learning_rate": 5.0505050505050515e-06, | |
| "loss": 0.6459, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.015565389897756753, | |
| "grad_norm": 2.8773749120652523, | |
| "learning_rate": 5.151515151515152e-06, | |
| "loss": 0.3324, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.015870593621242178, | |
| "grad_norm": 2.8060164424498786, | |
| "learning_rate": 5.252525252525253e-06, | |
| "loss": 0.3608, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.016175797344727606, | |
| "grad_norm": 2.3060494229726793, | |
| "learning_rate": 5.353535353535354e-06, | |
| "loss": 0.3818, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.01648100106821303, | |
| "grad_norm": 2.073464811557714, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 0.2667, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.01678620479169846, | |
| "grad_norm": 2.3474749655399245, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.35, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.017091408515183884, | |
| "grad_norm": 3.6988890036672086, | |
| "learning_rate": 5.656565656565657e-06, | |
| "loss": 0.284, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.017396612238669312, | |
| "grad_norm": 2.313501192849839, | |
| "learning_rate": 5.7575757575757586e-06, | |
| "loss": 0.3308, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.017701815962154737, | |
| "grad_norm": 2.411936098122121, | |
| "learning_rate": 5.858585858585859e-06, | |
| "loss": 0.3982, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.018007019685640165, | |
| "grad_norm": 2.724660127775508, | |
| "learning_rate": 5.95959595959596e-06, | |
| "loss": 0.3587, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.01831222340912559, | |
| "grad_norm": 3.130895013540925, | |
| "learning_rate": 6.060606060606061e-06, | |
| "loss": 0.3427, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01861742713261102, | |
| "grad_norm": 3.4261489723004614, | |
| "learning_rate": 6.1616161616161615e-06, | |
| "loss": 0.4578, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.018922630856096443, | |
| "grad_norm": 2.413871881063889, | |
| "learning_rate": 6.262626262626264e-06, | |
| "loss": 0.2067, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.01922783457958187, | |
| "grad_norm": 2.0941348505038366, | |
| "learning_rate": 6.363636363636364e-06, | |
| "loss": 0.27, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.019533038303067296, | |
| "grad_norm": 2.2153240133926153, | |
| "learning_rate": 6.464646464646466e-06, | |
| "loss": 0.3298, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.019838242026552724, | |
| "grad_norm": 2.422022070572305, | |
| "learning_rate": 6.565656565656566e-06, | |
| "loss": 0.4894, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.02014344575003815, | |
| "grad_norm": 2.45442660843552, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.3684, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.020448649473523577, | |
| "grad_norm": 3.5398238081108304, | |
| "learning_rate": 6.767676767676769e-06, | |
| "loss": 0.4233, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.020753853197009002, | |
| "grad_norm": 2.530397719080883, | |
| "learning_rate": 6.868686868686869e-06, | |
| "loss": 0.2676, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.02105905692049443, | |
| "grad_norm": 2.259346305696615, | |
| "learning_rate": 6.969696969696971e-06, | |
| "loss": 0.4409, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.021364260643979855, | |
| "grad_norm": 2.3339543424453764, | |
| "learning_rate": 7.070707070707071e-06, | |
| "loss": 0.3882, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.021669464367465283, | |
| "grad_norm": 2.348843038116063, | |
| "learning_rate": 7.171717171717172e-06, | |
| "loss": 0.3904, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.021974668090950708, | |
| "grad_norm": 2.7011363922899965, | |
| "learning_rate": 7.272727272727273e-06, | |
| "loss": 0.3586, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.022279871814436136, | |
| "grad_norm": 2.6923381814173486, | |
| "learning_rate": 7.373737373737374e-06, | |
| "loss": 0.4331, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.02258507553792156, | |
| "grad_norm": 2.0435337430530924, | |
| "learning_rate": 7.474747474747476e-06, | |
| "loss": 0.2739, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.02289027926140699, | |
| "grad_norm": 2.257183264462076, | |
| "learning_rate": 7.5757575757575764e-06, | |
| "loss": 0.4554, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.023195482984892414, | |
| "grad_norm": 2.5384248372961626, | |
| "learning_rate": 7.676767676767677e-06, | |
| "loss": 0.4934, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.023500686708377842, | |
| "grad_norm": 2.1578730127908488, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.3519, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.023805890431863267, | |
| "grad_norm": 2.1316764516757476, | |
| "learning_rate": 7.87878787878788e-06, | |
| "loss": 0.3268, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.024111094155348695, | |
| "grad_norm": 2.095996278024237, | |
| "learning_rate": 7.97979797979798e-06, | |
| "loss": 0.3318, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.02441629787883412, | |
| "grad_norm": 1.9985574049541877, | |
| "learning_rate": 8.08080808080808e-06, | |
| "loss": 0.1852, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02472150160231955, | |
| "grad_norm": 1.7092921737326583, | |
| "learning_rate": 8.181818181818183e-06, | |
| "loss": 0.2412, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.025026705325804977, | |
| "grad_norm": 1.9609482601524066, | |
| "learning_rate": 8.282828282828283e-06, | |
| "loss": 0.3349, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0253319090492904, | |
| "grad_norm": 2.5619254980161412, | |
| "learning_rate": 8.383838383838384e-06, | |
| "loss": 0.3327, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.02563711277277583, | |
| "grad_norm": 2.1734116421771827, | |
| "learning_rate": 8.484848484848486e-06, | |
| "loss": 0.5005, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.025942316496261254, | |
| "grad_norm": 2.4612836321871785, | |
| "learning_rate": 8.585858585858587e-06, | |
| "loss": 0.5919, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.026247520219746683, | |
| "grad_norm": 2.050264187978962, | |
| "learning_rate": 8.686868686868687e-06, | |
| "loss": 0.2654, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.026552723943232107, | |
| "grad_norm": 1.7466792206761999, | |
| "learning_rate": 8.787878787878788e-06, | |
| "loss": 0.2875, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.026857927666717536, | |
| "grad_norm": 1.9114055019911376, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.3317, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.02716313139020296, | |
| "grad_norm": 2.136028617695754, | |
| "learning_rate": 8.98989898989899e-06, | |
| "loss": 0.4322, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.02746833511368839, | |
| "grad_norm": 2.0559196693817303, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.3372, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.027773538837173813, | |
| "grad_norm": 1.6053810559753854, | |
| "learning_rate": 9.191919191919193e-06, | |
| "loss": 0.2833, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.02807874256065924, | |
| "grad_norm": 1.9190338968500587, | |
| "learning_rate": 9.292929292929294e-06, | |
| "loss": 0.2358, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.028383946284144666, | |
| "grad_norm": 1.7424429804531956, | |
| "learning_rate": 9.393939393939396e-06, | |
| "loss": 0.2805, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.028689150007630095, | |
| "grad_norm": 1.5616301594921251, | |
| "learning_rate": 9.494949494949497e-06, | |
| "loss": 0.326, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.02899435373111552, | |
| "grad_norm": 2.6517363851490297, | |
| "learning_rate": 9.595959595959597e-06, | |
| "loss": 0.5839, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.029299557454600948, | |
| "grad_norm": 1.9068377479857994, | |
| "learning_rate": 9.696969696969698e-06, | |
| "loss": 0.4213, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.029604761178086372, | |
| "grad_norm": 2.147263972819766, | |
| "learning_rate": 9.797979797979798e-06, | |
| "loss": 0.3776, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.0299099649015718, | |
| "grad_norm": 2.3466004395170685, | |
| "learning_rate": 9.8989898989899e-06, | |
| "loss": 0.4828, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.030215168625057225, | |
| "grad_norm": 1.9328188798162316, | |
| "learning_rate": 1e-05, | |
| "loss": 0.3816, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.030520372348542654, | |
| "grad_norm": 2.120656679761712, | |
| "learning_rate": 9.999997555414177e-06, | |
| "loss": 0.287, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03082557607202808, | |
| "grad_norm": 1.8272767014289886, | |
| "learning_rate": 9.999990221659095e-06, | |
| "loss": 0.2529, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.031130779795513507, | |
| "grad_norm": 2.108876035097533, | |
| "learning_rate": 9.999977998741925e-06, | |
| "loss": 0.4, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.031435983518998935, | |
| "grad_norm": 2.611227326027621, | |
| "learning_rate": 9.999960886674623e-06, | |
| "loss": 0.5577, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.031741187242484356, | |
| "grad_norm": 2.012760226088087, | |
| "learning_rate": 9.999938885473916e-06, | |
| "loss": 0.2397, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.032046390965969784, | |
| "grad_norm": 3.4069313977643088, | |
| "learning_rate": 9.999911995161323e-06, | |
| "loss": 0.3074, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.03235159468945521, | |
| "grad_norm": 1.5281487804348939, | |
| "learning_rate": 9.999880215763133e-06, | |
| "loss": 0.306, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.03265679841294064, | |
| "grad_norm": 1.5733903167529437, | |
| "learning_rate": 9.999843547310427e-06, | |
| "loss": 0.3123, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.03296200213642606, | |
| "grad_norm": 2.2084260837102776, | |
| "learning_rate": 9.999801989839055e-06, | |
| "loss": 0.2686, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.03326720585991149, | |
| "grad_norm": 2.0235527329790477, | |
| "learning_rate": 9.999755543389658e-06, | |
| "loss": 0.362, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.03357240958339692, | |
| "grad_norm": 1.4126246608311444, | |
| "learning_rate": 9.999704208007647e-06, | |
| "loss": 0.1868, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03387761330688235, | |
| "grad_norm": 1.9363750145032863, | |
| "learning_rate": 9.999647983743227e-06, | |
| "loss": 0.4674, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.03418281703036777, | |
| "grad_norm": 2.306492812857686, | |
| "learning_rate": 9.999586870651372e-06, | |
| "loss": 0.7454, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.034488020753853196, | |
| "grad_norm": 1.9927578577114744, | |
| "learning_rate": 9.999520868791839e-06, | |
| "loss": 0.2964, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.034793224477338625, | |
| "grad_norm": 2.897230200199283, | |
| "learning_rate": 9.99944997822917e-06, | |
| "loss": 0.3507, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.03509842820082405, | |
| "grad_norm": 1.7040567211820554, | |
| "learning_rate": 9.999374199032682e-06, | |
| "loss": 0.358, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.035403631924309474, | |
| "grad_norm": 1.7684725864001616, | |
| "learning_rate": 9.999293531276475e-06, | |
| "loss": 0.469, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.0357088356477949, | |
| "grad_norm": 2.151331613378997, | |
| "learning_rate": 9.999207975039429e-06, | |
| "loss": 0.4007, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.03601403937128033, | |
| "grad_norm": 2.1827006415812678, | |
| "learning_rate": 9.999117530405205e-06, | |
| "loss": 0.373, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.03631924309476576, | |
| "grad_norm": 2.0424756244526283, | |
| "learning_rate": 9.99902219746224e-06, | |
| "loss": 0.4664, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.03662444681825118, | |
| "grad_norm": 2.4438750213097014, | |
| "learning_rate": 9.998921976303757e-06, | |
| "loss": 0.5884, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03692965054173661, | |
| "grad_norm": 1.6168805259489245, | |
| "learning_rate": 9.998816867027753e-06, | |
| "loss": 0.3874, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.03723485426522204, | |
| "grad_norm": 2.4836564854380914, | |
| "learning_rate": 9.99870686973701e-06, | |
| "loss": 0.3865, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.037540057988707465, | |
| "grad_norm": 2.187549263535683, | |
| "learning_rate": 9.998591984539085e-06, | |
| "loss": 0.4419, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.037845261712192886, | |
| "grad_norm": 2.3145724108896366, | |
| "learning_rate": 9.998472211546317e-06, | |
| "loss": 0.5048, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.038150465435678314, | |
| "grad_norm": 2.6043824271784377, | |
| "learning_rate": 9.998347550875825e-06, | |
| "loss": 0.4323, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03845566915916374, | |
| "grad_norm": 1.7266964407358079, | |
| "learning_rate": 9.998218002649507e-06, | |
| "loss": 0.3093, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.03876087288264917, | |
| "grad_norm": 2.3091863655820397, | |
| "learning_rate": 9.99808356699404e-06, | |
| "loss": 0.5394, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.03906607660613459, | |
| "grad_norm": 2.178584103245907, | |
| "learning_rate": 9.997944244040877e-06, | |
| "loss": 0.562, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.03937128032962002, | |
| "grad_norm": 1.4762803065381216, | |
| "learning_rate": 9.997800033926252e-06, | |
| "loss": 0.3012, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.03967648405310545, | |
| "grad_norm": 1.6768704233807339, | |
| "learning_rate": 9.997650936791183e-06, | |
| "loss": 0.3314, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03998168777659088, | |
| "grad_norm": 1.8423584681568375, | |
| "learning_rate": 9.997496952781461e-06, | |
| "loss": 0.5373, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.0402868915000763, | |
| "grad_norm": 1.4926628434179245, | |
| "learning_rate": 9.997338082047656e-06, | |
| "loss": 0.1992, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.040592095223561726, | |
| "grad_norm": 1.6323074947028773, | |
| "learning_rate": 9.997174324745117e-06, | |
| "loss": 0.4872, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.040897298947047155, | |
| "grad_norm": 2.159688005520465, | |
| "learning_rate": 9.997005681033973e-06, | |
| "loss": 0.5076, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.04120250267053258, | |
| "grad_norm": 2.207163038792008, | |
| "learning_rate": 9.996832151079127e-06, | |
| "loss": 0.2677, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.041507706394018004, | |
| "grad_norm": 1.3990677420334965, | |
| "learning_rate": 9.996653735050265e-06, | |
| "loss": 0.2526, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.04181291011750343, | |
| "grad_norm": 1.7368886105229604, | |
| "learning_rate": 9.996470433121847e-06, | |
| "loss": 0.2874, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.04211811384098886, | |
| "grad_norm": 1.8138446424045762, | |
| "learning_rate": 9.996282245473113e-06, | |
| "loss": 0.2986, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.04242331756447429, | |
| "grad_norm": 1.8564789601928355, | |
| "learning_rate": 9.996089172288078e-06, | |
| "loss": 0.3954, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.04272852128795971, | |
| "grad_norm": 1.9085920361180522, | |
| "learning_rate": 9.995891213755536e-06, | |
| "loss": 0.2739, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04303372501144514, | |
| "grad_norm": 1.8924678931794556, | |
| "learning_rate": 9.99568837006906e-06, | |
| "loss": 0.2766, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.04333892873493057, | |
| "grad_norm": 1.8418836037208652, | |
| "learning_rate": 9.995480641426992e-06, | |
| "loss": 0.488, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.043644132458415995, | |
| "grad_norm": 1.6305125707231247, | |
| "learning_rate": 9.99526802803246e-06, | |
| "loss": 0.3045, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.043949336181901416, | |
| "grad_norm": 2.143051665423358, | |
| "learning_rate": 9.995050530093366e-06, | |
| "loss": 0.3567, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.044254539905386844, | |
| "grad_norm": 1.994194545633334, | |
| "learning_rate": 9.994828147822387e-06, | |
| "loss": 0.3655, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.04455974362887227, | |
| "grad_norm": 1.8553346605537173, | |
| "learning_rate": 9.994600881436972e-06, | |
| "loss": 0.3249, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.0448649473523577, | |
| "grad_norm": 2.1613773805709857, | |
| "learning_rate": 9.994368731159351e-06, | |
| "loss": 0.4863, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.04517015107584312, | |
| "grad_norm": 2.199571706523493, | |
| "learning_rate": 9.99413169721653e-06, | |
| "loss": 0.465, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.04547535479932855, | |
| "grad_norm": 1.681707967900651, | |
| "learning_rate": 9.99388977984029e-06, | |
| "loss": 0.3472, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.04578055852281398, | |
| "grad_norm": 1.6586587053140593, | |
| "learning_rate": 9.993642979267184e-06, | |
| "loss": 0.3626, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04608576224629941, | |
| "grad_norm": 2.12592721793332, | |
| "learning_rate": 9.993391295738542e-06, | |
| "loss": 0.3218, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.04639096596978483, | |
| "grad_norm": 1.6765944279655143, | |
| "learning_rate": 9.99313472950047e-06, | |
| "loss": 0.3402, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.046696169693270256, | |
| "grad_norm": 1.6019038139070678, | |
| "learning_rate": 9.992873280803848e-06, | |
| "loss": 0.4554, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.047001373416755685, | |
| "grad_norm": 1.6429860881882794, | |
| "learning_rate": 9.99260694990433e-06, | |
| "loss": 0.4086, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.04730657714024111, | |
| "grad_norm": 1.98592334325083, | |
| "learning_rate": 9.992335737062338e-06, | |
| "loss": 0.5733, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.047611780863726534, | |
| "grad_norm": 1.5624846648417388, | |
| "learning_rate": 9.992059642543076e-06, | |
| "loss": 0.2524, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.04791698458721196, | |
| "grad_norm": 1.4438198320418865, | |
| "learning_rate": 9.991778666616523e-06, | |
| "loss": 0.1756, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.04822218831069739, | |
| "grad_norm": 1.6284817295660008, | |
| "learning_rate": 9.991492809557424e-06, | |
| "loss": 0.4144, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.04852739203418282, | |
| "grad_norm": 1.2236340789910145, | |
| "learning_rate": 9.991202071645298e-06, | |
| "loss": 0.1664, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.04883259575766824, | |
| "grad_norm": 1.4874398163232816, | |
| "learning_rate": 9.99090645316444e-06, | |
| "loss": 0.3323, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04913779948115367, | |
| "grad_norm": 2.5394515927833403, | |
| "learning_rate": 9.990605954403917e-06, | |
| "loss": 0.27, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.0494430032046391, | |
| "grad_norm": 1.7966332314422868, | |
| "learning_rate": 9.990300575657565e-06, | |
| "loss": 0.4453, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.049748206928124525, | |
| "grad_norm": 1.825976682624809, | |
| "learning_rate": 9.989990317223995e-06, | |
| "loss": 0.2646, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.05005341065160995, | |
| "grad_norm": 1.6554541925183588, | |
| "learning_rate": 9.989675179406588e-06, | |
| "loss": 0.445, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.050358614375095374, | |
| "grad_norm": 1.6711133844293076, | |
| "learning_rate": 9.989355162513496e-06, | |
| "loss": 0.3685, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.0506638180985808, | |
| "grad_norm": 1.8033315345252203, | |
| "learning_rate": 9.989030266857644e-06, | |
| "loss": 0.2566, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.05096902182206623, | |
| "grad_norm": 1.6879852444966537, | |
| "learning_rate": 9.988700492756726e-06, | |
| "loss": 0.4086, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.05127422554555166, | |
| "grad_norm": 1.6855038740169574, | |
| "learning_rate": 9.988365840533204e-06, | |
| "loss": 0.3081, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.05157942926903708, | |
| "grad_norm": 2.245121010490438, | |
| "learning_rate": 9.988026310514316e-06, | |
| "loss": 0.5646, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.05188463299252251, | |
| "grad_norm": 1.531117336209479, | |
| "learning_rate": 9.987681903032065e-06, | |
| "loss": 0.3598, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05218983671600794, | |
| "grad_norm": 1.4368727600956301, | |
| "learning_rate": 9.987332618423221e-06, | |
| "loss": 0.3864, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.052495040439493365, | |
| "grad_norm": 2.039026486601271, | |
| "learning_rate": 9.98697845702933e-06, | |
| "loss": 0.2728, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.052800244162978786, | |
| "grad_norm": 1.5481974795842472, | |
| "learning_rate": 9.986619419196704e-06, | |
| "loss": 0.2376, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.053105447886464215, | |
| "grad_norm": 1.583025735121783, | |
| "learning_rate": 9.986255505276418e-06, | |
| "loss": 0.3941, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.05341065160994964, | |
| "grad_norm": 2.025610033619695, | |
| "learning_rate": 9.985886715624326e-06, | |
| "loss": 0.432, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.05371585533343507, | |
| "grad_norm": 1.9370365819159912, | |
| "learning_rate": 9.985513050601037e-06, | |
| "loss": 0.3311, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.05402105905692049, | |
| "grad_norm": 1.534591376747653, | |
| "learning_rate": 9.985134510571936e-06, | |
| "loss": 0.3804, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.05432626278040592, | |
| "grad_norm": 1.5627980520171343, | |
| "learning_rate": 9.984751095907175e-06, | |
| "loss": 0.3991, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.05463146650389135, | |
| "grad_norm": 1.858760828475349, | |
| "learning_rate": 9.984362806981665e-06, | |
| "loss": 0.4124, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.05493667022737678, | |
| "grad_norm": 1.4922057145689682, | |
| "learning_rate": 9.983969644175092e-06, | |
| "loss": 0.2571, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0552418739508622, | |
| "grad_norm": 1.4358215484460224, | |
| "learning_rate": 9.983571607871903e-06, | |
| "loss": 0.3351, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.05554707767434763, | |
| "grad_norm": 1.7105120125454414, | |
| "learning_rate": 9.983168698461312e-06, | |
| "loss": 0.4374, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.055852281397833055, | |
| "grad_norm": 1.4100459259074987, | |
| "learning_rate": 9.982760916337296e-06, | |
| "loss": 0.3958, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.05615748512131848, | |
| "grad_norm": 1.667173817085955, | |
| "learning_rate": 9.982348261898598e-06, | |
| "loss": 0.2867, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.056462688844803904, | |
| "grad_norm": 1.8278737995984025, | |
| "learning_rate": 9.981930735548731e-06, | |
| "loss": 0.3738, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.05676789256828933, | |
| "grad_norm": 1.806852289121097, | |
| "learning_rate": 9.98150833769596e-06, | |
| "loss": 0.5608, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.05707309629177476, | |
| "grad_norm": 1.6986308867720055, | |
| "learning_rate": 9.981081068753324e-06, | |
| "loss": 0.4253, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.05737830001526019, | |
| "grad_norm": 1.6392088091109513, | |
| "learning_rate": 9.98064892913862e-06, | |
| "loss": 0.2444, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.05768350373874561, | |
| "grad_norm": 1.7762995408711126, | |
| "learning_rate": 9.980211919274407e-06, | |
| "loss": 0.3866, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.05798870746223104, | |
| "grad_norm": 1.7144647062044762, | |
| "learning_rate": 9.979770039588013e-06, | |
| "loss": 0.4504, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05829391118571647, | |
| "grad_norm": 1.9069269572943617, | |
| "learning_rate": 9.979323290511517e-06, | |
| "loss": 0.4972, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.058599114909201895, | |
| "grad_norm": 1.831943664409223, | |
| "learning_rate": 9.978871672481774e-06, | |
| "loss": 0.3884, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.058904318632687316, | |
| "grad_norm": 1.60483584957947, | |
| "learning_rate": 9.978415185940383e-06, | |
| "loss": 0.3366, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.059209522356172745, | |
| "grad_norm": 2.041633475935638, | |
| "learning_rate": 9.977953831333718e-06, | |
| "loss": 0.4928, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.05951472607965817, | |
| "grad_norm": 2.1574861604284243, | |
| "learning_rate": 9.977487609112904e-06, | |
| "loss": 0.7092, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.0598199298031436, | |
| "grad_norm": 1.5382345073334531, | |
| "learning_rate": 9.97701651973383e-06, | |
| "loss": 0.2236, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.06012513352662902, | |
| "grad_norm": 2.1479787995768014, | |
| "learning_rate": 9.976540563657143e-06, | |
| "loss": 0.5182, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.06043033725011445, | |
| "grad_norm": 1.8579437774142544, | |
| "learning_rate": 9.976059741348252e-06, | |
| "loss": 0.3093, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.06073554097359988, | |
| "grad_norm": 1.5409701380525285, | |
| "learning_rate": 9.975574053277317e-06, | |
| "loss": 0.2877, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.06104074469708531, | |
| "grad_norm": 1.5474598097011698, | |
| "learning_rate": 9.975083499919264e-06, | |
| "loss": 0.2981, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06134594842057073, | |
| "grad_norm": 1.9202152932180157, | |
| "learning_rate": 9.974588081753773e-06, | |
| "loss": 0.5369, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.06165115214405616, | |
| "grad_norm": 1.4598442515817716, | |
| "learning_rate": 9.974087799265279e-06, | |
| "loss": 0.3696, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.061956355867541585, | |
| "grad_norm": 1.48078814360119, | |
| "learning_rate": 9.973582652942975e-06, | |
| "loss": 0.284, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.06226155959102701, | |
| "grad_norm": 2.100326004155181, | |
| "learning_rate": 9.973072643280813e-06, | |
| "loss": 0.5681, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.06256676331451244, | |
| "grad_norm": 1.976128330719915, | |
| "learning_rate": 9.972557770777496e-06, | |
| "loss": 0.3655, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.06287196703799787, | |
| "grad_norm": 1.2103730393566896, | |
| "learning_rate": 9.972038035936483e-06, | |
| "loss": 0.2471, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.06317717076148328, | |
| "grad_norm": 1.670449906238349, | |
| "learning_rate": 9.971513439265992e-06, | |
| "loss": 0.2184, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.06348237448496871, | |
| "grad_norm": 1.5020544764497652, | |
| "learning_rate": 9.970983981278989e-06, | |
| "loss": 0.3196, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.06378757820845414, | |
| "grad_norm": 1.7833251911345853, | |
| "learning_rate": 9.970449662493195e-06, | |
| "loss": 0.4122, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.06409278193193957, | |
| "grad_norm": 1.4149595334362772, | |
| "learning_rate": 9.96991048343109e-06, | |
| "loss": 0.2947, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.064397985655425, | |
| "grad_norm": 1.5991867680932033, | |
| "learning_rate": 9.969366444619898e-06, | |
| "loss": 0.1902, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.06470318937891043, | |
| "grad_norm": 1.4132064841734169, | |
| "learning_rate": 9.968817546591601e-06, | |
| "loss": 0.3389, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.06500839310239585, | |
| "grad_norm": 1.7671902900221814, | |
| "learning_rate": 9.968263789882926e-06, | |
| "loss": 0.4294, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.06531359682588128, | |
| "grad_norm": 1.5709821497329826, | |
| "learning_rate": 9.96770517503536e-06, | |
| "loss": 0.2765, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.0656188005493667, | |
| "grad_norm": 1.5211731343844295, | |
| "learning_rate": 9.967141702595134e-06, | |
| "loss": 0.387, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.06592400427285212, | |
| "grad_norm": 1.5499265222668686, | |
| "learning_rate": 9.96657337311323e-06, | |
| "loss": 0.4535, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.06622920799633755, | |
| "grad_norm": 1.4736546539447488, | |
| "learning_rate": 9.966000187145383e-06, | |
| "loss": 0.3834, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.06653441171982298, | |
| "grad_norm": 1.3306288958233108, | |
| "learning_rate": 9.965422145252072e-06, | |
| "loss": 0.3172, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.06683961544330841, | |
| "grad_norm": 1.5745937005003143, | |
| "learning_rate": 9.964839247998524e-06, | |
| "loss": 0.2725, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.06714481916679384, | |
| "grad_norm": 1.7546511557153388, | |
| "learning_rate": 9.96425149595472e-06, | |
| "loss": 0.3577, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.06745002289027927, | |
| "grad_norm": 2.0422588449754286, | |
| "learning_rate": 9.96365888969538e-06, | |
| "loss": 0.4976, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.0677552266137647, | |
| "grad_norm": 1.4661824124133862, | |
| "learning_rate": 9.963061429799979e-06, | |
| "loss": 0.3672, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.06806043033725011, | |
| "grad_norm": 2.0959067552369666, | |
| "learning_rate": 9.96245911685273e-06, | |
| "loss": 0.5381, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.06836563406073554, | |
| "grad_norm": 1.3296813372997014, | |
| "learning_rate": 9.961851951442599e-06, | |
| "loss": 0.2799, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.06867083778422096, | |
| "grad_norm": 1.7385807765114274, | |
| "learning_rate": 9.96123993416329e-06, | |
| "loss": 0.5183, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.06897604150770639, | |
| "grad_norm": 1.5190119701865645, | |
| "learning_rate": 9.960623065613254e-06, | |
| "loss": 0.4608, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.06928124523119182, | |
| "grad_norm": 1.4393894383331207, | |
| "learning_rate": 9.96000134639569e-06, | |
| "loss": 0.3455, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.06958644895467725, | |
| "grad_norm": 1.7132863682619555, | |
| "learning_rate": 9.959374777118533e-06, | |
| "loss": 0.316, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.06989165267816268, | |
| "grad_norm": 1.3227120889592454, | |
| "learning_rate": 9.958743358394464e-06, | |
| "loss": 0.2467, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.0701968564016481, | |
| "grad_norm": 1.5331153407144422, | |
| "learning_rate": 9.95810709084091e-06, | |
| "loss": 0.3138, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07050206012513352, | |
| "grad_norm": 1.7990748995190806, | |
| "learning_rate": 9.957465975080031e-06, | |
| "loss": 0.4747, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.07080726384861895, | |
| "grad_norm": 1.1638981235859056, | |
| "learning_rate": 9.956820011738736e-06, | |
| "loss": 0.2265, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.07111246757210438, | |
| "grad_norm": 1.5739388418179414, | |
| "learning_rate": 9.956169201448665e-06, | |
| "loss": 0.5066, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.0714176712955898, | |
| "grad_norm": 1.6803933013620869, | |
| "learning_rate": 9.955513544846205e-06, | |
| "loss": 0.4415, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.07172287501907523, | |
| "grad_norm": 1.4014872110785643, | |
| "learning_rate": 9.954853042572479e-06, | |
| "loss": 0.3271, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.07202807874256066, | |
| "grad_norm": 1.5310222689941932, | |
| "learning_rate": 9.954187695273352e-06, | |
| "loss": 0.3289, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.07233328246604609, | |
| "grad_norm": 2.166268226472017, | |
| "learning_rate": 9.953517503599419e-06, | |
| "loss": 0.622, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.07263848618953152, | |
| "grad_norm": 2.258081862277545, | |
| "learning_rate": 9.952842468206019e-06, | |
| "loss": 0.5071, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.07294368991301693, | |
| "grad_norm": 1.7322119894263104, | |
| "learning_rate": 9.952162589753224e-06, | |
| "loss": 0.5097, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.07324889363650236, | |
| "grad_norm": 1.9966284228033864, | |
| "learning_rate": 9.951477868905843e-06, | |
| "loss": 0.2263, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07355409735998779, | |
| "grad_norm": 1.6793267860774614, | |
| "learning_rate": 9.95078830633342e-06, | |
| "loss": 0.2065, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.07385930108347322, | |
| "grad_norm": 2.122564153881175, | |
| "learning_rate": 9.95009390271023e-06, | |
| "loss": 0.2665, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.07416450480695864, | |
| "grad_norm": 1.5852282963187305, | |
| "learning_rate": 9.949394658715289e-06, | |
| "loss": 0.4453, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.07446970853044407, | |
| "grad_norm": 1.7534712016120517, | |
| "learning_rate": 9.948690575032338e-06, | |
| "loss": 0.3628, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.0747749122539295, | |
| "grad_norm": 1.351810586905304, | |
| "learning_rate": 9.947981652349854e-06, | |
| "loss": 0.3984, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.07508011597741493, | |
| "grad_norm": 1.8377506474408298, | |
| "learning_rate": 9.947267891361051e-06, | |
| "loss": 0.3677, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.07538531970090036, | |
| "grad_norm": 1.4655632998364951, | |
| "learning_rate": 9.946549292763865e-06, | |
| "loss": 0.3516, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.07569052342438577, | |
| "grad_norm": 3.240838121636416, | |
| "learning_rate": 9.945825857260967e-06, | |
| "loss": 0.2627, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.0759957271478712, | |
| "grad_norm": 1.4085823215183912, | |
| "learning_rate": 9.945097585559757e-06, | |
| "loss": 0.2716, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.07630093087135663, | |
| "grad_norm": 1.6361471921651585, | |
| "learning_rate": 9.944364478372364e-06, | |
| "loss": 0.3595, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07660613459484206, | |
| "grad_norm": 1.0912978886499554, | |
| "learning_rate": 9.943626536415647e-06, | |
| "loss": 0.1968, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.07691133831832749, | |
| "grad_norm": 1.9515717700893849, | |
| "learning_rate": 9.942883760411188e-06, | |
| "loss": 0.374, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.07721654204181291, | |
| "grad_norm": 1.5560755068838334, | |
| "learning_rate": 9.942136151085302e-06, | |
| "loss": 0.44, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.07752174576529834, | |
| "grad_norm": 1.4843235207715992, | |
| "learning_rate": 9.941383709169024e-06, | |
| "loss": 0.3175, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.07782694948878377, | |
| "grad_norm": 1.5210960196158274, | |
| "learning_rate": 9.94062643539812e-06, | |
| "loss": 0.3722, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.07813215321226918, | |
| "grad_norm": 1.6656094376801425, | |
| "learning_rate": 9.939864330513079e-06, | |
| "loss": 0.3511, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.07843735693575461, | |
| "grad_norm": 1.2732857455769802, | |
| "learning_rate": 9.939097395259108e-06, | |
| "loss": 0.2619, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.07874256065924004, | |
| "grad_norm": 1.8947301386622588, | |
| "learning_rate": 9.938325630386149e-06, | |
| "loss": 0.3933, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.07904776438272547, | |
| "grad_norm": 1.5625416559388712, | |
| "learning_rate": 9.937549036648857e-06, | |
| "loss": 0.4491, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.0793529681062109, | |
| "grad_norm": 1.5125179888703784, | |
| "learning_rate": 9.936767614806612e-06, | |
| "loss": 0.3674, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07965817182969633, | |
| "grad_norm": 1.5026525250547669, | |
| "learning_rate": 9.935981365623516e-06, | |
| "loss": 0.4103, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.07996337555318175, | |
| "grad_norm": 2.3948536293362115, | |
| "learning_rate": 9.93519028986839e-06, | |
| "loss": 0.4009, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.08026857927666718, | |
| "grad_norm": 2.416554371647352, | |
| "learning_rate": 9.934394388314775e-06, | |
| "loss": 0.4265, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.0805737830001526, | |
| "grad_norm": 1.560923734953618, | |
| "learning_rate": 9.933593661740933e-06, | |
| "loss": 0.303, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.08087898672363802, | |
| "grad_norm": 1.6053945705234087, | |
| "learning_rate": 9.932788110929837e-06, | |
| "loss": 0.3295, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.08118419044712345, | |
| "grad_norm": 1.7775437462596928, | |
| "learning_rate": 9.931977736669185e-06, | |
| "loss": 0.2197, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.08148939417060888, | |
| "grad_norm": 1.701318325041301, | |
| "learning_rate": 9.931162539751392e-06, | |
| "loss": 0.3581, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.08179459789409431, | |
| "grad_norm": 1.5974548511363529, | |
| "learning_rate": 9.93034252097358e-06, | |
| "loss": 0.3432, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.08209980161757974, | |
| "grad_norm": 1.8669593065073864, | |
| "learning_rate": 9.929517681137594e-06, | |
| "loss": 0.4133, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.08240500534106517, | |
| "grad_norm": 1.4895827642408586, | |
| "learning_rate": 9.928688021049991e-06, | |
| "loss": 0.3111, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.0827102090645506, | |
| "grad_norm": 1.4317804244871846, | |
| "learning_rate": 9.927853541522041e-06, | |
| "loss": 0.2915, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.08301541278803601, | |
| "grad_norm": 1.252478145781798, | |
| "learning_rate": 9.927014243369727e-06, | |
| "loss": 0.2794, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.08332061651152144, | |
| "grad_norm": 1.6973954865497314, | |
| "learning_rate": 9.926170127413743e-06, | |
| "loss": 0.6183, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.08362582023500686, | |
| "grad_norm": 1.4723277244112698, | |
| "learning_rate": 9.925321194479494e-06, | |
| "loss": 0.2815, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.08393102395849229, | |
| "grad_norm": 1.7075555550514414, | |
| "learning_rate": 9.924467445397097e-06, | |
| "loss": 0.4178, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.08423622768197772, | |
| "grad_norm": 1.5354808046910606, | |
| "learning_rate": 9.923608881001377e-06, | |
| "loss": 0.2355, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.08454143140546315, | |
| "grad_norm": 1.1795750747565834, | |
| "learning_rate": 9.922745502131865e-06, | |
| "loss": 0.3404, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.08484663512894858, | |
| "grad_norm": 1.427067758888222, | |
| "learning_rate": 9.921877309632805e-06, | |
| "loss": 0.3141, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.085151838852434, | |
| "grad_norm": 1.3691564278772157, | |
| "learning_rate": 9.921004304353147e-06, | |
| "loss": 0.287, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.08545704257591942, | |
| "grad_norm": 1.9220775714586407, | |
| "learning_rate": 9.920126487146544e-06, | |
| "loss": 0.6617, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08576224629940485, | |
| "grad_norm": 1.6761030408371134, | |
| "learning_rate": 9.919243858871355e-06, | |
| "loss": 0.466, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.08606745002289028, | |
| "grad_norm": 1.6120747264173168, | |
| "learning_rate": 9.918356420390645e-06, | |
| "loss": 0.5351, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.0863726537463757, | |
| "grad_norm": 1.5236961732014556, | |
| "learning_rate": 9.91746417257218e-06, | |
| "loss": 0.33, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.08667785746986113, | |
| "grad_norm": 1.6328635321860312, | |
| "learning_rate": 9.916567116288434e-06, | |
| "loss": 0.4301, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.08698306119334656, | |
| "grad_norm": 1.4120804188821041, | |
| "learning_rate": 9.915665252416577e-06, | |
| "loss": 0.3025, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.08728826491683199, | |
| "grad_norm": 1.8410843798908767, | |
| "learning_rate": 9.914758581838482e-06, | |
| "loss": 0.5415, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.08759346864031742, | |
| "grad_norm": 1.1807475096034001, | |
| "learning_rate": 9.913847105440725e-06, | |
| "loss": 0.3184, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.08789867236380283, | |
| "grad_norm": 1.52681276111022, | |
| "learning_rate": 9.912930824114577e-06, | |
| "loss": 0.4266, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.08820387608728826, | |
| "grad_norm": 1.4904538614169496, | |
| "learning_rate": 9.91200973875601e-06, | |
| "loss": 0.3404, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.08850907981077369, | |
| "grad_norm": 1.7385111110311349, | |
| "learning_rate": 9.911083850265692e-06, | |
| "loss": 0.3371, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.08881428353425912, | |
| "grad_norm": 1.6013762575114376, | |
| "learning_rate": 9.91015315954899e-06, | |
| "loss": 0.4475, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.08911948725774455, | |
| "grad_norm": 1.5474202900018152, | |
| "learning_rate": 9.909217667515964e-06, | |
| "loss": 0.4162, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.08942469098122997, | |
| "grad_norm": 1.875769203080621, | |
| "learning_rate": 9.908277375081371e-06, | |
| "loss": 0.4446, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.0897298947047154, | |
| "grad_norm": 1.4914731218024286, | |
| "learning_rate": 9.907332283164663e-06, | |
| "loss": 0.4274, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.09003509842820083, | |
| "grad_norm": 1.6551811079983538, | |
| "learning_rate": 9.90638239268998e-06, | |
| "loss": 0.4883, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.09034030215168624, | |
| "grad_norm": 1.645510927644492, | |
| "learning_rate": 9.905427704586158e-06, | |
| "loss": 0.4885, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.09064550587517167, | |
| "grad_norm": 1.6759165462483547, | |
| "learning_rate": 9.904468219786727e-06, | |
| "loss": 0.3878, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.0909507095986571, | |
| "grad_norm": 1.596800484010474, | |
| "learning_rate": 9.903503939229901e-06, | |
| "loss": 0.2725, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.09125591332214253, | |
| "grad_norm": 1.4035704196730787, | |
| "learning_rate": 9.902534863858588e-06, | |
| "loss": 0.2147, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.09156111704562796, | |
| "grad_norm": 1.7460761357385464, | |
| "learning_rate": 9.90156099462038e-06, | |
| "loss": 0.3495, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09186632076911339, | |
| "grad_norm": 1.3373562156184522, | |
| "learning_rate": 9.900582332467566e-06, | |
| "loss": 0.342, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.09217152449259881, | |
| "grad_norm": 1.1466755748188362, | |
| "learning_rate": 9.89959887835711e-06, | |
| "loss": 0.1737, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.09247672821608424, | |
| "grad_norm": 1.8078659273922337, | |
| "learning_rate": 9.898610633250669e-06, | |
| "loss": 0.3111, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.09278193193956966, | |
| "grad_norm": 1.5400638324339648, | |
| "learning_rate": 9.897617598114584e-06, | |
| "loss": 0.4746, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.09308713566305508, | |
| "grad_norm": 1.558728128630052, | |
| "learning_rate": 9.896619773919878e-06, | |
| "loss": 0.3085, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.09339233938654051, | |
| "grad_norm": 4.094736926672729, | |
| "learning_rate": 9.895617161642257e-06, | |
| "loss": 0.4664, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.09369754311002594, | |
| "grad_norm": 1.63116898024897, | |
| "learning_rate": 9.89460976226211e-06, | |
| "loss": 0.3878, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.09400274683351137, | |
| "grad_norm": 1.7238364123731507, | |
| "learning_rate": 9.893597576764508e-06, | |
| "loss": 0.2989, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.0943079505569968, | |
| "grad_norm": 1.2496662648050174, | |
| "learning_rate": 9.8925806061392e-06, | |
| "loss": 0.3054, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.09461315428048223, | |
| "grad_norm": 0.8807197003313585, | |
| "learning_rate": 9.891558851380614e-06, | |
| "loss": 0.1904, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09491835800396765, | |
| "grad_norm": 1.5076918479598347, | |
| "learning_rate": 9.890532313487858e-06, | |
| "loss": 0.2679, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.09522356172745307, | |
| "grad_norm": 1.8465691043660122, | |
| "learning_rate": 9.889500993464716e-06, | |
| "loss": 0.5002, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.0955287654509385, | |
| "grad_norm": 1.9183643810942494, | |
| "learning_rate": 9.888464892319647e-06, | |
| "loss": 0.4869, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.09583396917442392, | |
| "grad_norm": 1.6515373264151805, | |
| "learning_rate": 9.887424011065788e-06, | |
| "loss": 0.4507, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.09613917289790935, | |
| "grad_norm": 1.6223391241834122, | |
| "learning_rate": 9.886378350720945e-06, | |
| "loss": 0.3445, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.09644437662139478, | |
| "grad_norm": 1.4416645097808285, | |
| "learning_rate": 9.885327912307604e-06, | |
| "loss": 0.2808, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.09674958034488021, | |
| "grad_norm": 1.4777192121308136, | |
| "learning_rate": 9.88427269685292e-06, | |
| "loss": 0.4335, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.09705478406836564, | |
| "grad_norm": 1.6934694740555867, | |
| "learning_rate": 9.883212705388715e-06, | |
| "loss": 0.4299, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.09735998779185107, | |
| "grad_norm": 1.9031284601590377, | |
| "learning_rate": 9.882147938951489e-06, | |
| "loss": 0.5364, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.09766519151533648, | |
| "grad_norm": 1.990035566558448, | |
| "learning_rate": 9.881078398582406e-06, | |
| "loss": 0.6476, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.09797039523882191, | |
| "grad_norm": 1.4458600630840748, | |
| "learning_rate": 9.8800040853273e-06, | |
| "loss": 0.268, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.09827559896230734, | |
| "grad_norm": 1.473557254783057, | |
| "learning_rate": 9.878925000236667e-06, | |
| "loss": 0.3889, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.09858080268579276, | |
| "grad_norm": 1.429462352597184, | |
| "learning_rate": 9.877841144365681e-06, | |
| "loss": 0.3348, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.0988860064092782, | |
| "grad_norm": 1.9126483909533352, | |
| "learning_rate": 9.876752518774167e-06, | |
| "loss": 0.5004, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.09919121013276362, | |
| "grad_norm": 1.528278815830415, | |
| "learning_rate": 9.875659124526622e-06, | |
| "loss": 0.1931, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.09949641385624905, | |
| "grad_norm": 1.6064809314060318, | |
| "learning_rate": 9.874560962692207e-06, | |
| "loss": 0.2627, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.09980161757973448, | |
| "grad_norm": 1.8583002911468363, | |
| "learning_rate": 9.873458034344741e-06, | |
| "loss": 0.4795, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.1001068213032199, | |
| "grad_norm": 2.180040993961252, | |
| "learning_rate": 9.872350340562704e-06, | |
| "loss": 0.3502, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.10041202502670532, | |
| "grad_norm": 2.2760944374886334, | |
| "learning_rate": 9.871237882429237e-06, | |
| "loss": 0.5504, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.10071722875019075, | |
| "grad_norm": 1.599604903553732, | |
| "learning_rate": 9.87012066103214e-06, | |
| "loss": 0.4642, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.10102243247367618, | |
| "grad_norm": 1.165592902920284, | |
| "learning_rate": 9.868998677463874e-06, | |
| "loss": 0.2118, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.1013276361971616, | |
| "grad_norm": 1.562687671834839, | |
| "learning_rate": 9.867871932821549e-06, | |
| "loss": 0.3389, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.10163283992064703, | |
| "grad_norm": 1.5690938291630006, | |
| "learning_rate": 9.866740428206935e-06, | |
| "loss": 0.3084, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.10193804364413246, | |
| "grad_norm": 1.6392469885959746, | |
| "learning_rate": 9.865604164726456e-06, | |
| "loss": 0.3935, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.10224324736761789, | |
| "grad_norm": 1.4807871775535164, | |
| "learning_rate": 9.864463143491192e-06, | |
| "loss": 0.4445, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.10254845109110332, | |
| "grad_norm": 1.1926217304533164, | |
| "learning_rate": 9.86331736561687e-06, | |
| "loss": 0.2623, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.10285365481458873, | |
| "grad_norm": 1.5461262133304665, | |
| "learning_rate": 9.862166832223871e-06, | |
| "loss": 0.4161, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.10315885853807416, | |
| "grad_norm": 1.5135662608063911, | |
| "learning_rate": 9.861011544437226e-06, | |
| "loss": 0.2864, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.10346406226155959, | |
| "grad_norm": 1.2723458882899108, | |
| "learning_rate": 9.85985150338662e-06, | |
| "loss": 0.3208, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.10376926598504502, | |
| "grad_norm": 1.5555768560283307, | |
| "learning_rate": 9.858686710206373e-06, | |
| "loss": 0.3341, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.10407446970853045, | |
| "grad_norm": 1.7562240544367693, | |
| "learning_rate": 9.857517166035466e-06, | |
| "loss": 0.4637, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.10437967343201587, | |
| "grad_norm": 3.9759170817857044, | |
| "learning_rate": 9.856342872017515e-06, | |
| "loss": 0.6559, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.1046848771555013, | |
| "grad_norm": 1.6864581247305628, | |
| "learning_rate": 9.855163829300789e-06, | |
| "loss": 0.4889, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.10499008087898673, | |
| "grad_norm": 1.5660173118774432, | |
| "learning_rate": 9.853980039038193e-06, | |
| "loss": 0.2512, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.10529528460247214, | |
| "grad_norm": 1.747919617181712, | |
| "learning_rate": 9.85279150238728e-06, | |
| "loss": 0.4796, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.10560048832595757, | |
| "grad_norm": 2.0064830609007496, | |
| "learning_rate": 9.85159822051024e-06, | |
| "loss": 0.6876, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.105905692049443, | |
| "grad_norm": 3.232705832740987, | |
| "learning_rate": 9.850400194573908e-06, | |
| "loss": 0.5043, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.10621089577292843, | |
| "grad_norm": 1.6750365694523466, | |
| "learning_rate": 9.849197425749753e-06, | |
| "loss": 0.4426, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.10651609949641386, | |
| "grad_norm": 1.2407353388150502, | |
| "learning_rate": 9.847989915213883e-06, | |
| "loss": 0.2728, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.10682130321989929, | |
| "grad_norm": 1.145770915094197, | |
| "learning_rate": 9.846777664147046e-06, | |
| "loss": 0.1828, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10712650694338471, | |
| "grad_norm": 1.0259526919339048, | |
| "learning_rate": 9.845560673734617e-06, | |
| "loss": 0.1815, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.10743171066687014, | |
| "grad_norm": 1.5927868344920564, | |
| "learning_rate": 9.844338945166619e-06, | |
| "loss": 0.4563, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.10773691439035556, | |
| "grad_norm": 1.862511817843432, | |
| "learning_rate": 9.843112479637692e-06, | |
| "loss": 0.4873, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.10804211811384098, | |
| "grad_norm": 1.3346380863318066, | |
| "learning_rate": 9.841881278347122e-06, | |
| "loss": 0.344, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.10834732183732641, | |
| "grad_norm": 1.2558059799685006, | |
| "learning_rate": 9.840645342498817e-06, | |
| "loss": 0.3489, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.10865252556081184, | |
| "grad_norm": 1.734496310003891, | |
| "learning_rate": 9.839404673301318e-06, | |
| "loss": 0.4161, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.10895772928429727, | |
| "grad_norm": 1.5538063811233147, | |
| "learning_rate": 9.838159271967795e-06, | |
| "loss": 0.4971, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.1092629330077827, | |
| "grad_norm": 1.51967394631939, | |
| "learning_rate": 9.836909139716044e-06, | |
| "loss": 0.3422, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.10956813673126813, | |
| "grad_norm": 1.8153898943745974, | |
| "learning_rate": 9.835654277768487e-06, | |
| "loss": 0.5826, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.10987334045475355, | |
| "grad_norm": 1.3956832615282058, | |
| "learning_rate": 9.834394687352168e-06, | |
| "loss": 0.3444, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.11017854417823897, | |
| "grad_norm": 1.7028097158357791, | |
| "learning_rate": 9.833130369698764e-06, | |
| "loss": 0.4653, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.1104837479017244, | |
| "grad_norm": 1.984086744142622, | |
| "learning_rate": 9.831861326044564e-06, | |
| "loss": 0.7268, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.11078895162520982, | |
| "grad_norm": 1.6010962575202456, | |
| "learning_rate": 9.830587557630481e-06, | |
| "loss": 0.4979, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.11109415534869525, | |
| "grad_norm": 1.163967768763629, | |
| "learning_rate": 9.829309065702054e-06, | |
| "loss": 0.2721, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.11139935907218068, | |
| "grad_norm": 1.5116526665647354, | |
| "learning_rate": 9.828025851509433e-06, | |
| "loss": 0.4504, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.11170456279566611, | |
| "grad_norm": 1.3126613426949747, | |
| "learning_rate": 9.82673791630739e-06, | |
| "loss": 0.2915, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.11200976651915154, | |
| "grad_norm": 1.2919896962667308, | |
| "learning_rate": 9.825445261355313e-06, | |
| "loss": 0.2862, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.11231497024263697, | |
| "grad_norm": 1.7465438774619029, | |
| "learning_rate": 9.824147887917201e-06, | |
| "loss": 0.5347, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.11262017396612238, | |
| "grad_norm": 1.5226131458552274, | |
| "learning_rate": 9.822845797261676e-06, | |
| "loss": 0.4764, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.11292537768960781, | |
| "grad_norm": 1.157991129368488, | |
| "learning_rate": 9.82153899066196e-06, | |
| "loss": 0.2002, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.11323058141309324, | |
| "grad_norm": 0.9301400886155634, | |
| "learning_rate": 9.8202274693959e-06, | |
| "loss": 0.1684, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.11353578513657867, | |
| "grad_norm": 1.502487900950231, | |
| "learning_rate": 9.818911234745942e-06, | |
| "loss": 0.3093, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.1138409888600641, | |
| "grad_norm": 1.9784385010881411, | |
| "learning_rate": 9.817590287999149e-06, | |
| "loss": 0.4803, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.11414619258354952, | |
| "grad_norm": 0.9195545540370847, | |
| "learning_rate": 9.816264630447186e-06, | |
| "loss": 0.2159, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.11445139630703495, | |
| "grad_norm": 1.9332444000616873, | |
| "learning_rate": 9.814934263386324e-06, | |
| "loss": 0.704, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.11475660003052038, | |
| "grad_norm": 1.2982271338494382, | |
| "learning_rate": 9.813599188117447e-06, | |
| "loss": 0.2579, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.11506180375400579, | |
| "grad_norm": 1.4400207145843447, | |
| "learning_rate": 9.812259405946033e-06, | |
| "loss": 0.2581, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.11536700747749122, | |
| "grad_norm": 1.6214016909138647, | |
| "learning_rate": 9.810914918182168e-06, | |
| "loss": 0.3348, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.11567221120097665, | |
| "grad_norm": 1.3037284021243603, | |
| "learning_rate": 9.80956572614054e-06, | |
| "loss": 0.2009, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.11597741492446208, | |
| "grad_norm": 1.4597999101205241, | |
| "learning_rate": 9.808211831140434e-06, | |
| "loss": 0.4417, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1162826186479475, | |
| "grad_norm": 1.3133108268461724, | |
| "learning_rate": 9.806853234505736e-06, | |
| "loss": 0.3305, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.11658782237143293, | |
| "grad_norm": 1.455145466929522, | |
| "learning_rate": 9.805489937564926e-06, | |
| "loss": 0.4611, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.11689302609491836, | |
| "grad_norm": 1.3560358010182432, | |
| "learning_rate": 9.804121941651085e-06, | |
| "loss": 0.2624, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.11719822981840379, | |
| "grad_norm": 1.5730489544680661, | |
| "learning_rate": 9.802749248101885e-06, | |
| "loss": 0.5959, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.1175034335418892, | |
| "grad_norm": 1.6080011966995038, | |
| "learning_rate": 9.801371858259594e-06, | |
| "loss": 0.3077, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.11780863726537463, | |
| "grad_norm": 1.3452073998773173, | |
| "learning_rate": 9.799989773471071e-06, | |
| "loss": 0.3877, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.11811384098886006, | |
| "grad_norm": 1.521531541360139, | |
| "learning_rate": 9.798602995087764e-06, | |
| "loss": 0.2978, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.11841904471234549, | |
| "grad_norm": 1.705542362457564, | |
| "learning_rate": 9.797211524465715e-06, | |
| "loss": 0.4298, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.11872424843583092, | |
| "grad_norm": 2.110762528312395, | |
| "learning_rate": 9.79581536296555e-06, | |
| "loss": 0.4766, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.11902945215931635, | |
| "grad_norm": 1.8442245131411212, | |
| "learning_rate": 9.794414511952484e-06, | |
| "loss": 0.2988, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.11933465588280177, | |
| "grad_norm": 1.2950053698748747, | |
| "learning_rate": 9.793008972796318e-06, | |
| "loss": 0.2907, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.1196398596062872, | |
| "grad_norm": 1.1692504994324242, | |
| "learning_rate": 9.791598746871438e-06, | |
| "loss": 0.2584, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.11994506332977262, | |
| "grad_norm": 1.7847769770487698, | |
| "learning_rate": 9.790183835556806e-06, | |
| "loss": 0.4874, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.12025026705325804, | |
| "grad_norm": 1.2562325547558533, | |
| "learning_rate": 9.788764240235976e-06, | |
| "loss": 0.3739, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.12055547077674347, | |
| "grad_norm": 1.340465345857484, | |
| "learning_rate": 9.787339962297076e-06, | |
| "loss": 0.1911, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.1208606745002289, | |
| "grad_norm": 1.5667496682899116, | |
| "learning_rate": 9.785911003132811e-06, | |
| "loss": 0.4243, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.12116587822371433, | |
| "grad_norm": 1.81118055527733, | |
| "learning_rate": 9.78447736414047e-06, | |
| "loss": 0.5246, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.12147108194719976, | |
| "grad_norm": 1.4053388398772566, | |
| "learning_rate": 9.783039046721912e-06, | |
| "loss": 0.2964, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.12177628567068519, | |
| "grad_norm": 1.5253960954907508, | |
| "learning_rate": 9.781596052283573e-06, | |
| "loss": 0.3939, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.12208148939417061, | |
| "grad_norm": 1.4250712259795966, | |
| "learning_rate": 9.780148382236465e-06, | |
| "loss": 0.2338, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12238669311765603, | |
| "grad_norm": 1.6445234212439845, | |
| "learning_rate": 9.778696037996167e-06, | |
| "loss": 0.6218, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.12269189684114146, | |
| "grad_norm": 1.625655408880631, | |
| "learning_rate": 9.777239020982834e-06, | |
| "loss": 0.4565, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.12299710056462689, | |
| "grad_norm": 1.2734470484088918, | |
| "learning_rate": 9.775777332621184e-06, | |
| "loss": 0.3673, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.12330230428811231, | |
| "grad_norm": 1.5775928166525761, | |
| "learning_rate": 9.774310974340506e-06, | |
| "loss": 0.3673, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.12360750801159774, | |
| "grad_norm": 1.2252627733176171, | |
| "learning_rate": 9.772839947574658e-06, | |
| "loss": 0.4064, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.12391271173508317, | |
| "grad_norm": 1.7189364245488037, | |
| "learning_rate": 9.77136425376206e-06, | |
| "loss": 0.4633, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.1242179154585686, | |
| "grad_norm": 1.7558994988767551, | |
| "learning_rate": 9.769883894345693e-06, | |
| "loss": 0.5655, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.12452311918205403, | |
| "grad_norm": 1.4802133948901528, | |
| "learning_rate": 9.768398870773108e-06, | |
| "loss": 0.4175, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.12482832290553945, | |
| "grad_norm": 1.3302751078782091, | |
| "learning_rate": 9.766909184496408e-06, | |
| "loss": 0.3468, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.12513352662902488, | |
| "grad_norm": 1.704721594870751, | |
| "learning_rate": 9.765414836972262e-06, | |
| "loss": 0.4898, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1254387303525103, | |
| "grad_norm": 1.2131375907547226, | |
| "learning_rate": 9.763915829661891e-06, | |
| "loss": 0.3886, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.12574393407599574, | |
| "grad_norm": 1.3401723742285034, | |
| "learning_rate": 9.76241216403108e-06, | |
| "loss": 0.4102, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.12604913779948115, | |
| "grad_norm": 1.6513004120286505, | |
| "learning_rate": 9.760903841550164e-06, | |
| "loss": 0.433, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.12635434152296657, | |
| "grad_norm": 2.2390646637016327, | |
| "learning_rate": 9.75939086369403e-06, | |
| "loss": 0.4213, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.126659545246452, | |
| "grad_norm": 1.585281461881457, | |
| "learning_rate": 9.757873231942122e-06, | |
| "loss": 0.2145, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.12696474896993742, | |
| "grad_norm": 1.616278056338578, | |
| "learning_rate": 9.756350947778431e-06, | |
| "loss": 0.3786, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.12726995269342287, | |
| "grad_norm": 1.3896314522086528, | |
| "learning_rate": 9.754824012691499e-06, | |
| "loss": 0.3886, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.12757515641690828, | |
| "grad_norm": 1.5388381717065547, | |
| "learning_rate": 9.753292428174416e-06, | |
| "loss": 0.4901, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.12788036014039372, | |
| "grad_norm": 1.2099556172325527, | |
| "learning_rate": 9.75175619572482e-06, | |
| "loss": 0.2379, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.12818556386387914, | |
| "grad_norm": 2.578673148403812, | |
| "learning_rate": 9.750215316844886e-06, | |
| "loss": 0.3093, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.12849076758736458, | |
| "grad_norm": 1.3557950817043143, | |
| "learning_rate": 9.748669793041345e-06, | |
| "loss": 0.2901, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.12879597131085, | |
| "grad_norm": 1.8058846206435177, | |
| "learning_rate": 9.747119625825459e-06, | |
| "loss": 0.5222, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.1291011750343354, | |
| "grad_norm": 1.449090093366816, | |
| "learning_rate": 9.745564816713034e-06, | |
| "loss": 0.3738, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.12940637875782085, | |
| "grad_norm": 1.3181509799847857, | |
| "learning_rate": 9.74400536722442e-06, | |
| "loss": 0.2624, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.12971158248130626, | |
| "grad_norm": 2.1599699357377675, | |
| "learning_rate": 9.742441278884496e-06, | |
| "loss": 0.4838, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.1300167862047917, | |
| "grad_norm": 1.6258096878519581, | |
| "learning_rate": 9.740872553222685e-06, | |
| "loss": 0.4999, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.13032198992827712, | |
| "grad_norm": 1.5046387312958875, | |
| "learning_rate": 9.739299191772937e-06, | |
| "loss": 0.3095, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.13062719365176256, | |
| "grad_norm": 3.2565177704876653, | |
| "learning_rate": 9.737721196073742e-06, | |
| "loss": 0.4886, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.13093239737524798, | |
| "grad_norm": 5.072257540244327, | |
| "learning_rate": 9.736138567668117e-06, | |
| "loss": 0.2433, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.1312376010987334, | |
| "grad_norm": 1.5984597272623309, | |
| "learning_rate": 9.734551308103607e-06, | |
| "loss": 0.3274, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.13154280482221883, | |
| "grad_norm": 1.9766310728941525, | |
| "learning_rate": 9.732959418932297e-06, | |
| "loss": 0.5601, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.13184800854570425, | |
| "grad_norm": 1.8500535183975242, | |
| "learning_rate": 9.731362901710783e-06, | |
| "loss": 0.8055, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.1321532122691897, | |
| "grad_norm": 1.5082490121163843, | |
| "learning_rate": 9.7297617580002e-06, | |
| "loss": 0.3715, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.1324584159926751, | |
| "grad_norm": 1.3484742708817405, | |
| "learning_rate": 9.728155989366198e-06, | |
| "loss": 0.2316, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.13276361971616055, | |
| "grad_norm": 1.4064281774599339, | |
| "learning_rate": 9.726545597378953e-06, | |
| "loss": 0.2529, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.13306882343964596, | |
| "grad_norm": 1.3619428913058238, | |
| "learning_rate": 9.724930583613164e-06, | |
| "loss": 0.2675, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.1333740271631314, | |
| "grad_norm": 1.8072201350967787, | |
| "learning_rate": 9.723310949648044e-06, | |
| "loss": 0.4832, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.13367923088661682, | |
| "grad_norm": 1.376837369774298, | |
| "learning_rate": 9.721686697067328e-06, | |
| "loss": 0.3855, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.13398443461010223, | |
| "grad_norm": 1.577084491012061, | |
| "learning_rate": 9.720057827459264e-06, | |
| "loss": 0.3307, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.13428963833358767, | |
| "grad_norm": 1.7335930513549882, | |
| "learning_rate": 9.718424342416619e-06, | |
| "loss": 0.3794, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1345948420570731, | |
| "grad_norm": 1.3979759520207362, | |
| "learning_rate": 9.716786243536672e-06, | |
| "loss": 0.17, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.13490004578055853, | |
| "grad_norm": 1.5166020341173607, | |
| "learning_rate": 9.715143532421208e-06, | |
| "loss": 0.2782, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.13520524950404395, | |
| "grad_norm": 1.5547284451560954, | |
| "learning_rate": 9.71349621067653e-06, | |
| "loss": 0.2398, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.1355104532275294, | |
| "grad_norm": 1.4706416022210265, | |
| "learning_rate": 9.711844279913443e-06, | |
| "loss": 0.3486, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.1358156569510148, | |
| "grad_norm": 1.7726813901729983, | |
| "learning_rate": 9.710187741747264e-06, | |
| "loss": 0.4938, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.13612086067450022, | |
| "grad_norm": 1.3682846898358798, | |
| "learning_rate": 9.708526597797812e-06, | |
| "loss": 0.3399, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.13642606439798566, | |
| "grad_norm": 1.173206742476641, | |
| "learning_rate": 9.70686084968941e-06, | |
| "loss": 0.209, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.13673126812147107, | |
| "grad_norm": 1.5006667880365117, | |
| "learning_rate": 9.705190499050885e-06, | |
| "loss": 0.3918, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.13703647184495651, | |
| "grad_norm": 1.4081281623516984, | |
| "learning_rate": 9.70351554751556e-06, | |
| "loss": 0.3478, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.13734167556844193, | |
| "grad_norm": 1.3521857616183288, | |
| "learning_rate": 9.701835996721267e-06, | |
| "loss": 0.2527, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.13764687929192737, | |
| "grad_norm": 1.609701105059192, | |
| "learning_rate": 9.70015184831032e-06, | |
| "loss": 0.4019, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.13795208301541279, | |
| "grad_norm": 1.5247656094886828, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 0.3393, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.13825728673889823, | |
| "grad_norm": 1.7521263555747508, | |
| "learning_rate": 9.696769765230244e-06, | |
| "loss": 0.4711, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.13856249046238364, | |
| "grad_norm": 1.1750952661731386, | |
| "learning_rate": 9.695071833868233e-06, | |
| "loss": 0.3209, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.13886769418586906, | |
| "grad_norm": 1.1097718724387464, | |
| "learning_rate": 9.693369311503801e-06, | |
| "loss": 0.2077, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.1391728979093545, | |
| "grad_norm": 1.3477077308441543, | |
| "learning_rate": 9.691662199801735e-06, | |
| "loss": 0.3387, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.1394781016328399, | |
| "grad_norm": 1.5926509134118427, | |
| "learning_rate": 9.689950500431306e-06, | |
| "loss": 0.4737, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.13978330535632535, | |
| "grad_norm": 1.238821976588628, | |
| "learning_rate": 9.688234215066274e-06, | |
| "loss": 0.2941, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.14008850907981077, | |
| "grad_norm": 1.283103486116252, | |
| "learning_rate": 9.68651334538488e-06, | |
| "loss": 0.2298, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.1403937128032962, | |
| "grad_norm": 1.3964480095243228, | |
| "learning_rate": 9.684787893069852e-06, | |
| "loss": 0.2755, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.14069891652678163, | |
| "grad_norm": 4.5768637107742185, | |
| "learning_rate": 9.683057859808394e-06, | |
| "loss": 0.3969, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.14100412025026704, | |
| "grad_norm": 1.238974584705195, | |
| "learning_rate": 9.681323247292193e-06, | |
| "loss": 0.302, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.14130932397375248, | |
| "grad_norm": 1.484367852444757, | |
| "learning_rate": 9.679584057217412e-06, | |
| "loss": 0.3771, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.1416145276972379, | |
| "grad_norm": 1.8892302609803566, | |
| "learning_rate": 9.677840291284693e-06, | |
| "loss": 0.4296, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.14191973142072334, | |
| "grad_norm": 1.9264735588835349, | |
| "learning_rate": 9.676091951199147e-06, | |
| "loss": 0.1789, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.14222493514420875, | |
| "grad_norm": 1.6779527799580054, | |
| "learning_rate": 9.674339038670362e-06, | |
| "loss": 0.3394, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.1425301388676942, | |
| "grad_norm": 1.4269803792009585, | |
| "learning_rate": 9.672581555412396e-06, | |
| "loss": 0.3436, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.1428353425911796, | |
| "grad_norm": 1.6997561953129157, | |
| "learning_rate": 9.67081950314378e-06, | |
| "loss": 0.4244, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.14314054631466505, | |
| "grad_norm": 1.5847658235861504, | |
| "learning_rate": 9.669052883587503e-06, | |
| "loss": 0.4061, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.14344575003815047, | |
| "grad_norm": 1.5000132719088555, | |
| "learning_rate": 9.667281698471032e-06, | |
| "loss": 0.2349, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.14375095376163588, | |
| "grad_norm": 1.4700398564287758, | |
| "learning_rate": 9.665505949526288e-06, | |
| "loss": 0.4265, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.14405615748512132, | |
| "grad_norm": 1.1929108671224367, | |
| "learning_rate": 9.663725638489662e-06, | |
| "loss": 0.3107, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.14436136120860674, | |
| "grad_norm": 1.1577689014622543, | |
| "learning_rate": 9.661940767102001e-06, | |
| "loss": 0.1722, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.14466656493209218, | |
| "grad_norm": 2.093772204836488, | |
| "learning_rate": 9.660151337108617e-06, | |
| "loss": 0.4278, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.1449717686555776, | |
| "grad_norm": 1.6654665758895533, | |
| "learning_rate": 9.658357350259274e-06, | |
| "loss": 0.3394, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.14527697237906304, | |
| "grad_norm": 1.2202451289258887, | |
| "learning_rate": 9.656558808308193e-06, | |
| "loss": 0.3433, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.14558217610254845, | |
| "grad_norm": 1.5418473816537024, | |
| "learning_rate": 9.654755713014052e-06, | |
| "loss": 0.4099, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.14588737982603386, | |
| "grad_norm": 1.867172943441559, | |
| "learning_rate": 9.652948066139978e-06, | |
| "loss": 0.4445, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.1461925835495193, | |
| "grad_norm": 1.2274320399473075, | |
| "learning_rate": 9.651135869453552e-06, | |
| "loss": 0.3687, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.14649778727300472, | |
| "grad_norm": 1.3632570280967384, | |
| "learning_rate": 9.649319124726799e-06, | |
| "loss": 0.3554, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.14680299099649016, | |
| "grad_norm": 1.9368328668689925, | |
| "learning_rate": 9.647497833736197e-06, | |
| "loss": 0.433, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.14710819471997558, | |
| "grad_norm": 1.7750440538339176, | |
| "learning_rate": 9.645671998262668e-06, | |
| "loss": 0.4275, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.14741339844346102, | |
| "grad_norm": 1.7620981827052555, | |
| "learning_rate": 9.643841620091572e-06, | |
| "loss": 0.4327, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.14771860216694643, | |
| "grad_norm": 1.705808013569081, | |
| "learning_rate": 9.642006701012719e-06, | |
| "loss": 0.2914, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.14802380589043188, | |
| "grad_norm": 1.4490666476731855, | |
| "learning_rate": 9.640167242820356e-06, | |
| "loss": 0.4171, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.1483290096139173, | |
| "grad_norm": 1.4017898188106575, | |
| "learning_rate": 9.638323247313167e-06, | |
| "loss": 0.3609, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.1486342133374027, | |
| "grad_norm": 1.273585773097182, | |
| "learning_rate": 9.636474716294275e-06, | |
| "loss": 0.2781, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.14893941706088815, | |
| "grad_norm": 1.4426438722313946, | |
| "learning_rate": 9.634621651571235e-06, | |
| "loss": 0.3446, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.14924462078437356, | |
| "grad_norm": 1.3396568501630033, | |
| "learning_rate": 9.632764054956042e-06, | |
| "loss": 0.3209, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.149549824507859, | |
| "grad_norm": 1.869987463373602, | |
| "learning_rate": 9.630901928265113e-06, | |
| "loss": 0.6676, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.14985502823134442, | |
| "grad_norm": 1.3995020559881168, | |
| "learning_rate": 9.6290352733193e-06, | |
| "loss": 0.443, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.15016023195482986, | |
| "grad_norm": 1.5646801044524754, | |
| "learning_rate": 9.627164091943886e-06, | |
| "loss": 0.4808, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.15046543567831527, | |
| "grad_norm": 1.4064811516565765, | |
| "learning_rate": 9.625288385968572e-06, | |
| "loss": 0.3281, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.15077063940180072, | |
| "grad_norm": 1.3603824959747954, | |
| "learning_rate": 9.623408157227493e-06, | |
| "loss": 0.4203, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.15107584312528613, | |
| "grad_norm": 1.4648922518012057, | |
| "learning_rate": 9.621523407559193e-06, | |
| "loss": 0.4691, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.15138104684877154, | |
| "grad_norm": 1.2898880598011702, | |
| "learning_rate": 9.619634138806653e-06, | |
| "loss": 0.22, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.151686250572257, | |
| "grad_norm": 1.629312149082348, | |
| "learning_rate": 9.61774035281726e-06, | |
| "loss": 0.3852, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.1519914542957424, | |
| "grad_norm": 1.4209940770003642, | |
| "learning_rate": 9.615842051442825e-06, | |
| "loss": 0.3434, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.15229665801922784, | |
| "grad_norm": 1.5981471419786573, | |
| "learning_rate": 9.613939236539571e-06, | |
| "loss": 0.4405, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.15260186174271326, | |
| "grad_norm": 1.6345273007096384, | |
| "learning_rate": 9.612031909968138e-06, | |
| "loss": 0.4621, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1529070654661987, | |
| "grad_norm": 1.7946527422515466, | |
| "learning_rate": 9.610120073593574e-06, | |
| "loss": 0.4215, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.1532122691896841, | |
| "grad_norm": 1.9696007964079152, | |
| "learning_rate": 9.608203729285337e-06, | |
| "loss": 0.2416, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.15351747291316953, | |
| "grad_norm": 1.186483550480542, | |
| "learning_rate": 9.606282878917296e-06, | |
| "loss": 0.1656, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.15382267663665497, | |
| "grad_norm": 1.3709781521921298, | |
| "learning_rate": 9.604357524367723e-06, | |
| "loss": 0.3374, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.15412788036014038, | |
| "grad_norm": 1.4744363645402312, | |
| "learning_rate": 9.602427667519297e-06, | |
| "loss": 0.3472, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.15443308408362583, | |
| "grad_norm": 1.7032963377834875, | |
| "learning_rate": 9.600493310259098e-06, | |
| "loss": 0.4352, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.15473828780711124, | |
| "grad_norm": 1.487020684601837, | |
| "learning_rate": 9.598554454478608e-06, | |
| "loss": 0.2661, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.15504349153059668, | |
| "grad_norm": 1.2499312381905126, | |
| "learning_rate": 9.596611102073703e-06, | |
| "loss": 0.2785, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.1553486952540821, | |
| "grad_norm": 1.529878897767237, | |
| "learning_rate": 9.594663254944664e-06, | |
| "loss": 0.3768, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.15565389897756754, | |
| "grad_norm": 1.5214931502474798, | |
| "learning_rate": 9.592710914996162e-06, | |
| "loss": 0.5126, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.15595910270105295, | |
| "grad_norm": 1.3836939529329817, | |
| "learning_rate": 9.590754084137259e-06, | |
| "loss": 0.3011, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.15626430642453837, | |
| "grad_norm": 1.4833094737816435, | |
| "learning_rate": 9.588792764281413e-06, | |
| "loss": 0.4761, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.1565695101480238, | |
| "grad_norm": 1.2346664760598378, | |
| "learning_rate": 9.586826957346473e-06, | |
| "loss": 0.2454, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.15687471387150922, | |
| "grad_norm": 1.6476636760719772, | |
| "learning_rate": 9.584856665254667e-06, | |
| "loss": 0.2867, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.15717991759499467, | |
| "grad_norm": 1.5637210652382973, | |
| "learning_rate": 9.58288188993262e-06, | |
| "loss": 0.2899, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.15748512131848008, | |
| "grad_norm": 1.3205463270086828, | |
| "learning_rate": 9.580902633311331e-06, | |
| "loss": 0.3756, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.15779032504196552, | |
| "grad_norm": 1.3975127161911243, | |
| "learning_rate": 9.578918897326186e-06, | |
| "loss": 0.5111, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.15809552876545094, | |
| "grad_norm": 1.4459383872321914, | |
| "learning_rate": 9.57693068391695e-06, | |
| "loss": 0.4283, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.15840073248893635, | |
| "grad_norm": 1.7061785001760192, | |
| "learning_rate": 9.574937995027767e-06, | |
| "loss": 0.3702, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.1587059362124218, | |
| "grad_norm": 1.822247438656905, | |
| "learning_rate": 9.572940832607157e-06, | |
| "loss": 0.3406, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1590111399359072, | |
| "grad_norm": 2.44932543751886, | |
| "learning_rate": 9.570939198608013e-06, | |
| "loss": 0.3118, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.15931634365939265, | |
| "grad_norm": 1.6119202421775476, | |
| "learning_rate": 9.5689330949876e-06, | |
| "loss": 0.3903, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.15962154738287807, | |
| "grad_norm": 1.4848330945324375, | |
| "learning_rate": 9.56692252370756e-06, | |
| "loss": 0.2336, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.1599267511063635, | |
| "grad_norm": 1.410632362194396, | |
| "learning_rate": 9.564907486733893e-06, | |
| "loss": 0.2749, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.16023195482984892, | |
| "grad_norm": 1.596052638125191, | |
| "learning_rate": 9.562887986036975e-06, | |
| "loss": 0.4752, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.16053715855333436, | |
| "grad_norm": 1.7668740909494465, | |
| "learning_rate": 9.560864023591541e-06, | |
| "loss": 0.4457, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.16084236227681978, | |
| "grad_norm": 1.4009268145182425, | |
| "learning_rate": 9.558835601376692e-06, | |
| "loss": 0.2615, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.1611475660003052, | |
| "grad_norm": 1.7299333351168085, | |
| "learning_rate": 9.55680272137589e-06, | |
| "loss": 0.5216, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.16145276972379063, | |
| "grad_norm": 1.398003196407042, | |
| "learning_rate": 9.554765385576951e-06, | |
| "loss": 0.2917, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.16175797344727605, | |
| "grad_norm": 1.4037115710357768, | |
| "learning_rate": 9.552723595972055e-06, | |
| "loss": 0.2794, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1620631771707615, | |
| "grad_norm": 1.4104804936912443, | |
| "learning_rate": 9.550677354557734e-06, | |
| "loss": 0.3294, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.1623683808942469, | |
| "grad_norm": 1.3043707731550427, | |
| "learning_rate": 9.548626663334872e-06, | |
| "loss": 0.3542, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.16267358461773235, | |
| "grad_norm": 1.4523817232860987, | |
| "learning_rate": 9.546571524308707e-06, | |
| "loss": 0.4394, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.16297878834121776, | |
| "grad_norm": 1.2378417959119585, | |
| "learning_rate": 9.544511939488823e-06, | |
| "loss": 0.2859, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.16328399206470318, | |
| "grad_norm": 1.1623856416463947, | |
| "learning_rate": 9.542447910889152e-06, | |
| "loss": 0.2682, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.16358919578818862, | |
| "grad_norm": 1.503292443987416, | |
| "learning_rate": 9.540379440527974e-06, | |
| "loss": 0.4513, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.16389439951167403, | |
| "grad_norm": 1.278183220840744, | |
| "learning_rate": 9.538306530427908e-06, | |
| "loss": 0.2486, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.16419960323515947, | |
| "grad_norm": 1.477438530587252, | |
| "learning_rate": 9.536229182615919e-06, | |
| "loss": 0.4748, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.1645048069586449, | |
| "grad_norm": 1.161000468008389, | |
| "learning_rate": 9.534147399123308e-06, | |
| "loss": 0.3166, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.16481001068213033, | |
| "grad_norm": 1.3151690275104762, | |
| "learning_rate": 9.532061181985713e-06, | |
| "loss": 0.3547, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.16511521440561575, | |
| "grad_norm": 1.750297725419665, | |
| "learning_rate": 9.529970533243112e-06, | |
| "loss": 0.4156, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.1654204181291012, | |
| "grad_norm": 1.3465531852012238, | |
| "learning_rate": 9.52787545493981e-06, | |
| "loss": 0.3366, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.1657256218525866, | |
| "grad_norm": 1.5440141830188223, | |
| "learning_rate": 9.525775949124447e-06, | |
| "loss": 0.3376, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.16603082557607202, | |
| "grad_norm": 1.5415130315298482, | |
| "learning_rate": 9.523672017849995e-06, | |
| "loss": 0.541, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.16633602929955746, | |
| "grad_norm": 1.7579856956776627, | |
| "learning_rate": 9.521563663173746e-06, | |
| "loss": 0.4806, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.16664123302304287, | |
| "grad_norm": 1.7818866729621587, | |
| "learning_rate": 9.519450887157324e-06, | |
| "loss": 0.5464, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.16694643674652831, | |
| "grad_norm": 1.6064088023016758, | |
| "learning_rate": 9.517333691866672e-06, | |
| "loss": 0.43, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.16725164047001373, | |
| "grad_norm": 1.4778043177300115, | |
| "learning_rate": 9.515212079372059e-06, | |
| "loss": 0.4399, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.16755684419349917, | |
| "grad_norm": 2.027608100840915, | |
| "learning_rate": 9.513086051748069e-06, | |
| "loss": 0.4069, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.16786204791698459, | |
| "grad_norm": 1.5850802204517391, | |
| "learning_rate": 9.510955611073605e-06, | |
| "loss": 0.3827, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.16816725164047, | |
| "grad_norm": 1.293889481401633, | |
| "learning_rate": 9.508820759431883e-06, | |
| "loss": 0.2572, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.16847245536395544, | |
| "grad_norm": 1.842230801320139, | |
| "learning_rate": 9.506681498910437e-06, | |
| "loss": 0.5275, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.16877765908744086, | |
| "grad_norm": 1.1873629641518748, | |
| "learning_rate": 9.50453783160111e-06, | |
| "loss": 0.3282, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.1690828628109263, | |
| "grad_norm": 1.6093453710106354, | |
| "learning_rate": 9.50238975960005e-06, | |
| "loss": 0.5784, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.1693880665344117, | |
| "grad_norm": 1.8900657646403543, | |
| "learning_rate": 9.500237285007719e-06, | |
| "loss": 0.5224, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.16969327025789716, | |
| "grad_norm": 1.4803576264157936, | |
| "learning_rate": 9.498080409928878e-06, | |
| "loss": 0.3726, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.16999847398138257, | |
| "grad_norm": 1.7675886459458987, | |
| "learning_rate": 9.495919136472595e-06, | |
| "loss": 0.4656, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.170303677704868, | |
| "grad_norm": 1.7910944298366285, | |
| "learning_rate": 9.493753466752236e-06, | |
| "loss": 0.8076, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.17060888142835343, | |
| "grad_norm": 1.4678825901279975, | |
| "learning_rate": 9.49158340288547e-06, | |
| "loss": 0.3575, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.17091408515183884, | |
| "grad_norm": 1.411596350121475, | |
| "learning_rate": 9.489408946994256e-06, | |
| "loss": 0.3316, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.17121928887532428, | |
| "grad_norm": 1.2961064094904746, | |
| "learning_rate": 9.487230101204855e-06, | |
| "loss": 0.3634, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.1715244925988097, | |
| "grad_norm": 1.2525457208629842, | |
| "learning_rate": 9.485046867647816e-06, | |
| "loss": 0.368, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.17182969632229514, | |
| "grad_norm": 1.5857071732762902, | |
| "learning_rate": 9.48285924845798e-06, | |
| "loss": 0.4546, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.17213490004578055, | |
| "grad_norm": 1.4510716523054648, | |
| "learning_rate": 9.480667245774474e-06, | |
| "loss": 0.2739, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.172440103769266, | |
| "grad_norm": 1.7890268513821783, | |
| "learning_rate": 9.478470861740716e-06, | |
| "loss": 0.4085, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.1727453074927514, | |
| "grad_norm": 1.2143129127325427, | |
| "learning_rate": 9.476270098504405e-06, | |
| "loss": 0.2669, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.17305051121623685, | |
| "grad_norm": 1.4725539275626134, | |
| "learning_rate": 9.474064958217524e-06, | |
| "loss": 0.3474, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.17335571493972227, | |
| "grad_norm": 1.6648347880329453, | |
| "learning_rate": 9.471855443036333e-06, | |
| "loss": 0.3059, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.17366091866320768, | |
| "grad_norm": 1.4826208089202084, | |
| "learning_rate": 9.469641555121372e-06, | |
| "loss": 0.3309, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.17396612238669312, | |
| "grad_norm": 1.2087588985869038, | |
| "learning_rate": 9.467423296637458e-06, | |
| "loss": 0.2765, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.17427132611017854, | |
| "grad_norm": 1.5271503296745377, | |
| "learning_rate": 9.465200669753678e-06, | |
| "loss": 0.4388, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.17457652983366398, | |
| "grad_norm": 1.720167996940521, | |
| "learning_rate": 9.462973676643395e-06, | |
| "loss": 0.4693, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.1748817335571494, | |
| "grad_norm": 1.4666930059033054, | |
| "learning_rate": 9.46074231948424e-06, | |
| "loss": 0.354, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.17518693728063484, | |
| "grad_norm": 1.8597430337784902, | |
| "learning_rate": 9.458506600458106e-06, | |
| "loss": 0.3892, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.17549214100412025, | |
| "grad_norm": 0.9696512337091734, | |
| "learning_rate": 9.456266521751162e-06, | |
| "loss": 0.2294, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.17579734472760566, | |
| "grad_norm": 1.781498807963985, | |
| "learning_rate": 9.454022085553829e-06, | |
| "loss": 0.4873, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.1761025484510911, | |
| "grad_norm": 1.3337716733106453, | |
| "learning_rate": 9.451773294060797e-06, | |
| "loss": 0.3031, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.17640775217457652, | |
| "grad_norm": 1.7055986970891146, | |
| "learning_rate": 9.449520149471008e-06, | |
| "loss": 0.6405, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.17671295589806196, | |
| "grad_norm": 1.5188407211523098, | |
| "learning_rate": 9.447262653987668e-06, | |
| "loss": 0.3739, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.17701815962154738, | |
| "grad_norm": 0.9308569789252137, | |
| "learning_rate": 9.445000809818231e-06, | |
| "loss": 0.2505, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.17732336334503282, | |
| "grad_norm": 1.5000700139554115, | |
| "learning_rate": 9.442734619174408e-06, | |
| "loss": 0.4158, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.17762856706851823, | |
| "grad_norm": 1.3548714588751447, | |
| "learning_rate": 9.440464084272157e-06, | |
| "loss": 0.3911, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.17793377079200368, | |
| "grad_norm": 1.3241166157002833, | |
| "learning_rate": 9.438189207331684e-06, | |
| "loss": 0.371, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.1782389745154891, | |
| "grad_norm": 1.251287417238196, | |
| "learning_rate": 9.435909990577442e-06, | |
| "loss": 0.4543, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.1785441782389745, | |
| "grad_norm": 1.6243898273186124, | |
| "learning_rate": 9.433626436238128e-06, | |
| "loss": 0.3955, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.17884938196245995, | |
| "grad_norm": 1.5933520250274826, | |
| "learning_rate": 9.43133854654668e-06, | |
| "loss": 0.4232, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.17915458568594536, | |
| "grad_norm": 1.271604048902552, | |
| "learning_rate": 9.429046323740275e-06, | |
| "loss": 0.186, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.1794597894094308, | |
| "grad_norm": 1.7480757897399084, | |
| "learning_rate": 9.426749770060325e-06, | |
| "loss": 0.3198, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.17976499313291622, | |
| "grad_norm": 1.6582908292655634, | |
| "learning_rate": 9.424448887752485e-06, | |
| "loss": 0.4497, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.18007019685640166, | |
| "grad_norm": 1.514808198729056, | |
| "learning_rate": 9.42214367906663e-06, | |
| "loss": 0.3135, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.18037540057988707, | |
| "grad_norm": 1.33735933544563, | |
| "learning_rate": 9.419834146256875e-06, | |
| "loss": 0.1512, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.1806806043033725, | |
| "grad_norm": 1.7983955915325747, | |
| "learning_rate": 9.417520291581562e-06, | |
| "loss": 0.397, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.18098580802685793, | |
| "grad_norm": 1.8357965942853254, | |
| "learning_rate": 9.415202117303253e-06, | |
| "loss": 0.3479, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.18129101175034334, | |
| "grad_norm": 2.1098502294891084, | |
| "learning_rate": 9.412879625688742e-06, | |
| "loss": 0.6081, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.1815962154738288, | |
| "grad_norm": 1.7002717361934219, | |
| "learning_rate": 9.410552819009041e-06, | |
| "loss": 0.2335, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.1819014191973142, | |
| "grad_norm": 1.6858392243118179, | |
| "learning_rate": 9.408221699539381e-06, | |
| "loss": 0.3502, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.18220662292079964, | |
| "grad_norm": 1.9779389304442994, | |
| "learning_rate": 9.40588626955921e-06, | |
| "loss": 0.5023, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.18251182664428506, | |
| "grad_norm": 1.984831269441273, | |
| "learning_rate": 9.403546531352192e-06, | |
| "loss": 0.1808, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.1828170303677705, | |
| "grad_norm": 1.7825668553552305, | |
| "learning_rate": 9.401202487206205e-06, | |
| "loss": 0.2451, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.18312223409125591, | |
| "grad_norm": 1.7395479880147604, | |
| "learning_rate": 9.398854139413332e-06, | |
| "loss": 0.4586, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.18342743781474133, | |
| "grad_norm": 1.7910877075998561, | |
| "learning_rate": 9.396501490269871e-06, | |
| "loss": 0.4334, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.18373264153822677, | |
| "grad_norm": 1.671153260357796, | |
| "learning_rate": 9.394144542076321e-06, | |
| "loss": 0.3457, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.18403784526171219, | |
| "grad_norm": 2.2927735747628057, | |
| "learning_rate": 9.391783297137392e-06, | |
| "loss": 0.5006, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.18434304898519763, | |
| "grad_norm": 1.4375155904173251, | |
| "learning_rate": 9.389417757761983e-06, | |
| "loss": 0.3412, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.18464825270868304, | |
| "grad_norm": 1.0876679937459988, | |
| "learning_rate": 9.387047926263205e-06, | |
| "loss": 0.2323, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.18495345643216848, | |
| "grad_norm": 1.5691166969698962, | |
| "learning_rate": 9.384673804958357e-06, | |
| "loss": 0.3929, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.1852586601556539, | |
| "grad_norm": 1.2508041656129554, | |
| "learning_rate": 9.38229539616894e-06, | |
| "loss": 0.2123, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.1855638638791393, | |
| "grad_norm": 1.6014009719897135, | |
| "learning_rate": 9.379912702220641e-06, | |
| "loss": 0.234, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.18586906760262475, | |
| "grad_norm": 1.4804611004553776, | |
| "learning_rate": 9.377525725443341e-06, | |
| "loss": 0.3951, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.18617427132611017, | |
| "grad_norm": 1.4589508747304376, | |
| "learning_rate": 9.375134468171108e-06, | |
| "loss": 0.2887, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.1864794750495956, | |
| "grad_norm": 1.9199770728371568, | |
| "learning_rate": 9.372738932742193e-06, | |
| "loss": 0.5627, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.18678467877308103, | |
| "grad_norm": 1.5903295576095668, | |
| "learning_rate": 9.370339121499039e-06, | |
| "loss": 0.4379, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.18708988249656647, | |
| "grad_norm": 1.6986262549100166, | |
| "learning_rate": 9.367935036788257e-06, | |
| "loss": 0.4873, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.18739508622005188, | |
| "grad_norm": 1.2194659257752518, | |
| "learning_rate": 9.365526680960645e-06, | |
| "loss": 0.3571, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.18770028994353732, | |
| "grad_norm": 1.3049138038493902, | |
| "learning_rate": 9.363114056371178e-06, | |
| "loss": 0.3114, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.18800549366702274, | |
| "grad_norm": 1.468685879220778, | |
| "learning_rate": 9.360697165379004e-06, | |
| "loss": 0.5043, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.18831069739050815, | |
| "grad_norm": 2.131454426646245, | |
| "learning_rate": 9.35827601034744e-06, | |
| "loss": 0.5165, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.1886159011139936, | |
| "grad_norm": 1.5563065422847613, | |
| "learning_rate": 9.355850593643974e-06, | |
| "loss": 0.4707, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.188921104837479, | |
| "grad_norm": 1.3992788671446874, | |
| "learning_rate": 9.353420917640264e-06, | |
| "loss": 0.3905, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.18922630856096445, | |
| "grad_norm": 1.2635210704880713, | |
| "learning_rate": 9.350986984712128e-06, | |
| "loss": 0.2399, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.18953151228444987, | |
| "grad_norm": 1.4071894580574145, | |
| "learning_rate": 9.348548797239551e-06, | |
| "loss": 0.3689, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.1898367160079353, | |
| "grad_norm": 1.8460324916935194, | |
| "learning_rate": 9.346106357606675e-06, | |
| "loss": 0.3337, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.19014191973142072, | |
| "grad_norm": 1.2199756132877848, | |
| "learning_rate": 9.343659668201803e-06, | |
| "loss": 0.3707, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.19044712345490614, | |
| "grad_norm": 1.3352934416971625, | |
| "learning_rate": 9.34120873141739e-06, | |
| "loss": 0.3332, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.19075232717839158, | |
| "grad_norm": 1.5847999740161538, | |
| "learning_rate": 9.33875354965005e-06, | |
| "loss": 0.4658, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.191057530901877, | |
| "grad_norm": 1.59267718540602, | |
| "learning_rate": 9.336294125300538e-06, | |
| "loss": 0.5138, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.19136273462536244, | |
| "grad_norm": 1.0558131110089597, | |
| "learning_rate": 9.333830460773767e-06, | |
| "loss": 0.2512, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.19166793834884785, | |
| "grad_norm": 1.6912848096424418, | |
| "learning_rate": 9.331362558478793e-06, | |
| "loss": 0.3129, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.1919731420723333, | |
| "grad_norm": 1.7565199783626735, | |
| "learning_rate": 9.328890420828817e-06, | |
| "loss": 0.2625, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.1922783457958187, | |
| "grad_norm": 1.6287483120468187, | |
| "learning_rate": 9.326414050241176e-06, | |
| "loss": 0.4631, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.19258354951930415, | |
| "grad_norm": 1.5343127080699748, | |
| "learning_rate": 9.323933449137353e-06, | |
| "loss": 0.35, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.19288875324278956, | |
| "grad_norm": 1.2901421851525343, | |
| "learning_rate": 9.321448619942963e-06, | |
| "loss": 0.3191, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.19319395696627498, | |
| "grad_norm": 1.3651871352420322, | |
| "learning_rate": 9.318959565087761e-06, | |
| "loss": 0.3063, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.19349916068976042, | |
| "grad_norm": 1.4900191765389657, | |
| "learning_rate": 9.316466287005625e-06, | |
| "loss": 0.3621, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.19380436441324583, | |
| "grad_norm": 1.836926327897149, | |
| "learning_rate": 9.313968788134572e-06, | |
| "loss": 0.6273, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.19410956813673128, | |
| "grad_norm": 1.6283757285797815, | |
| "learning_rate": 9.311467070916743e-06, | |
| "loss": 0.3191, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.1944147718602167, | |
| "grad_norm": 1.7047955780313857, | |
| "learning_rate": 9.308961137798398e-06, | |
| "loss": 0.5581, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.19471997558370213, | |
| "grad_norm": 1.4826549000090183, | |
| "learning_rate": 9.306450991229927e-06, | |
| "loss": 0.3157, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.19502517930718755, | |
| "grad_norm": 1.435361017145943, | |
| "learning_rate": 9.30393663366584e-06, | |
| "loss": 0.3084, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.19533038303067296, | |
| "grad_norm": 1.402358583674702, | |
| "learning_rate": 9.301418067564758e-06, | |
| "loss": 0.3351, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.1956355867541584, | |
| "grad_norm": 1.2933654176691274, | |
| "learning_rate": 9.298895295389423e-06, | |
| "loss": 0.2585, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.19594079047764382, | |
| "grad_norm": 1.5346090103364156, | |
| "learning_rate": 9.29636831960669e-06, | |
| "loss": 0.33, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.19624599420112926, | |
| "grad_norm": 1.3251725534327445, | |
| "learning_rate": 9.293837142687522e-06, | |
| "loss": 0.2104, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.19655119792461467, | |
| "grad_norm": 1.583285740923444, | |
| "learning_rate": 9.291301767106986e-06, | |
| "loss": 0.4326, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.19685640164810012, | |
| "grad_norm": 1.0905371753559963, | |
| "learning_rate": 9.288762195344266e-06, | |
| "loss": 0.274, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.19716160537158553, | |
| "grad_norm": 1.7263838041187525, | |
| "learning_rate": 9.28621842988264e-06, | |
| "loss": 0.5011, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.19746680909507097, | |
| "grad_norm": 1.4838510492072716, | |
| "learning_rate": 9.283670473209488e-06, | |
| "loss": 0.1956, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.1977720128185564, | |
| "grad_norm": 1.2036114489558822, | |
| "learning_rate": 9.28111832781629e-06, | |
| "loss": 0.2346, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.1980772165420418, | |
| "grad_norm": 1.3853687861302435, | |
| "learning_rate": 9.278561996198622e-06, | |
| "loss": 0.2313, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.19838242026552724, | |
| "grad_norm": 1.2708592907281826, | |
| "learning_rate": 9.276001480856152e-06, | |
| "loss": 0.3717, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.19868762398901266, | |
| "grad_norm": 1.6407943638167721, | |
| "learning_rate": 9.273436784292641e-06, | |
| "loss": 0.5533, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.1989928277124981, | |
| "grad_norm": 1.9614104046455092, | |
| "learning_rate": 9.270867909015936e-06, | |
| "loss": 0.4552, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.1992980314359835, | |
| "grad_norm": 1.6038724290911757, | |
| "learning_rate": 9.268294857537973e-06, | |
| "loss": 0.477, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.19960323515946896, | |
| "grad_norm": 1.9838192379267598, | |
| "learning_rate": 9.26571763237477e-06, | |
| "loss": 0.7118, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.19990843888295437, | |
| "grad_norm": 1.4066170955047037, | |
| "learning_rate": 9.263136236046422e-06, | |
| "loss": 0.4222, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.2002136426064398, | |
| "grad_norm": 1.800111592330467, | |
| "learning_rate": 9.260550671077113e-06, | |
| "loss": 0.4969, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.20051884632992523, | |
| "grad_norm": 1.450700768131887, | |
| "learning_rate": 9.257960939995093e-06, | |
| "loss": 0.3938, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.20082405005341064, | |
| "grad_norm": 1.6208519445035616, | |
| "learning_rate": 9.255367045332693e-06, | |
| "loss": 0.5284, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.20112925377689608, | |
| "grad_norm": 1.4936718972189802, | |
| "learning_rate": 9.25276898962631e-06, | |
| "loss": 0.3314, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.2014344575003815, | |
| "grad_norm": 1.8465064182825872, | |
| "learning_rate": 9.250166775416412e-06, | |
| "loss": 0.5858, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.20173966122386694, | |
| "grad_norm": 1.7205660848788409, | |
| "learning_rate": 9.247560405247535e-06, | |
| "loss": 0.4412, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.20204486494735235, | |
| "grad_norm": 1.3534767838887214, | |
| "learning_rate": 9.244949881668276e-06, | |
| "loss": 0.2469, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.2023500686708378, | |
| "grad_norm": 1.283164106498675, | |
| "learning_rate": 9.242335207231297e-06, | |
| "loss": 0.2035, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.2026552723943232, | |
| "grad_norm": 1.6915673582245812, | |
| "learning_rate": 9.239716384493313e-06, | |
| "loss": 0.2989, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.20296047611780862, | |
| "grad_norm": 2.2410065734371956, | |
| "learning_rate": 9.2370934160151e-06, | |
| "loss": 0.4781, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.20326567984129407, | |
| "grad_norm": 1.3632387520798106, | |
| "learning_rate": 9.234466304361487e-06, | |
| "loss": 0.2167, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.20357088356477948, | |
| "grad_norm": 1.249315694419475, | |
| "learning_rate": 9.231835052101352e-06, | |
| "loss": 0.323, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.20387608728826492, | |
| "grad_norm": 1.2758684884196045, | |
| "learning_rate": 9.229199661807628e-06, | |
| "loss": 0.2939, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.20418129101175034, | |
| "grad_norm": 1.6065093517583355, | |
| "learning_rate": 9.226560136057286e-06, | |
| "loss": 0.5757, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.20448649473523578, | |
| "grad_norm": 1.4713850530122647, | |
| "learning_rate": 9.223916477431348e-06, | |
| "loss": 0.1891, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2047916984587212, | |
| "grad_norm": 1.2115833160052654, | |
| "learning_rate": 9.221268688514872e-06, | |
| "loss": 0.2403, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.20509690218220664, | |
| "grad_norm": 1.2023485328623802, | |
| "learning_rate": 9.218616771896958e-06, | |
| "loss": 0.2996, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.20540210590569205, | |
| "grad_norm": 1.489677195048235, | |
| "learning_rate": 9.21596073017074e-06, | |
| "loss": 0.4833, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.20570730962917746, | |
| "grad_norm": 1.3322943821875144, | |
| "learning_rate": 9.213300565933387e-06, | |
| "loss": 0.4981, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.2060125133526629, | |
| "grad_norm": 1.657242243821286, | |
| "learning_rate": 9.210636281786101e-06, | |
| "loss": 0.4101, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.20631771707614832, | |
| "grad_norm": 1.5710052935767878, | |
| "learning_rate": 9.207967880334107e-06, | |
| "loss": 0.5269, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.20662292079963376, | |
| "grad_norm": 1.7349735636167414, | |
| "learning_rate": 9.205295364186664e-06, | |
| "loss": 0.4501, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.20692812452311918, | |
| "grad_norm": 1.2131120415162016, | |
| "learning_rate": 9.202618735957044e-06, | |
| "loss": 0.2405, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.20723332824660462, | |
| "grad_norm": 1.873473817856673, | |
| "learning_rate": 9.199937998262553e-06, | |
| "loss": 0.534, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.20753853197009003, | |
| "grad_norm": 1.5561433809212002, | |
| "learning_rate": 9.197253153724504e-06, | |
| "loss": 0.3787, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.20784373569357545, | |
| "grad_norm": 1.656918804520659, | |
| "learning_rate": 9.194564204968231e-06, | |
| "loss": 0.5373, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.2081489394170609, | |
| "grad_norm": 1.2717305075559726, | |
| "learning_rate": 9.19187115462308e-06, | |
| "loss": 0.3069, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.2084541431405463, | |
| "grad_norm": 1.6153063314109386, | |
| "learning_rate": 9.189174005322408e-06, | |
| "loss": 0.5305, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.20875934686403175, | |
| "grad_norm": 1.336223638576054, | |
| "learning_rate": 9.18647275970358e-06, | |
| "loss": 0.3889, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.20906455058751716, | |
| "grad_norm": 1.8056451147641948, | |
| "learning_rate": 9.18376742040797e-06, | |
| "loss": 0.5624, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.2093697543110026, | |
| "grad_norm": 1.4375442829774812, | |
| "learning_rate": 9.181057990080945e-06, | |
| "loss": 0.4177, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.20967495803448802, | |
| "grad_norm": 1.4711699333864836, | |
| "learning_rate": 9.178344471371886e-06, | |
| "loss": 0.3406, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.20998016175797346, | |
| "grad_norm": 1.3838222287012096, | |
| "learning_rate": 9.17562686693416e-06, | |
| "loss": 0.3066, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.21028536548145887, | |
| "grad_norm": 1.4022212067938096, | |
| "learning_rate": 9.172905179425136e-06, | |
| "loss": 0.2756, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.2105905692049443, | |
| "grad_norm": 1.3536281346270633, | |
| "learning_rate": 9.170179411506175e-06, | |
| "loss": 0.4636, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.21089577292842973, | |
| "grad_norm": 1.8927154333736904, | |
| "learning_rate": 9.167449565842622e-06, | |
| "loss": 0.4806, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.21120097665191515, | |
| "grad_norm": 1.3316444887041978, | |
| "learning_rate": 9.164715645103818e-06, | |
| "loss": 0.3236, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.2115061803754006, | |
| "grad_norm": 1.5414957862521723, | |
| "learning_rate": 9.161977651963082e-06, | |
| "loss": 0.3174, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.211811384098886, | |
| "grad_norm": 1.3526532837600678, | |
| "learning_rate": 9.15923558909772e-06, | |
| "loss": 0.3286, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.21211658782237144, | |
| "grad_norm": 1.6741032947314405, | |
| "learning_rate": 9.156489459189011e-06, | |
| "loss": 0.5021, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.21242179154585686, | |
| "grad_norm": 1.3755135010833581, | |
| "learning_rate": 9.153739264922221e-06, | |
| "loss": 0.3659, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.21272699526934227, | |
| "grad_norm": 1.1828934010664012, | |
| "learning_rate": 9.150985008986579e-06, | |
| "loss": 0.273, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.21303219899282771, | |
| "grad_norm": 1.6368043280832338, | |
| "learning_rate": 9.148226694075295e-06, | |
| "loss": 0.3457, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.21333740271631313, | |
| "grad_norm": 1.7649363297171154, | |
| "learning_rate": 9.14546432288554e-06, | |
| "loss": 0.504, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.21364260643979857, | |
| "grad_norm": 1.5869344950607664, | |
| "learning_rate": 9.14269789811846e-06, | |
| "loss": 0.2746, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.21394781016328399, | |
| "grad_norm": 2.1981528156810266, | |
| "learning_rate": 9.139927422479157e-06, | |
| "loss": 0.3494, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.21425301388676943, | |
| "grad_norm": 1.5364449020919386, | |
| "learning_rate": 9.137152898676698e-06, | |
| "loss": 0.259, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.21455821761025484, | |
| "grad_norm": 1.5976641591815994, | |
| "learning_rate": 9.134374329424107e-06, | |
| "loss": 0.3714, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.21486342133374028, | |
| "grad_norm": 1.2686502539601563, | |
| "learning_rate": 9.131591717438366e-06, | |
| "loss": 0.3223, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.2151686250572257, | |
| "grad_norm": 1.246057539745606, | |
| "learning_rate": 9.128805065440405e-06, | |
| "loss": 0.2376, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.2154738287807111, | |
| "grad_norm": 1.7361721316316239, | |
| "learning_rate": 9.126014376155113e-06, | |
| "loss": 0.4981, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.21577903250419656, | |
| "grad_norm": 1.5341841942267511, | |
| "learning_rate": 9.12321965231132e-06, | |
| "loss": 0.3948, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.21608423622768197, | |
| "grad_norm": 1.564805123380453, | |
| "learning_rate": 9.1204208966418e-06, | |
| "loss": 0.3948, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.2163894399511674, | |
| "grad_norm": 1.3738642249897421, | |
| "learning_rate": 9.117618111883276e-06, | |
| "loss": 0.3615, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.21669464367465283, | |
| "grad_norm": 1.2994164991334436, | |
| "learning_rate": 9.114811300776405e-06, | |
| "loss": 0.2186, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.21699984739813827, | |
| "grad_norm": 1.7972182236315297, | |
| "learning_rate": 9.112000466065785e-06, | |
| "loss": 0.5655, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.21730505112162368, | |
| "grad_norm": 1.278135561724537, | |
| "learning_rate": 9.109185610499943e-06, | |
| "loss": 0.3853, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.2176102548451091, | |
| "grad_norm": 1.213829377522313, | |
| "learning_rate": 9.106366736831347e-06, | |
| "loss": 0.3143, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.21791545856859454, | |
| "grad_norm": 1.5786296679622351, | |
| "learning_rate": 9.103543847816384e-06, | |
| "loss": 0.3748, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.21822066229207995, | |
| "grad_norm": 1.1980380231780912, | |
| "learning_rate": 9.100716946215374e-06, | |
| "loss": 0.3257, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.2185258660155654, | |
| "grad_norm": 1.5748488156857616, | |
| "learning_rate": 9.097886034792557e-06, | |
| "loss": 0.3859, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.2188310697390508, | |
| "grad_norm": 1.4962734980956909, | |
| "learning_rate": 9.095051116316095e-06, | |
| "loss": 0.4612, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.21913627346253625, | |
| "grad_norm": 1.4269067789650183, | |
| "learning_rate": 9.092212193558072e-06, | |
| "loss": 0.3177, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.21944147718602167, | |
| "grad_norm": 1.2842950689313655, | |
| "learning_rate": 9.089369269294483e-06, | |
| "loss": 0.238, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.2197466809095071, | |
| "grad_norm": 1.3869072584515276, | |
| "learning_rate": 9.086522346305233e-06, | |
| "loss": 0.3534, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.22005188463299252, | |
| "grad_norm": 1.5861416389361607, | |
| "learning_rate": 9.083671427374144e-06, | |
| "loss": 0.404, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.22035708835647794, | |
| "grad_norm": 1.3575612826767118, | |
| "learning_rate": 9.080816515288944e-06, | |
| "loss": 0.2564, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.22066229207996338, | |
| "grad_norm": 1.6224055486381046, | |
| "learning_rate": 9.077957612841262e-06, | |
| "loss": 0.5335, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.2209674958034488, | |
| "grad_norm": 1.1424184312646575, | |
| "learning_rate": 9.07509472282663e-06, | |
| "loss": 0.2248, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.22127269952693424, | |
| "grad_norm": 0.9653403668195691, | |
| "learning_rate": 9.07222784804448e-06, | |
| "loss": 0.1492, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.22157790325041965, | |
| "grad_norm": 1.7736448595259362, | |
| "learning_rate": 9.069356991298145e-06, | |
| "loss": 0.3365, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.2218831069739051, | |
| "grad_norm": 1.5382460544138177, | |
| "learning_rate": 9.066482155394843e-06, | |
| "loss": 0.3641, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.2221883106973905, | |
| "grad_norm": 1.5490656282179018, | |
| "learning_rate": 9.063603343145689e-06, | |
| "loss": 0.451, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.22249351442087595, | |
| "grad_norm": 1.6477503496903847, | |
| "learning_rate": 9.060720557365683e-06, | |
| "loss": 0.3152, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.22279871814436136, | |
| "grad_norm": 1.3334962278440752, | |
| "learning_rate": 9.057833800873712e-06, | |
| "loss": 0.3514, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.22310392186784678, | |
| "grad_norm": 1.3285303282716252, | |
| "learning_rate": 9.054943076492548e-06, | |
| "loss": 0.2835, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.22340912559133222, | |
| "grad_norm": 1.275406047266298, | |
| "learning_rate": 9.05204838704884e-06, | |
| "loss": 0.2324, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.22371432931481763, | |
| "grad_norm": 1.6512597679062968, | |
| "learning_rate": 9.04914973537311e-06, | |
| "loss": 0.6219, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.22401953303830308, | |
| "grad_norm": 1.2059290028925398, | |
| "learning_rate": 9.046247124299766e-06, | |
| "loss": 0.3296, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.2243247367617885, | |
| "grad_norm": 1.5740026131575533, | |
| "learning_rate": 9.043340556667076e-06, | |
| "loss": 0.4929, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.22462994048527393, | |
| "grad_norm": 1.525534179809715, | |
| "learning_rate": 9.040430035317184e-06, | |
| "loss": 0.3855, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.22493514420875935, | |
| "grad_norm": 1.5007721185282292, | |
| "learning_rate": 9.037515563096096e-06, | |
| "loss": 0.2867, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.22524034793224476, | |
| "grad_norm": 1.7439351574891226, | |
| "learning_rate": 9.034597142853685e-06, | |
| "loss": 0.5206, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.2255455516557302, | |
| "grad_norm": 1.3832916895025305, | |
| "learning_rate": 9.031674777443679e-06, | |
| "loss": 0.3525, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.22585075537921562, | |
| "grad_norm": 1.689345092847421, | |
| "learning_rate": 9.028748469723671e-06, | |
| "loss": 0.605, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.22615595910270106, | |
| "grad_norm": 1.5414939302466633, | |
| "learning_rate": 9.025818222555105e-06, | |
| "loss": 0.4616, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.22646116282618647, | |
| "grad_norm": 1.572475405138349, | |
| "learning_rate": 9.022884038803275e-06, | |
| "loss": 0.4125, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.22676636654967192, | |
| "grad_norm": 1.6244915386812042, | |
| "learning_rate": 9.019945921337328e-06, | |
| "loss": 0.3892, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.22707157027315733, | |
| "grad_norm": 1.109983250603302, | |
| "learning_rate": 9.017003873030254e-06, | |
| "loss": 0.2775, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.22737677399664277, | |
| "grad_norm": 1.2908690769048217, | |
| "learning_rate": 9.014057896758892e-06, | |
| "loss": 0.2799, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.2276819777201282, | |
| "grad_norm": 1.3869598608885343, | |
| "learning_rate": 9.011107995403917e-06, | |
| "loss": 0.3109, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.2279871814436136, | |
| "grad_norm": 1.9493518241147973, | |
| "learning_rate": 9.008154171849843e-06, | |
| "loss": 0.3222, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.22829238516709904, | |
| "grad_norm": 1.1260119760970688, | |
| "learning_rate": 9.005196428985024e-06, | |
| "loss": 0.2015, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.22859758889058446, | |
| "grad_norm": 1.3395279047737458, | |
| "learning_rate": 9.002234769701637e-06, | |
| "loss": 0.3174, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.2289027926140699, | |
| "grad_norm": 1.127299131698768, | |
| "learning_rate": 8.999269196895698e-06, | |
| "loss": 0.3164, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.22920799633755531, | |
| "grad_norm": 1.1374371018678644, | |
| "learning_rate": 8.996299713467044e-06, | |
| "loss": 0.3252, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.22951320006104076, | |
| "grad_norm": 1.4696910359597422, | |
| "learning_rate": 8.993326322319339e-06, | |
| "loss": 0.4006, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.22981840378452617, | |
| "grad_norm": 1.862503625238974, | |
| "learning_rate": 8.990349026360065e-06, | |
| "loss": 0.5648, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.23012360750801159, | |
| "grad_norm": 1.768109354521003, | |
| "learning_rate": 8.987367828500528e-06, | |
| "loss": 0.4002, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.23042881123149703, | |
| "grad_norm": 1.6108609348753753, | |
| "learning_rate": 8.984382731655842e-06, | |
| "loss": 0.3703, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.23073401495498244, | |
| "grad_norm": 2.0879815191569153, | |
| "learning_rate": 8.98139373874494e-06, | |
| "loss": 0.4966, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.23103921867846788, | |
| "grad_norm": 1.627248748628783, | |
| "learning_rate": 8.978400852690557e-06, | |
| "loss": 0.4972, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.2313444224019533, | |
| "grad_norm": 1.6245326323749618, | |
| "learning_rate": 8.975404076419244e-06, | |
| "loss": 0.2992, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.23164962612543874, | |
| "grad_norm": 1.295122180020727, | |
| "learning_rate": 8.972403412861354e-06, | |
| "loss": 0.1872, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.23195482984892415, | |
| "grad_norm": 1.5772423116895944, | |
| "learning_rate": 8.969398864951034e-06, | |
| "loss": 0.3683, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2322600335724096, | |
| "grad_norm": 1.2952086072241094, | |
| "learning_rate": 8.966390435626233e-06, | |
| "loss": 0.3407, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.232565237295895, | |
| "grad_norm": 1.350730242849987, | |
| "learning_rate": 8.963378127828703e-06, | |
| "loss": 0.3012, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.23287044101938043, | |
| "grad_norm": 1.757344365832517, | |
| "learning_rate": 8.960361944503976e-06, | |
| "loss": 0.4483, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.23317564474286587, | |
| "grad_norm": 1.4971687299475023, | |
| "learning_rate": 8.957341888601382e-06, | |
| "loss": 0.3751, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.23348084846635128, | |
| "grad_norm": 0.9570785510771707, | |
| "learning_rate": 8.954317963074035e-06, | |
| "loss": 0.1987, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.23378605218983672, | |
| "grad_norm": 1.8380667817910563, | |
| "learning_rate": 8.951290170878834e-06, | |
| "loss": 0.4731, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.23409125591332214, | |
| "grad_norm": 1.112944434897937, | |
| "learning_rate": 8.948258514976456e-06, | |
| "loss": 0.2407, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.23439645963680758, | |
| "grad_norm": 1.309366755129546, | |
| "learning_rate": 8.94522299833136e-06, | |
| "loss": 0.319, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.234701663360293, | |
| "grad_norm": 1.1451808921483004, | |
| "learning_rate": 8.94218362391178e-06, | |
| "loss": 0.2171, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.2350068670837784, | |
| "grad_norm": 1.428457846554008, | |
| "learning_rate": 8.939140394689716e-06, | |
| "loss": 0.294, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.23531207080726385, | |
| "grad_norm": 1.6548235526629411, | |
| "learning_rate": 8.936093313640947e-06, | |
| "loss": 0.4041, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.23561727453074927, | |
| "grad_norm": 1.4221218858946143, | |
| "learning_rate": 8.93304238374501e-06, | |
| "loss": 0.2095, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.2359224782542347, | |
| "grad_norm": 1.5383523764762757, | |
| "learning_rate": 8.929987607985212e-06, | |
| "loss": 0.478, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.23622768197772012, | |
| "grad_norm": 1.5292649150056814, | |
| "learning_rate": 8.926928989348612e-06, | |
| "loss": 0.4167, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.23653288570120556, | |
| "grad_norm": 1.6476214228386405, | |
| "learning_rate": 8.923866530826037e-06, | |
| "loss": 0.4843, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.23683808942469098, | |
| "grad_norm": 1.5924467106022067, | |
| "learning_rate": 8.920800235412067e-06, | |
| "loss": 0.407, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.23714329314817642, | |
| "grad_norm": 1.5594856941518516, | |
| "learning_rate": 8.917730106105022e-06, | |
| "loss": 0.3124, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.23744849687166183, | |
| "grad_norm": 1.386797749592837, | |
| "learning_rate": 8.914656145906988e-06, | |
| "loss": 0.3016, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.23775370059514725, | |
| "grad_norm": 1.3692351348081182, | |
| "learning_rate": 8.911578357823784e-06, | |
| "loss": 0.3401, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.2380589043186327, | |
| "grad_norm": 1.389706254561915, | |
| "learning_rate": 8.90849674486498e-06, | |
| "loss": 0.3724, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2383641080421181, | |
| "grad_norm": 1.05409970328404, | |
| "learning_rate": 8.90541131004388e-06, | |
| "loss": 0.2848, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.23866931176560355, | |
| "grad_norm": 1.5742105715707222, | |
| "learning_rate": 8.90232205637753e-06, | |
| "loss": 0.6582, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.23897451548908896, | |
| "grad_norm": 1.4335601553241863, | |
| "learning_rate": 8.899228986886709e-06, | |
| "loss": 0.4478, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.2392797192125744, | |
| "grad_norm": 1.692276024021936, | |
| "learning_rate": 8.896132104595925e-06, | |
| "loss": 0.2317, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.23958492293605982, | |
| "grad_norm": 1.4666941437892842, | |
| "learning_rate": 8.893031412533415e-06, | |
| "loss": 0.4121, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.23989012665954523, | |
| "grad_norm": 1.7867397905919786, | |
| "learning_rate": 8.889926913731144e-06, | |
| "loss": 0.3591, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.24019533038303068, | |
| "grad_norm": 1.5710332933779532, | |
| "learning_rate": 8.886818611224798e-06, | |
| "loss": 0.3421, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.2405005341065161, | |
| "grad_norm": 1.0993405381050079, | |
| "learning_rate": 8.883706508053778e-06, | |
| "loss": 0.1907, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.24080573783000153, | |
| "grad_norm": 1.5126727738496022, | |
| "learning_rate": 8.88059060726121e-06, | |
| "loss": 0.5117, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.24111094155348695, | |
| "grad_norm": 1.0050194650159943, | |
| "learning_rate": 8.877470911893927e-06, | |
| "loss": 0.2927, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2414161452769724, | |
| "grad_norm": 1.549032075208785, | |
| "learning_rate": 8.874347425002474e-06, | |
| "loss": 0.4823, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.2417213490004578, | |
| "grad_norm": 1.4199913878918462, | |
| "learning_rate": 8.871220149641101e-06, | |
| "loss": 0.372, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.24202655272394324, | |
| "grad_norm": 1.2204976389548388, | |
| "learning_rate": 8.868089088867772e-06, | |
| "loss": 0.4162, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.24233175644742866, | |
| "grad_norm": 1.2773847194249226, | |
| "learning_rate": 8.86495424574414e-06, | |
| "loss": 0.3559, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.24263696017091407, | |
| "grad_norm": 1.5146172702910354, | |
| "learning_rate": 8.861815623335564e-06, | |
| "loss": 0.4853, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.24294216389439952, | |
| "grad_norm": 1.2421815500176878, | |
| "learning_rate": 8.858673224711097e-06, | |
| "loss": 0.2322, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.24324736761788493, | |
| "grad_norm": 1.573303107817239, | |
| "learning_rate": 8.855527052943482e-06, | |
| "loss": 0.4221, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.24355257134137037, | |
| "grad_norm": 1.3348685555629807, | |
| "learning_rate": 8.852377111109158e-06, | |
| "loss": 0.4016, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.2438577750648558, | |
| "grad_norm": 1.4724771810683823, | |
| "learning_rate": 8.849223402288244e-06, | |
| "loss": 0.3673, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.24416297878834123, | |
| "grad_norm": 1.4577243169053915, | |
| "learning_rate": 8.846065929564542e-06, | |
| "loss": 0.3661, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.24446818251182664, | |
| "grad_norm": 1.2504498625919398, | |
| "learning_rate": 8.842904696025542e-06, | |
| "loss": 0.3142, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.24477338623531206, | |
| "grad_norm": 1.4501852015903842, | |
| "learning_rate": 8.839739704762404e-06, | |
| "loss": 0.4218, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.2450785899587975, | |
| "grad_norm": 1.814308645288012, | |
| "learning_rate": 8.836570958869966e-06, | |
| "loss": 0.2846, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.2453837936822829, | |
| "grad_norm": 1.4130518552602014, | |
| "learning_rate": 8.833398461446736e-06, | |
| "loss": 0.3661, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.24568899740576836, | |
| "grad_norm": 1.207043545854772, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 0.3083, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.24599420112925377, | |
| "grad_norm": 1.3695879979407652, | |
| "learning_rate": 8.827042224420273e-06, | |
| "loss": 0.3913, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.2462994048527392, | |
| "grad_norm": 1.509429933800135, | |
| "learning_rate": 8.823858491032388e-06, | |
| "loss": 0.4189, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.24660460857622463, | |
| "grad_norm": 1.5440694625132374, | |
| "learning_rate": 8.820671018544398e-06, | |
| "loss": 0.4917, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.24690981229971007, | |
| "grad_norm": 1.2648979736165928, | |
| "learning_rate": 8.817479810073124e-06, | |
| "loss": 0.2272, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.24721501602319548, | |
| "grad_norm": 1.5676001662475934, | |
| "learning_rate": 8.814284868739038e-06, | |
| "loss": 0.3194, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2475202197466809, | |
| "grad_norm": 1.5083433807769027, | |
| "learning_rate": 8.811086197666266e-06, | |
| "loss": 0.4213, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.24782542347016634, | |
| "grad_norm": 1.428992764464654, | |
| "learning_rate": 8.807883799982574e-06, | |
| "loss": 0.3399, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.24813062719365175, | |
| "grad_norm": 1.0166441279138219, | |
| "learning_rate": 8.80467767881938e-06, | |
| "loss": 0.2195, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.2484358309171372, | |
| "grad_norm": 1.2422483417317973, | |
| "learning_rate": 8.801467837311738e-06, | |
| "loss": 0.2906, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.2487410346406226, | |
| "grad_norm": 1.419560487453445, | |
| "learning_rate": 8.798254278598345e-06, | |
| "loss": 0.4495, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.24904623836410805, | |
| "grad_norm": 1.16665115800268, | |
| "learning_rate": 8.795037005821521e-06, | |
| "loss": 0.2503, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.24935144208759347, | |
| "grad_norm": 1.4481353411677762, | |
| "learning_rate": 8.791816022127233e-06, | |
| "loss": 0.3363, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.2496566458110789, | |
| "grad_norm": 1.338264713753052, | |
| "learning_rate": 8.788591330665065e-06, | |
| "loss": 0.2325, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.24996184953456432, | |
| "grad_norm": 1.3941866237825566, | |
| "learning_rate": 8.785362934588234e-06, | |
| "loss": 0.4524, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.25026705325804977, | |
| "grad_norm": 1.508098753010285, | |
| "learning_rate": 8.782130837053575e-06, | |
| "loss": 0.3796, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2505722569815352, | |
| "grad_norm": 1.3573838043362958, | |
| "learning_rate": 8.778895041221544e-06, | |
| "loss": 0.2623, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.2508774607050206, | |
| "grad_norm": 1.3709683700574562, | |
| "learning_rate": 8.775655550256214e-06, | |
| "loss": 0.3998, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.251182664428506, | |
| "grad_norm": 1.5964366482889705, | |
| "learning_rate": 8.772412367325269e-06, | |
| "loss": 0.455, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.2514878681519915, | |
| "grad_norm": 1.6611709904306184, | |
| "learning_rate": 8.769165495600007e-06, | |
| "loss": 0.5094, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.2517930718754769, | |
| "grad_norm": 1.1400080018013656, | |
| "learning_rate": 8.76591493825533e-06, | |
| "loss": 0.3997, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.2520982755989623, | |
| "grad_norm": 1.862472646627605, | |
| "learning_rate": 8.76266069846974e-06, | |
| "loss": 0.3198, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.2524034793224477, | |
| "grad_norm": 1.6623689872345544, | |
| "learning_rate": 8.75940277942535e-06, | |
| "loss": 0.5344, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.25270868304593314, | |
| "grad_norm": 1.4234557534815584, | |
| "learning_rate": 8.756141184307865e-06, | |
| "loss": 0.3674, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.2530138867694186, | |
| "grad_norm": 1.7406127381363719, | |
| "learning_rate": 8.75287591630658e-06, | |
| "loss": 0.4348, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.253319090492904, | |
| "grad_norm": 1.4903918741773559, | |
| "learning_rate": 8.749606978614391e-06, | |
| "loss": 0.2545, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.25362429421638943, | |
| "grad_norm": 1.8076324214942547, | |
| "learning_rate": 8.746334374427774e-06, | |
| "loss": 0.4559, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.25392949793987485, | |
| "grad_norm": 1.3532948940123553, | |
| "learning_rate": 8.743058106946798e-06, | |
| "loss": 0.2642, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.2542347016633603, | |
| "grad_norm": 1.6339304623976283, | |
| "learning_rate": 8.739778179375105e-06, | |
| "loss": 0.3866, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.25453990538684573, | |
| "grad_norm": 1.2532848113555968, | |
| "learning_rate": 8.736494594919922e-06, | |
| "loss": 0.2563, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.25484510911033115, | |
| "grad_norm": 1.062909552234949, | |
| "learning_rate": 8.733207356792053e-06, | |
| "loss": 0.2942, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.25515031283381656, | |
| "grad_norm": 1.3812270046063335, | |
| "learning_rate": 8.72991646820587e-06, | |
| "loss": 0.4591, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.255455516557302, | |
| "grad_norm": 1.401063935434944, | |
| "learning_rate": 8.726621932379319e-06, | |
| "loss": 0.3184, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.25576072028078745, | |
| "grad_norm": 1.394771394969279, | |
| "learning_rate": 8.723323752533907e-06, | |
| "loss": 0.2373, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.25606592400427286, | |
| "grad_norm": 1.4771850726658662, | |
| "learning_rate": 8.72002193189471e-06, | |
| "loss": 0.384, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.2563711277277583, | |
| "grad_norm": 1.278173381384045, | |
| "learning_rate": 8.716716473690361e-06, | |
| "loss": 0.2333, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.2566763314512437, | |
| "grad_norm": 1.5231335458997086, | |
| "learning_rate": 8.713407381153052e-06, | |
| "loss": 0.4792, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.25698153517472916, | |
| "grad_norm": 1.2138032535770222, | |
| "learning_rate": 8.710094657518524e-06, | |
| "loss": 0.1745, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.2572867388982146, | |
| "grad_norm": 1.6203262902374684, | |
| "learning_rate": 8.706778306026073e-06, | |
| "loss": 0.392, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.2575919426217, | |
| "grad_norm": 1.5129887571452891, | |
| "learning_rate": 8.703458329918541e-06, | |
| "loss": 0.3458, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.2578971463451854, | |
| "grad_norm": 1.579903535191777, | |
| "learning_rate": 8.700134732442318e-06, | |
| "loss": 0.4183, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.2582023500686708, | |
| "grad_norm": 1.6503110245689208, | |
| "learning_rate": 8.69680751684733e-06, | |
| "loss": 0.5822, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.2585075537921563, | |
| "grad_norm": 1.4889049274535986, | |
| "learning_rate": 8.693476686387038e-06, | |
| "loss": 0.4368, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.2588127575156417, | |
| "grad_norm": 1.1464358161897683, | |
| "learning_rate": 8.69014224431845e-06, | |
| "loss": 0.2515, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.2591179612391271, | |
| "grad_norm": 1.759904896160663, | |
| "learning_rate": 8.686804193902092e-06, | |
| "loss": 0.3729, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.25942316496261253, | |
| "grad_norm": 1.6474376376075388, | |
| "learning_rate": 8.683462538402026e-06, | |
| "loss": 0.2528, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.25972836868609794, | |
| "grad_norm": 1.6153335633088148, | |
| "learning_rate": 8.680117281085839e-06, | |
| "loss": 0.4734, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.2600335724095834, | |
| "grad_norm": 1.7594579836313058, | |
| "learning_rate": 8.676768425224636e-06, | |
| "loss": 0.3666, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.2603387761330688, | |
| "grad_norm": 2.7725868317444973, | |
| "learning_rate": 8.673415974093046e-06, | |
| "loss": 0.3024, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.26064397985655424, | |
| "grad_norm": 1.3139609425748013, | |
| "learning_rate": 8.67005993096921e-06, | |
| "loss": 0.2145, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.26094918358003966, | |
| "grad_norm": 1.5222687762088085, | |
| "learning_rate": 8.66670029913478e-06, | |
| "loss": 0.306, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.2612543873035251, | |
| "grad_norm": 1.225174136338348, | |
| "learning_rate": 8.663337081874921e-06, | |
| "loss": 0.1945, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.26155959102701054, | |
| "grad_norm": 1.5844026100416482, | |
| "learning_rate": 8.659970282478303e-06, | |
| "loss": 0.5302, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.26186479475049596, | |
| "grad_norm": 1.4779095030916518, | |
| "learning_rate": 8.656599904237097e-06, | |
| "loss": 0.492, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.26216999847398137, | |
| "grad_norm": 1.6248924513219443, | |
| "learning_rate": 8.653225950446973e-06, | |
| "loss": 0.5415, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.2624752021974668, | |
| "grad_norm": 1.3916992153237053, | |
| "learning_rate": 8.649848424407103e-06, | |
| "loss": 0.2716, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.26278040592095225, | |
| "grad_norm": 1.5314790982505428, | |
| "learning_rate": 8.646467329420145e-06, | |
| "loss": 0.486, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.26308560964443767, | |
| "grad_norm": 1.474685202158443, | |
| "learning_rate": 8.64308266879225e-06, | |
| "loss": 0.4944, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.2633908133679231, | |
| "grad_norm": 2.089424447750544, | |
| "learning_rate": 8.639694445833056e-06, | |
| "loss": 0.5607, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.2636960170914085, | |
| "grad_norm": 1.6425002507132653, | |
| "learning_rate": 8.636302663855682e-06, | |
| "loss": 0.3179, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.26400122081489397, | |
| "grad_norm": 1.2523110959560677, | |
| "learning_rate": 8.632907326176732e-06, | |
| "loss": 0.3304, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.2643064245383794, | |
| "grad_norm": 1.9713067301422675, | |
| "learning_rate": 8.629508436116281e-06, | |
| "loss": 0.3482, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.2646116282618648, | |
| "grad_norm": 1.574438794377994, | |
| "learning_rate": 8.626105996997881e-06, | |
| "loss": 0.3681, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.2649168319853502, | |
| "grad_norm": 1.975886896641686, | |
| "learning_rate": 8.622700012148554e-06, | |
| "loss": 0.3096, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.2652220357088356, | |
| "grad_norm": 1.205037763961463, | |
| "learning_rate": 8.619290484898791e-06, | |
| "loss": 0.2542, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.2655272394323211, | |
| "grad_norm": 1.410569402556539, | |
| "learning_rate": 8.615877418582541e-06, | |
| "loss": 0.3157, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.2658324431558065, | |
| "grad_norm": 1.4086097839086094, | |
| "learning_rate": 8.61246081653722e-06, | |
| "loss": 0.2501, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.2661376468792919, | |
| "grad_norm": 1.8258092836473052, | |
| "learning_rate": 8.609040682103697e-06, | |
| "loss": 0.5621, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.26644285060277734, | |
| "grad_norm": 1.5606382340865774, | |
| "learning_rate": 8.6056170186263e-06, | |
| "loss": 0.4155, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.2667480543262628, | |
| "grad_norm": 1.4738547712233099, | |
| "learning_rate": 8.6021898294528e-06, | |
| "loss": 0.3736, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.2670532580497482, | |
| "grad_norm": 1.2521166999589783, | |
| "learning_rate": 8.598759117934424e-06, | |
| "loss": 0.2707, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.26735846177323364, | |
| "grad_norm": 1.854567909402273, | |
| "learning_rate": 8.595324887425839e-06, | |
| "loss": 0.6055, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.26766366549671905, | |
| "grad_norm": 1.3156991751956812, | |
| "learning_rate": 8.59188714128515e-06, | |
| "loss": 0.1861, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.26796886922020446, | |
| "grad_norm": 1.2377078296039872, | |
| "learning_rate": 8.58844588287391e-06, | |
| "loss": 0.3001, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.26827407294368993, | |
| "grad_norm": 1.2660582471980335, | |
| "learning_rate": 8.585001115557093e-06, | |
| "loss": 0.3137, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.26857927666717535, | |
| "grad_norm": 1.6544970466723443, | |
| "learning_rate": 8.581552842703114e-06, | |
| "loss": 0.518, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.26888448039066076, | |
| "grad_norm": 1.7821640881601462, | |
| "learning_rate": 8.57810106768381e-06, | |
| "loss": 0.3717, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.2691896841141462, | |
| "grad_norm": 1.4045088267172914, | |
| "learning_rate": 8.57464579387445e-06, | |
| "loss": 0.507, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.2694948878376316, | |
| "grad_norm": 1.2562243938208713, | |
| "learning_rate": 8.571187024653715e-06, | |
| "loss": 0.2897, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.26980009156111706, | |
| "grad_norm": 1.647258608185494, | |
| "learning_rate": 8.567724763403709e-06, | |
| "loss": 0.4629, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.2701052952846025, | |
| "grad_norm": 1.4132863649325282, | |
| "learning_rate": 8.564259013509952e-06, | |
| "loss": 0.2711, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.2704104990080879, | |
| "grad_norm": 1.0546579427462797, | |
| "learning_rate": 8.560789778361372e-06, | |
| "loss": 0.1637, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.2707157027315733, | |
| "grad_norm": 1.8546260807501882, | |
| "learning_rate": 8.557317061350303e-06, | |
| "loss": 0.3391, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.2710209064550588, | |
| "grad_norm": 1.23880229781815, | |
| "learning_rate": 8.553840865872492e-06, | |
| "loss": 0.1623, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.2713261101785442, | |
| "grad_norm": 1.5924230549187588, | |
| "learning_rate": 8.550361195327079e-06, | |
| "loss": 0.3142, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.2716313139020296, | |
| "grad_norm": 1.3037915837064973, | |
| "learning_rate": 8.546878053116609e-06, | |
| "loss": 0.2978, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.271936517625515, | |
| "grad_norm": 1.2055323902273756, | |
| "learning_rate": 8.543391442647013e-06, | |
| "loss": 0.2708, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.27224172134900043, | |
| "grad_norm": 1.1232141519442895, | |
| "learning_rate": 8.539901367327622e-06, | |
| "loss": 0.2381, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.2725469250724859, | |
| "grad_norm": 1.6705142749495014, | |
| "learning_rate": 8.53640783057115e-06, | |
| "loss": 0.3586, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.2728521287959713, | |
| "grad_norm": 1.1042663181369634, | |
| "learning_rate": 8.532910835793697e-06, | |
| "loss": 0.142, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.27315733251945673, | |
| "grad_norm": 1.4442013980293982, | |
| "learning_rate": 8.529410386414747e-06, | |
| "loss": 0.3902, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.27346253624294214, | |
| "grad_norm": 1.1510191464990058, | |
| "learning_rate": 8.525906485857156e-06, | |
| "loss": 0.2468, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.2737677399664276, | |
| "grad_norm": 1.5572115857341773, | |
| "learning_rate": 8.522399137547162e-06, | |
| "loss": 0.3039, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.27407294368991303, | |
| "grad_norm": 1.7209680290555225, | |
| "learning_rate": 8.518888344914365e-06, | |
| "loss": 0.5024, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.27437814741339844, | |
| "grad_norm": 1.3970675444552212, | |
| "learning_rate": 8.515374111391745e-06, | |
| "loss": 0.3327, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.27468335113688386, | |
| "grad_norm": 1.5862907429686486, | |
| "learning_rate": 8.511856440415635e-06, | |
| "loss": 0.4007, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.27498855486036927, | |
| "grad_norm": 1.5069995329007524, | |
| "learning_rate": 8.508335335425739e-06, | |
| "loss": 0.415, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.27529375858385474, | |
| "grad_norm": 1.3954829540134184, | |
| "learning_rate": 8.50481079986511e-06, | |
| "loss": 0.2788, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.27559896230734016, | |
| "grad_norm": 1.8025662725066627, | |
| "learning_rate": 8.501282837180165e-06, | |
| "loss": 0.3723, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.27590416603082557, | |
| "grad_norm": 1.520821384052033, | |
| "learning_rate": 8.49775145082066e-06, | |
| "loss": 0.2988, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.276209369754311, | |
| "grad_norm": 1.604241860679225, | |
| "learning_rate": 8.494216644239712e-06, | |
| "loss": 0.2951, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.27651457347779645, | |
| "grad_norm": 1.3350559025036193, | |
| "learning_rate": 8.490678420893773e-06, | |
| "loss": 0.2941, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.27681977720128187, | |
| "grad_norm": 1.3989463178848165, | |
| "learning_rate": 8.487136784242641e-06, | |
| "loss": 0.3683, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.2771249809247673, | |
| "grad_norm": 1.6081399715718512, | |
| "learning_rate": 8.483591737749448e-06, | |
| "loss": 0.4503, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.2774301846482527, | |
| "grad_norm": 1.6045658886374856, | |
| "learning_rate": 8.480043284880666e-06, | |
| "loss": 0.3672, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.2777353883717381, | |
| "grad_norm": 1.5365693477891538, | |
| "learning_rate": 8.476491429106088e-06, | |
| "loss": 0.5018, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.2780405920952236, | |
| "grad_norm": 1.306165577473847, | |
| "learning_rate": 8.472936173898846e-06, | |
| "loss": 0.3336, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.278345795818709, | |
| "grad_norm": 1.2410262335833262, | |
| "learning_rate": 8.469377522735387e-06, | |
| "loss": 0.3175, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.2786509995421944, | |
| "grad_norm": 0.9618771928411501, | |
| "learning_rate": 8.465815479095485e-06, | |
| "loss": 0.2373, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.2789562032656798, | |
| "grad_norm": 2.139593836663555, | |
| "learning_rate": 8.462250046462226e-06, | |
| "loss": 0.4121, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.2792614069891653, | |
| "grad_norm": 1.3283828036351077, | |
| "learning_rate": 8.458681228322014e-06, | |
| "loss": 0.38, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.2795666107126507, | |
| "grad_norm": 1.4749702062668928, | |
| "learning_rate": 8.455109028164561e-06, | |
| "loss": 0.4707, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.2798718144361361, | |
| "grad_norm": 1.2064530552545678, | |
| "learning_rate": 8.45153344948289e-06, | |
| "loss": 0.3217, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.28017701815962154, | |
| "grad_norm": 1.6256035637532993, | |
| "learning_rate": 8.44795449577332e-06, | |
| "loss": 0.4455, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.28048222188310695, | |
| "grad_norm": 1.3626337563191635, | |
| "learning_rate": 8.444372170535478e-06, | |
| "loss": 0.3391, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.2807874256065924, | |
| "grad_norm": 1.2635973004795618, | |
| "learning_rate": 8.440786477272282e-06, | |
| "loss": 0.2515, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.28109262933007784, | |
| "grad_norm": 1.1911478731743688, | |
| "learning_rate": 8.437197419489948e-06, | |
| "loss": 0.2891, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.28139783305356325, | |
| "grad_norm": 1.3587095012788262, | |
| "learning_rate": 8.43360500069798e-06, | |
| "loss": 0.416, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.28170303677704867, | |
| "grad_norm": 1.3537329737331374, | |
| "learning_rate": 8.430009224409167e-06, | |
| "loss": 0.2851, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.2820082405005341, | |
| "grad_norm": 1.0905607319343713, | |
| "learning_rate": 8.426410094139582e-06, | |
| "loss": 0.219, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.28231344422401955, | |
| "grad_norm": 1.2957621936139718, | |
| "learning_rate": 8.422807613408583e-06, | |
| "loss": 0.2757, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.28261864794750496, | |
| "grad_norm": 1.5464703579715704, | |
| "learning_rate": 8.419201785738794e-06, | |
| "loss": 0.488, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.2829238516709904, | |
| "grad_norm": 1.5027192422666622, | |
| "learning_rate": 8.415592614656119e-06, | |
| "loss": 0.298, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.2832290553944758, | |
| "grad_norm": 1.5041494197684437, | |
| "learning_rate": 8.411980103689727e-06, | |
| "loss": 0.2037, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.28353425911796126, | |
| "grad_norm": 1.1718395482104975, | |
| "learning_rate": 8.40836425637206e-06, | |
| "loss": 0.1936, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.2838394628414467, | |
| "grad_norm": 1.4034578790843537, | |
| "learning_rate": 8.404745076238816e-06, | |
| "loss": 0.3402, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2841446665649321, | |
| "grad_norm": 1.471548215059981, | |
| "learning_rate": 8.40112256682895e-06, | |
| "loss": 0.3633, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.2844498702884175, | |
| "grad_norm": 2.534571034931892, | |
| "learning_rate": 8.39749673168468e-06, | |
| "loss": 0.3617, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.2847550740119029, | |
| "grad_norm": 1.5477637618039504, | |
| "learning_rate": 8.39386757435147e-06, | |
| "loss": 0.4332, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.2850602777353884, | |
| "grad_norm": 1.058523928884, | |
| "learning_rate": 8.390235098378037e-06, | |
| "loss": 0.2018, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.2853654814588738, | |
| "grad_norm": 1.051963849726592, | |
| "learning_rate": 8.38659930731634e-06, | |
| "loss": 0.2024, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.2856706851823592, | |
| "grad_norm": 1.4307030578090003, | |
| "learning_rate": 8.382960204721575e-06, | |
| "loss": 0.3186, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.28597588890584463, | |
| "grad_norm": 1.2191946734542334, | |
| "learning_rate": 8.37931779415219e-06, | |
| "loss": 0.2907, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.2862810926293301, | |
| "grad_norm": 1.4629392730406188, | |
| "learning_rate": 8.375672079169854e-06, | |
| "loss": 0.3628, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.2865862963528155, | |
| "grad_norm": 1.2671740435957073, | |
| "learning_rate": 8.372023063339472e-06, | |
| "loss": 0.3895, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.28689150007630093, | |
| "grad_norm": 1.2843029586747219, | |
| "learning_rate": 8.36837075022918e-06, | |
| "loss": 0.2448, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.28719670379978635, | |
| "grad_norm": 1.225493384282322, | |
| "learning_rate": 8.36471514341033e-06, | |
| "loss": 0.3751, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.28750190752327176, | |
| "grad_norm": 1.118820597376438, | |
| "learning_rate": 8.361056246457509e-06, | |
| "loss": 0.2959, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.28780711124675723, | |
| "grad_norm": 1.5826287542337327, | |
| "learning_rate": 8.357394062948501e-06, | |
| "loss": 0.1865, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.28811231497024264, | |
| "grad_norm": 1.7322615384777234, | |
| "learning_rate": 8.353728596464324e-06, | |
| "loss": 0.3319, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.28841751869372806, | |
| "grad_norm": 1.1819946263218575, | |
| "learning_rate": 8.350059850589189e-06, | |
| "loss": 0.258, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.2887227224172135, | |
| "grad_norm": 1.2768421179691603, | |
| "learning_rate": 8.34638782891053e-06, | |
| "loss": 0.3505, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.28902792614069894, | |
| "grad_norm": 1.6765785010321739, | |
| "learning_rate": 8.342712535018968e-06, | |
| "loss": 0.3988, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.28933312986418436, | |
| "grad_norm": 1.6460901695759294, | |
| "learning_rate": 8.339033972508335e-06, | |
| "loss": 0.5964, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.28963833358766977, | |
| "grad_norm": 1.3645220250147991, | |
| "learning_rate": 8.335352144975657e-06, | |
| "loss": 0.3997, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.2899435373111552, | |
| "grad_norm": 1.565124139219312, | |
| "learning_rate": 8.331667056021151e-06, | |
| "loss": 0.2581, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2902487410346406, | |
| "grad_norm": 1.2163034972670224, | |
| "learning_rate": 8.32797870924822e-06, | |
| "loss": 0.3549, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.29055394475812607, | |
| "grad_norm": 1.9963649856615253, | |
| "learning_rate": 8.324287108263462e-06, | |
| "loss": 0.4499, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.2908591484816115, | |
| "grad_norm": 1.5738812787701257, | |
| "learning_rate": 8.320592256676646e-06, | |
| "loss": 0.4303, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.2911643522050969, | |
| "grad_norm": 1.51492812662889, | |
| "learning_rate": 8.316894158100728e-06, | |
| "loss": 0.3071, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.2914695559285823, | |
| "grad_norm": 1.5701047086118967, | |
| "learning_rate": 8.313192816151832e-06, | |
| "loss": 0.3527, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.29177475965206773, | |
| "grad_norm": 1.6660195604391899, | |
| "learning_rate": 8.309488234449261e-06, | |
| "loss": 0.4016, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.2920799633755532, | |
| "grad_norm": 1.5308116363222675, | |
| "learning_rate": 8.30578041661548e-06, | |
| "loss": 0.3991, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.2923851670990386, | |
| "grad_norm": 1.5118616317530167, | |
| "learning_rate": 8.302069366276121e-06, | |
| "loss": 0.4968, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.292690370822524, | |
| "grad_norm": 1.6934474407382258, | |
| "learning_rate": 8.298355087059978e-06, | |
| "loss": 0.5735, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.29299557454600944, | |
| "grad_norm": 1.4097627634984986, | |
| "learning_rate": 8.294637582598998e-06, | |
| "loss": 0.3816, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.2933007782694949, | |
| "grad_norm": 1.4214156530194402, | |
| "learning_rate": 8.290916856528288e-06, | |
| "loss": 0.3706, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.2936059819929803, | |
| "grad_norm": 1.3762680622131698, | |
| "learning_rate": 8.287192912486098e-06, | |
| "loss": 0.2482, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.29391118571646574, | |
| "grad_norm": 1.6238119477399238, | |
| "learning_rate": 8.283465754113832e-06, | |
| "loss": 0.483, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.29421638943995115, | |
| "grad_norm": 1.580806305043778, | |
| "learning_rate": 8.279735385056028e-06, | |
| "loss": 0.3277, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.29452159316343657, | |
| "grad_norm": 1.9306479469286622, | |
| "learning_rate": 8.276001808960374e-06, | |
| "loss": 0.448, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.29482679688692204, | |
| "grad_norm": 1.5497884575246978, | |
| "learning_rate": 8.272265029477686e-06, | |
| "loss": 0.3917, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.29513200061040745, | |
| "grad_norm": 1.5393309383304865, | |
| "learning_rate": 8.26852505026192e-06, | |
| "loss": 0.3033, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.29543720433389287, | |
| "grad_norm": 1.6980365016594954, | |
| "learning_rate": 8.26478187497015e-06, | |
| "loss": 0.2664, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.2957424080573783, | |
| "grad_norm": 1.363460408765191, | |
| "learning_rate": 8.261035507262582e-06, | |
| "loss": 0.254, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.29604761178086375, | |
| "grad_norm": 1.6405225449314822, | |
| "learning_rate": 8.257285950802547e-06, | |
| "loss": 0.4717, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.29635281550434917, | |
| "grad_norm": 1.4402490121062985, | |
| "learning_rate": 8.253533209256486e-06, | |
| "loss": 0.2763, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.2966580192278346, | |
| "grad_norm": 1.8581821237579783, | |
| "learning_rate": 8.24977728629396e-06, | |
| "loss": 0.5867, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.29696322295132, | |
| "grad_norm": 1.6644196462856506, | |
| "learning_rate": 8.246018185587642e-06, | |
| "loss": 0.6931, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.2972684266748054, | |
| "grad_norm": 1.7226101616124239, | |
| "learning_rate": 8.242255910813308e-06, | |
| "loss": 0.4707, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.2975736303982909, | |
| "grad_norm": 1.559017343729282, | |
| "learning_rate": 8.238490465649837e-06, | |
| "loss": 0.3913, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.2978788341217763, | |
| "grad_norm": 2.355733929284259, | |
| "learning_rate": 8.234721853779212e-06, | |
| "loss": 0.3725, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.2981840378452617, | |
| "grad_norm": 1.800766090445105, | |
| "learning_rate": 8.230950078886512e-06, | |
| "loss": 0.3071, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.2984892415687471, | |
| "grad_norm": 2.5023327434487097, | |
| "learning_rate": 8.227175144659908e-06, | |
| "loss": 0.5576, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.2987944452922326, | |
| "grad_norm": 1.657243682467184, | |
| "learning_rate": 8.22339705479066e-06, | |
| "loss": 0.3635, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.299099649015718, | |
| "grad_norm": 1.4497278352698486, | |
| "learning_rate": 8.219615812973111e-06, | |
| "loss": 0.3942, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.2994048527392034, | |
| "grad_norm": 1.4676463261815744, | |
| "learning_rate": 8.215831422904694e-06, | |
| "loss": 0.3701, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.29971005646268883, | |
| "grad_norm": 1.4714044973041538, | |
| "learning_rate": 8.212043888285913e-06, | |
| "loss": 0.3646, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.30001526018617425, | |
| "grad_norm": 1.8962188440528207, | |
| "learning_rate": 8.208253212820349e-06, | |
| "loss": 0.459, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.3003204639096597, | |
| "grad_norm": 1.5089138139920484, | |
| "learning_rate": 8.204459400214657e-06, | |
| "loss": 0.3636, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.30062566763314513, | |
| "grad_norm": 1.5444120585905727, | |
| "learning_rate": 8.200662454178553e-06, | |
| "loss": 0.3727, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.30093087135663055, | |
| "grad_norm": 1.3153314475890103, | |
| "learning_rate": 8.196862378424826e-06, | |
| "loss": 0.2808, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.30123607508011596, | |
| "grad_norm": 1.645327268934486, | |
| "learning_rate": 8.193059176669317e-06, | |
| "loss": 0.4536, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.30154127880360143, | |
| "grad_norm": 1.343818307959044, | |
| "learning_rate": 8.189252852630927e-06, | |
| "loss": 0.3137, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.30184648252708685, | |
| "grad_norm": 1.079353461920414, | |
| "learning_rate": 8.185443410031613e-06, | |
| "loss": 0.1587, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.30215168625057226, | |
| "grad_norm": 1.3923715489039352, | |
| "learning_rate": 8.181630852596378e-06, | |
| "loss": 0.5496, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3024568899740577, | |
| "grad_norm": 1.3696798223323383, | |
| "learning_rate": 8.17781518405327e-06, | |
| "loss": 0.2037, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.3027620936975431, | |
| "grad_norm": 1.6749786375590874, | |
| "learning_rate": 8.173996408133382e-06, | |
| "loss": 0.523, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.30306729742102856, | |
| "grad_norm": 1.129331462466354, | |
| "learning_rate": 8.170174528570845e-06, | |
| "loss": 0.2696, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.303372501144514, | |
| "grad_norm": 1.4335553322615482, | |
| "learning_rate": 8.16634954910282e-06, | |
| "loss": 0.3102, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.3036777048679994, | |
| "grad_norm": 1.9054489954275935, | |
| "learning_rate": 8.162521473469509e-06, | |
| "loss": 0.4552, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.3039829085914848, | |
| "grad_norm": 1.8579421733648953, | |
| "learning_rate": 8.158690305414132e-06, | |
| "loss": 0.3536, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.3042881123149702, | |
| "grad_norm": 2.2709514225577223, | |
| "learning_rate": 8.154856048682938e-06, | |
| "loss": 0.4632, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.3045933160384557, | |
| "grad_norm": 1.5737905369537974, | |
| "learning_rate": 8.151018707025194e-06, | |
| "loss": 0.2503, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.3048985197619411, | |
| "grad_norm": 1.298904393760468, | |
| "learning_rate": 8.147178284193185e-06, | |
| "loss": 0.3482, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.3052037234854265, | |
| "grad_norm": 2.3561598742827132, | |
| "learning_rate": 8.143334783942207e-06, | |
| "loss": 0.3419, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.30550892720891193, | |
| "grad_norm": 1.1397332664109803, | |
| "learning_rate": 8.13948821003057e-06, | |
| "loss": 0.264, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.3058141309323974, | |
| "grad_norm": 1.664228189305695, | |
| "learning_rate": 8.135638566219581e-06, | |
| "loss": 0.6021, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.3061193346558828, | |
| "grad_norm": 1.2828873662440865, | |
| "learning_rate": 8.131785856273558e-06, | |
| "loss": 0.3529, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.3064245383793682, | |
| "grad_norm": 1.54799572968202, | |
| "learning_rate": 8.127930083959813e-06, | |
| "loss": 0.41, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.30672974210285364, | |
| "grad_norm": 1.6015124057285708, | |
| "learning_rate": 8.124071253048648e-06, | |
| "loss": 0.5602, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.30703494582633906, | |
| "grad_norm": 1.5019375684946967, | |
| "learning_rate": 8.120209367313364e-06, | |
| "loss": 0.5064, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.3073401495498245, | |
| "grad_norm": 1.4841104372065577, | |
| "learning_rate": 8.116344430530248e-06, | |
| "loss": 0.2734, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.30764535327330994, | |
| "grad_norm": 1.4979575079530576, | |
| "learning_rate": 8.112476446478561e-06, | |
| "loss": 0.4476, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.30795055699679535, | |
| "grad_norm": 1.48012915266827, | |
| "learning_rate": 8.108605418940555e-06, | |
| "loss": 0.3478, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.30825576072028077, | |
| "grad_norm": 1.7441608560206838, | |
| "learning_rate": 8.104731351701456e-06, | |
| "loss": 0.4198, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.30856096444376624, | |
| "grad_norm": 1.6603083282826114, | |
| "learning_rate": 8.100854248549453e-06, | |
| "loss": 0.2977, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.30886616816725165, | |
| "grad_norm": 1.4905130468269074, | |
| "learning_rate": 8.096974113275716e-06, | |
| "loss": 0.4249, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.30917137189073707, | |
| "grad_norm": 1.3913148176340926, | |
| "learning_rate": 8.093090949674373e-06, | |
| "loss": 0.3232, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.3094765756142225, | |
| "grad_norm": 1.896351174136801, | |
| "learning_rate": 8.089204761542515e-06, | |
| "loss": 0.6316, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.3097817793377079, | |
| "grad_norm": 1.3098254883257765, | |
| "learning_rate": 8.08531555268019e-06, | |
| "loss": 0.3526, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.31008698306119337, | |
| "grad_norm": 1.2414711860204384, | |
| "learning_rate": 8.081423326890399e-06, | |
| "loss": 0.2527, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.3103921867846788, | |
| "grad_norm": 1.651694240025547, | |
| "learning_rate": 8.077528087979095e-06, | |
| "loss": 0.4595, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.3106973905081642, | |
| "grad_norm": 1.2490722074076572, | |
| "learning_rate": 8.073629839755177e-06, | |
| "loss": 0.3143, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.3110025942316496, | |
| "grad_norm": 1.4922248875592679, | |
| "learning_rate": 8.069728586030483e-06, | |
| "loss": 0.4735, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.3113077979551351, | |
| "grad_norm": 1.3134160251209652, | |
| "learning_rate": 8.065824330619796e-06, | |
| "loss": 0.3399, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.3116130016786205, | |
| "grad_norm": 1.3819445456159205, | |
| "learning_rate": 8.061917077340828e-06, | |
| "loss": 0.4378, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.3119182054021059, | |
| "grad_norm": 1.3067268833489771, | |
| "learning_rate": 8.05800683001423e-06, | |
| "loss": 0.3642, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.3122234091255913, | |
| "grad_norm": 1.2386206994963245, | |
| "learning_rate": 8.054093592463572e-06, | |
| "loss": 0.3116, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.31252861284907674, | |
| "grad_norm": 1.5821932486019623, | |
| "learning_rate": 8.050177368515353e-06, | |
| "loss": 0.3499, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.3128338165725622, | |
| "grad_norm": 1.2768808628569455, | |
| "learning_rate": 8.046258161998993e-06, | |
| "loss": 0.1659, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.3131390202960476, | |
| "grad_norm": 1.2575220756033998, | |
| "learning_rate": 8.042335976746822e-06, | |
| "loss": 0.2405, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.31344422401953304, | |
| "grad_norm": 1.8245135116139561, | |
| "learning_rate": 8.038410816594093e-06, | |
| "loss": 0.4366, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.31374942774301845, | |
| "grad_norm": 1.4466165793811705, | |
| "learning_rate": 8.034482685378959e-06, | |
| "loss": 0.4082, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.31405463146650386, | |
| "grad_norm": 1.5442738868035615, | |
| "learning_rate": 8.030551586942484e-06, | |
| "loss": 0.4539, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.31435983518998933, | |
| "grad_norm": 1.2964103969559602, | |
| "learning_rate": 8.026617525128628e-06, | |
| "loss": 0.3801, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.31466503891347475, | |
| "grad_norm": 1.872123164314918, | |
| "learning_rate": 8.022680503784252e-06, | |
| "loss": 0.3698, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.31497024263696016, | |
| "grad_norm": 1.3752871993705764, | |
| "learning_rate": 8.018740526759115e-06, | |
| "loss": 0.3825, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.3152754463604456, | |
| "grad_norm": 0.9331829492009497, | |
| "learning_rate": 8.014797597905856e-06, | |
| "loss": 0.2683, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.31558065008393105, | |
| "grad_norm": 1.5771299945709583, | |
| "learning_rate": 8.01085172108001e-06, | |
| "loss": 0.5906, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.31588585380741646, | |
| "grad_norm": 1.239300714325851, | |
| "learning_rate": 8.006902900139989e-06, | |
| "loss": 0.3648, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.3161910575309019, | |
| "grad_norm": 1.058310326503882, | |
| "learning_rate": 8.002951138947085e-06, | |
| "loss": 0.228, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.3164962612543873, | |
| "grad_norm": 1.1824475268280543, | |
| "learning_rate": 7.99899644136547e-06, | |
| "loss": 0.2105, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.3168014649778727, | |
| "grad_norm": 1.5959459771356375, | |
| "learning_rate": 7.995038811262176e-06, | |
| "loss": 0.4822, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.3171066687013582, | |
| "grad_norm": 1.0890532877681574, | |
| "learning_rate": 7.991078252507117e-06, | |
| "loss": 0.301, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.3174118724248436, | |
| "grad_norm": 1.5858524092975068, | |
| "learning_rate": 7.987114768973059e-06, | |
| "loss": 0.3329, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.317717076148329, | |
| "grad_norm": 1.2871444766825402, | |
| "learning_rate": 7.983148364535633e-06, | |
| "loss": 0.3842, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.3180222798718144, | |
| "grad_norm": 1.6762774250879164, | |
| "learning_rate": 7.979179043073327e-06, | |
| "loss": 0.6428, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.3183274835952999, | |
| "grad_norm": 1.1260557519041783, | |
| "learning_rate": 7.975206808467477e-06, | |
| "loss": 0.2912, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.3186326873187853, | |
| "grad_norm": 1.4814440250076806, | |
| "learning_rate": 7.971231664602273e-06, | |
| "loss": 0.428, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.3189378910422707, | |
| "grad_norm": 1.5470069289542376, | |
| "learning_rate": 7.967253615364746e-06, | |
| "loss": 0.326, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.31924309476575613, | |
| "grad_norm": 1.6492330791767074, | |
| "learning_rate": 7.963272664644765e-06, | |
| "loss": 0.4851, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.31954829848924154, | |
| "grad_norm": 1.1387453660755529, | |
| "learning_rate": 7.95928881633505e-06, | |
| "loss": 0.1819, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.319853502212727, | |
| "grad_norm": 1.528342152399863, | |
| "learning_rate": 7.955302074331136e-06, | |
| "loss": 0.2465, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.32015870593621243, | |
| "grad_norm": 1.3953796712292619, | |
| "learning_rate": 7.9513124425314e-06, | |
| "loss": 0.3372, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.32046390965969784, | |
| "grad_norm": 1.5878443787469112, | |
| "learning_rate": 7.94731992483704e-06, | |
| "loss": 0.3593, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.32076911338318326, | |
| "grad_norm": 1.5355544022885335, | |
| "learning_rate": 7.943324525152077e-06, | |
| "loss": 0.3395, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.3210743171066687, | |
| "grad_norm": 1.2530592675223244, | |
| "learning_rate": 7.93932624738335e-06, | |
| "loss": 0.2164, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.32137952083015414, | |
| "grad_norm": 4.526521517561823, | |
| "learning_rate": 7.935325095440511e-06, | |
| "loss": 0.2893, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.32168472455363956, | |
| "grad_norm": 1.4025910565877562, | |
| "learning_rate": 7.931321073236024e-06, | |
| "loss": 0.3703, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.32198992827712497, | |
| "grad_norm": 1.6381535397997637, | |
| "learning_rate": 7.927314184685164e-06, | |
| "loss": 0.3823, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.3222951320006104, | |
| "grad_norm": 1.6423873881343087, | |
| "learning_rate": 7.923304433705999e-06, | |
| "loss": 0.3951, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.32260033572409585, | |
| "grad_norm": 1.8735606709031154, | |
| "learning_rate": 7.919291824219402e-06, | |
| "loss": 0.2828, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.32290553944758127, | |
| "grad_norm": 1.3931902680953308, | |
| "learning_rate": 7.91527636014904e-06, | |
| "loss": 0.2629, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.3232107431710667, | |
| "grad_norm": 1.3616683731784345, | |
| "learning_rate": 7.911258045421374e-06, | |
| "loss": 0.3049, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.3235159468945521, | |
| "grad_norm": 1.8477138002235685, | |
| "learning_rate": 7.90723688396565e-06, | |
| "loss": 0.5091, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.32382115061803757, | |
| "grad_norm": 2.1715762804280048, | |
| "learning_rate": 7.903212879713894e-06, | |
| "loss": 0.3296, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.324126354341523, | |
| "grad_norm": 1.2259722218688094, | |
| "learning_rate": 7.89918603660092e-06, | |
| "loss": 0.2084, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.3244315580650084, | |
| "grad_norm": 1.5260897510578109, | |
| "learning_rate": 7.895156358564308e-06, | |
| "loss": 0.3144, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.3247367617884938, | |
| "grad_norm": 1.2101539291645254, | |
| "learning_rate": 7.891123849544421e-06, | |
| "loss": 0.2139, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.3250419655119792, | |
| "grad_norm": 1.4587532074802312, | |
| "learning_rate": 7.887088513484383e-06, | |
| "loss": 0.3954, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.3253471692354647, | |
| "grad_norm": 1.6684881111981207, | |
| "learning_rate": 7.883050354330085e-06, | |
| "loss": 0.3655, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.3256523729589501, | |
| "grad_norm": 1.307546988348603, | |
| "learning_rate": 7.879009376030173e-06, | |
| "loss": 0.2016, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.3259575766824355, | |
| "grad_norm": 1.382846724092064, | |
| "learning_rate": 7.87496558253606e-06, | |
| "loss": 0.3516, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.32626278040592094, | |
| "grad_norm": 1.420119155129035, | |
| "learning_rate": 7.870918977801902e-06, | |
| "loss": 0.4247, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.32656798412940635, | |
| "grad_norm": 2.0170283200834227, | |
| "learning_rate": 7.866869565784611e-06, | |
| "loss": 0.4074, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.3268731878528918, | |
| "grad_norm": 1.3132837770926757, | |
| "learning_rate": 7.86281735044384e-06, | |
| "loss": 0.3554, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.32717839157637724, | |
| "grad_norm": 1.491650229990386, | |
| "learning_rate": 7.858762335741984e-06, | |
| "loss": 0.4311, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.32748359529986265, | |
| "grad_norm": 1.3015830744734889, | |
| "learning_rate": 7.854704525644175e-06, | |
| "loss": 0.4368, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.32778879902334807, | |
| "grad_norm": 1.240655590227942, | |
| "learning_rate": 7.85064392411828e-06, | |
| "loss": 0.3503, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.32809400274683354, | |
| "grad_norm": 1.0370573041527442, | |
| "learning_rate": 7.846580535134895e-06, | |
| "loss": 0.2173, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.32839920647031895, | |
| "grad_norm": 1.490198377421961, | |
| "learning_rate": 7.842514362667341e-06, | |
| "loss": 0.4797, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.32870441019380436, | |
| "grad_norm": 1.2483676742868937, | |
| "learning_rate": 7.83844541069166e-06, | |
| "loss": 0.3117, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.3290096139172898, | |
| "grad_norm": 1.1118596151916136, | |
| "learning_rate": 7.834373683186614e-06, | |
| "loss": 0.3425, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.3293148176407752, | |
| "grad_norm": 1.1498236044067436, | |
| "learning_rate": 7.830299184133676e-06, | |
| "loss": 0.3116, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.32962002136426066, | |
| "grad_norm": 1.330890961706903, | |
| "learning_rate": 7.826221917517034e-06, | |
| "loss": 0.326, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.3299252250877461, | |
| "grad_norm": 1.1391107096366442, | |
| "learning_rate": 7.822141887323577e-06, | |
| "loss": 0.2253, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.3302304288112315, | |
| "grad_norm": 1.0899804689249892, | |
| "learning_rate": 7.8180590975429e-06, | |
| "loss": 0.3972, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.3305356325347169, | |
| "grad_norm": 1.1421995399769407, | |
| "learning_rate": 7.813973552167293e-06, | |
| "loss": 0.1951, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.3308408362582024, | |
| "grad_norm": 1.6301828043180877, | |
| "learning_rate": 7.809885255191745e-06, | |
| "loss": 0.5203, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.3311460399816878, | |
| "grad_norm": 1.1015959389663672, | |
| "learning_rate": 7.805794210613934e-06, | |
| "loss": 0.1868, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.3314512437051732, | |
| "grad_norm": 1.0719373857232206, | |
| "learning_rate": 7.801700422434218e-06, | |
| "loss": 0.3546, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.3317564474286586, | |
| "grad_norm": 1.7069619729935679, | |
| "learning_rate": 7.79760389465565e-06, | |
| "loss": 0.2883, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.33206165115214403, | |
| "grad_norm": 1.1374914944508905, | |
| "learning_rate": 7.793504631283952e-06, | |
| "loss": 0.2634, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.3323668548756295, | |
| "grad_norm": 1.407791140920363, | |
| "learning_rate": 7.789402636327526e-06, | |
| "loss": 0.4067, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.3326720585991149, | |
| "grad_norm": 1.1160900354196726, | |
| "learning_rate": 7.785297913797441e-06, | |
| "loss": 0.2304, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.33297726232260033, | |
| "grad_norm": 1.2290853807544064, | |
| "learning_rate": 7.78119046770744e-06, | |
| "loss": 0.3382, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.33328246604608575, | |
| "grad_norm": 1.582064705954116, | |
| "learning_rate": 7.777080302073922e-06, | |
| "loss": 0.4928, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.3335876697695712, | |
| "grad_norm": 1.1020461299123707, | |
| "learning_rate": 7.772967420915948e-06, | |
| "loss": 0.2539, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.33389287349305663, | |
| "grad_norm": 1.2964060291640898, | |
| "learning_rate": 7.768851828255233e-06, | |
| "loss": 0.3398, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.33419807721654204, | |
| "grad_norm": 1.2035385015753215, | |
| "learning_rate": 7.764733528116148e-06, | |
| "loss": 0.2841, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.33450328094002746, | |
| "grad_norm": 1.4026090323213516, | |
| "learning_rate": 7.760612524525708e-06, | |
| "loss": 0.3719, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.3348084846635129, | |
| "grad_norm": 1.44136903296084, | |
| "learning_rate": 7.75648882151357e-06, | |
| "loss": 0.3998, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.33511368838699834, | |
| "grad_norm": 1.491339317646278, | |
| "learning_rate": 7.752362423112032e-06, | |
| "loss": 0.4328, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.33541889211048376, | |
| "grad_norm": 1.0421144222506495, | |
| "learning_rate": 7.74823333335603e-06, | |
| "loss": 0.1908, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.33572409583396917, | |
| "grad_norm": 1.2708409522823558, | |
| "learning_rate": 7.744101556283129e-06, | |
| "loss": 0.3129, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3360292995574546, | |
| "grad_norm": 1.6982843745883154, | |
| "learning_rate": 7.73996709593352e-06, | |
| "loss": 0.4516, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.33633450328094, | |
| "grad_norm": 1.3745112417433676, | |
| "learning_rate": 7.735829956350026e-06, | |
| "loss": 0.3835, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.33663970700442547, | |
| "grad_norm": 1.0289081222992171, | |
| "learning_rate": 7.731690141578076e-06, | |
| "loss": 0.23, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.3369449107279109, | |
| "grad_norm": 1.3309142499739386, | |
| "learning_rate": 7.72754765566573e-06, | |
| "loss": 0.2879, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.3372501144513963, | |
| "grad_norm": 1.7026299273281889, | |
| "learning_rate": 7.72340250266365e-06, | |
| "loss": 0.6118, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.3375553181748817, | |
| "grad_norm": 1.3708586314610167, | |
| "learning_rate": 7.71925468662511e-06, | |
| "loss": 0.3695, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.3378605218983672, | |
| "grad_norm": 1.138227058539724, | |
| "learning_rate": 7.715104211605987e-06, | |
| "loss": 0.3156, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.3381657256218526, | |
| "grad_norm": 1.6312680765955443, | |
| "learning_rate": 7.710951081664758e-06, | |
| "loss": 0.3716, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.338470929345338, | |
| "grad_norm": 1.43186060111883, | |
| "learning_rate": 7.706795300862493e-06, | |
| "loss": 0.4303, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.3387761330688234, | |
| "grad_norm": 1.295302897418865, | |
| "learning_rate": 7.702636873262859e-06, | |
| "loss": 0.2334, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.33908133679230884, | |
| "grad_norm": 1.2674378022051007, | |
| "learning_rate": 7.69847580293211e-06, | |
| "loss": 0.3056, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.3393865405157943, | |
| "grad_norm": 1.326251694716828, | |
| "learning_rate": 7.694312093939084e-06, | |
| "loss": 0.2555, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.3396917442392797, | |
| "grad_norm": 1.525503000572152, | |
| "learning_rate": 7.690145750355198e-06, | |
| "loss": 0.3448, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.33999694796276514, | |
| "grad_norm": 1.3733356980578115, | |
| "learning_rate": 7.685976776254446e-06, | |
| "loss": 0.2901, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.34030215168625055, | |
| "grad_norm": 1.23597237288206, | |
| "learning_rate": 7.681805175713392e-06, | |
| "loss": 0.2144, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.340607355409736, | |
| "grad_norm": 1.5777935019830391, | |
| "learning_rate": 7.677630952811172e-06, | |
| "loss": 0.5106, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.34091255913322144, | |
| "grad_norm": 1.171188713025467, | |
| "learning_rate": 7.673454111629486e-06, | |
| "loss": 0.1928, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.34121776285670685, | |
| "grad_norm": 1.3533361626972884, | |
| "learning_rate": 7.669274656252588e-06, | |
| "loss": 0.2716, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.34152296658019227, | |
| "grad_norm": 1.3706061645549534, | |
| "learning_rate": 7.665092590767298e-06, | |
| "loss": 0.2415, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.3418281703036777, | |
| "grad_norm": 1.444083686347945, | |
| "learning_rate": 7.66090791926298e-06, | |
| "loss": 0.2384, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.34213337402716315, | |
| "grad_norm": 1.7114127557234347, | |
| "learning_rate": 7.65672064583155e-06, | |
| "loss": 0.422, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.34243857775064857, | |
| "grad_norm": 1.5259644507270844, | |
| "learning_rate": 7.652530774567468e-06, | |
| "loss": 0.5226, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.342743781474134, | |
| "grad_norm": 0.9911746164065847, | |
| "learning_rate": 7.648338309567735e-06, | |
| "loss": 0.2267, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.3430489851976194, | |
| "grad_norm": 1.0166151648645703, | |
| "learning_rate": 7.644143254931887e-06, | |
| "loss": 0.2104, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.34335418892110486, | |
| "grad_norm": 1.6720832623689992, | |
| "learning_rate": 7.63994561476199e-06, | |
| "loss": 0.5396, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.3436593926445903, | |
| "grad_norm": 1.5678839432039253, | |
| "learning_rate": 7.635745393162643e-06, | |
| "loss": 0.4492, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.3439645963680757, | |
| "grad_norm": 1.8116112207083335, | |
| "learning_rate": 7.631542594240968e-06, | |
| "loss": 0.3977, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.3442698000915611, | |
| "grad_norm": 1.473465027051407, | |
| "learning_rate": 7.627337222106602e-06, | |
| "loss": 0.3924, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.3445750038150465, | |
| "grad_norm": 1.8018368091168058, | |
| "learning_rate": 7.623129280871706e-06, | |
| "loss": 0.6324, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.344880207538532, | |
| "grad_norm": 1.2871463289874823, | |
| "learning_rate": 7.618918774650947e-06, | |
| "loss": 0.317, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3451854112620174, | |
| "grad_norm": 1.3104766508523527, | |
| "learning_rate": 7.614705707561505e-06, | |
| "loss": 0.1865, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.3454906149855028, | |
| "grad_norm": 1.4958005772283107, | |
| "learning_rate": 7.610490083723059e-06, | |
| "loss": 0.3286, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.34579581870898823, | |
| "grad_norm": 1.3334807622385083, | |
| "learning_rate": 7.606271907257793e-06, | |
| "loss": 0.1803, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.3461010224324737, | |
| "grad_norm": 1.58655509762167, | |
| "learning_rate": 7.602051182290382e-06, | |
| "loss": 0.428, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.3464062261559591, | |
| "grad_norm": 1.6285506850668747, | |
| "learning_rate": 7.597827912947998e-06, | |
| "loss": 0.3046, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.34671142987944453, | |
| "grad_norm": 1.3766123681447917, | |
| "learning_rate": 7.593602103360298e-06, | |
| "loss": 0.2579, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.34701663360292995, | |
| "grad_norm": 1.7542194180322948, | |
| "learning_rate": 7.589373757659424e-06, | |
| "loss": 0.2563, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.34732183732641536, | |
| "grad_norm": 1.1682456208270866, | |
| "learning_rate": 7.585142879979998e-06, | |
| "loss": 0.3068, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.34762704104990083, | |
| "grad_norm": 1.1214377908787607, | |
| "learning_rate": 7.580909474459117e-06, | |
| "loss": 0.1778, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.34793224477338625, | |
| "grad_norm": 1.5446511669234666, | |
| "learning_rate": 7.576673545236349e-06, | |
| "loss": 0.2251, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.34823744849687166, | |
| "grad_norm": 1.2705911290898333, | |
| "learning_rate": 7.572435096453734e-06, | |
| "loss": 0.2938, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.3485426522203571, | |
| "grad_norm": 1.493297862812399, | |
| "learning_rate": 7.5681941322557685e-06, | |
| "loss": 0.3066, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.3488478559438425, | |
| "grad_norm": 1.0677076510295394, | |
| "learning_rate": 7.563950656789416e-06, | |
| "loss": 0.2338, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.34915305966732796, | |
| "grad_norm": 1.2516527217201623, | |
| "learning_rate": 7.559704674204091e-06, | |
| "loss": 0.3347, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.3494582633908134, | |
| "grad_norm": 1.702080362504898, | |
| "learning_rate": 7.555456188651665e-06, | |
| "loss": 0.4124, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.3497634671142988, | |
| "grad_norm": 1.539828192304715, | |
| "learning_rate": 7.551205204286447e-06, | |
| "loss": 0.462, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.3500686708377842, | |
| "grad_norm": 1.3087519763541762, | |
| "learning_rate": 7.546951725265198e-06, | |
| "loss": 0.2483, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.35037387456126967, | |
| "grad_norm": 1.43063223220777, | |
| "learning_rate": 7.542695755747116e-06, | |
| "loss": 0.3615, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.3506790782847551, | |
| "grad_norm": 1.5523078380591047, | |
| "learning_rate": 7.538437299893836e-06, | |
| "loss": 0.5231, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.3509842820082405, | |
| "grad_norm": 1.2101188107160756, | |
| "learning_rate": 7.534176361869418e-06, | |
| "loss": 0.3176, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3512894857317259, | |
| "grad_norm": 1.4239347860105078, | |
| "learning_rate": 7.529912945840359e-06, | |
| "loss": 0.3347, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.35159468945521133, | |
| "grad_norm": 1.3610805029033388, | |
| "learning_rate": 7.5256470559755665e-06, | |
| "loss": 0.3266, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.3518998931786968, | |
| "grad_norm": 1.6132233172860933, | |
| "learning_rate": 7.521378696446381e-06, | |
| "loss": 0.3327, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.3522050969021822, | |
| "grad_norm": 1.2930810424354338, | |
| "learning_rate": 7.517107871426548e-06, | |
| "loss": 0.1709, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.3525103006256676, | |
| "grad_norm": 1.6807461972866704, | |
| "learning_rate": 7.512834585092226e-06, | |
| "loss": 0.3168, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.35281550434915304, | |
| "grad_norm": 1.7732192926139976, | |
| "learning_rate": 7.508558841621981e-06, | |
| "loss": 0.5005, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.3531207080726385, | |
| "grad_norm": 1.4750885241824656, | |
| "learning_rate": 7.504280645196784e-06, | |
| "loss": 0.3684, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.3534259117961239, | |
| "grad_norm": 1.5202750221957153, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.1899, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.35373111551960934, | |
| "grad_norm": 1.1595814112433622, | |
| "learning_rate": 7.495716910217393e-06, | |
| "loss": 0.3105, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.35403631924309475, | |
| "grad_norm": 1.4534366232509097, | |
| "learning_rate": 7.491431380037113e-06, | |
| "loss": 0.3465, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.35434152296658017, | |
| "grad_norm": 1.156548538845248, | |
| "learning_rate": 7.4871434136497e-06, | |
| "loss": 0.2893, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.35464672669006564, | |
| "grad_norm": 1.4538056615012076, | |
| "learning_rate": 7.482853015248075e-06, | |
| "loss": 0.3667, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.35495193041355105, | |
| "grad_norm": 1.6556975684961233, | |
| "learning_rate": 7.478560189027536e-06, | |
| "loss": 0.2493, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.35525713413703647, | |
| "grad_norm": 1.6844460438765754, | |
| "learning_rate": 7.474264939185756e-06, | |
| "loss": 0.1798, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.3555623378605219, | |
| "grad_norm": 1.9475210131391785, | |
| "learning_rate": 7.469967269922777e-06, | |
| "loss": 0.5204, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.35586754158400735, | |
| "grad_norm": 2.793715839902533, | |
| "learning_rate": 7.465667185441009e-06, | |
| "loss": 0.3077, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.35617274530749277, | |
| "grad_norm": 1.4888770645456522, | |
| "learning_rate": 7.4613646899452205e-06, | |
| "loss": 0.4135, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.3564779490309782, | |
| "grad_norm": 1.1987516506211549, | |
| "learning_rate": 7.457059787642541e-06, | |
| "loss": 0.2382, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.3567831527544636, | |
| "grad_norm": 1.4813551853352487, | |
| "learning_rate": 7.452752482742452e-06, | |
| "loss": 0.217, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.357088356477949, | |
| "grad_norm": 1.6930510736367361, | |
| "learning_rate": 7.448442779456781e-06, | |
| "loss": 0.5214, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.3573935602014345, | |
| "grad_norm": 2.190573141270535, | |
| "learning_rate": 7.444130681999708e-06, | |
| "loss": 0.2428, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.3576987639249199, | |
| "grad_norm": 1.3779387713699958, | |
| "learning_rate": 7.439816194587748e-06, | |
| "loss": 0.3164, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.3580039676484053, | |
| "grad_norm": 1.092055070131237, | |
| "learning_rate": 7.435499321439754e-06, | |
| "loss": 0.2772, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.3583091713718907, | |
| "grad_norm": 1.3792533977854593, | |
| "learning_rate": 7.431180066776913e-06, | |
| "loss": 0.3562, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.35861437509537614, | |
| "grad_norm": 1.2656833023017504, | |
| "learning_rate": 7.426858434822742e-06, | |
| "loss": 0.2438, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.3589195788188616, | |
| "grad_norm": 1.4217271832774543, | |
| "learning_rate": 7.42253442980308e-06, | |
| "loss": 0.4452, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.359224782542347, | |
| "grad_norm": 1.2647369471621235, | |
| "learning_rate": 7.418208055946088e-06, | |
| "loss": 0.3769, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.35952998626583244, | |
| "grad_norm": 1.5573904799259919, | |
| "learning_rate": 7.413879317482242e-06, | |
| "loss": 0.2497, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.35983518998931785, | |
| "grad_norm": 1.1158649645978311, | |
| "learning_rate": 7.409548218644332e-06, | |
| "loss": 0.3706, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.3601403937128033, | |
| "grad_norm": 1.325219870264579, | |
| "learning_rate": 7.4052147636674545e-06, | |
| "loss": 0.2988, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.36044559743628873, | |
| "grad_norm": 1.339413943736962, | |
| "learning_rate": 7.400878956789011e-06, | |
| "loss": 0.3791, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.36075080115977415, | |
| "grad_norm": 1.3236780103464618, | |
| "learning_rate": 7.396540802248704e-06, | |
| "loss": 0.3051, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.36105600488325956, | |
| "grad_norm": 1.4621900605001774, | |
| "learning_rate": 7.392200304288526e-06, | |
| "loss": 0.4029, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.361361208606745, | |
| "grad_norm": 1.3004193418247747, | |
| "learning_rate": 7.387857467152767e-06, | |
| "loss": 0.3573, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.36166641233023045, | |
| "grad_norm": 1.3403584017356904, | |
| "learning_rate": 7.383512295088002e-06, | |
| "loss": 0.301, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.36197161605371586, | |
| "grad_norm": 1.3326412828848118, | |
| "learning_rate": 7.379164792343091e-06, | |
| "loss": 0.2346, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.3622768197772013, | |
| "grad_norm": 1.3231894845074195, | |
| "learning_rate": 7.37481496316917e-06, | |
| "loss": 0.3924, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.3625820235006867, | |
| "grad_norm": 1.2230476508538641, | |
| "learning_rate": 7.370462811819651e-06, | |
| "loss": 0.3316, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.36288722722417216, | |
| "grad_norm": 1.6324841474899627, | |
| "learning_rate": 7.366108342550217e-06, | |
| "loss": 0.2683, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.3631924309476576, | |
| "grad_norm": 1.1650127142511193, | |
| "learning_rate": 7.361751559618819e-06, | |
| "loss": 0.3151, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.363497634671143, | |
| "grad_norm": 1.7617352605413967, | |
| "learning_rate": 7.357392467285667e-06, | |
| "loss": 0.3985, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.3638028383946284, | |
| "grad_norm": 1.6177177849776165, | |
| "learning_rate": 7.353031069813231e-06, | |
| "loss": 0.3853, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.3641080421181138, | |
| "grad_norm": 1.4672814189844252, | |
| "learning_rate": 7.348667371466238e-06, | |
| "loss": 0.3228, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.3644132458415993, | |
| "grad_norm": 1.429990475762935, | |
| "learning_rate": 7.344301376511659e-06, | |
| "loss": 0.3216, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.3647184495650847, | |
| "grad_norm": 1.2518101936436028, | |
| "learning_rate": 7.339933089218716e-06, | |
| "loss": 0.3541, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.3650236532885701, | |
| "grad_norm": 1.5100278244233902, | |
| "learning_rate": 7.335562513858868e-06, | |
| "loss": 0.3777, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.36532885701205553, | |
| "grad_norm": 1.3864156902279452, | |
| "learning_rate": 7.331189654705816e-06, | |
| "loss": 0.3451, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.365634060735541, | |
| "grad_norm": 1.3148051409795818, | |
| "learning_rate": 7.326814516035491e-06, | |
| "loss": 0.2568, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.3659392644590264, | |
| "grad_norm": 1.1783966682135776, | |
| "learning_rate": 7.322437102126052e-06, | |
| "loss": 0.409, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.36624446818251183, | |
| "grad_norm": 1.3482269405611969, | |
| "learning_rate": 7.318057417257886e-06, | |
| "loss": 0.4714, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.36654967190599724, | |
| "grad_norm": 1.6310707549202617, | |
| "learning_rate": 7.3136754657136e-06, | |
| "loss": 0.2499, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.36685487562948266, | |
| "grad_norm": 1.039768581410541, | |
| "learning_rate": 7.309291251778016e-06, | |
| "loss": 0.272, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.3671600793529681, | |
| "grad_norm": 0.9895520991699935, | |
| "learning_rate": 7.304904779738169e-06, | |
| "loss": 0.2435, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.36746528307645354, | |
| "grad_norm": 1.229493544913369, | |
| "learning_rate": 7.300516053883301e-06, | |
| "loss": 0.3142, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.36777048679993896, | |
| "grad_norm": 1.4306220663631386, | |
| "learning_rate": 7.29612507850486e-06, | |
| "loss": 0.3279, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.36807569052342437, | |
| "grad_norm": 1.6062925874875447, | |
| "learning_rate": 7.291731857896493e-06, | |
| "loss": 0.4255, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.3683808942469098, | |
| "grad_norm": 1.268726927448817, | |
| "learning_rate": 7.287336396354039e-06, | |
| "loss": 0.3437, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.36868609797039525, | |
| "grad_norm": 1.6882520342147596, | |
| "learning_rate": 7.282938698175535e-06, | |
| "loss": 0.5873, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.36899130169388067, | |
| "grad_norm": 1.0404068703903802, | |
| "learning_rate": 7.278538767661198e-06, | |
| "loss": 0.2389, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.3692965054173661, | |
| "grad_norm": 1.1653062972796002, | |
| "learning_rate": 7.274136609113433e-06, | |
| "loss": 0.2512, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.3696017091408515, | |
| "grad_norm": 1.3873057667640636, | |
| "learning_rate": 7.269732226836821e-06, | |
| "loss": 0.3267, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.36990691286433697, | |
| "grad_norm": 1.5224889687825975, | |
| "learning_rate": 7.265325625138119e-06, | |
| "loss": 0.3719, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.3702121165878224, | |
| "grad_norm": 1.2204715600675486, | |
| "learning_rate": 7.260916808326252e-06, | |
| "loss": 0.2686, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.3705173203113078, | |
| "grad_norm": 1.3842524558223457, | |
| "learning_rate": 7.256505780712314e-06, | |
| "loss": 0.2467, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.3708225240347932, | |
| "grad_norm": 1.7657099340484863, | |
| "learning_rate": 7.252092546609558e-06, | |
| "loss": 0.2798, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.3711277277582786, | |
| "grad_norm": 1.3329438693740925, | |
| "learning_rate": 7.247677110333397e-06, | |
| "loss": 0.2828, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.3714329314817641, | |
| "grad_norm": 1.104320779277888, | |
| "learning_rate": 7.2432594762013945e-06, | |
| "loss": 0.3003, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.3717381352052495, | |
| "grad_norm": 1.2445465814923657, | |
| "learning_rate": 7.238839648533266e-06, | |
| "loss": 0.2837, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.3720433389287349, | |
| "grad_norm": 1.540336275418335, | |
| "learning_rate": 7.234417631650872e-06, | |
| "loss": 0.5497, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.37234854265222034, | |
| "grad_norm": 1.2167853145142045, | |
| "learning_rate": 7.2299934298782105e-06, | |
| "loss": 0.2327, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.3726537463757058, | |
| "grad_norm": 1.6897903723185226, | |
| "learning_rate": 7.225567047541418e-06, | |
| "loss": 0.4309, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.3729589500991912, | |
| "grad_norm": 1.3675420682807005, | |
| "learning_rate": 7.221138488968763e-06, | |
| "loss": 0.3553, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.37326415382267664, | |
| "grad_norm": 1.1874100500472131, | |
| "learning_rate": 7.2167077584906416e-06, | |
| "loss": 0.2332, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.37356935754616205, | |
| "grad_norm": 1.7310995153583675, | |
| "learning_rate": 7.2122748604395765e-06, | |
| "loss": 0.3075, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.37387456126964747, | |
| "grad_norm": 1.3940442942063533, | |
| "learning_rate": 7.207839799150206e-06, | |
| "loss": 0.43, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.37417976499313294, | |
| "grad_norm": 1.6425564976559994, | |
| "learning_rate": 7.203402578959285e-06, | |
| "loss": 0.5069, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.37448496871661835, | |
| "grad_norm": 0.9576849748308918, | |
| "learning_rate": 7.1989632042056816e-06, | |
| "loss": 0.2351, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.37479017244010376, | |
| "grad_norm": 1.3701544258348275, | |
| "learning_rate": 7.1945216792303666e-06, | |
| "loss": 0.3381, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.3750953761635892, | |
| "grad_norm": 1.140854485823984, | |
| "learning_rate": 7.190078008376415e-06, | |
| "loss": 0.2751, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.37540057988707465, | |
| "grad_norm": 1.3323249381262718, | |
| "learning_rate": 7.185632195989005e-06, | |
| "loss": 0.3349, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.37570578361056006, | |
| "grad_norm": 1.0885163567382525, | |
| "learning_rate": 7.1811842464154e-06, | |
| "loss": 0.2448, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.3760109873340455, | |
| "grad_norm": 1.4080823193078549, | |
| "learning_rate": 7.1767341640049595e-06, | |
| "loss": 0.3132, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.3763161910575309, | |
| "grad_norm": 1.353912867702153, | |
| "learning_rate": 7.172281953109128e-06, | |
| "loss": 0.3432, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.3766213947810163, | |
| "grad_norm": 1.3812789171739044, | |
| "learning_rate": 7.167827618081426e-06, | |
| "loss": 0.2137, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.3769265985045018, | |
| "grad_norm": 1.2478616544202277, | |
| "learning_rate": 7.1633711632774605e-06, | |
| "loss": 0.2565, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.3772318022279872, | |
| "grad_norm": 1.4006239352207315, | |
| "learning_rate": 7.158912593054904e-06, | |
| "loss": 0.4235, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.3775370059514726, | |
| "grad_norm": 1.2649826963093111, | |
| "learning_rate": 7.154451911773499e-06, | |
| "loss": 0.3147, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.377842209674958, | |
| "grad_norm": 1.9419141139672966, | |
| "learning_rate": 7.149989123795054e-06, | |
| "loss": 0.2825, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.3781474133984435, | |
| "grad_norm": 1.4118815933485622, | |
| "learning_rate": 7.145524233483434e-06, | |
| "loss": 0.252, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.3784526171219289, | |
| "grad_norm": 1.55813966795455, | |
| "learning_rate": 7.1410572452045635e-06, | |
| "loss": 0.3432, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.3787578208454143, | |
| "grad_norm": 0.9637101653261684, | |
| "learning_rate": 7.136588163326417e-06, | |
| "loss": 0.2123, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.37906302456889973, | |
| "grad_norm": 1.4156183777374252, | |
| "learning_rate": 7.1321169922190144e-06, | |
| "loss": 0.4448, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.37936822829238515, | |
| "grad_norm": 1.2011574845111053, | |
| "learning_rate": 7.127643736254424e-06, | |
| "loss": 0.1914, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.3796734320158706, | |
| "grad_norm": 1.2719810910228972, | |
| "learning_rate": 7.123168399806747e-06, | |
| "loss": 0.2365, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.37997863573935603, | |
| "grad_norm": 1.1627952282839493, | |
| "learning_rate": 7.118690987252121e-06, | |
| "loss": 0.2535, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.38028383946284144, | |
| "grad_norm": 1.526033273441638, | |
| "learning_rate": 7.114211502968712e-06, | |
| "loss": 0.563, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.38058904318632686, | |
| "grad_norm": 1.6215492435897827, | |
| "learning_rate": 7.1097299513367166e-06, | |
| "loss": 0.238, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.3808942469098123, | |
| "grad_norm": 1.689691321719832, | |
| "learning_rate": 7.105246336738348e-06, | |
| "loss": 0.1984, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.38119945063329774, | |
| "grad_norm": 3.3706702256948455, | |
| "learning_rate": 7.100760663557841e-06, | |
| "loss": 0.2772, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.38150465435678316, | |
| "grad_norm": 1.5153837503559604, | |
| "learning_rate": 7.0962729361814355e-06, | |
| "loss": 0.2683, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.38180985808026857, | |
| "grad_norm": 1.269653690009095, | |
| "learning_rate": 7.0917831589973895e-06, | |
| "loss": 0.2611, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.382115061803754, | |
| "grad_norm": 1.409968651632736, | |
| "learning_rate": 7.0872913363959614e-06, | |
| "loss": 0.3647, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.38242026552723946, | |
| "grad_norm": 1.5218215588345414, | |
| "learning_rate": 7.082797472769408e-06, | |
| "loss": 0.466, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.38272546925072487, | |
| "grad_norm": 2.3890410517745835, | |
| "learning_rate": 7.078301572511984e-06, | |
| "loss": 0.3646, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.3830306729742103, | |
| "grad_norm": 1.641992489517636, | |
| "learning_rate": 7.073803640019936e-06, | |
| "loss": 0.3105, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.3833358766976957, | |
| "grad_norm": 1.299025900561255, | |
| "learning_rate": 7.0693036796914945e-06, | |
| "loss": 0.3399, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.3836410804211811, | |
| "grad_norm": 1.5380435417024074, | |
| "learning_rate": 7.064801695926877e-06, | |
| "loss": 0.2588, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.3839462841446666, | |
| "grad_norm": 1.2372269820552038, | |
| "learning_rate": 7.060297693128277e-06, | |
| "loss": 0.3599, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.384251487868152, | |
| "grad_norm": 1.1614198321930482, | |
| "learning_rate": 7.055791675699863e-06, | |
| "loss": 0.1642, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.3845566915916374, | |
| "grad_norm": 1.4934775483085228, | |
| "learning_rate": 7.051283648047775e-06, | |
| "loss": 0.4615, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.3848618953151228, | |
| "grad_norm": 1.4582552843552379, | |
| "learning_rate": 7.046773614580116e-06, | |
| "loss": 0.3002, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.3851670990386083, | |
| "grad_norm": 4.333102342366705, | |
| "learning_rate": 7.042261579706951e-06, | |
| "loss": 0.3606, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.3854723027620937, | |
| "grad_norm": 1.5981293143466662, | |
| "learning_rate": 7.037747547840303e-06, | |
| "loss": 0.3758, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.3857775064855791, | |
| "grad_norm": 1.1060333427728648, | |
| "learning_rate": 7.033231523394147e-06, | |
| "loss": 0.1347, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.38608271020906454, | |
| "grad_norm": 1.7104158346504792, | |
| "learning_rate": 7.028713510784409e-06, | |
| "loss": 0.6244, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.38638791393254995, | |
| "grad_norm": 1.2041816530098084, | |
| "learning_rate": 7.024193514428953e-06, | |
| "loss": 0.2692, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.3866931176560354, | |
| "grad_norm": 1.2271375340838422, | |
| "learning_rate": 7.0196715387475885e-06, | |
| "loss": 0.2536, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.38699832137952084, | |
| "grad_norm": 1.3208524965326127, | |
| "learning_rate": 7.015147588162061e-06, | |
| "loss": 0.2425, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.38730352510300625, | |
| "grad_norm": 1.6679415198075516, | |
| "learning_rate": 7.010621667096041e-06, | |
| "loss": 0.3849, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.38760872882649167, | |
| "grad_norm": 1.336956505558018, | |
| "learning_rate": 7.0060937799751316e-06, | |
| "loss": 0.2577, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.38791393254997714, | |
| "grad_norm": 1.1363423042029048, | |
| "learning_rate": 7.0015639312268555e-06, | |
| "loss": 0.2241, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.38821913627346255, | |
| "grad_norm": 1.283661967883495, | |
| "learning_rate": 6.997032125280655e-06, | |
| "loss": 0.2903, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.38852433999694796, | |
| "grad_norm": 1.6001822129445207, | |
| "learning_rate": 6.992498366567884e-06, | |
| "loss": 0.2261, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.3888295437204334, | |
| "grad_norm": 1.3026652853973728, | |
| "learning_rate": 6.987962659521808e-06, | |
| "loss": 0.3057, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.3891347474439188, | |
| "grad_norm": 1.4690995307739332, | |
| "learning_rate": 6.983425008577598e-06, | |
| "loss": 0.5416, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.38943995116740426, | |
| "grad_norm": 1.8375650537094952, | |
| "learning_rate": 6.978885418172325e-06, | |
| "loss": 0.3456, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.3897451548908897, | |
| "grad_norm": 2.194246189266676, | |
| "learning_rate": 6.974343892744954e-06, | |
| "loss": 0.2907, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.3900503586143751, | |
| "grad_norm": 1.6774755773457783, | |
| "learning_rate": 6.969800436736347e-06, | |
| "loss": 0.3058, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.3903555623378605, | |
| "grad_norm": 1.4187678667867623, | |
| "learning_rate": 6.965255054589252e-06, | |
| "loss": 0.4065, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.3906607660613459, | |
| "grad_norm": 1.2370997427013408, | |
| "learning_rate": 6.960707750748296e-06, | |
| "loss": 0.276, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.3909659697848314, | |
| "grad_norm": 1.2143352462379313, | |
| "learning_rate": 6.956158529659991e-06, | |
| "loss": 0.2169, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.3912711735083168, | |
| "grad_norm": 1.3152454363292938, | |
| "learning_rate": 6.951607395772721e-06, | |
| "loss": 0.3729, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.3915763772318022, | |
| "grad_norm": 1.7624342452965842, | |
| "learning_rate": 6.947054353536742e-06, | |
| "loss": 0.3491, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.39188158095528763, | |
| "grad_norm": 1.0107668961163137, | |
| "learning_rate": 6.942499407404175e-06, | |
| "loss": 0.2528, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.3921867846787731, | |
| "grad_norm": 1.3640381643739714, | |
| "learning_rate": 6.937942561829001e-06, | |
| "loss": 0.2922, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.3924919884022585, | |
| "grad_norm": 1.3678373263857668, | |
| "learning_rate": 6.933383821267062e-06, | |
| "loss": 0.4763, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.39279719212574393, | |
| "grad_norm": 1.4067856510214773, | |
| "learning_rate": 6.928823190176051e-06, | |
| "loss": 0.441, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.39310239584922935, | |
| "grad_norm": 1.1456457967166047, | |
| "learning_rate": 6.924260673015507e-06, | |
| "loss": 0.2726, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.39340759957271476, | |
| "grad_norm": 1.2577366776447958, | |
| "learning_rate": 6.919696274246818e-06, | |
| "loss": 0.2356, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.39371280329620023, | |
| "grad_norm": 1.5018478773369082, | |
| "learning_rate": 6.91512999833321e-06, | |
| "loss": 0.3858, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.39401800701968565, | |
| "grad_norm": 1.4382055458480731, | |
| "learning_rate": 6.910561849739743e-06, | |
| "loss": 0.4623, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.39432321074317106, | |
| "grad_norm": 1.4150013204507765, | |
| "learning_rate": 6.905991832933312e-06, | |
| "loss": 0.2598, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.3946284144666565, | |
| "grad_norm": 1.313619408763559, | |
| "learning_rate": 6.901419952382633e-06, | |
| "loss": 0.1856, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.39493361819014194, | |
| "grad_norm": 1.1626329284469816, | |
| "learning_rate": 6.89684621255825e-06, | |
| "loss": 0.2655, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.39523882191362736, | |
| "grad_norm": 1.4286438514880226, | |
| "learning_rate": 6.892270617932523e-06, | |
| "loss": 0.3111, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.3955440256371128, | |
| "grad_norm": 1.467025068592859, | |
| "learning_rate": 6.887693172979624e-06, | |
| "loss": 0.1943, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.3958492293605982, | |
| "grad_norm": 1.5302799032399699, | |
| "learning_rate": 6.883113882175536e-06, | |
| "loss": 0.2896, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 0.3961544330840836, | |
| "grad_norm": 1.4221040310691428, | |
| "learning_rate": 6.878532749998049e-06, | |
| "loss": 0.2751, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.39645963680756907, | |
| "grad_norm": 1.377710099164584, | |
| "learning_rate": 6.873949780926747e-06, | |
| "loss": 0.2793, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.3967648405310545, | |
| "grad_norm": 1.3417904782871544, | |
| "learning_rate": 6.869364979443019e-06, | |
| "loss": 0.2602, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3970700442545399, | |
| "grad_norm": 1.3467916175186017, | |
| "learning_rate": 6.86477835003004e-06, | |
| "loss": 0.2857, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.3973752479780253, | |
| "grad_norm": 1.4529951304165758, | |
| "learning_rate": 6.860189897172772e-06, | |
| "loss": 0.5097, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.3976804517015108, | |
| "grad_norm": 1.3839926990470812, | |
| "learning_rate": 6.8555996253579645e-06, | |
| "loss": 0.4489, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.3979856554249962, | |
| "grad_norm": 1.4801954354304339, | |
| "learning_rate": 6.85100753907414e-06, | |
| "loss": 0.5139, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.3982908591484816, | |
| "grad_norm": 1.0192216424990155, | |
| "learning_rate": 6.846413642811598e-06, | |
| "loss": 0.231, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.398596062871967, | |
| "grad_norm": 1.4352380952459303, | |
| "learning_rate": 6.841817941062412e-06, | |
| "loss": 0.3393, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.39890126659545244, | |
| "grad_norm": 1.5851110550635767, | |
| "learning_rate": 6.837220438320411e-06, | |
| "loss": 0.41, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.3992064703189379, | |
| "grad_norm": 1.569272508271095, | |
| "learning_rate": 6.832621139081196e-06, | |
| "loss": 0.3163, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.3995116740424233, | |
| "grad_norm": 1.3018066671809359, | |
| "learning_rate": 6.8280200478421185e-06, | |
| "loss": 0.2329, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.39981687776590874, | |
| "grad_norm": 1.660594639958246, | |
| "learning_rate": 6.823417169102282e-06, | |
| "loss": 0.3668, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.40012208148939415, | |
| "grad_norm": 1.522549727556404, | |
| "learning_rate": 6.81881250736254e-06, | |
| "loss": 0.3363, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.4004272852128796, | |
| "grad_norm": 1.497940397892529, | |
| "learning_rate": 6.8142060671254905e-06, | |
| "loss": 0.393, | |
| "step": 1312 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 3276, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 328, | |
| "total_flos": 162941137707008.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |