| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.1001068213032199, | |
| "eval_steps": 500, | |
| "global_step": 328, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0003052037234854265, | |
| "grad_norm": 19.476922880741295, | |
| "learning_rate": 1.0101010101010103e-07, | |
| "loss": 1.1728, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.000610407446970853, | |
| "grad_norm": 29.879020388476594, | |
| "learning_rate": 2.0202020202020205e-07, | |
| "loss": 1.0955, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0009156111704562796, | |
| "grad_norm": 24.931945947136526, | |
| "learning_rate": 3.0303030303030305e-07, | |
| "loss": 0.9541, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.001220814893941706, | |
| "grad_norm": 27.83214939667906, | |
| "learning_rate": 4.040404040404041e-07, | |
| "loss": 1.0735, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0015260186174271325, | |
| "grad_norm": 21.219233961021736, | |
| "learning_rate": 5.05050505050505e-07, | |
| "loss": 1.0455, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0018312223409125592, | |
| "grad_norm": 20.022707446211225, | |
| "learning_rate": 6.060606060606061e-07, | |
| "loss": 0.9675, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0021364260643979855, | |
| "grad_norm": 26.532427830157193, | |
| "learning_rate": 7.070707070707071e-07, | |
| "loss": 1.1393, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.002441629787883412, | |
| "grad_norm": 27.89728780710031, | |
| "learning_rate": 8.080808080808082e-07, | |
| "loss": 1.0952, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0027468335113688385, | |
| "grad_norm": 20.346264005570532, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 0.9626, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.003052037234854265, | |
| "grad_norm": 18.804489508720884, | |
| "learning_rate": 1.01010101010101e-06, | |
| "loss": 1.0255, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.003357240958339692, | |
| "grad_norm": 19.776534785573535, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 0.7399, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0036624446818251184, | |
| "grad_norm": 21.16130386460154, | |
| "learning_rate": 1.2121212121212122e-06, | |
| "loss": 0.5413, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0039676484053105445, | |
| "grad_norm": 16.482713371526263, | |
| "learning_rate": 1.3131313131313134e-06, | |
| "loss": 0.5773, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.004272852128795971, | |
| "grad_norm": 10.780528168770594, | |
| "learning_rate": 1.4141414141414143e-06, | |
| "loss": 0.6782, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0045780558522813975, | |
| "grad_norm": 7.0900135030469915, | |
| "learning_rate": 1.5151515151515152e-06, | |
| "loss": 0.9153, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.004883259575766824, | |
| "grad_norm": 8.490445320662754, | |
| "learning_rate": 1.6161616161616164e-06, | |
| "loss": 0.4798, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0051884632992522505, | |
| "grad_norm": 6.677142812986669, | |
| "learning_rate": 1.7171717171717173e-06, | |
| "loss": 0.4782, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.005493667022737677, | |
| "grad_norm": 5.9204247946017485, | |
| "learning_rate": 1.8181818181818183e-06, | |
| "loss": 0.3191, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0057988707462231035, | |
| "grad_norm": 5.012462343754674, | |
| "learning_rate": 1.9191919191919192e-06, | |
| "loss": 0.4115, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.00610407446970853, | |
| "grad_norm": 3.9095937836899113, | |
| "learning_rate": 2.02020202020202e-06, | |
| "loss": 0.6158, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.006409278193193957, | |
| "grad_norm": 4.438163815129716, | |
| "learning_rate": 2.1212121212121216e-06, | |
| "loss": 0.7388, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.006714481916679384, | |
| "grad_norm": 3.62875198348435, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.2875, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.00701968564016481, | |
| "grad_norm": 4.963543929599541, | |
| "learning_rate": 2.3232323232323234e-06, | |
| "loss": 0.4662, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.007324889363650237, | |
| "grad_norm": 4.274904100558248, | |
| "learning_rate": 2.4242424242424244e-06, | |
| "loss": 0.5171, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.007630093087135663, | |
| "grad_norm": 2.670885047669819, | |
| "learning_rate": 2.5252525252525258e-06, | |
| "loss": 0.4488, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.007935296810621089, | |
| "grad_norm": 2.6864388610994014, | |
| "learning_rate": 2.6262626262626267e-06, | |
| "loss": 0.372, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.008240500534106516, | |
| "grad_norm": 3.804357369452407, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 0.2646, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.008545704257591942, | |
| "grad_norm": 4.059008227452532, | |
| "learning_rate": 2.8282828282828286e-06, | |
| "loss": 0.5907, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.008850907981077369, | |
| "grad_norm": 4.9062443629918855, | |
| "learning_rate": 2.9292929292929295e-06, | |
| "loss": 0.2972, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.009156111704562795, | |
| "grad_norm": 3.5391495380267064, | |
| "learning_rate": 3.0303030303030305e-06, | |
| "loss": 0.3821, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.009461315428048222, | |
| "grad_norm": 2.5896920322264854, | |
| "learning_rate": 3.131313131313132e-06, | |
| "loss": 0.4164, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.009766519151533648, | |
| "grad_norm": 3.0230775761822937, | |
| "learning_rate": 3.232323232323233e-06, | |
| "loss": 0.4237, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.010071722875019075, | |
| "grad_norm": 2.8417717057519423, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.3353, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.010376926598504501, | |
| "grad_norm": 2.5789157463945878, | |
| "learning_rate": 3.4343434343434347e-06, | |
| "loss": 0.3769, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.010682130321989928, | |
| "grad_norm": 2.5222241581850096, | |
| "learning_rate": 3.5353535353535356e-06, | |
| "loss": 0.519, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.010987334045475354, | |
| "grad_norm": 2.8704682168269127, | |
| "learning_rate": 3.6363636363636366e-06, | |
| "loss": 0.2829, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.01129253776896078, | |
| "grad_norm": 3.24684532820184, | |
| "learning_rate": 3.737373737373738e-06, | |
| "loss": 0.3586, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.011597741492446207, | |
| "grad_norm": 5.24792475783676, | |
| "learning_rate": 3.8383838383838385e-06, | |
| "loss": 0.402, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.011902945215931634, | |
| "grad_norm": 3.111184671834165, | |
| "learning_rate": 3.93939393939394e-06, | |
| "loss": 0.466, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.01220814893941706, | |
| "grad_norm": 3.165565566985893, | |
| "learning_rate": 4.04040404040404e-06, | |
| "loss": 0.2678, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.012513352662902488, | |
| "grad_norm": 2.5486933296193257, | |
| "learning_rate": 4.141414141414142e-06, | |
| "loss": 0.5457, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.012818556386387915, | |
| "grad_norm": 3.4373721012250438, | |
| "learning_rate": 4.242424242424243e-06, | |
| "loss": 0.3862, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.013123760109873341, | |
| "grad_norm": 2.863317221380458, | |
| "learning_rate": 4.343434343434344e-06, | |
| "loss": 0.3601, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.013428963833358768, | |
| "grad_norm": 2.1041128573446035, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 0.3693, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.013734167556844194, | |
| "grad_norm": 2.286990324679626, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.2513, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01403937128032962, | |
| "grad_norm": 8.793466778432636, | |
| "learning_rate": 4.646464646464647e-06, | |
| "loss": 0.4343, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.014344575003815047, | |
| "grad_norm": 1.8648737533834159, | |
| "learning_rate": 4.747474747474748e-06, | |
| "loss": 0.2631, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.014649778727300474, | |
| "grad_norm": 2.3081781364995324, | |
| "learning_rate": 4.848484848484849e-06, | |
| "loss": 0.2755, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0149549824507859, | |
| "grad_norm": 2.284005369243557, | |
| "learning_rate": 4.94949494949495e-06, | |
| "loss": 0.4186, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.015260186174271327, | |
| "grad_norm": 2.6759709423238096, | |
| "learning_rate": 5.0505050505050515e-06, | |
| "loss": 0.6459, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.015565389897756753, | |
| "grad_norm": 2.8773749120652523, | |
| "learning_rate": 5.151515151515152e-06, | |
| "loss": 0.3324, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.015870593621242178, | |
| "grad_norm": 2.8060164424498786, | |
| "learning_rate": 5.252525252525253e-06, | |
| "loss": 0.3608, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.016175797344727606, | |
| "grad_norm": 2.3060494229726793, | |
| "learning_rate": 5.353535353535354e-06, | |
| "loss": 0.3818, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.01648100106821303, | |
| "grad_norm": 2.073464811557714, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 0.2667, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.01678620479169846, | |
| "grad_norm": 2.3474749655399245, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.35, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.017091408515183884, | |
| "grad_norm": 3.6988890036672086, | |
| "learning_rate": 5.656565656565657e-06, | |
| "loss": 0.284, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.017396612238669312, | |
| "grad_norm": 2.313501192849839, | |
| "learning_rate": 5.7575757575757586e-06, | |
| "loss": 0.3308, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.017701815962154737, | |
| "grad_norm": 2.411936098122121, | |
| "learning_rate": 5.858585858585859e-06, | |
| "loss": 0.3982, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.018007019685640165, | |
| "grad_norm": 2.724660127775508, | |
| "learning_rate": 5.95959595959596e-06, | |
| "loss": 0.3587, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.01831222340912559, | |
| "grad_norm": 3.130895013540925, | |
| "learning_rate": 6.060606060606061e-06, | |
| "loss": 0.3427, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01861742713261102, | |
| "grad_norm": 3.4261489723004614, | |
| "learning_rate": 6.1616161616161615e-06, | |
| "loss": 0.4578, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.018922630856096443, | |
| "grad_norm": 2.413871881063889, | |
| "learning_rate": 6.262626262626264e-06, | |
| "loss": 0.2067, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.01922783457958187, | |
| "grad_norm": 2.0941348505038366, | |
| "learning_rate": 6.363636363636364e-06, | |
| "loss": 0.27, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.019533038303067296, | |
| "grad_norm": 2.2153240133926153, | |
| "learning_rate": 6.464646464646466e-06, | |
| "loss": 0.3298, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.019838242026552724, | |
| "grad_norm": 2.422022070572305, | |
| "learning_rate": 6.565656565656566e-06, | |
| "loss": 0.4894, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.02014344575003815, | |
| "grad_norm": 2.45442660843552, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.3684, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.020448649473523577, | |
| "grad_norm": 3.5398238081108304, | |
| "learning_rate": 6.767676767676769e-06, | |
| "loss": 0.4233, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.020753853197009002, | |
| "grad_norm": 2.530397719080883, | |
| "learning_rate": 6.868686868686869e-06, | |
| "loss": 0.2676, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.02105905692049443, | |
| "grad_norm": 2.259346305696615, | |
| "learning_rate": 6.969696969696971e-06, | |
| "loss": 0.4409, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.021364260643979855, | |
| "grad_norm": 2.3339543424453764, | |
| "learning_rate": 7.070707070707071e-06, | |
| "loss": 0.3882, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.021669464367465283, | |
| "grad_norm": 2.348843038116063, | |
| "learning_rate": 7.171717171717172e-06, | |
| "loss": 0.3904, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.021974668090950708, | |
| "grad_norm": 2.7011363922899965, | |
| "learning_rate": 7.272727272727273e-06, | |
| "loss": 0.3586, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.022279871814436136, | |
| "grad_norm": 2.6923381814173486, | |
| "learning_rate": 7.373737373737374e-06, | |
| "loss": 0.4331, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.02258507553792156, | |
| "grad_norm": 2.0435337430530924, | |
| "learning_rate": 7.474747474747476e-06, | |
| "loss": 0.2739, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.02289027926140699, | |
| "grad_norm": 2.257183264462076, | |
| "learning_rate": 7.5757575757575764e-06, | |
| "loss": 0.4554, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.023195482984892414, | |
| "grad_norm": 2.5384248372961626, | |
| "learning_rate": 7.676767676767677e-06, | |
| "loss": 0.4934, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.023500686708377842, | |
| "grad_norm": 2.1578730127908488, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.3519, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.023805890431863267, | |
| "grad_norm": 2.1316764516757476, | |
| "learning_rate": 7.87878787878788e-06, | |
| "loss": 0.3268, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.024111094155348695, | |
| "grad_norm": 2.095996278024237, | |
| "learning_rate": 7.97979797979798e-06, | |
| "loss": 0.3318, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.02441629787883412, | |
| "grad_norm": 1.9985574049541877, | |
| "learning_rate": 8.08080808080808e-06, | |
| "loss": 0.1852, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02472150160231955, | |
| "grad_norm": 1.7092921737326583, | |
| "learning_rate": 8.181818181818183e-06, | |
| "loss": 0.2412, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.025026705325804977, | |
| "grad_norm": 1.9609482601524066, | |
| "learning_rate": 8.282828282828283e-06, | |
| "loss": 0.3349, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0253319090492904, | |
| "grad_norm": 2.5619254980161412, | |
| "learning_rate": 8.383838383838384e-06, | |
| "loss": 0.3327, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.02563711277277583, | |
| "grad_norm": 2.1734116421771827, | |
| "learning_rate": 8.484848484848486e-06, | |
| "loss": 0.5005, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.025942316496261254, | |
| "grad_norm": 2.4612836321871785, | |
| "learning_rate": 8.585858585858587e-06, | |
| "loss": 0.5919, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.026247520219746683, | |
| "grad_norm": 2.050264187978962, | |
| "learning_rate": 8.686868686868687e-06, | |
| "loss": 0.2654, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.026552723943232107, | |
| "grad_norm": 1.7466792206761999, | |
| "learning_rate": 8.787878787878788e-06, | |
| "loss": 0.2875, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.026857927666717536, | |
| "grad_norm": 1.9114055019911376, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.3317, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.02716313139020296, | |
| "grad_norm": 2.136028617695754, | |
| "learning_rate": 8.98989898989899e-06, | |
| "loss": 0.4322, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.02746833511368839, | |
| "grad_norm": 2.0559196693817303, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.3372, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.027773538837173813, | |
| "grad_norm": 1.6053810559753854, | |
| "learning_rate": 9.191919191919193e-06, | |
| "loss": 0.2833, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.02807874256065924, | |
| "grad_norm": 1.9190338968500587, | |
| "learning_rate": 9.292929292929294e-06, | |
| "loss": 0.2358, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.028383946284144666, | |
| "grad_norm": 1.7424429804531956, | |
| "learning_rate": 9.393939393939396e-06, | |
| "loss": 0.2805, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.028689150007630095, | |
| "grad_norm": 1.5616301594921251, | |
| "learning_rate": 9.494949494949497e-06, | |
| "loss": 0.326, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.02899435373111552, | |
| "grad_norm": 2.6517363851490297, | |
| "learning_rate": 9.595959595959597e-06, | |
| "loss": 0.5839, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.029299557454600948, | |
| "grad_norm": 1.9068377479857994, | |
| "learning_rate": 9.696969696969698e-06, | |
| "loss": 0.4213, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.029604761178086372, | |
| "grad_norm": 2.147263972819766, | |
| "learning_rate": 9.797979797979798e-06, | |
| "loss": 0.3776, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.0299099649015718, | |
| "grad_norm": 2.3466004395170685, | |
| "learning_rate": 9.8989898989899e-06, | |
| "loss": 0.4828, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.030215168625057225, | |
| "grad_norm": 1.9328188798162316, | |
| "learning_rate": 1e-05, | |
| "loss": 0.3816, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.030520372348542654, | |
| "grad_norm": 2.120656679761712, | |
| "learning_rate": 9.999997555414177e-06, | |
| "loss": 0.287, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03082557607202808, | |
| "grad_norm": 1.8272767014289886, | |
| "learning_rate": 9.999990221659095e-06, | |
| "loss": 0.2529, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.031130779795513507, | |
| "grad_norm": 2.108876035097533, | |
| "learning_rate": 9.999977998741925e-06, | |
| "loss": 0.4, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.031435983518998935, | |
| "grad_norm": 2.611227326027621, | |
| "learning_rate": 9.999960886674623e-06, | |
| "loss": 0.5577, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.031741187242484356, | |
| "grad_norm": 2.012760226088087, | |
| "learning_rate": 9.999938885473916e-06, | |
| "loss": 0.2397, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.032046390965969784, | |
| "grad_norm": 3.4069313977643088, | |
| "learning_rate": 9.999911995161323e-06, | |
| "loss": 0.3074, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.03235159468945521, | |
| "grad_norm": 1.5281487804348939, | |
| "learning_rate": 9.999880215763133e-06, | |
| "loss": 0.306, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.03265679841294064, | |
| "grad_norm": 1.5733903167529437, | |
| "learning_rate": 9.999843547310427e-06, | |
| "loss": 0.3123, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.03296200213642606, | |
| "grad_norm": 2.2084260837102776, | |
| "learning_rate": 9.999801989839055e-06, | |
| "loss": 0.2686, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.03326720585991149, | |
| "grad_norm": 2.0235527329790477, | |
| "learning_rate": 9.999755543389658e-06, | |
| "loss": 0.362, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.03357240958339692, | |
| "grad_norm": 1.4126246608311444, | |
| "learning_rate": 9.999704208007647e-06, | |
| "loss": 0.1868, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03387761330688235, | |
| "grad_norm": 1.9363750145032863, | |
| "learning_rate": 9.999647983743227e-06, | |
| "loss": 0.4674, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.03418281703036777, | |
| "grad_norm": 2.306492812857686, | |
| "learning_rate": 9.999586870651372e-06, | |
| "loss": 0.7454, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.034488020753853196, | |
| "grad_norm": 1.9927578577114744, | |
| "learning_rate": 9.999520868791839e-06, | |
| "loss": 0.2964, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.034793224477338625, | |
| "grad_norm": 2.897230200199283, | |
| "learning_rate": 9.99944997822917e-06, | |
| "loss": 0.3507, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.03509842820082405, | |
| "grad_norm": 1.7040567211820554, | |
| "learning_rate": 9.999374199032682e-06, | |
| "loss": 0.358, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.035403631924309474, | |
| "grad_norm": 1.7684725864001616, | |
| "learning_rate": 9.999293531276475e-06, | |
| "loss": 0.469, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.0357088356477949, | |
| "grad_norm": 2.151331613378997, | |
| "learning_rate": 9.999207975039429e-06, | |
| "loss": 0.4007, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.03601403937128033, | |
| "grad_norm": 2.1827006415812678, | |
| "learning_rate": 9.999117530405205e-06, | |
| "loss": 0.373, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.03631924309476576, | |
| "grad_norm": 2.0424756244526283, | |
| "learning_rate": 9.99902219746224e-06, | |
| "loss": 0.4664, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.03662444681825118, | |
| "grad_norm": 2.4438750213097014, | |
| "learning_rate": 9.998921976303757e-06, | |
| "loss": 0.5884, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03692965054173661, | |
| "grad_norm": 1.6168805259489245, | |
| "learning_rate": 9.998816867027753e-06, | |
| "loss": 0.3874, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.03723485426522204, | |
| "grad_norm": 2.4836564854380914, | |
| "learning_rate": 9.99870686973701e-06, | |
| "loss": 0.3865, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.037540057988707465, | |
| "grad_norm": 2.187549263535683, | |
| "learning_rate": 9.998591984539085e-06, | |
| "loss": 0.4419, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.037845261712192886, | |
| "grad_norm": 2.3145724108896366, | |
| "learning_rate": 9.998472211546317e-06, | |
| "loss": 0.5048, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.038150465435678314, | |
| "grad_norm": 2.6043824271784377, | |
| "learning_rate": 9.998347550875825e-06, | |
| "loss": 0.4323, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03845566915916374, | |
| "grad_norm": 1.7266964407358079, | |
| "learning_rate": 9.998218002649507e-06, | |
| "loss": 0.3093, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.03876087288264917, | |
| "grad_norm": 2.3091863655820397, | |
| "learning_rate": 9.99808356699404e-06, | |
| "loss": 0.5394, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.03906607660613459, | |
| "grad_norm": 2.178584103245907, | |
| "learning_rate": 9.997944244040877e-06, | |
| "loss": 0.562, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.03937128032962002, | |
| "grad_norm": 1.4762803065381216, | |
| "learning_rate": 9.997800033926252e-06, | |
| "loss": 0.3012, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.03967648405310545, | |
| "grad_norm": 1.6768704233807339, | |
| "learning_rate": 9.997650936791183e-06, | |
| "loss": 0.3314, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03998168777659088, | |
| "grad_norm": 1.8423584681568375, | |
| "learning_rate": 9.997496952781461e-06, | |
| "loss": 0.5373, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.0402868915000763, | |
| "grad_norm": 1.4926628434179245, | |
| "learning_rate": 9.997338082047656e-06, | |
| "loss": 0.1992, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.040592095223561726, | |
| "grad_norm": 1.6323074947028773, | |
| "learning_rate": 9.997174324745117e-06, | |
| "loss": 0.4872, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.040897298947047155, | |
| "grad_norm": 2.159688005520465, | |
| "learning_rate": 9.997005681033973e-06, | |
| "loss": 0.5076, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.04120250267053258, | |
| "grad_norm": 2.207163038792008, | |
| "learning_rate": 9.996832151079127e-06, | |
| "loss": 0.2677, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.041507706394018004, | |
| "grad_norm": 1.3990677420334965, | |
| "learning_rate": 9.996653735050265e-06, | |
| "loss": 0.2526, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.04181291011750343, | |
| "grad_norm": 1.7368886105229604, | |
| "learning_rate": 9.996470433121847e-06, | |
| "loss": 0.2874, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.04211811384098886, | |
| "grad_norm": 1.8138446424045762, | |
| "learning_rate": 9.996282245473113e-06, | |
| "loss": 0.2986, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.04242331756447429, | |
| "grad_norm": 1.8564789601928355, | |
| "learning_rate": 9.996089172288078e-06, | |
| "loss": 0.3954, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.04272852128795971, | |
| "grad_norm": 1.9085920361180522, | |
| "learning_rate": 9.995891213755536e-06, | |
| "loss": 0.2739, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04303372501144514, | |
| "grad_norm": 1.8924678931794556, | |
| "learning_rate": 9.99568837006906e-06, | |
| "loss": 0.2766, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.04333892873493057, | |
| "grad_norm": 1.8418836037208652, | |
| "learning_rate": 9.995480641426992e-06, | |
| "loss": 0.488, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.043644132458415995, | |
| "grad_norm": 1.6305125707231247, | |
| "learning_rate": 9.99526802803246e-06, | |
| "loss": 0.3045, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.043949336181901416, | |
| "grad_norm": 2.143051665423358, | |
| "learning_rate": 9.995050530093366e-06, | |
| "loss": 0.3567, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.044254539905386844, | |
| "grad_norm": 1.994194545633334, | |
| "learning_rate": 9.994828147822387e-06, | |
| "loss": 0.3655, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.04455974362887227, | |
| "grad_norm": 1.8553346605537173, | |
| "learning_rate": 9.994600881436972e-06, | |
| "loss": 0.3249, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.0448649473523577, | |
| "grad_norm": 2.1613773805709857, | |
| "learning_rate": 9.994368731159351e-06, | |
| "loss": 0.4863, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.04517015107584312, | |
| "grad_norm": 2.199571706523493, | |
| "learning_rate": 9.99413169721653e-06, | |
| "loss": 0.465, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.04547535479932855, | |
| "grad_norm": 1.681707967900651, | |
| "learning_rate": 9.99388977984029e-06, | |
| "loss": 0.3472, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.04578055852281398, | |
| "grad_norm": 1.6586587053140593, | |
| "learning_rate": 9.993642979267184e-06, | |
| "loss": 0.3626, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04608576224629941, | |
| "grad_norm": 2.12592721793332, | |
| "learning_rate": 9.993391295738542e-06, | |
| "loss": 0.3218, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.04639096596978483, | |
| "grad_norm": 1.6765944279655143, | |
| "learning_rate": 9.99313472950047e-06, | |
| "loss": 0.3402, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.046696169693270256, | |
| "grad_norm": 1.6019038139070678, | |
| "learning_rate": 9.992873280803848e-06, | |
| "loss": 0.4554, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.047001373416755685, | |
| "grad_norm": 1.6429860881882794, | |
| "learning_rate": 9.99260694990433e-06, | |
| "loss": 0.4086, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.04730657714024111, | |
| "grad_norm": 1.98592334325083, | |
| "learning_rate": 9.992335737062338e-06, | |
| "loss": 0.5733, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.047611780863726534, | |
| "grad_norm": 1.5624846648417388, | |
| "learning_rate": 9.992059642543076e-06, | |
| "loss": 0.2524, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.04791698458721196, | |
| "grad_norm": 1.4438198320418865, | |
| "learning_rate": 9.991778666616523e-06, | |
| "loss": 0.1756, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.04822218831069739, | |
| "grad_norm": 1.6284817295660008, | |
| "learning_rate": 9.991492809557424e-06, | |
| "loss": 0.4144, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.04852739203418282, | |
| "grad_norm": 1.2236340789910145, | |
| "learning_rate": 9.991202071645298e-06, | |
| "loss": 0.1664, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.04883259575766824, | |
| "grad_norm": 1.4874398163232816, | |
| "learning_rate": 9.99090645316444e-06, | |
| "loss": 0.3323, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04913779948115367, | |
| "grad_norm": 2.5394515927833403, | |
| "learning_rate": 9.990605954403917e-06, | |
| "loss": 0.27, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.0494430032046391, | |
| "grad_norm": 1.7966332314422868, | |
| "learning_rate": 9.990300575657565e-06, | |
| "loss": 0.4453, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.049748206928124525, | |
| "grad_norm": 1.825976682624809, | |
| "learning_rate": 9.989990317223995e-06, | |
| "loss": 0.2646, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.05005341065160995, | |
| "grad_norm": 1.6554541925183588, | |
| "learning_rate": 9.989675179406588e-06, | |
| "loss": 0.445, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.050358614375095374, | |
| "grad_norm": 1.6711133844293076, | |
| "learning_rate": 9.989355162513496e-06, | |
| "loss": 0.3685, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.0506638180985808, | |
| "grad_norm": 1.8033315345252203, | |
| "learning_rate": 9.989030266857644e-06, | |
| "loss": 0.2566, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.05096902182206623, | |
| "grad_norm": 1.6879852444966537, | |
| "learning_rate": 9.988700492756726e-06, | |
| "loss": 0.4086, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.05127422554555166, | |
| "grad_norm": 1.6855038740169574, | |
| "learning_rate": 9.988365840533204e-06, | |
| "loss": 0.3081, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.05157942926903708, | |
| "grad_norm": 2.245121010490438, | |
| "learning_rate": 9.988026310514316e-06, | |
| "loss": 0.5646, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.05188463299252251, | |
| "grad_norm": 1.531117336209479, | |
| "learning_rate": 9.987681903032065e-06, | |
| "loss": 0.3598, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05218983671600794, | |
| "grad_norm": 1.4368727600956301, | |
| "learning_rate": 9.987332618423221e-06, | |
| "loss": 0.3864, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.052495040439493365, | |
| "grad_norm": 2.039026486601271, | |
| "learning_rate": 9.98697845702933e-06, | |
| "loss": 0.2728, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.052800244162978786, | |
| "grad_norm": 1.5481974795842472, | |
| "learning_rate": 9.986619419196704e-06, | |
| "loss": 0.2376, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.053105447886464215, | |
| "grad_norm": 1.583025735121783, | |
| "learning_rate": 9.986255505276418e-06, | |
| "loss": 0.3941, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.05341065160994964, | |
| "grad_norm": 2.025610033619695, | |
| "learning_rate": 9.985886715624326e-06, | |
| "loss": 0.432, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.05371585533343507, | |
| "grad_norm": 1.9370365819159912, | |
| "learning_rate": 9.985513050601037e-06, | |
| "loss": 0.3311, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.05402105905692049, | |
| "grad_norm": 1.534591376747653, | |
| "learning_rate": 9.985134510571936e-06, | |
| "loss": 0.3804, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.05432626278040592, | |
| "grad_norm": 1.5627980520171343, | |
| "learning_rate": 9.984751095907175e-06, | |
| "loss": 0.3991, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.05463146650389135, | |
| "grad_norm": 1.858760828475349, | |
| "learning_rate": 9.984362806981665e-06, | |
| "loss": 0.4124, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.05493667022737678, | |
| "grad_norm": 1.4922057145689682, | |
| "learning_rate": 9.983969644175092e-06, | |
| "loss": 0.2571, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0552418739508622, | |
| "grad_norm": 1.4358215484460224, | |
| "learning_rate": 9.983571607871903e-06, | |
| "loss": 0.3351, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.05554707767434763, | |
| "grad_norm": 1.7105120125454414, | |
| "learning_rate": 9.983168698461312e-06, | |
| "loss": 0.4374, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.055852281397833055, | |
| "grad_norm": 1.4100459259074987, | |
| "learning_rate": 9.982760916337296e-06, | |
| "loss": 0.3958, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.05615748512131848, | |
| "grad_norm": 1.667173817085955, | |
| "learning_rate": 9.982348261898598e-06, | |
| "loss": 0.2867, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.056462688844803904, | |
| "grad_norm": 1.8278737995984025, | |
| "learning_rate": 9.981930735548731e-06, | |
| "loss": 0.3738, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.05676789256828933, | |
| "grad_norm": 1.806852289121097, | |
| "learning_rate": 9.98150833769596e-06, | |
| "loss": 0.5608, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.05707309629177476, | |
| "grad_norm": 1.6986308867720055, | |
| "learning_rate": 9.981081068753324e-06, | |
| "loss": 0.4253, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.05737830001526019, | |
| "grad_norm": 1.6392088091109513, | |
| "learning_rate": 9.98064892913862e-06, | |
| "loss": 0.2444, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.05768350373874561, | |
| "grad_norm": 1.7762995408711126, | |
| "learning_rate": 9.980211919274407e-06, | |
| "loss": 0.3866, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.05798870746223104, | |
| "grad_norm": 1.7144647062044762, | |
| "learning_rate": 9.979770039588013e-06, | |
| "loss": 0.4504, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05829391118571647, | |
| "grad_norm": 1.9069269572943617, | |
| "learning_rate": 9.979323290511517e-06, | |
| "loss": 0.4972, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.058599114909201895, | |
| "grad_norm": 1.831943664409223, | |
| "learning_rate": 9.978871672481774e-06, | |
| "loss": 0.3884, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.058904318632687316, | |
| "grad_norm": 1.60483584957947, | |
| "learning_rate": 9.978415185940383e-06, | |
| "loss": 0.3366, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.059209522356172745, | |
| "grad_norm": 2.041633475935638, | |
| "learning_rate": 9.977953831333718e-06, | |
| "loss": 0.4928, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.05951472607965817, | |
| "grad_norm": 2.1574861604284243, | |
| "learning_rate": 9.977487609112904e-06, | |
| "loss": 0.7092, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.0598199298031436, | |
| "grad_norm": 1.5382345073334531, | |
| "learning_rate": 9.97701651973383e-06, | |
| "loss": 0.2236, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.06012513352662902, | |
| "grad_norm": 2.1479787995768014, | |
| "learning_rate": 9.976540563657143e-06, | |
| "loss": 0.5182, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.06043033725011445, | |
| "grad_norm": 1.8579437774142544, | |
| "learning_rate": 9.976059741348252e-06, | |
| "loss": 0.3093, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.06073554097359988, | |
| "grad_norm": 1.5409701380525285, | |
| "learning_rate": 9.975574053277317e-06, | |
| "loss": 0.2877, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.06104074469708531, | |
| "grad_norm": 1.5474598097011698, | |
| "learning_rate": 9.975083499919264e-06, | |
| "loss": 0.2981, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06134594842057073, | |
| "grad_norm": 1.9202152932180157, | |
| "learning_rate": 9.974588081753773e-06, | |
| "loss": 0.5369, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.06165115214405616, | |
| "grad_norm": 1.4598442515817716, | |
| "learning_rate": 9.974087799265279e-06, | |
| "loss": 0.3696, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.061956355867541585, | |
| "grad_norm": 1.48078814360119, | |
| "learning_rate": 9.973582652942975e-06, | |
| "loss": 0.284, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.06226155959102701, | |
| "grad_norm": 2.100326004155181, | |
| "learning_rate": 9.973072643280813e-06, | |
| "loss": 0.5681, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.06256676331451244, | |
| "grad_norm": 1.976128330719915, | |
| "learning_rate": 9.972557770777496e-06, | |
| "loss": 0.3655, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.06287196703799787, | |
| "grad_norm": 1.2103730393566896, | |
| "learning_rate": 9.972038035936483e-06, | |
| "loss": 0.2471, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.06317717076148328, | |
| "grad_norm": 1.670449906238349, | |
| "learning_rate": 9.971513439265992e-06, | |
| "loss": 0.2184, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.06348237448496871, | |
| "grad_norm": 1.5020544764497652, | |
| "learning_rate": 9.970983981278989e-06, | |
| "loss": 0.3196, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.06378757820845414, | |
| "grad_norm": 1.7833251911345853, | |
| "learning_rate": 9.970449662493195e-06, | |
| "loss": 0.4122, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.06409278193193957, | |
| "grad_norm": 1.4149595334362772, | |
| "learning_rate": 9.96991048343109e-06, | |
| "loss": 0.2947, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.064397985655425, | |
| "grad_norm": 1.5991867680932033, | |
| "learning_rate": 9.969366444619898e-06, | |
| "loss": 0.1902, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.06470318937891043, | |
| "grad_norm": 1.4132064841734169, | |
| "learning_rate": 9.968817546591601e-06, | |
| "loss": 0.3389, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.06500839310239585, | |
| "grad_norm": 1.7671902900221814, | |
| "learning_rate": 9.968263789882926e-06, | |
| "loss": 0.4294, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.06531359682588128, | |
| "grad_norm": 1.5709821497329826, | |
| "learning_rate": 9.96770517503536e-06, | |
| "loss": 0.2765, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.0656188005493667, | |
| "grad_norm": 1.5211731343844295, | |
| "learning_rate": 9.967141702595134e-06, | |
| "loss": 0.387, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.06592400427285212, | |
| "grad_norm": 1.5499265222668686, | |
| "learning_rate": 9.96657337311323e-06, | |
| "loss": 0.4535, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.06622920799633755, | |
| "grad_norm": 1.4736546539447488, | |
| "learning_rate": 9.966000187145383e-06, | |
| "loss": 0.3834, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.06653441171982298, | |
| "grad_norm": 1.3306288958233108, | |
| "learning_rate": 9.965422145252072e-06, | |
| "loss": 0.3172, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.06683961544330841, | |
| "grad_norm": 1.5745937005003143, | |
| "learning_rate": 9.964839247998524e-06, | |
| "loss": 0.2725, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.06714481916679384, | |
| "grad_norm": 1.7546511557153388, | |
| "learning_rate": 9.96425149595472e-06, | |
| "loss": 0.3577, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.06745002289027927, | |
| "grad_norm": 2.0422588449754286, | |
| "learning_rate": 9.96365888969538e-06, | |
| "loss": 0.4976, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.0677552266137647, | |
| "grad_norm": 1.4661824124133862, | |
| "learning_rate": 9.963061429799979e-06, | |
| "loss": 0.3672, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.06806043033725011, | |
| "grad_norm": 2.0959067552369666, | |
| "learning_rate": 9.96245911685273e-06, | |
| "loss": 0.5381, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.06836563406073554, | |
| "grad_norm": 1.3296813372997014, | |
| "learning_rate": 9.961851951442599e-06, | |
| "loss": 0.2799, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.06867083778422096, | |
| "grad_norm": 1.7385807765114274, | |
| "learning_rate": 9.96123993416329e-06, | |
| "loss": 0.5183, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.06897604150770639, | |
| "grad_norm": 1.5190119701865645, | |
| "learning_rate": 9.960623065613254e-06, | |
| "loss": 0.4608, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.06928124523119182, | |
| "grad_norm": 1.4393894383331207, | |
| "learning_rate": 9.96000134639569e-06, | |
| "loss": 0.3455, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.06958644895467725, | |
| "grad_norm": 1.7132863682619555, | |
| "learning_rate": 9.959374777118533e-06, | |
| "loss": 0.316, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.06989165267816268, | |
| "grad_norm": 1.3227120889592454, | |
| "learning_rate": 9.958743358394464e-06, | |
| "loss": 0.2467, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.0701968564016481, | |
| "grad_norm": 1.5331153407144422, | |
| "learning_rate": 9.95810709084091e-06, | |
| "loss": 0.3138, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07050206012513352, | |
| "grad_norm": 1.7990748995190806, | |
| "learning_rate": 9.957465975080031e-06, | |
| "loss": 0.4747, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.07080726384861895, | |
| "grad_norm": 1.1638981235859056, | |
| "learning_rate": 9.956820011738736e-06, | |
| "loss": 0.2265, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.07111246757210438, | |
| "grad_norm": 1.5739388418179414, | |
| "learning_rate": 9.956169201448665e-06, | |
| "loss": 0.5066, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.0714176712955898, | |
| "grad_norm": 1.6803933013620869, | |
| "learning_rate": 9.955513544846205e-06, | |
| "loss": 0.4415, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.07172287501907523, | |
| "grad_norm": 1.4014872110785643, | |
| "learning_rate": 9.954853042572479e-06, | |
| "loss": 0.3271, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.07202807874256066, | |
| "grad_norm": 1.5310222689941932, | |
| "learning_rate": 9.954187695273352e-06, | |
| "loss": 0.3289, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.07233328246604609, | |
| "grad_norm": 2.166268226472017, | |
| "learning_rate": 9.953517503599419e-06, | |
| "loss": 0.622, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.07263848618953152, | |
| "grad_norm": 2.258081862277545, | |
| "learning_rate": 9.952842468206019e-06, | |
| "loss": 0.5071, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.07294368991301693, | |
| "grad_norm": 1.7322119894263104, | |
| "learning_rate": 9.952162589753224e-06, | |
| "loss": 0.5097, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.07324889363650236, | |
| "grad_norm": 1.9966284228033864, | |
| "learning_rate": 9.951477868905843e-06, | |
| "loss": 0.2263, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07355409735998779, | |
| "grad_norm": 1.6793267860774614, | |
| "learning_rate": 9.95078830633342e-06, | |
| "loss": 0.2065, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.07385930108347322, | |
| "grad_norm": 2.122564153881175, | |
| "learning_rate": 9.95009390271023e-06, | |
| "loss": 0.2665, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.07416450480695864, | |
| "grad_norm": 1.5852282963187305, | |
| "learning_rate": 9.949394658715289e-06, | |
| "loss": 0.4453, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.07446970853044407, | |
| "grad_norm": 1.7534712016120517, | |
| "learning_rate": 9.948690575032338e-06, | |
| "loss": 0.3628, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.0747749122539295, | |
| "grad_norm": 1.351810586905304, | |
| "learning_rate": 9.947981652349854e-06, | |
| "loss": 0.3984, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.07508011597741493, | |
| "grad_norm": 1.8377506474408298, | |
| "learning_rate": 9.947267891361051e-06, | |
| "loss": 0.3677, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.07538531970090036, | |
| "grad_norm": 1.4655632998364951, | |
| "learning_rate": 9.946549292763865e-06, | |
| "loss": 0.3516, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.07569052342438577, | |
| "grad_norm": 3.240838121636416, | |
| "learning_rate": 9.945825857260967e-06, | |
| "loss": 0.2627, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.0759957271478712, | |
| "grad_norm": 1.4085823215183912, | |
| "learning_rate": 9.945097585559757e-06, | |
| "loss": 0.2716, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.07630093087135663, | |
| "grad_norm": 1.6361471921651585, | |
| "learning_rate": 9.944364478372364e-06, | |
| "loss": 0.3595, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07660613459484206, | |
| "grad_norm": 1.0912978886499554, | |
| "learning_rate": 9.943626536415647e-06, | |
| "loss": 0.1968, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.07691133831832749, | |
| "grad_norm": 1.9515717700893849, | |
| "learning_rate": 9.942883760411188e-06, | |
| "loss": 0.374, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.07721654204181291, | |
| "grad_norm": 1.5560755068838334, | |
| "learning_rate": 9.942136151085302e-06, | |
| "loss": 0.44, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.07752174576529834, | |
| "grad_norm": 1.4843235207715992, | |
| "learning_rate": 9.941383709169024e-06, | |
| "loss": 0.3175, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.07782694948878377, | |
| "grad_norm": 1.5210960196158274, | |
| "learning_rate": 9.94062643539812e-06, | |
| "loss": 0.3722, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.07813215321226918, | |
| "grad_norm": 1.6656094376801425, | |
| "learning_rate": 9.939864330513079e-06, | |
| "loss": 0.3511, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.07843735693575461, | |
| "grad_norm": 1.2732857455769802, | |
| "learning_rate": 9.939097395259108e-06, | |
| "loss": 0.2619, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.07874256065924004, | |
| "grad_norm": 1.8947301386622588, | |
| "learning_rate": 9.938325630386149e-06, | |
| "loss": 0.3933, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.07904776438272547, | |
| "grad_norm": 1.5625416559388712, | |
| "learning_rate": 9.937549036648857e-06, | |
| "loss": 0.4491, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.0793529681062109, | |
| "grad_norm": 1.5125179888703784, | |
| "learning_rate": 9.936767614806612e-06, | |
| "loss": 0.3674, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07965817182969633, | |
| "grad_norm": 1.5026525250547669, | |
| "learning_rate": 9.935981365623516e-06, | |
| "loss": 0.4103, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.07996337555318175, | |
| "grad_norm": 2.3948536293362115, | |
| "learning_rate": 9.93519028986839e-06, | |
| "loss": 0.4009, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.08026857927666718, | |
| "grad_norm": 2.416554371647352, | |
| "learning_rate": 9.934394388314775e-06, | |
| "loss": 0.4265, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.0805737830001526, | |
| "grad_norm": 1.560923734953618, | |
| "learning_rate": 9.933593661740933e-06, | |
| "loss": 0.303, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.08087898672363802, | |
| "grad_norm": 1.6053945705234087, | |
| "learning_rate": 9.932788110929837e-06, | |
| "loss": 0.3295, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.08118419044712345, | |
| "grad_norm": 1.7775437462596928, | |
| "learning_rate": 9.931977736669185e-06, | |
| "loss": 0.2197, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.08148939417060888, | |
| "grad_norm": 1.701318325041301, | |
| "learning_rate": 9.931162539751392e-06, | |
| "loss": 0.3581, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.08179459789409431, | |
| "grad_norm": 1.5974548511363529, | |
| "learning_rate": 9.93034252097358e-06, | |
| "loss": 0.3432, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.08209980161757974, | |
| "grad_norm": 1.8669593065073864, | |
| "learning_rate": 9.929517681137594e-06, | |
| "loss": 0.4133, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.08240500534106517, | |
| "grad_norm": 1.4895827642408586, | |
| "learning_rate": 9.928688021049991e-06, | |
| "loss": 0.3111, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.0827102090645506, | |
| "grad_norm": 1.4317804244871846, | |
| "learning_rate": 9.927853541522041e-06, | |
| "loss": 0.2915, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.08301541278803601, | |
| "grad_norm": 1.252478145781798, | |
| "learning_rate": 9.927014243369727e-06, | |
| "loss": 0.2794, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.08332061651152144, | |
| "grad_norm": 1.6973954865497314, | |
| "learning_rate": 9.926170127413743e-06, | |
| "loss": 0.6183, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.08362582023500686, | |
| "grad_norm": 1.4723277244112698, | |
| "learning_rate": 9.925321194479494e-06, | |
| "loss": 0.2815, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.08393102395849229, | |
| "grad_norm": 1.7075555550514414, | |
| "learning_rate": 9.924467445397097e-06, | |
| "loss": 0.4178, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.08423622768197772, | |
| "grad_norm": 1.5354808046910606, | |
| "learning_rate": 9.923608881001377e-06, | |
| "loss": 0.2355, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.08454143140546315, | |
| "grad_norm": 1.1795750747565834, | |
| "learning_rate": 9.922745502131865e-06, | |
| "loss": 0.3404, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.08484663512894858, | |
| "grad_norm": 1.427067758888222, | |
| "learning_rate": 9.921877309632805e-06, | |
| "loss": 0.3141, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.085151838852434, | |
| "grad_norm": 1.3691564278772157, | |
| "learning_rate": 9.921004304353147e-06, | |
| "loss": 0.287, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.08545704257591942, | |
| "grad_norm": 1.9220775714586407, | |
| "learning_rate": 9.920126487146544e-06, | |
| "loss": 0.6617, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08576224629940485, | |
| "grad_norm": 1.6761030408371134, | |
| "learning_rate": 9.919243858871355e-06, | |
| "loss": 0.466, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.08606745002289028, | |
| "grad_norm": 1.6120747264173168, | |
| "learning_rate": 9.918356420390645e-06, | |
| "loss": 0.5351, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.0863726537463757, | |
| "grad_norm": 1.5236961732014556, | |
| "learning_rate": 9.91746417257218e-06, | |
| "loss": 0.33, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.08667785746986113, | |
| "grad_norm": 1.6328635321860312, | |
| "learning_rate": 9.916567116288434e-06, | |
| "loss": 0.4301, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.08698306119334656, | |
| "grad_norm": 1.4120804188821041, | |
| "learning_rate": 9.915665252416577e-06, | |
| "loss": 0.3025, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.08728826491683199, | |
| "grad_norm": 1.8410843798908767, | |
| "learning_rate": 9.914758581838482e-06, | |
| "loss": 0.5415, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.08759346864031742, | |
| "grad_norm": 1.1807475096034001, | |
| "learning_rate": 9.913847105440725e-06, | |
| "loss": 0.3184, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.08789867236380283, | |
| "grad_norm": 1.52681276111022, | |
| "learning_rate": 9.912930824114577e-06, | |
| "loss": 0.4266, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.08820387608728826, | |
| "grad_norm": 1.4904538614169496, | |
| "learning_rate": 9.91200973875601e-06, | |
| "loss": 0.3404, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.08850907981077369, | |
| "grad_norm": 1.7385111110311349, | |
| "learning_rate": 9.911083850265692e-06, | |
| "loss": 0.3371, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.08881428353425912, | |
| "grad_norm": 1.6013762575114376, | |
| "learning_rate": 9.91015315954899e-06, | |
| "loss": 0.4475, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.08911948725774455, | |
| "grad_norm": 1.5474202900018152, | |
| "learning_rate": 9.909217667515964e-06, | |
| "loss": 0.4162, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.08942469098122997, | |
| "grad_norm": 1.875769203080621, | |
| "learning_rate": 9.908277375081371e-06, | |
| "loss": 0.4446, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.0897298947047154, | |
| "grad_norm": 1.4914731218024286, | |
| "learning_rate": 9.907332283164663e-06, | |
| "loss": 0.4274, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.09003509842820083, | |
| "grad_norm": 1.6551811079983538, | |
| "learning_rate": 9.90638239268998e-06, | |
| "loss": 0.4883, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.09034030215168624, | |
| "grad_norm": 1.645510927644492, | |
| "learning_rate": 9.905427704586158e-06, | |
| "loss": 0.4885, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.09064550587517167, | |
| "grad_norm": 1.6759165462483547, | |
| "learning_rate": 9.904468219786727e-06, | |
| "loss": 0.3878, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.0909507095986571, | |
| "grad_norm": 1.596800484010474, | |
| "learning_rate": 9.903503939229901e-06, | |
| "loss": 0.2725, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.09125591332214253, | |
| "grad_norm": 1.4035704196730787, | |
| "learning_rate": 9.902534863858588e-06, | |
| "loss": 0.2147, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.09156111704562796, | |
| "grad_norm": 1.7460761357385464, | |
| "learning_rate": 9.90156099462038e-06, | |
| "loss": 0.3495, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09186632076911339, | |
| "grad_norm": 1.3373562156184522, | |
| "learning_rate": 9.900582332467566e-06, | |
| "loss": 0.342, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.09217152449259881, | |
| "grad_norm": 1.1466755748188362, | |
| "learning_rate": 9.89959887835711e-06, | |
| "loss": 0.1737, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.09247672821608424, | |
| "grad_norm": 1.8078659273922337, | |
| "learning_rate": 9.898610633250669e-06, | |
| "loss": 0.3111, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.09278193193956966, | |
| "grad_norm": 1.5400638324339648, | |
| "learning_rate": 9.897617598114584e-06, | |
| "loss": 0.4746, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.09308713566305508, | |
| "grad_norm": 1.558728128630052, | |
| "learning_rate": 9.896619773919878e-06, | |
| "loss": 0.3085, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.09339233938654051, | |
| "grad_norm": 4.094736926672729, | |
| "learning_rate": 9.895617161642257e-06, | |
| "loss": 0.4664, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.09369754311002594, | |
| "grad_norm": 1.63116898024897, | |
| "learning_rate": 9.89460976226211e-06, | |
| "loss": 0.3878, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.09400274683351137, | |
| "grad_norm": 1.7238364123731507, | |
| "learning_rate": 9.893597576764508e-06, | |
| "loss": 0.2989, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.0943079505569968, | |
| "grad_norm": 1.2496662648050174, | |
| "learning_rate": 9.8925806061392e-06, | |
| "loss": 0.3054, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.09461315428048223, | |
| "grad_norm": 0.8807197003313585, | |
| "learning_rate": 9.891558851380614e-06, | |
| "loss": 0.1904, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09491835800396765, | |
| "grad_norm": 1.5076918479598347, | |
| "learning_rate": 9.890532313487858e-06, | |
| "loss": 0.2679, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.09522356172745307, | |
| "grad_norm": 1.8465691043660122, | |
| "learning_rate": 9.889500993464716e-06, | |
| "loss": 0.5002, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.0955287654509385, | |
| "grad_norm": 1.9183643810942494, | |
| "learning_rate": 9.888464892319647e-06, | |
| "loss": 0.4869, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.09583396917442392, | |
| "grad_norm": 1.6515373264151805, | |
| "learning_rate": 9.887424011065788e-06, | |
| "loss": 0.4507, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.09613917289790935, | |
| "grad_norm": 1.6223391241834122, | |
| "learning_rate": 9.886378350720945e-06, | |
| "loss": 0.3445, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.09644437662139478, | |
| "grad_norm": 1.4416645097808285, | |
| "learning_rate": 9.885327912307604e-06, | |
| "loss": 0.2808, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.09674958034488021, | |
| "grad_norm": 1.4777192121308136, | |
| "learning_rate": 9.88427269685292e-06, | |
| "loss": 0.4335, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.09705478406836564, | |
| "grad_norm": 1.6934694740555867, | |
| "learning_rate": 9.883212705388715e-06, | |
| "loss": 0.4299, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.09735998779185107, | |
| "grad_norm": 1.9031284601590377, | |
| "learning_rate": 9.882147938951489e-06, | |
| "loss": 0.5364, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.09766519151533648, | |
| "grad_norm": 1.990035566558448, | |
| "learning_rate": 9.881078398582406e-06, | |
| "loss": 0.6476, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.09797039523882191, | |
| "grad_norm": 1.4458600630840748, | |
| "learning_rate": 9.8800040853273e-06, | |
| "loss": 0.268, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.09827559896230734, | |
| "grad_norm": 1.473557254783057, | |
| "learning_rate": 9.878925000236667e-06, | |
| "loss": 0.3889, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.09858080268579276, | |
| "grad_norm": 1.429462352597184, | |
| "learning_rate": 9.877841144365681e-06, | |
| "loss": 0.3348, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.0988860064092782, | |
| "grad_norm": 1.9126483909533352, | |
| "learning_rate": 9.876752518774167e-06, | |
| "loss": 0.5004, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.09919121013276362, | |
| "grad_norm": 1.528278815830415, | |
| "learning_rate": 9.875659124526622e-06, | |
| "loss": 0.1931, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.09949641385624905, | |
| "grad_norm": 1.6064809314060318, | |
| "learning_rate": 9.874560962692207e-06, | |
| "loss": 0.2627, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.09980161757973448, | |
| "grad_norm": 1.8583002911468363, | |
| "learning_rate": 9.873458034344741e-06, | |
| "loss": 0.4795, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.1001068213032199, | |
| "grad_norm": 2.180040993961252, | |
| "learning_rate": 9.872350340562704e-06, | |
| "loss": 0.3502, | |
| "step": 328 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 3276, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 328, | |
| "total_flos": 40670334410752.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |