| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 0, |
| "global_step": 495, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00202020202020202, |
| "grad_norm": 0.7161276936531067, |
| "learning_rate": 9.97979797979798e-06, |
| "loss": 1.9711, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00404040404040404, |
| "grad_norm": 0.7165152430534363, |
| "learning_rate": 9.95959595959596e-06, |
| "loss": 2.0014, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006060606060606061, |
| "grad_norm": 0.6409142017364502, |
| "learning_rate": 9.939393939393939e-06, |
| "loss": 1.8579, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00808080808080808, |
| "grad_norm": 0.6455299258232117, |
| "learning_rate": 9.91919191919192e-06, |
| "loss": 1.9338, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.010101010101010102, |
| "grad_norm": 0.620257556438446, |
| "learning_rate": 9.8989898989899e-06, |
| "loss": 1.9081, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.012121212121212121, |
| "grad_norm": 0.5960692763328552, |
| "learning_rate": 9.87878787878788e-06, |
| "loss": 1.867, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.014141414141414142, |
| "grad_norm": 0.5806917548179626, |
| "learning_rate": 9.85858585858586e-06, |
| "loss": 1.8566, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01616161616161616, |
| "grad_norm": 0.5505217909812927, |
| "learning_rate": 9.838383838383839e-06, |
| "loss": 1.8, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01818181818181818, |
| "grad_norm": 0.5532991886138916, |
| "learning_rate": 9.81818181818182e-06, |
| "loss": 1.8884, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.020202020202020204, |
| "grad_norm": 0.49693602323532104, |
| "learning_rate": 9.797979797979798e-06, |
| "loss": 1.7941, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.022222222222222223, |
| "grad_norm": 0.49418094754219055, |
| "learning_rate": 9.777777777777779e-06, |
| "loss": 1.7838, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.024242424242424242, |
| "grad_norm": 0.49829062819480896, |
| "learning_rate": 9.757575757575758e-06, |
| "loss": 1.7929, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.026262626262626262, |
| "grad_norm": 0.49077147245407104, |
| "learning_rate": 9.737373737373738e-06, |
| "loss": 1.7614, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.028282828282828285, |
| "grad_norm": 0.5530189871788025, |
| "learning_rate": 9.717171717171719e-06, |
| "loss": 1.8083, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.030303030303030304, |
| "grad_norm": 0.4590587913990021, |
| "learning_rate": 9.696969696969698e-06, |
| "loss": 1.7642, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03232323232323232, |
| "grad_norm": 0.4447866678237915, |
| "learning_rate": 9.676767676767678e-06, |
| "loss": 1.6643, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03434343434343434, |
| "grad_norm": 0.4465409517288208, |
| "learning_rate": 9.656565656565657e-06, |
| "loss": 1.7234, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03636363636363636, |
| "grad_norm": 0.4881887137889862, |
| "learning_rate": 9.636363636363638e-06, |
| "loss": 1.6956, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03838383838383838, |
| "grad_norm": 0.43465423583984375, |
| "learning_rate": 9.616161616161616e-06, |
| "loss": 1.6792, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04040404040404041, |
| "grad_norm": 0.4478694796562195, |
| "learning_rate": 9.595959595959597e-06, |
| "loss": 1.6767, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04242424242424243, |
| "grad_norm": 0.40964585542678833, |
| "learning_rate": 9.575757575757576e-06, |
| "loss": 1.6158, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.044444444444444446, |
| "grad_norm": 0.44421252608299255, |
| "learning_rate": 9.555555555555556e-06, |
| "loss": 1.694, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.046464646464646465, |
| "grad_norm": 0.46450942754745483, |
| "learning_rate": 9.535353535353537e-06, |
| "loss": 1.6252, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.048484848484848485, |
| "grad_norm": 0.3978925049304962, |
| "learning_rate": 9.515151515151516e-06, |
| "loss": 1.5648, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.050505050505050504, |
| "grad_norm": 0.4238833487033844, |
| "learning_rate": 9.494949494949497e-06, |
| "loss": 1.6422, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.052525252525252523, |
| "grad_norm": 0.4160165786743164, |
| "learning_rate": 9.474747474747475e-06, |
| "loss": 1.5613, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05454545454545454, |
| "grad_norm": 0.4237181544303894, |
| "learning_rate": 9.454545454545456e-06, |
| "loss": 1.5941, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05656565656565657, |
| "grad_norm": 0.4124365448951721, |
| "learning_rate": 9.434343434343435e-06, |
| "loss": 1.6283, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05858585858585859, |
| "grad_norm": 0.3750332295894623, |
| "learning_rate": 9.414141414141414e-06, |
| "loss": 1.5654, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06060606060606061, |
| "grad_norm": 0.41155683994293213, |
| "learning_rate": 9.393939393939396e-06, |
| "loss": 1.5497, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06262626262626263, |
| "grad_norm": 0.3584173619747162, |
| "learning_rate": 9.373737373737375e-06, |
| "loss": 1.5246, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06464646464646465, |
| "grad_norm": 0.3761274516582489, |
| "learning_rate": 9.353535353535354e-06, |
| "loss": 1.5842, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 0.3488684296607971, |
| "learning_rate": 9.333333333333334e-06, |
| "loss": 1.4644, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06868686868686869, |
| "grad_norm": 0.3474116623401642, |
| "learning_rate": 9.313131313131313e-06, |
| "loss": 1.501, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0707070707070707, |
| "grad_norm": 0.3438032269477844, |
| "learning_rate": 9.292929292929294e-06, |
| "loss": 1.5141, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07272727272727272, |
| "grad_norm": 0.35277971625328064, |
| "learning_rate": 9.272727272727273e-06, |
| "loss": 1.5847, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07474747474747474, |
| "grad_norm": 0.34736958146095276, |
| "learning_rate": 9.252525252525253e-06, |
| "loss": 1.4935, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.07676767676767676, |
| "grad_norm": 0.3474046587944031, |
| "learning_rate": 9.232323232323232e-06, |
| "loss": 1.51, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07878787878787878, |
| "grad_norm": 0.3346942961215973, |
| "learning_rate": 9.212121212121213e-06, |
| "loss": 1.5053, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08080808080808081, |
| "grad_norm": 0.35995081067085266, |
| "learning_rate": 9.191919191919193e-06, |
| "loss": 1.3861, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08282828282828283, |
| "grad_norm": 0.33850350975990295, |
| "learning_rate": 9.171717171717172e-06, |
| "loss": 1.424, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.08484848484848485, |
| "grad_norm": 0.3318334221839905, |
| "learning_rate": 9.151515151515153e-06, |
| "loss": 1.4537, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.08686868686868687, |
| "grad_norm": 0.3250851631164551, |
| "learning_rate": 9.131313131313132e-06, |
| "loss": 1.4415, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 0.30386024713516235, |
| "learning_rate": 9.111111111111112e-06, |
| "loss": 1.3826, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.09090909090909091, |
| "grad_norm": 0.4216459095478058, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 1.3891, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09292929292929293, |
| "grad_norm": 0.33815595507621765, |
| "learning_rate": 9.070707070707072e-06, |
| "loss": 1.3504, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.09494949494949495, |
| "grad_norm": 0.36842572689056396, |
| "learning_rate": 9.050505050505052e-06, |
| "loss": 1.4033, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.09696969696969697, |
| "grad_norm": 0.33431506156921387, |
| "learning_rate": 9.030303030303031e-06, |
| "loss": 1.4419, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.09898989898989899, |
| "grad_norm": 0.3316555619239807, |
| "learning_rate": 9.010101010101012e-06, |
| "loss": 1.481, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.10101010101010101, |
| "grad_norm": 0.31679612398147583, |
| "learning_rate": 8.98989898989899e-06, |
| "loss": 1.4128, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10303030303030303, |
| "grad_norm": 0.37177518010139465, |
| "learning_rate": 8.969696969696971e-06, |
| "loss": 1.4847, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.10505050505050505, |
| "grad_norm": 0.3365563452243805, |
| "learning_rate": 8.94949494949495e-06, |
| "loss": 1.5136, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.10707070707070707, |
| "grad_norm": 0.31179460883140564, |
| "learning_rate": 8.92929292929293e-06, |
| "loss": 1.3844, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.10909090909090909, |
| "grad_norm": 0.34356045722961426, |
| "learning_rate": 8.90909090909091e-06, |
| "loss": 1.3206, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 0.3672977089881897, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 1.3702, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.11313131313131314, |
| "grad_norm": 0.30548205971717834, |
| "learning_rate": 8.86868686868687e-06, |
| "loss": 1.4516, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.11515151515151516, |
| "grad_norm": 0.3203405439853668, |
| "learning_rate": 8.84848484848485e-06, |
| "loss": 1.4681, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.11717171717171718, |
| "grad_norm": 0.3301260769367218, |
| "learning_rate": 8.82828282828283e-06, |
| "loss": 1.3809, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1191919191919192, |
| "grad_norm": 0.30863118171691895, |
| "learning_rate": 8.808080808080809e-06, |
| "loss": 1.378, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.12121212121212122, |
| "grad_norm": 0.3332017660140991, |
| "learning_rate": 8.787878787878788e-06, |
| "loss": 1.3982, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.12323232323232323, |
| "grad_norm": 0.30107933282852173, |
| "learning_rate": 8.767676767676768e-06, |
| "loss": 1.4082, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.12525252525252525, |
| "grad_norm": 0.30208951234817505, |
| "learning_rate": 8.747474747474747e-06, |
| "loss": 1.4039, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.12727272727272726, |
| "grad_norm": 0.33244606852531433, |
| "learning_rate": 8.727272727272728e-06, |
| "loss": 1.3178, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1292929292929293, |
| "grad_norm": 0.31905415654182434, |
| "learning_rate": 8.707070707070707e-06, |
| "loss": 1.3215, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.13131313131313133, |
| "grad_norm": 0.4120834171772003, |
| "learning_rate": 8.686868686868687e-06, |
| "loss": 1.4206, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 0.29965725541114807, |
| "learning_rate": 8.666666666666668e-06, |
| "loss": 1.417, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.13535353535353536, |
| "grad_norm": 0.31773456931114197, |
| "learning_rate": 8.646464646464647e-06, |
| "loss": 1.3652, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.13737373737373737, |
| "grad_norm": 0.3313538730144501, |
| "learning_rate": 8.626262626262627e-06, |
| "loss": 1.4158, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1393939393939394, |
| "grad_norm": 0.32019728422164917, |
| "learning_rate": 8.606060606060606e-06, |
| "loss": 1.3246, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1414141414141414, |
| "grad_norm": 0.3173425495624542, |
| "learning_rate": 8.585858585858587e-06, |
| "loss": 1.352, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.14343434343434344, |
| "grad_norm": 0.32213637232780457, |
| "learning_rate": 8.565656565656566e-06, |
| "loss": 1.3853, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.14545454545454545, |
| "grad_norm": 0.3139975070953369, |
| "learning_rate": 8.545454545454546e-06, |
| "loss": 1.3577, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.14747474747474748, |
| "grad_norm": 0.314394474029541, |
| "learning_rate": 8.525252525252527e-06, |
| "loss": 1.3418, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1494949494949495, |
| "grad_norm": 0.3202640414237976, |
| "learning_rate": 8.505050505050506e-06, |
| "loss": 1.3105, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.15151515151515152, |
| "grad_norm": 0.32140177488327026, |
| "learning_rate": 8.484848484848486e-06, |
| "loss": 1.3431, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.15353535353535352, |
| "grad_norm": 0.308694064617157, |
| "learning_rate": 8.464646464646465e-06, |
| "loss": 1.2795, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.15555555555555556, |
| "grad_norm": 0.3079676628112793, |
| "learning_rate": 8.444444444444446e-06, |
| "loss": 1.3289, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.15757575757575756, |
| "grad_norm": 0.3917205333709717, |
| "learning_rate": 8.424242424242425e-06, |
| "loss": 1.411, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1595959595959596, |
| "grad_norm": 0.33000463247299194, |
| "learning_rate": 8.404040404040405e-06, |
| "loss": 1.3653, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.16161616161616163, |
| "grad_norm": 0.3023252487182617, |
| "learning_rate": 8.383838383838384e-06, |
| "loss": 1.324, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.16363636363636364, |
| "grad_norm": 0.3326578736305237, |
| "learning_rate": 8.363636363636365e-06, |
| "loss": 1.365, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.16565656565656567, |
| "grad_norm": 0.3222097158432007, |
| "learning_rate": 8.343434343434345e-06, |
| "loss": 1.2692, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.16767676767676767, |
| "grad_norm": 0.3314870595932007, |
| "learning_rate": 8.323232323232324e-06, |
| "loss": 1.3517, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.1696969696969697, |
| "grad_norm": 0.312295526266098, |
| "learning_rate": 8.303030303030305e-06, |
| "loss": 1.3228, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1717171717171717, |
| "grad_norm": 0.31697049736976624, |
| "learning_rate": 8.282828282828283e-06, |
| "loss": 1.3023, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.17373737373737375, |
| "grad_norm": 0.3326326608657837, |
| "learning_rate": 8.262626262626264e-06, |
| "loss": 1.3565, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.17575757575757575, |
| "grad_norm": 0.31436386704444885, |
| "learning_rate": 8.242424242424243e-06, |
| "loss": 1.2924, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 0.3132941424846649, |
| "learning_rate": 8.222222222222222e-06, |
| "loss": 1.3322, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1797979797979798, |
| "grad_norm": 0.3268982470035553, |
| "learning_rate": 8.202020202020202e-06, |
| "loss": 1.3321, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 0.3064195215702057, |
| "learning_rate": 8.181818181818183e-06, |
| "loss": 1.3621, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.18383838383838383, |
| "grad_norm": 0.3247116804122925, |
| "learning_rate": 8.161616161616162e-06, |
| "loss": 1.2668, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.18585858585858586, |
| "grad_norm": 0.3569066822528839, |
| "learning_rate": 8.141414141414142e-06, |
| "loss": 1.2769, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.18787878787878787, |
| "grad_norm": 0.3417966961860657, |
| "learning_rate": 8.121212121212121e-06, |
| "loss": 1.3375, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.1898989898989899, |
| "grad_norm": 0.3412160575389862, |
| "learning_rate": 8.101010101010102e-06, |
| "loss": 1.3694, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.1919191919191919, |
| "grad_norm": 0.3187222182750702, |
| "learning_rate": 8.08080808080808e-06, |
| "loss": 1.324, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.19393939393939394, |
| "grad_norm": 0.35206106305122375, |
| "learning_rate": 8.060606060606061e-06, |
| "loss": 1.2607, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.19595959595959597, |
| "grad_norm": 0.32647690176963806, |
| "learning_rate": 8.04040404040404e-06, |
| "loss": 1.3569, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.19797979797979798, |
| "grad_norm": 0.32524800300598145, |
| "learning_rate": 8.02020202020202e-06, |
| "loss": 1.2589, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.3647509217262268, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 1.3076, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.20202020202020202, |
| "grad_norm": 0.3915540277957916, |
| "learning_rate": 7.97979797979798e-06, |
| "loss": 1.2947, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.20404040404040405, |
| "grad_norm": 0.3395519554615021, |
| "learning_rate": 7.95959595959596e-06, |
| "loss": 1.3305, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.20606060606060606, |
| "grad_norm": 0.3333810865879059, |
| "learning_rate": 7.93939393939394e-06, |
| "loss": 1.2764, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2080808080808081, |
| "grad_norm": 0.3425975441932678, |
| "learning_rate": 7.91919191919192e-06, |
| "loss": 1.2832, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.2101010101010101, |
| "grad_norm": 0.32646429538726807, |
| "learning_rate": 7.898989898989899e-06, |
| "loss": 1.2463, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.21212121212121213, |
| "grad_norm": 0.3389233350753784, |
| "learning_rate": 7.87878787878788e-06, |
| "loss": 1.3357, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.21414141414141413, |
| "grad_norm": 0.39399006962776184, |
| "learning_rate": 7.858585858585859e-06, |
| "loss": 1.2935, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.21616161616161617, |
| "grad_norm": 0.36394181847572327, |
| "learning_rate": 7.838383838383839e-06, |
| "loss": 1.2413, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.21818181818181817, |
| "grad_norm": 0.3306232690811157, |
| "learning_rate": 7.81818181818182e-06, |
| "loss": 1.2499, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.2202020202020202, |
| "grad_norm": 0.3697446584701538, |
| "learning_rate": 7.797979797979799e-06, |
| "loss": 1.3104, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 0.33766692876815796, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 1.2753, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.22424242424242424, |
| "grad_norm": 0.4814659655094147, |
| "learning_rate": 7.757575757575758e-06, |
| "loss": 1.2874, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.22626262626262628, |
| "grad_norm": 0.39757466316223145, |
| "learning_rate": 7.737373737373739e-06, |
| "loss": 1.2381, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.22828282828282828, |
| "grad_norm": 0.36207494139671326, |
| "learning_rate": 7.717171717171717e-06, |
| "loss": 1.2205, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.23030303030303031, |
| "grad_norm": 0.38033750653266907, |
| "learning_rate": 7.696969696969696e-06, |
| "loss": 1.2648, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.23232323232323232, |
| "grad_norm": 0.4587545692920685, |
| "learning_rate": 7.676767676767677e-06, |
| "loss": 1.2546, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.23434343434343435, |
| "grad_norm": 0.4454125463962555, |
| "learning_rate": 7.656565656565658e-06, |
| "loss": 1.2876, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.23636363636363636, |
| "grad_norm": 0.3893223702907562, |
| "learning_rate": 7.636363636363638e-06, |
| "loss": 1.2756, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2383838383838384, |
| "grad_norm": 0.35102713108062744, |
| "learning_rate": 7.616161616161617e-06, |
| "loss": 1.3007, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2404040404040404, |
| "grad_norm": 0.37877577543258667, |
| "learning_rate": 7.595959595959597e-06, |
| "loss": 1.2119, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.24242424242424243, |
| "grad_norm": 0.3598031997680664, |
| "learning_rate": 7.5757575757575764e-06, |
| "loss": 1.2998, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.24444444444444444, |
| "grad_norm": 0.35307177901268005, |
| "learning_rate": 7.555555555555556e-06, |
| "loss": 1.3126, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.24646464646464647, |
| "grad_norm": 0.4488123655319214, |
| "learning_rate": 7.535353535353536e-06, |
| "loss": 1.249, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.24848484848484848, |
| "grad_norm": 0.3422190248966217, |
| "learning_rate": 7.515151515151516e-06, |
| "loss": 1.2519, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2505050505050505, |
| "grad_norm": 0.3509416878223419, |
| "learning_rate": 7.494949494949496e-06, |
| "loss": 1.254, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.25252525252525254, |
| "grad_norm": 0.3425782322883606, |
| "learning_rate": 7.474747474747476e-06, |
| "loss": 1.304, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2545454545454545, |
| "grad_norm": 0.3341624140739441, |
| "learning_rate": 7.454545454545456e-06, |
| "loss": 1.2864, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.25656565656565655, |
| "grad_norm": 0.3653208613395691, |
| "learning_rate": 7.434343434343435e-06, |
| "loss": 1.315, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2585858585858586, |
| "grad_norm": 0.3386072516441345, |
| "learning_rate": 7.414141414141415e-06, |
| "loss": 1.2362, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2606060606060606, |
| "grad_norm": 0.3409993350505829, |
| "learning_rate": 7.393939393939395e-06, |
| "loss": 1.205, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.26262626262626265, |
| "grad_norm": 0.35771581530570984, |
| "learning_rate": 7.373737373737374e-06, |
| "loss": 1.2276, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.26464646464646463, |
| "grad_norm": 0.4764679968357086, |
| "learning_rate": 7.353535353535353e-06, |
| "loss": 1.3022, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 0.35045522451400757, |
| "learning_rate": 7.333333333333333e-06, |
| "loss": 1.2707, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2686868686868687, |
| "grad_norm": 0.3524115979671478, |
| "learning_rate": 7.3131313131313146e-06, |
| "loss": 1.2546, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.27070707070707073, |
| "grad_norm": 0.36653998494148254, |
| "learning_rate": 7.2929292929292934e-06, |
| "loss": 1.2478, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 0.3623749017715454, |
| "learning_rate": 7.272727272727273e-06, |
| "loss": 1.3022, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.27474747474747474, |
| "grad_norm": 0.46625304222106934, |
| "learning_rate": 7.252525252525253e-06, |
| "loss": 1.2532, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2767676767676768, |
| "grad_norm": 0.380654901266098, |
| "learning_rate": 7.232323232323233e-06, |
| "loss": 1.2397, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2787878787878788, |
| "grad_norm": 0.3655640482902527, |
| "learning_rate": 7.212121212121212e-06, |
| "loss": 1.1885, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.2808080808080808, |
| "grad_norm": 0.3584921956062317, |
| "learning_rate": 7.191919191919192e-06, |
| "loss": 1.2261, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.2828282828282828, |
| "grad_norm": 0.45796817541122437, |
| "learning_rate": 7.171717171717172e-06, |
| "loss": 1.2093, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.28484848484848485, |
| "grad_norm": 0.3415711522102356, |
| "learning_rate": 7.151515151515152e-06, |
| "loss": 1.1708, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2868686868686869, |
| "grad_norm": 0.3456715941429138, |
| "learning_rate": 7.131313131313132e-06, |
| "loss": 1.1643, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.28888888888888886, |
| "grad_norm": 0.40753453969955444, |
| "learning_rate": 7.111111111111112e-06, |
| "loss": 1.1713, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2909090909090909, |
| "grad_norm": 0.373065322637558, |
| "learning_rate": 7.0909090909090916e-06, |
| "loss": 1.2251, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.29292929292929293, |
| "grad_norm": 0.35757017135620117, |
| "learning_rate": 7.070707070707071e-06, |
| "loss": 1.2142, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.29494949494949496, |
| "grad_norm": 0.34428662061691284, |
| "learning_rate": 7.050505050505051e-06, |
| "loss": 1.2148, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.296969696969697, |
| "grad_norm": 0.37126731872558594, |
| "learning_rate": 7.030303030303031e-06, |
| "loss": 1.2787, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.298989898989899, |
| "grad_norm": 0.4702625870704651, |
| "learning_rate": 7.0101010101010105e-06, |
| "loss": 1.3027, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.301010101010101, |
| "grad_norm": 0.3773735463619232, |
| "learning_rate": 6.98989898989899e-06, |
| "loss": 1.2715, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 0.3751866817474365, |
| "learning_rate": 6.969696969696971e-06, |
| "loss": 1.2922, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.30505050505050507, |
| "grad_norm": 0.39378979802131653, |
| "learning_rate": 6.9494949494949505e-06, |
| "loss": 1.2464, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.30707070707070705, |
| "grad_norm": 0.43541523814201355, |
| "learning_rate": 6.92929292929293e-06, |
| "loss": 1.2892, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3090909090909091, |
| "grad_norm": 0.38896918296813965, |
| "learning_rate": 6.90909090909091e-06, |
| "loss": 1.239, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.3111111111111111, |
| "grad_norm": 0.3622775673866272, |
| "learning_rate": 6.88888888888889e-06, |
| "loss": 1.2136, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.31313131313131315, |
| "grad_norm": 0.43285271525382996, |
| "learning_rate": 6.868686868686869e-06, |
| "loss": 1.2774, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3151515151515151, |
| "grad_norm": 0.3640473186969757, |
| "learning_rate": 6.848484848484849e-06, |
| "loss": 1.2126, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.31717171717171716, |
| "grad_norm": 0.42588090896606445, |
| "learning_rate": 6.828282828282828e-06, |
| "loss": 1.1799, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3191919191919192, |
| "grad_norm": 0.375704824924469, |
| "learning_rate": 6.808080808080809e-06, |
| "loss": 1.2328, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3212121212121212, |
| "grad_norm": 0.38189056515693665, |
| "learning_rate": 6.787878787878789e-06, |
| "loss": 1.227, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.32323232323232326, |
| "grad_norm": 0.386527419090271, |
| "learning_rate": 6.767676767676769e-06, |
| "loss": 1.2044, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.32525252525252524, |
| "grad_norm": 0.5718291401863098, |
| "learning_rate": 6.747474747474749e-06, |
| "loss": 1.2129, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.32727272727272727, |
| "grad_norm": 0.39799562096595764, |
| "learning_rate": 6.7272727272727275e-06, |
| "loss": 1.2148, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3292929292929293, |
| "grad_norm": 0.5430400967597961, |
| "learning_rate": 6.707070707070707e-06, |
| "loss": 1.1866, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.33131313131313134, |
| "grad_norm": 0.3847930133342743, |
| "learning_rate": 6.686868686868687e-06, |
| "loss": 1.21, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.44060763716697693, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.1995, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.33535353535353535, |
| "grad_norm": 0.37353143095970154, |
| "learning_rate": 6.646464646464646e-06, |
| "loss": 1.1902, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.3373737373737374, |
| "grad_norm": 0.42311203479766846, |
| "learning_rate": 6.626262626262627e-06, |
| "loss": 1.2584, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3393939393939394, |
| "grad_norm": 0.38061851263046265, |
| "learning_rate": 6.606060606060607e-06, |
| "loss": 1.2506, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3414141414141414, |
| "grad_norm": 0.39264020323753357, |
| "learning_rate": 6.585858585858586e-06, |
| "loss": 1.2419, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3434343434343434, |
| "grad_norm": 0.39015626907348633, |
| "learning_rate": 6.565656565656566e-06, |
| "loss": 1.2192, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.34545454545454546, |
| "grad_norm": 0.3569429814815521, |
| "learning_rate": 6.545454545454546e-06, |
| "loss": 1.2266, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.3474747474747475, |
| "grad_norm": 0.45226484537124634, |
| "learning_rate": 6.525252525252526e-06, |
| "loss": 1.2529, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.34949494949494947, |
| "grad_norm": 0.4128590524196625, |
| "learning_rate": 6.505050505050505e-06, |
| "loss": 1.2238, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3515151515151515, |
| "grad_norm": 0.3912878930568695, |
| "learning_rate": 6.484848484848485e-06, |
| "loss": 1.2674, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.35353535353535354, |
| "grad_norm": 0.5874902606010437, |
| "learning_rate": 6.464646464646466e-06, |
| "loss": 1.3129, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 0.3880002796649933, |
| "learning_rate": 6.444444444444445e-06, |
| "loss": 1.2228, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.3575757575757576, |
| "grad_norm": 0.5327728390693665, |
| "learning_rate": 6.424242424242425e-06, |
| "loss": 1.1844, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.3595959595959596, |
| "grad_norm": 0.3654763996601105, |
| "learning_rate": 6.404040404040405e-06, |
| "loss": 1.1878, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.3616161616161616, |
| "grad_norm": 0.4578845500946045, |
| "learning_rate": 6.3838383838383845e-06, |
| "loss": 1.1795, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.49257224798202515, |
| "learning_rate": 6.363636363636364e-06, |
| "loss": 1.2089, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3656565656565657, |
| "grad_norm": 0.4164067208766937, |
| "learning_rate": 6.343434343434344e-06, |
| "loss": 1.272, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.36767676767676766, |
| "grad_norm": 0.38031005859375, |
| "learning_rate": 6.323232323232324e-06, |
| "loss": 1.1853, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3696969696969697, |
| "grad_norm": 0.38159313797950745, |
| "learning_rate": 6.303030303030303e-06, |
| "loss": 1.2229, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3717171717171717, |
| "grad_norm": 0.4095843434333801, |
| "learning_rate": 6.282828282828284e-06, |
| "loss": 1.2036, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.37373737373737376, |
| "grad_norm": 0.3795385956764221, |
| "learning_rate": 6.262626262626264e-06, |
| "loss": 1.1978, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.37575757575757573, |
| "grad_norm": 0.3845679759979248, |
| "learning_rate": 6.2424242424242434e-06, |
| "loss": 1.1985, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.37777777777777777, |
| "grad_norm": 0.3987710773944855, |
| "learning_rate": 6.222222222222223e-06, |
| "loss": 1.2439, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.3797979797979798, |
| "grad_norm": 0.476406991481781, |
| "learning_rate": 6.202020202020203e-06, |
| "loss": 1.2493, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.38181818181818183, |
| "grad_norm": 0.3847932517528534, |
| "learning_rate": 6.181818181818182e-06, |
| "loss": 1.2102, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.3838383838383838, |
| "grad_norm": 0.3880085051059723, |
| "learning_rate": 6.1616161616161615e-06, |
| "loss": 1.2171, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.38585858585858585, |
| "grad_norm": 0.37940821051597595, |
| "learning_rate": 6.141414141414141e-06, |
| "loss": 1.1935, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3878787878787879, |
| "grad_norm": 0.3726148009300232, |
| "learning_rate": 6.121212121212121e-06, |
| "loss": 1.1938, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3898989898989899, |
| "grad_norm": 0.37551644444465637, |
| "learning_rate": 6.1010101010101015e-06, |
| "loss": 1.1923, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.39191919191919194, |
| "grad_norm": 0.3850058913230896, |
| "learning_rate": 6.080808080808081e-06, |
| "loss": 1.201, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.3939393939393939, |
| "grad_norm": 0.40154439210891724, |
| "learning_rate": 6.060606060606061e-06, |
| "loss": 1.1739, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.39595959595959596, |
| "grad_norm": 0.42834407091140747, |
| "learning_rate": 6.040404040404041e-06, |
| "loss": 1.2242, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.397979797979798, |
| "grad_norm": 0.3858676254749298, |
| "learning_rate": 6.0202020202020204e-06, |
| "loss": 1.1885, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.38397836685180664, |
| "learning_rate": 6e-06, |
| "loss": 1.1747, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.402020202020202, |
| "grad_norm": 0.39212876558303833, |
| "learning_rate": 5.97979797979798e-06, |
| "loss": 1.1739, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "grad_norm": 0.43060749769210815, |
| "learning_rate": 5.95959595959596e-06, |
| "loss": 1.259, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.40606060606060607, |
| "grad_norm": 0.4071057438850403, |
| "learning_rate": 5.93939393939394e-06, |
| "loss": 1.2328, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.4080808080808081, |
| "grad_norm": 0.3919212520122528, |
| "learning_rate": 5.91919191919192e-06, |
| "loss": 1.1906, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4101010101010101, |
| "grad_norm": 0.4213036596775055, |
| "learning_rate": 5.8989898989899e-06, |
| "loss": 1.2457, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4121212121212121, |
| "grad_norm": 0.4471401274204254, |
| "learning_rate": 5.878787878787879e-06, |
| "loss": 1.245, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.41414141414141414, |
| "grad_norm": 0.3926369249820709, |
| "learning_rate": 5.858585858585859e-06, |
| "loss": 1.1918, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.4161616161616162, |
| "grad_norm": 0.441599577665329, |
| "learning_rate": 5.838383838383839e-06, |
| "loss": 1.2615, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.41818181818181815, |
| "grad_norm": 0.4350634217262268, |
| "learning_rate": 5.8181818181818185e-06, |
| "loss": 1.2082, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.4202020202020202, |
| "grad_norm": 0.4004654288291931, |
| "learning_rate": 5.797979797979798e-06, |
| "loss": 1.2063, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4222222222222222, |
| "grad_norm": 0.4058932960033417, |
| "learning_rate": 5.777777777777778e-06, |
| "loss": 1.1472, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.42424242424242425, |
| "grad_norm": 0.39488402009010315, |
| "learning_rate": 5.7575757575757586e-06, |
| "loss": 1.1836, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4262626262626263, |
| "grad_norm": 0.40045166015625, |
| "learning_rate": 5.737373737373738e-06, |
| "loss": 1.208, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.42828282828282827, |
| "grad_norm": 0.4182022213935852, |
| "learning_rate": 5.717171717171718e-06, |
| "loss": 1.2055, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.4303030303030303, |
| "grad_norm": 0.401506632566452, |
| "learning_rate": 5.696969696969698e-06, |
| "loss": 1.244, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.43232323232323233, |
| "grad_norm": 0.37805262207984924, |
| "learning_rate": 5.6767676767676775e-06, |
| "loss": 1.1536, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.43434343434343436, |
| "grad_norm": 0.3987525701522827, |
| "learning_rate": 5.656565656565657e-06, |
| "loss": 1.1601, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.43636363636363634, |
| "grad_norm": 0.3922836184501648, |
| "learning_rate": 5.636363636363636e-06, |
| "loss": 1.1738, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.4383838383838384, |
| "grad_norm": 0.4252990484237671, |
| "learning_rate": 5.616161616161616e-06, |
| "loss": 1.2306, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4404040404040404, |
| "grad_norm": 0.4056296646595001, |
| "learning_rate": 5.595959595959597e-06, |
| "loss": 1.1768, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.44242424242424244, |
| "grad_norm": 0.40049415826797485, |
| "learning_rate": 5.575757575757577e-06, |
| "loss": 1.2135, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.38820382952690125, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 1.181, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.44646464646464645, |
| "grad_norm": 0.4120016098022461, |
| "learning_rate": 5.5353535353535355e-06, |
| "loss": 1.2093, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.4484848484848485, |
| "grad_norm": 0.44070056080818176, |
| "learning_rate": 5.515151515151515e-06, |
| "loss": 1.2054, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4505050505050505, |
| "grad_norm": 0.40790727734565735, |
| "learning_rate": 5.494949494949495e-06, |
| "loss": 1.1838, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.45252525252525255, |
| "grad_norm": 0.42531147599220276, |
| "learning_rate": 5.474747474747475e-06, |
| "loss": 1.229, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.5559154152870178, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 1.1661, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.45656565656565656, |
| "grad_norm": 0.4001394212245941, |
| "learning_rate": 5.434343434343434e-06, |
| "loss": 1.2354, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.4585858585858586, |
| "grad_norm": 0.43856632709503174, |
| "learning_rate": 5.414141414141415e-06, |
| "loss": 1.1432, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.46060606060606063, |
| "grad_norm": 0.40476134419441223, |
| "learning_rate": 5.3939393939393945e-06, |
| "loss": 1.1613, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4626262626262626, |
| "grad_norm": 0.412791907787323, |
| "learning_rate": 5.373737373737374e-06, |
| "loss": 1.2085, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.46464646464646464, |
| "grad_norm": 0.44894886016845703, |
| "learning_rate": 5.353535353535354e-06, |
| "loss": 1.1555, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4666666666666667, |
| "grad_norm": 0.40787672996520996, |
| "learning_rate": 5.333333333333334e-06, |
| "loss": 1.1973, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.4686868686868687, |
| "grad_norm": 0.40681204199790955, |
| "learning_rate": 5.313131313131313e-06, |
| "loss": 1.137, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4707070707070707, |
| "grad_norm": 0.38673871755599976, |
| "learning_rate": 5.292929292929293e-06, |
| "loss": 1.145, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.4727272727272727, |
| "grad_norm": 0.43359676003456116, |
| "learning_rate": 5.272727272727273e-06, |
| "loss": 1.2187, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.47474747474747475, |
| "grad_norm": 0.4281125068664551, |
| "learning_rate": 5.252525252525253e-06, |
| "loss": 1.2103, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.4767676767676768, |
| "grad_norm": 0.3990701138973236, |
| "learning_rate": 5.232323232323233e-06, |
| "loss": 1.2256, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.47878787878787876, |
| "grad_norm": 0.4083038866519928, |
| "learning_rate": 5.212121212121213e-06, |
| "loss": 1.2163, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4808080808080808, |
| "grad_norm": 0.39717841148376465, |
| "learning_rate": 5.191919191919193e-06, |
| "loss": 1.2002, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.48282828282828283, |
| "grad_norm": 0.4372676610946655, |
| "learning_rate": 5.171717171717172e-06, |
| "loss": 1.1944, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.48484848484848486, |
| "grad_norm": 0.41287773847579956, |
| "learning_rate": 5.151515151515152e-06, |
| "loss": 1.1902, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4868686868686869, |
| "grad_norm": 0.44914692640304565, |
| "learning_rate": 5.131313131313132e-06, |
| "loss": 1.1688, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.4888888888888889, |
| "grad_norm": 0.5057622790336609, |
| "learning_rate": 5.1111111111111115e-06, |
| "loss": 1.1299, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.4909090909090909, |
| "grad_norm": 0.41322949528694153, |
| "learning_rate": 5.090909090909091e-06, |
| "loss": 1.1641, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.49292929292929294, |
| "grad_norm": 0.41922056674957275, |
| "learning_rate": 5.070707070707072e-06, |
| "loss": 1.211, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.494949494949495, |
| "grad_norm": 0.4766096770763397, |
| "learning_rate": 5.0505050505050515e-06, |
| "loss": 1.2204, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.49696969696969695, |
| "grad_norm": 0.43514490127563477, |
| "learning_rate": 5.030303030303031e-06, |
| "loss": 1.1944, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.498989898989899, |
| "grad_norm": 0.44566211104393005, |
| "learning_rate": 5.010101010101011e-06, |
| "loss": 1.2415, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.501010101010101, |
| "grad_norm": 0.4128602147102356, |
| "learning_rate": 4.98989898989899e-06, |
| "loss": 1.1598, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.503030303030303, |
| "grad_norm": 0.5456644892692566, |
| "learning_rate": 4.9696969696969696e-06, |
| "loss": 1.2071, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5050505050505051, |
| "grad_norm": 0.416353702545166, |
| "learning_rate": 4.94949494949495e-06, |
| "loss": 1.2018, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5070707070707071, |
| "grad_norm": 0.4307159185409546, |
| "learning_rate": 4.92929292929293e-06, |
| "loss": 1.1076, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.509090909090909, |
| "grad_norm": 0.4292945861816406, |
| "learning_rate": 4.90909090909091e-06, |
| "loss": 1.1467, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5111111111111111, |
| "grad_norm": 0.458187997341156, |
| "learning_rate": 4.888888888888889e-06, |
| "loss": 1.1661, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5131313131313131, |
| "grad_norm": 0.42484790086746216, |
| "learning_rate": 4.868686868686869e-06, |
| "loss": 1.1841, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5151515151515151, |
| "grad_norm": 0.4383080303668976, |
| "learning_rate": 4.848484848484849e-06, |
| "loss": 1.2258, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5171717171717172, |
| "grad_norm": 0.4041128158569336, |
| "learning_rate": 4.8282828282828285e-06, |
| "loss": 1.2129, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5191919191919192, |
| "grad_norm": 0.41394418478012085, |
| "learning_rate": 4.808080808080808e-06, |
| "loss": 1.1662, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5212121212121212, |
| "grad_norm": 0.41534096002578735, |
| "learning_rate": 4.787878787878788e-06, |
| "loss": 1.1357, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5232323232323233, |
| "grad_norm": 0.5621700882911682, |
| "learning_rate": 4.7676767676767685e-06, |
| "loss": 1.1742, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5252525252525253, |
| "grad_norm": 0.45183923840522766, |
| "learning_rate": 4.747474747474748e-06, |
| "loss": 1.1307, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5272727272727272, |
| "grad_norm": 0.4465235471725464, |
| "learning_rate": 4.727272727272728e-06, |
| "loss": 1.1482, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5292929292929293, |
| "grad_norm": 0.40243247151374817, |
| "learning_rate": 4.707070707070707e-06, |
| "loss": 1.1328, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5313131313131313, |
| "grad_norm": 0.4068451523780823, |
| "learning_rate": 4.6868686868686874e-06, |
| "loss": 1.1874, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 0.4202090799808502, |
| "learning_rate": 4.666666666666667e-06, |
| "loss": 1.1382, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5353535353535354, |
| "grad_norm": 0.43421876430511475, |
| "learning_rate": 4.646464646464647e-06, |
| "loss": 1.1409, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5373737373737374, |
| "grad_norm": 0.4315180480480194, |
| "learning_rate": 4.626262626262627e-06, |
| "loss": 1.2075, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5393939393939394, |
| "grad_norm": 0.5113957524299622, |
| "learning_rate": 4.606060606060606e-06, |
| "loss": 1.2027, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5414141414141415, |
| "grad_norm": 0.41647160053253174, |
| "learning_rate": 4.585858585858586e-06, |
| "loss": 1.1589, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5434343434343434, |
| "grad_norm": 0.41890841722488403, |
| "learning_rate": 4.565656565656566e-06, |
| "loss": 1.1552, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.4266463816165924, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 1.0917, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5474747474747474, |
| "grad_norm": 0.4231935143470764, |
| "learning_rate": 4.525252525252526e-06, |
| "loss": 1.2367, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5494949494949495, |
| "grad_norm": 0.41641512513160706, |
| "learning_rate": 4.505050505050506e-06, |
| "loss": 1.1572, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5515151515151515, |
| "grad_norm": 0.44688332080841064, |
| "learning_rate": 4.4848484848484855e-06, |
| "loss": 1.1074, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5535353535353535, |
| "grad_norm": 0.41230785846710205, |
| "learning_rate": 4.464646464646465e-06, |
| "loss": 1.1277, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.4160812497138977, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 1.1872, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5575757575757576, |
| "grad_norm": 0.4164697825908661, |
| "learning_rate": 4.424242424242425e-06, |
| "loss": 1.1576, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5595959595959596, |
| "grad_norm": 0.40832680463790894, |
| "learning_rate": 4.4040404040404044e-06, |
| "loss": 1.1563, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.5616161616161616, |
| "grad_norm": 0.4225139617919922, |
| "learning_rate": 4.383838383838384e-06, |
| "loss": 1.1232, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5636363636363636, |
| "grad_norm": 0.4079049527645111, |
| "learning_rate": 4.363636363636364e-06, |
| "loss": 1.1661, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5656565656565656, |
| "grad_norm": 0.6146241426467896, |
| "learning_rate": 4.343434343434344e-06, |
| "loss": 1.1824, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5676767676767677, |
| "grad_norm": 0.4161895513534546, |
| "learning_rate": 4.323232323232323e-06, |
| "loss": 1.2106, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.5696969696969697, |
| "grad_norm": 0.4248459041118622, |
| "learning_rate": 4.303030303030303e-06, |
| "loss": 1.1525, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5717171717171717, |
| "grad_norm": 0.44591110944747925, |
| "learning_rate": 4.282828282828283e-06, |
| "loss": 1.2562, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.5737373737373738, |
| "grad_norm": 0.416741281747818, |
| "learning_rate": 4.262626262626263e-06, |
| "loss": 1.1514, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5757575757575758, |
| "grad_norm": 0.4068896174430847, |
| "learning_rate": 4.242424242424243e-06, |
| "loss": 1.1614, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5777777777777777, |
| "grad_norm": 0.4546835124492645, |
| "learning_rate": 4.222222222222223e-06, |
| "loss": 1.0821, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.5797979797979798, |
| "grad_norm": 0.40689557790756226, |
| "learning_rate": 4.2020202020202026e-06, |
| "loss": 1.2122, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.5818181818181818, |
| "grad_norm": 0.4217931628227234, |
| "learning_rate": 4.181818181818182e-06, |
| "loss": 1.1175, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5838383838383838, |
| "grad_norm": 0.4158902168273926, |
| "learning_rate": 4.161616161616162e-06, |
| "loss": 1.1625, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.5858585858585859, |
| "grad_norm": 0.4220472276210785, |
| "learning_rate": 4.141414141414142e-06, |
| "loss": 1.0984, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5878787878787879, |
| "grad_norm": 0.42913931608200073, |
| "learning_rate": 4.1212121212121215e-06, |
| "loss": 1.1544, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.5898989898989899, |
| "grad_norm": 0.43582457304000854, |
| "learning_rate": 4.101010101010101e-06, |
| "loss": 1.1611, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.591919191919192, |
| "grad_norm": 0.4344398081302643, |
| "learning_rate": 4.080808080808081e-06, |
| "loss": 1.1379, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.593939393939394, |
| "grad_norm": 0.42097654938697815, |
| "learning_rate": 4.060606060606061e-06, |
| "loss": 1.1463, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5959595959595959, |
| "grad_norm": 0.4698149561882019, |
| "learning_rate": 4.04040404040404e-06, |
| "loss": 1.1727, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.597979797979798, |
| "grad_norm": 0.44758716225624084, |
| "learning_rate": 4.02020202020202e-06, |
| "loss": 1.2231, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.4903525114059448, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.1436, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.602020202020202, |
| "grad_norm": 0.4759634733200073, |
| "learning_rate": 3.97979797979798e-06, |
| "loss": 1.2026, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.604040404040404, |
| "grad_norm": 0.5918915271759033, |
| "learning_rate": 3.95959595959596e-06, |
| "loss": 1.184, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 0.43710240721702576, |
| "learning_rate": 3.93939393939394e-06, |
| "loss": 1.1788, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6080808080808081, |
| "grad_norm": 0.42409440875053406, |
| "learning_rate": 3.9191919191919196e-06, |
| "loss": 1.1728, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.6101010101010101, |
| "grad_norm": 0.41567525267601013, |
| "learning_rate": 3.898989898989899e-06, |
| "loss": 1.1293, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6121212121212121, |
| "grad_norm": 0.5063587427139282, |
| "learning_rate": 3.878787878787879e-06, |
| "loss": 1.1833, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6141414141414141, |
| "grad_norm": 0.620876133441925, |
| "learning_rate": 3.858585858585859e-06, |
| "loss": 1.1825, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6161616161616161, |
| "grad_norm": 0.4584016501903534, |
| "learning_rate": 3.8383838383838385e-06, |
| "loss": 1.1357, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6181818181818182, |
| "grad_norm": 0.664551854133606, |
| "learning_rate": 3.818181818181819e-06, |
| "loss": 1.1885, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6202020202020202, |
| "grad_norm": 0.4733007550239563, |
| "learning_rate": 3.7979797979797984e-06, |
| "loss": 1.1535, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "grad_norm": 0.4278660714626312, |
| "learning_rate": 3.777777777777778e-06, |
| "loss": 1.1605, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6242424242424243, |
| "grad_norm": 0.4181557595729828, |
| "learning_rate": 3.757575757575758e-06, |
| "loss": 1.1509, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6262626262626263, |
| "grad_norm": 0.42843660712242126, |
| "learning_rate": 3.737373737373738e-06, |
| "loss": 1.0812, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6282828282828283, |
| "grad_norm": 0.6776353120803833, |
| "learning_rate": 3.7171717171717177e-06, |
| "loss": 1.2063, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.6303030303030303, |
| "grad_norm": 0.47619807720184326, |
| "learning_rate": 3.6969696969696974e-06, |
| "loss": 1.1347, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6323232323232323, |
| "grad_norm": 0.5270451307296753, |
| "learning_rate": 3.6767676767676767e-06, |
| "loss": 1.2234, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.6343434343434343, |
| "grad_norm": 0.43449628353118896, |
| "learning_rate": 3.6565656565656573e-06, |
| "loss": 1.0883, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.6363636363636364, |
| "grad_norm": 0.48849719762802124, |
| "learning_rate": 3.6363636363636366e-06, |
| "loss": 1.1207, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6383838383838384, |
| "grad_norm": 0.4676908850669861, |
| "learning_rate": 3.6161616161616163e-06, |
| "loss": 1.1537, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6404040404040404, |
| "grad_norm": 0.433596134185791, |
| "learning_rate": 3.595959595959596e-06, |
| "loss": 1.1987, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6424242424242425, |
| "grad_norm": 0.4653543531894684, |
| "learning_rate": 3.575757575757576e-06, |
| "loss": 1.1527, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6444444444444445, |
| "grad_norm": 0.427843302488327, |
| "learning_rate": 3.555555555555556e-06, |
| "loss": 1.1151, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6464646464646465, |
| "grad_norm": 0.49472782015800476, |
| "learning_rate": 3.5353535353535356e-06, |
| "loss": 1.166, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6484848484848484, |
| "grad_norm": 0.43731406331062317, |
| "learning_rate": 3.5151515151515154e-06, |
| "loss": 1.1905, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.6505050505050505, |
| "grad_norm": 0.4756666421890259, |
| "learning_rate": 3.494949494949495e-06, |
| "loss": 1.1234, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6525252525252525, |
| "grad_norm": 0.4782750904560089, |
| "learning_rate": 3.4747474747474752e-06, |
| "loss": 1.1443, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6545454545454545, |
| "grad_norm": 0.464298814535141, |
| "learning_rate": 3.454545454545455e-06, |
| "loss": 1.1924, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6565656565656566, |
| "grad_norm": 0.5392622351646423, |
| "learning_rate": 3.4343434343434347e-06, |
| "loss": 1.1884, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6585858585858586, |
| "grad_norm": 0.4372235834598541, |
| "learning_rate": 3.414141414141414e-06, |
| "loss": 1.1297, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6606060606060606, |
| "grad_norm": 0.4278256595134735, |
| "learning_rate": 3.3939393939393946e-06, |
| "loss": 1.1252, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.6626262626262627, |
| "grad_norm": 0.4149866998195648, |
| "learning_rate": 3.3737373737373743e-06, |
| "loss": 1.132, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.6646464646464646, |
| "grad_norm": 0.4469216465950012, |
| "learning_rate": 3.3535353535353536e-06, |
| "loss": 1.1454, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.43997472524642944, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.1378, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6686868686868687, |
| "grad_norm": 0.48830243945121765, |
| "learning_rate": 3.3131313131313135e-06, |
| "loss": 1.2211, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.6707070707070707, |
| "grad_norm": 0.43039470911026, |
| "learning_rate": 3.292929292929293e-06, |
| "loss": 1.1188, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.6727272727272727, |
| "grad_norm": 0.4260430932044983, |
| "learning_rate": 3.272727272727273e-06, |
| "loss": 1.1876, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.6747474747474748, |
| "grad_norm": 2.3196213245391846, |
| "learning_rate": 3.2525252525252527e-06, |
| "loss": 1.1765, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.6767676767676768, |
| "grad_norm": 0.4350275695323944, |
| "learning_rate": 3.232323232323233e-06, |
| "loss": 1.127, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6787878787878788, |
| "grad_norm": 0.42931365966796875, |
| "learning_rate": 3.2121212121212125e-06, |
| "loss": 1.1641, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6808080808080809, |
| "grad_norm": 0.5864835381507874, |
| "learning_rate": 3.1919191919191923e-06, |
| "loss": 1.1731, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.6828282828282828, |
| "grad_norm": 0.5235271453857422, |
| "learning_rate": 3.171717171717172e-06, |
| "loss": 1.1438, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.6848484848484848, |
| "grad_norm": 0.43767064809799194, |
| "learning_rate": 3.1515151515151517e-06, |
| "loss": 1.1546, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.6868686868686869, |
| "grad_norm": 0.43670475482940674, |
| "learning_rate": 3.131313131313132e-06, |
| "loss": 1.1324, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6888888888888889, |
| "grad_norm": 0.441994309425354, |
| "learning_rate": 3.1111111111111116e-06, |
| "loss": 1.1244, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.6909090909090909, |
| "grad_norm": 0.43313199281692505, |
| "learning_rate": 3.090909090909091e-06, |
| "loss": 1.1463, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.692929292929293, |
| "grad_norm": 0.4461485743522644, |
| "learning_rate": 3.0707070707070706e-06, |
| "loss": 1.1102, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.694949494949495, |
| "grad_norm": 0.4676089584827423, |
| "learning_rate": 3.0505050505050508e-06, |
| "loss": 1.2312, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.696969696969697, |
| "grad_norm": 0.5393002033233643, |
| "learning_rate": 3.0303030303030305e-06, |
| "loss": 1.2007, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6989898989898989, |
| "grad_norm": 0.5098319053649902, |
| "learning_rate": 3.0101010101010102e-06, |
| "loss": 1.1131, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.701010101010101, |
| "grad_norm": 0.6225714683532715, |
| "learning_rate": 2.98989898989899e-06, |
| "loss": 1.1297, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.703030303030303, |
| "grad_norm": 0.43681618571281433, |
| "learning_rate": 2.96969696969697e-06, |
| "loss": 1.1694, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.705050505050505, |
| "grad_norm": 0.4441690146923065, |
| "learning_rate": 2.94949494949495e-06, |
| "loss": 1.1442, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.7070707070707071, |
| "grad_norm": 0.6507865190505981, |
| "learning_rate": 2.9292929292929295e-06, |
| "loss": 1.113, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7090909090909091, |
| "grad_norm": 0.44259369373321533, |
| "learning_rate": 2.9090909090909093e-06, |
| "loss": 1.1898, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 0.44127604365348816, |
| "learning_rate": 2.888888888888889e-06, |
| "loss": 1.1395, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.7131313131313132, |
| "grad_norm": 0.41907399892807007, |
| "learning_rate": 2.868686868686869e-06, |
| "loss": 1.1235, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.7151515151515152, |
| "grad_norm": 0.5818850994110107, |
| "learning_rate": 2.848484848484849e-06, |
| "loss": 1.0942, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.7171717171717171, |
| "grad_norm": 0.46083900332450867, |
| "learning_rate": 2.8282828282828286e-06, |
| "loss": 1.1183, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.7191919191919192, |
| "grad_norm": 0.42110776901245117, |
| "learning_rate": 2.808080808080808e-06, |
| "loss": 1.1243, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.7212121212121212, |
| "grad_norm": 0.4376765787601471, |
| "learning_rate": 2.7878787878787885e-06, |
| "loss": 1.1025, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.7232323232323232, |
| "grad_norm": 0.4480559229850769, |
| "learning_rate": 2.7676767676767678e-06, |
| "loss": 1.1169, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.7252525252525253, |
| "grad_norm": 0.4249851107597351, |
| "learning_rate": 2.7474747474747475e-06, |
| "loss": 1.1385, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.4330328404903412, |
| "learning_rate": 2.7272727272727272e-06, |
| "loss": 1.1573, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7292929292929293, |
| "grad_norm": 0.5165948867797852, |
| "learning_rate": 2.7070707070707074e-06, |
| "loss": 1.0962, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.7313131313131314, |
| "grad_norm": 0.43222978711128235, |
| "learning_rate": 2.686868686868687e-06, |
| "loss": 1.1562, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.7333333333333333, |
| "grad_norm": 0.4787655472755432, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 1.183, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.7353535353535353, |
| "grad_norm": 0.47134554386138916, |
| "learning_rate": 2.6464646464646466e-06, |
| "loss": 1.1184, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.7373737373737373, |
| "grad_norm": 0.435281902551651, |
| "learning_rate": 2.6262626262626267e-06, |
| "loss": 1.1517, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.7393939393939394, |
| "grad_norm": 0.4473310112953186, |
| "learning_rate": 2.6060606060606064e-06, |
| "loss": 1.1593, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.7414141414141414, |
| "grad_norm": 0.43103650212287903, |
| "learning_rate": 2.585858585858586e-06, |
| "loss": 1.0713, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.7434343434343434, |
| "grad_norm": 0.4542030692100525, |
| "learning_rate": 2.565656565656566e-06, |
| "loss": 1.1458, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7454545454545455, |
| "grad_norm": 0.4651864469051361, |
| "learning_rate": 2.5454545454545456e-06, |
| "loss": 1.1469, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7474747474747475, |
| "grad_norm": 0.43439170718193054, |
| "learning_rate": 2.5252525252525258e-06, |
| "loss": 1.1413, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7494949494949495, |
| "grad_norm": 0.42597660422325134, |
| "learning_rate": 2.5050505050505055e-06, |
| "loss": 1.1276, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.7515151515151515, |
| "grad_norm": 0.4265841245651245, |
| "learning_rate": 2.4848484848484848e-06, |
| "loss": 1.1501, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7535353535353535, |
| "grad_norm": 0.4280242919921875, |
| "learning_rate": 2.464646464646465e-06, |
| "loss": 1.125, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.7555555555555555, |
| "grad_norm": 0.45420101284980774, |
| "learning_rate": 2.4444444444444447e-06, |
| "loss": 1.1429, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 0.4887622594833374, |
| "learning_rate": 2.4242424242424244e-06, |
| "loss": 1.0949, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.7595959595959596, |
| "grad_norm": 0.44896116852760315, |
| "learning_rate": 2.404040404040404e-06, |
| "loss": 1.1469, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.7616161616161616, |
| "grad_norm": 0.44278374314308167, |
| "learning_rate": 2.3838383838383843e-06, |
| "loss": 1.136, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.7636363636363637, |
| "grad_norm": 0.4393131732940674, |
| "learning_rate": 2.363636363636364e-06, |
| "loss": 1.1509, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.7656565656565657, |
| "grad_norm": 0.4577363133430481, |
| "learning_rate": 2.3434343434343437e-06, |
| "loss": 1.1389, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.7676767676767676, |
| "grad_norm": 0.522461473941803, |
| "learning_rate": 2.3232323232323234e-06, |
| "loss": 1.157, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7696969696969697, |
| "grad_norm": 0.44777238368988037, |
| "learning_rate": 2.303030303030303e-06, |
| "loss": 1.1146, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.7717171717171717, |
| "grad_norm": 0.4325833320617676, |
| "learning_rate": 2.282828282828283e-06, |
| "loss": 1.1993, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.7737373737373737, |
| "grad_norm": 0.42686280608177185, |
| "learning_rate": 2.262626262626263e-06, |
| "loss": 1.1482, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.7757575757575758, |
| "grad_norm": 0.4293493628501892, |
| "learning_rate": 2.2424242424242428e-06, |
| "loss": 1.1388, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 0.47681450843811035, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 1.1829, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.7797979797979798, |
| "grad_norm": 0.5011666417121887, |
| "learning_rate": 2.2020202020202022e-06, |
| "loss": 1.1261, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.7818181818181819, |
| "grad_norm": 0.43567654490470886, |
| "learning_rate": 2.181818181818182e-06, |
| "loss": 1.1542, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.7838383838383839, |
| "grad_norm": 0.6387421488761902, |
| "learning_rate": 2.1616161616161617e-06, |
| "loss": 1.1699, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.7858585858585858, |
| "grad_norm": 0.4544886648654938, |
| "learning_rate": 2.1414141414141414e-06, |
| "loss": 1.1836, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.7878787878787878, |
| "grad_norm": 0.4290023446083069, |
| "learning_rate": 2.1212121212121216e-06, |
| "loss": 1.1207, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7898989898989899, |
| "grad_norm": 0.43866926431655884, |
| "learning_rate": 2.1010101010101013e-06, |
| "loss": 1.1314, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.7919191919191919, |
| "grad_norm": 0.4637463688850403, |
| "learning_rate": 2.080808080808081e-06, |
| "loss": 1.1953, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.793939393939394, |
| "grad_norm": 0.46285444498062134, |
| "learning_rate": 2.0606060606060607e-06, |
| "loss": 1.1379, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.795959595959596, |
| "grad_norm": 0.44597333669662476, |
| "learning_rate": 2.0404040404040405e-06, |
| "loss": 1.1593, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.797979797979798, |
| "grad_norm": 0.4328615665435791, |
| "learning_rate": 2.02020202020202e-06, |
| "loss": 1.1368, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.43377038836479187, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.1793, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.802020202020202, |
| "grad_norm": 0.6099244952201843, |
| "learning_rate": 1.97979797979798e-06, |
| "loss": 1.2042, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.804040404040404, |
| "grad_norm": 0.44520944356918335, |
| "learning_rate": 1.9595959595959598e-06, |
| "loss": 1.1492, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.806060606060606, |
| "grad_norm": 0.4510004222393036, |
| "learning_rate": 1.9393939393939395e-06, |
| "loss": 1.1759, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "grad_norm": 0.4355102479457855, |
| "learning_rate": 1.9191919191919192e-06, |
| "loss": 1.163, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8101010101010101, |
| "grad_norm": 0.45038437843322754, |
| "learning_rate": 1.8989898989898992e-06, |
| "loss": 1.1667, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.8121212121212121, |
| "grad_norm": 0.8860539197921753, |
| "learning_rate": 1.878787878787879e-06, |
| "loss": 1.1704, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.8141414141414142, |
| "grad_norm": 0.43954333662986755, |
| "learning_rate": 1.8585858585858588e-06, |
| "loss": 1.1727, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.8161616161616162, |
| "grad_norm": 0.4659814238548279, |
| "learning_rate": 1.8383838383838384e-06, |
| "loss": 1.1287, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 0.42593252658843994, |
| "learning_rate": 1.8181818181818183e-06, |
| "loss": 1.0651, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.8202020202020202, |
| "grad_norm": 0.6211426854133606, |
| "learning_rate": 1.797979797979798e-06, |
| "loss": 1.0999, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.8222222222222222, |
| "grad_norm": 0.4256881773471832, |
| "learning_rate": 1.777777777777778e-06, |
| "loss": 1.1509, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.8242424242424242, |
| "grad_norm": 0.4392426311969757, |
| "learning_rate": 1.7575757575757577e-06, |
| "loss": 1.1146, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.8262626262626263, |
| "grad_norm": 0.684870719909668, |
| "learning_rate": 1.7373737373737376e-06, |
| "loss": 1.142, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.8282828282828283, |
| "grad_norm": 0.4318128526210785, |
| "learning_rate": 1.7171717171717173e-06, |
| "loss": 1.1144, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8303030303030303, |
| "grad_norm": 0.5533406138420105, |
| "learning_rate": 1.6969696969696973e-06, |
| "loss": 1.1304, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.8323232323232324, |
| "grad_norm": 0.45593488216400146, |
| "learning_rate": 1.6767676767676768e-06, |
| "loss": 1.1562, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.8343434343434344, |
| "grad_norm": 0.43928050994873047, |
| "learning_rate": 1.6565656565656567e-06, |
| "loss": 1.167, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.8363636363636363, |
| "grad_norm": 0.44358789920806885, |
| "learning_rate": 1.6363636363636365e-06, |
| "loss": 1.0835, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.8383838383838383, |
| "grad_norm": 0.702485978603363, |
| "learning_rate": 1.6161616161616164e-06, |
| "loss": 1.1429, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8404040404040404, |
| "grad_norm": 0.4881725311279297, |
| "learning_rate": 1.5959595959595961e-06, |
| "loss": 1.1928, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.8424242424242424, |
| "grad_norm": 0.4462536573410034, |
| "learning_rate": 1.5757575757575759e-06, |
| "loss": 1.1068, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.8444444444444444, |
| "grad_norm": 0.43907180428504944, |
| "learning_rate": 1.5555555555555558e-06, |
| "loss": 1.1737, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.8464646464646465, |
| "grad_norm": 0.4497542083263397, |
| "learning_rate": 1.5353535353535353e-06, |
| "loss": 1.1413, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.8484848484848485, |
| "grad_norm": 0.45987042784690857, |
| "learning_rate": 1.5151515151515152e-06, |
| "loss": 1.1603, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8505050505050505, |
| "grad_norm": 0.42042702436447144, |
| "learning_rate": 1.494949494949495e-06, |
| "loss": 1.0856, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.8525252525252526, |
| "grad_norm": 0.47144535183906555, |
| "learning_rate": 1.474747474747475e-06, |
| "loss": 1.1399, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.8545454545454545, |
| "grad_norm": 0.4407351315021515, |
| "learning_rate": 1.4545454545454546e-06, |
| "loss": 1.1233, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.8565656565656565, |
| "grad_norm": 0.4383006691932678, |
| "learning_rate": 1.4343434343434346e-06, |
| "loss": 1.1597, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.8585858585858586, |
| "grad_norm": 0.43792206048965454, |
| "learning_rate": 1.4141414141414143e-06, |
| "loss": 1.1303, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.8606060606060606, |
| "grad_norm": 0.4718475639820099, |
| "learning_rate": 1.3939393939393942e-06, |
| "loss": 1.1693, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.8626262626262626, |
| "grad_norm": 0.5578370094299316, |
| "learning_rate": 1.3737373737373738e-06, |
| "loss": 1.1808, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.8646464646464647, |
| "grad_norm": 0.5279450416564941, |
| "learning_rate": 1.3535353535353537e-06, |
| "loss": 1.1485, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.8666666666666667, |
| "grad_norm": 0.450605571269989, |
| "learning_rate": 1.3333333333333334e-06, |
| "loss": 1.1611, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.8686868686868687, |
| "grad_norm": 0.47113335132598877, |
| "learning_rate": 1.3131313131313134e-06, |
| "loss": 1.1017, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8707070707070707, |
| "grad_norm": 0.6759155988693237, |
| "learning_rate": 1.292929292929293e-06, |
| "loss": 1.1382, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.8727272727272727, |
| "grad_norm": 0.5046219825744629, |
| "learning_rate": 1.2727272727272728e-06, |
| "loss": 1.1704, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.8747474747474747, |
| "grad_norm": 0.45093533396720886, |
| "learning_rate": 1.2525252525252527e-06, |
| "loss": 1.1564, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.8767676767676768, |
| "grad_norm": 0.4605855643749237, |
| "learning_rate": 1.2323232323232325e-06, |
| "loss": 1.1162, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.8787878787878788, |
| "grad_norm": 0.4468255043029785, |
| "learning_rate": 1.2121212121212122e-06, |
| "loss": 1.1406, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.8808080808080808, |
| "grad_norm": 0.4631853997707367, |
| "learning_rate": 1.1919191919191921e-06, |
| "loss": 1.1961, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.8828282828282829, |
| "grad_norm": 0.5156291723251343, |
| "learning_rate": 1.1717171717171719e-06, |
| "loss": 1.2438, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.8848484848484849, |
| "grad_norm": 0.4465646743774414, |
| "learning_rate": 1.1515151515151516e-06, |
| "loss": 1.0995, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.8868686868686869, |
| "grad_norm": 0.4443415105342865, |
| "learning_rate": 1.1313131313131315e-06, |
| "loss": 1.1126, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.4424268901348114, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 1.1256, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8909090909090909, |
| "grad_norm": 0.5184109210968018, |
| "learning_rate": 1.090909090909091e-06, |
| "loss": 1.1522, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.8929292929292929, |
| "grad_norm": 0.4386366903781891, |
| "learning_rate": 1.0707070707070707e-06, |
| "loss": 1.1185, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.8949494949494949, |
| "grad_norm": 0.5085979104042053, |
| "learning_rate": 1.0505050505050506e-06, |
| "loss": 1.1811, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.896969696969697, |
| "grad_norm": 0.470874547958374, |
| "learning_rate": 1.0303030303030304e-06, |
| "loss": 1.1656, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.898989898989899, |
| "grad_norm": 0.43742021918296814, |
| "learning_rate": 1.01010101010101e-06, |
| "loss": 1.1332, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.901010101010101, |
| "grad_norm": 0.44026342034339905, |
| "learning_rate": 9.8989898989899e-07, |
| "loss": 1.1545, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.9030303030303031, |
| "grad_norm": 0.43544185161590576, |
| "learning_rate": 9.696969696969698e-07, |
| "loss": 1.1657, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.9050505050505051, |
| "grad_norm": 0.447896808385849, |
| "learning_rate": 9.494949494949496e-07, |
| "loss": 1.1187, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.907070707070707, |
| "grad_norm": 0.6337141394615173, |
| "learning_rate": 9.292929292929294e-07, |
| "loss": 1.1655, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.4487914443016052, |
| "learning_rate": 9.090909090909091e-07, |
| "loss": 1.1191, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9111111111111111, |
| "grad_norm": 0.44377103447914124, |
| "learning_rate": 8.88888888888889e-07, |
| "loss": 1.154, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.9131313131313131, |
| "grad_norm": 0.4307768940925598, |
| "learning_rate": 8.686868686868688e-07, |
| "loss": 1.124, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.9151515151515152, |
| "grad_norm": 0.4362584054470062, |
| "learning_rate": 8.484848484848486e-07, |
| "loss": 1.1485, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.9171717171717172, |
| "grad_norm": 0.4378771483898163, |
| "learning_rate": 8.282828282828284e-07, |
| "loss": 1.1172, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.9191919191919192, |
| "grad_norm": 0.4427582919597626, |
| "learning_rate": 8.080808080808082e-07, |
| "loss": 1.1878, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.9212121212121213, |
| "grad_norm": 0.4444504380226135, |
| "learning_rate": 7.878787878787879e-07, |
| "loss": 1.151, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.9232323232323232, |
| "grad_norm": 0.43979477882385254, |
| "learning_rate": 7.676767676767677e-07, |
| "loss": 1.1231, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.9252525252525252, |
| "grad_norm": 0.42842262983322144, |
| "learning_rate": 7.474747474747475e-07, |
| "loss": 1.1246, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.9272727272727272, |
| "grad_norm": 0.4698165953159332, |
| "learning_rate": 7.272727272727273e-07, |
| "loss": 1.1578, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.9292929292929293, |
| "grad_norm": 0.4842698574066162, |
| "learning_rate": 7.070707070707071e-07, |
| "loss": 1.1334, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9313131313131313, |
| "grad_norm": 0.4647423028945923, |
| "learning_rate": 6.868686868686869e-07, |
| "loss": 1.0744, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 0.43209952116012573, |
| "learning_rate": 6.666666666666667e-07, |
| "loss": 1.1157, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.9353535353535354, |
| "grad_norm": 0.48932209610939026, |
| "learning_rate": 6.464646464646465e-07, |
| "loss": 1.1779, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.9373737373737374, |
| "grad_norm": 0.4362215995788574, |
| "learning_rate": 6.262626262626264e-07, |
| "loss": 1.1144, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.9393939393939394, |
| "grad_norm": 0.4569753408432007, |
| "learning_rate": 6.060606060606061e-07, |
| "loss": 1.1489, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.9414141414141414, |
| "grad_norm": 0.4431014358997345, |
| "learning_rate": 5.858585858585859e-07, |
| "loss": 1.1146, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.9434343434343434, |
| "grad_norm": 0.47941043972969055, |
| "learning_rate": 5.656565656565658e-07, |
| "loss": 1.0851, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.9454545454545454, |
| "grad_norm": 0.4418059289455414, |
| "learning_rate": 5.454545454545455e-07, |
| "loss": 1.1051, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.9474747474747475, |
| "grad_norm": 0.42367491126060486, |
| "learning_rate": 5.252525252525253e-07, |
| "loss": 1.1348, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.9494949494949495, |
| "grad_norm": 0.46153539419174194, |
| "learning_rate": 5.05050505050505e-07, |
| "loss": 1.1295, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.9515151515151515, |
| "grad_norm": 0.43902987241744995, |
| "learning_rate": 4.848484848484849e-07, |
| "loss": 1.1583, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.9535353535353536, |
| "grad_norm": 0.4670887887477875, |
| "learning_rate": 4.646464646464647e-07, |
| "loss": 1.1267, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.9555555555555556, |
| "grad_norm": 0.4860509932041168, |
| "learning_rate": 4.444444444444445e-07, |
| "loss": 1.1155, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.9575757575757575, |
| "grad_norm": 0.4374881386756897, |
| "learning_rate": 4.242424242424243e-07, |
| "loss": 1.15, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.9595959595959596, |
| "grad_norm": 0.44016000628471375, |
| "learning_rate": 4.040404040404041e-07, |
| "loss": 1.1399, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.9616161616161616, |
| "grad_norm": 0.44191956520080566, |
| "learning_rate": 3.838383838383838e-07, |
| "loss": 1.1307, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.9636363636363636, |
| "grad_norm": 0.45141059160232544, |
| "learning_rate": 3.6363636363636366e-07, |
| "loss": 1.1289, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.9656565656565657, |
| "grad_norm": 0.4429006278514862, |
| "learning_rate": 3.4343434343434344e-07, |
| "loss": 1.1461, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.9676767676767677, |
| "grad_norm": 0.4396512806415558, |
| "learning_rate": 3.2323232323232327e-07, |
| "loss": 1.1487, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.9696969696969697, |
| "grad_norm": 0.44995373487472534, |
| "learning_rate": 3.0303030303030305e-07, |
| "loss": 1.1547, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.9717171717171718, |
| "grad_norm": 0.5054865479469299, |
| "learning_rate": 2.828282828282829e-07, |
| "loss": 1.1612, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.9737373737373738, |
| "grad_norm": 0.45184507966041565, |
| "learning_rate": 2.6262626262626266e-07, |
| "loss": 1.1788, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.9757575757575757, |
| "grad_norm": 0.6350483298301697, |
| "learning_rate": 2.4242424242424244e-07, |
| "loss": 1.0855, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "grad_norm": 0.443535178899765, |
| "learning_rate": 2.2222222222222224e-07, |
| "loss": 1.1296, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.9797979797979798, |
| "grad_norm": 0.45181265473365784, |
| "learning_rate": 2.0202020202020205e-07, |
| "loss": 1.1336, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.9818181818181818, |
| "grad_norm": 0.4365933835506439, |
| "learning_rate": 1.8181818181818183e-07, |
| "loss": 1.1624, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.9838383838383838, |
| "grad_norm": 0.43410101532936096, |
| "learning_rate": 1.6161616161616163e-07, |
| "loss": 1.1653, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.9858585858585859, |
| "grad_norm": 0.43480247259140015, |
| "learning_rate": 1.4141414141414144e-07, |
| "loss": 1.1541, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.9878787878787879, |
| "grad_norm": 0.5730026960372925, |
| "learning_rate": 1.2121212121212122e-07, |
| "loss": 1.1526, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.98989898989899, |
| "grad_norm": 0.43849003314971924, |
| "learning_rate": 1.0101010101010103e-07, |
| "loss": 1.1648, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9919191919191919, |
| "grad_norm": 0.49526941776275635, |
| "learning_rate": 8.080808080808082e-08, |
| "loss": 1.1705, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.9939393939393939, |
| "grad_norm": 0.4443527162075043, |
| "learning_rate": 6.060606060606061e-08, |
| "loss": 1.1405, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.9959595959595959, |
| "grad_norm": 0.437156617641449, |
| "learning_rate": 4.040404040404041e-08, |
| "loss": 1.1135, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.997979797979798, |
| "grad_norm": 0.4385826885700226, |
| "learning_rate": 2.0202020202020204e-08, |
| "loss": 1.1368, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.44586920738220215, |
| "learning_rate": 0.0, |
| "loss": 1.122, |
| "step": 495 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 495, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.725088714939433e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|