| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.45023865498325855, |
| "eval_steps": 1580, |
| "global_step": 1580, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.000284961174040037, |
| "grad_norm": 29.125, |
| "learning_rate": 0.0, |
| "loss": 2.6234, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.000569922348080074, |
| "grad_norm": 21.5, |
| "learning_rate": 6.329113924050633e-07, |
| "loss": 2.6285, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0008548835221201111, |
| "grad_norm": 23.5, |
| "learning_rate": 1.2658227848101265e-06, |
| "loss": 2.6019, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.001139844696160148, |
| "grad_norm": 19.75, |
| "learning_rate": 1.8987341772151901e-06, |
| "loss": 2.5364, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0014248058702001853, |
| "grad_norm": 14.5625, |
| "learning_rate": 2.531645569620253e-06, |
| "loss": 2.2591, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0017097670442402222, |
| "grad_norm": 16.5, |
| "learning_rate": 3.1645569620253167e-06, |
| "loss": 2.2259, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.001994728218280259, |
| "grad_norm": 30.375, |
| "learning_rate": 3.7974683544303802e-06, |
| "loss": 2.757, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.002279689392320296, |
| "grad_norm": 16.375, |
| "learning_rate": 4.430379746835443e-06, |
| "loss": 2.431, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0025646505663603335, |
| "grad_norm": 15.5625, |
| "learning_rate": 5.063291139240506e-06, |
| "loss": 2.3019, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0028496117404003705, |
| "grad_norm": 13.75, |
| "learning_rate": 5.69620253164557e-06, |
| "loss": 2.2385, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0031345729144404075, |
| "grad_norm": 12.8125, |
| "learning_rate": 6.329113924050633e-06, |
| "loss": 2.3134, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0034195340884804444, |
| "grad_norm": 12.875, |
| "learning_rate": 6.9620253164556965e-06, |
| "loss": 2.3087, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0037044952625204814, |
| "grad_norm": 11.0625, |
| "learning_rate": 7.5949367088607605e-06, |
| "loss": 1.9818, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.003989456436560518, |
| "grad_norm": 9.9375, |
| "learning_rate": 8.227848101265822e-06, |
| "loss": 2.0041, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.004274417610600555, |
| "grad_norm": 9.4375, |
| "learning_rate": 8.860759493670886e-06, |
| "loss": 2.0196, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.004559378784640592, |
| "grad_norm": 8.0625, |
| "learning_rate": 9.49367088607595e-06, |
| "loss": 1.8285, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.00484433995868063, |
| "grad_norm": 7.4375, |
| "learning_rate": 1.0126582278481012e-05, |
| "loss": 1.9578, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.005129301132720667, |
| "grad_norm": 5.625, |
| "learning_rate": 1.0759493670886076e-05, |
| "loss": 1.8962, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.005414262306760704, |
| "grad_norm": 4.125, |
| "learning_rate": 1.139240506329114e-05, |
| "loss": 1.7924, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.005699223480800741, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.2025316455696203e-05, |
| "loss": 1.718, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.005984184654840778, |
| "grad_norm": 3.234375, |
| "learning_rate": 1.2658227848101267e-05, |
| "loss": 1.6582, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.006269145828880815, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.3291139240506329e-05, |
| "loss": 1.6866, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.006554107002920852, |
| "grad_norm": 2.8125, |
| "learning_rate": 1.3924050632911393e-05, |
| "loss": 1.7584, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.006839068176960889, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.4556962025316457e-05, |
| "loss": 1.6102, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.007124029351000926, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.5189873417721521e-05, |
| "loss": 1.6869, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.007408990525040963, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.5822784810126583e-05, |
| "loss": 1.6164, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.007693951699081001, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.6455696202531644e-05, |
| "loss": 1.7301, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.007978912873121037, |
| "grad_norm": 1.640625, |
| "learning_rate": 1.7088607594936708e-05, |
| "loss": 1.3851, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.008263874047161075, |
| "grad_norm": 1.625, |
| "learning_rate": 1.7721518987341772e-05, |
| "loss": 1.6785, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.00854883522120111, |
| "grad_norm": 1.4765625, |
| "learning_rate": 1.8354430379746836e-05, |
| "loss": 1.3419, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.008833796395241148, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.89873417721519e-05, |
| "loss": 1.4765, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.009118757569281185, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.962025316455696e-05, |
| "loss": 1.3954, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.009403718743321222, |
| "grad_norm": 1.484375, |
| "learning_rate": 2.0253164556962025e-05, |
| "loss": 1.576, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.00968867991736126, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.088607594936709e-05, |
| "loss": 1.4133, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.009973641091401296, |
| "grad_norm": 1.34375, |
| "learning_rate": 2.1518987341772153e-05, |
| "loss": 1.6026, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.010258602265441334, |
| "grad_norm": 1.5859375, |
| "learning_rate": 2.2151898734177217e-05, |
| "loss": 1.6557, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01054356343948137, |
| "grad_norm": 1.34375, |
| "learning_rate": 2.278481012658228e-05, |
| "loss": 1.5608, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.010828524613521408, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.341772151898734e-05, |
| "loss": 1.4184, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.011113485787561444, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.4050632911392405e-05, |
| "loss": 1.6377, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.011398446961601482, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.468354430379747e-05, |
| "loss": 1.3586, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.011683408135641518, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.5316455696202533e-05, |
| "loss": 1.4761, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.011968369309681556, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.5949367088607597e-05, |
| "loss": 1.3997, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.012253330483721592, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.6582278481012658e-05, |
| "loss": 1.5065, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.01253829165776163, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.7215189873417722e-05, |
| "loss": 1.3738, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.012823252831801668, |
| "grad_norm": 1.25, |
| "learning_rate": 2.7848101265822786e-05, |
| "loss": 1.4361, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.013108214005841704, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.848101265822785e-05, |
| "loss": 1.3365, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.013393175179881742, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.9113924050632914e-05, |
| "loss": 1.3863, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.013678136353921778, |
| "grad_norm": 1.1953125, |
| "learning_rate": 2.9746835443037974e-05, |
| "loss": 1.465, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.013963097527961816, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.0379746835443042e-05, |
| "loss": 1.5108, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.014248058702001852, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.10126582278481e-05, |
| "loss": 1.3891, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01453301987604189, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.1645569620253167e-05, |
| "loss": 1.285, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.014817981050081926, |
| "grad_norm": 1.1640625, |
| "learning_rate": 3.227848101265823e-05, |
| "loss": 1.3574, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.015102942224121963, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.291139240506329e-05, |
| "loss": 1.3037, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.015387903398162001, |
| "grad_norm": 1.1484375, |
| "learning_rate": 3.354430379746836e-05, |
| "loss": 1.3654, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.015672864572202037, |
| "grad_norm": 1.1484375, |
| "learning_rate": 3.4177215189873416e-05, |
| "loss": 1.4181, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.015957825746242073, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.4810126582278487e-05, |
| "loss": 1.2465, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.016242786920282113, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.5443037974683544e-05, |
| "loss": 1.29, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.01652774809432215, |
| "grad_norm": 1.25, |
| "learning_rate": 3.607594936708861e-05, |
| "loss": 1.4819, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.016812709268362185, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.670886075949367e-05, |
| "loss": 1.2337, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.01709767044240222, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.7341772151898736e-05, |
| "loss": 1.4545, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.01738263161644226, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.79746835443038e-05, |
| "loss": 1.4053, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.017667592790482297, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.8607594936708864e-05, |
| "loss": 1.3166, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.017952553964522333, |
| "grad_norm": 1.2890625, |
| "learning_rate": 3.924050632911392e-05, |
| "loss": 1.495, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.01823751513856237, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.987341772151899e-05, |
| "loss": 1.3391, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.01852247631260241, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.050632911392405e-05, |
| "loss": 1.3754, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.018807437486642445, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.113924050632912e-05, |
| "loss": 1.35, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.01909239866068248, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.177215189873418e-05, |
| "loss": 1.359, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.01937735983472252, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.240506329113924e-05, |
| "loss": 1.3325, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.019662321008762557, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.3037974683544305e-05, |
| "loss": 1.4202, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.019947282182802593, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.367088607594937e-05, |
| "loss": 1.2194, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02023224335684263, |
| "grad_norm": 1.140625, |
| "learning_rate": 4.430379746835443e-05, |
| "loss": 1.4287, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.02051720453088267, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.49367088607595e-05, |
| "loss": 1.2697, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.020802165704922704, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.556962025316456e-05, |
| "loss": 1.4088, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.02108712687896274, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.6202531645569625e-05, |
| "loss": 1.3963, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.021372088053002777, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.683544303797468e-05, |
| "loss": 1.5456, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.021657049227042816, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.7468354430379746e-05, |
| "loss": 1.2655, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.021942010401082852, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.810126582278481e-05, |
| "loss": 1.3796, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.02222697157512289, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.8734177215189874e-05, |
| "loss": 1.2177, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.022511932749162928, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.936708860759494e-05, |
| "loss": 1.5553, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.022796893923202964, |
| "grad_norm": 1.1015625, |
| "learning_rate": 5e-05, |
| "loss": 1.3957, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.023081855097243, |
| "grad_norm": 1.15625, |
| "learning_rate": 5.0632911392405066e-05, |
| "loss": 1.4037, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.023366816271283036, |
| "grad_norm": 1.0390625, |
| "learning_rate": 5.1265822784810124e-05, |
| "loss": 1.2413, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.023651777445323076, |
| "grad_norm": 0.98828125, |
| "learning_rate": 5.1898734177215194e-05, |
| "loss": 1.2343, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.023936738619363112, |
| "grad_norm": 0.9453125, |
| "learning_rate": 5.253164556962026e-05, |
| "loss": 1.148, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.024221699793403148, |
| "grad_norm": 1.0859375, |
| "learning_rate": 5.3164556962025316e-05, |
| "loss": 1.5004, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.024506660967443184, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.379746835443038e-05, |
| "loss": 1.2534, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.024791622141483224, |
| "grad_norm": 1.015625, |
| "learning_rate": 5.4430379746835444e-05, |
| "loss": 1.2575, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.02507658331552326, |
| "grad_norm": 1.0859375, |
| "learning_rate": 5.5063291139240514e-05, |
| "loss": 1.2185, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.025361544489563296, |
| "grad_norm": 1.0078125, |
| "learning_rate": 5.569620253164557e-05, |
| "loss": 1.3606, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.025646505663603335, |
| "grad_norm": 1.0, |
| "learning_rate": 5.6329113924050636e-05, |
| "loss": 1.3193, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02593146683764337, |
| "grad_norm": 1.0546875, |
| "learning_rate": 5.69620253164557e-05, |
| "loss": 1.2805, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.026216428011683408, |
| "grad_norm": 1.1796875, |
| "learning_rate": 5.759493670886076e-05, |
| "loss": 1.433, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.026501389185723444, |
| "grad_norm": 0.94140625, |
| "learning_rate": 5.822784810126583e-05, |
| "loss": 1.1702, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.026786350359763483, |
| "grad_norm": 1.265625, |
| "learning_rate": 5.886075949367089e-05, |
| "loss": 1.4432, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.02707131153380352, |
| "grad_norm": 1.1015625, |
| "learning_rate": 5.949367088607595e-05, |
| "loss": 1.419, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.027356272707843556, |
| "grad_norm": 1.0546875, |
| "learning_rate": 6.012658227848101e-05, |
| "loss": 1.2796, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.027641233881883595, |
| "grad_norm": 1.1484375, |
| "learning_rate": 6.0759493670886084e-05, |
| "loss": 1.2824, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.02792619505592363, |
| "grad_norm": 1.0234375, |
| "learning_rate": 6.139240506329115e-05, |
| "loss": 1.3208, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.028211156229963667, |
| "grad_norm": 1.015625, |
| "learning_rate": 6.20253164556962e-05, |
| "loss": 1.2591, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.028496117404003703, |
| "grad_norm": 0.984375, |
| "learning_rate": 6.265822784810128e-05, |
| "loss": 1.185, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.028781078578043743, |
| "grad_norm": 1.0546875, |
| "learning_rate": 6.329113924050633e-05, |
| "loss": 1.3322, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.02906603975208378, |
| "grad_norm": 1.0703125, |
| "learning_rate": 6.392405063291139e-05, |
| "loss": 1.4192, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.029351000926123815, |
| "grad_norm": 1.015625, |
| "learning_rate": 6.455696202531646e-05, |
| "loss": 1.3446, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.02963596210016385, |
| "grad_norm": 1.015625, |
| "learning_rate": 6.518987341772153e-05, |
| "loss": 1.3051, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.02992092327420389, |
| "grad_norm": 0.93359375, |
| "learning_rate": 6.582278481012658e-05, |
| "loss": 1.2271, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.030205884448243927, |
| "grad_norm": 0.98046875, |
| "learning_rate": 6.645569620253165e-05, |
| "loss": 1.3005, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.030490845622283963, |
| "grad_norm": 1.0390625, |
| "learning_rate": 6.708860759493672e-05, |
| "loss": 1.3054, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.030775806796324003, |
| "grad_norm": 0.953125, |
| "learning_rate": 6.772151898734177e-05, |
| "loss": 1.1751, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.03106076797036404, |
| "grad_norm": 1.09375, |
| "learning_rate": 6.835443037974683e-05, |
| "loss": 1.2837, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.031345729144404075, |
| "grad_norm": 1.0546875, |
| "learning_rate": 6.89873417721519e-05, |
| "loss": 1.284, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.031630690318444114, |
| "grad_norm": 1.0390625, |
| "learning_rate": 6.962025316455697e-05, |
| "loss": 1.1764, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.03191565149248415, |
| "grad_norm": 1.015625, |
| "learning_rate": 7.025316455696203e-05, |
| "loss": 1.3151, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.032200612666524187, |
| "grad_norm": 1.0390625, |
| "learning_rate": 7.088607594936709e-05, |
| "loss": 1.3396, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.032485573840564226, |
| "grad_norm": 1.046875, |
| "learning_rate": 7.151898734177216e-05, |
| "loss": 1.3189, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.03277053501460426, |
| "grad_norm": 1.03125, |
| "learning_rate": 7.215189873417722e-05, |
| "loss": 1.3304, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0330554961886443, |
| "grad_norm": 1.0, |
| "learning_rate": 7.278481012658229e-05, |
| "loss": 1.3019, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.03334045736268433, |
| "grad_norm": 0.94140625, |
| "learning_rate": 7.341772151898734e-05, |
| "loss": 1.1286, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.03362541853672437, |
| "grad_norm": 1.078125, |
| "learning_rate": 7.40506329113924e-05, |
| "loss": 1.2951, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.03391037971076441, |
| "grad_norm": 0.98828125, |
| "learning_rate": 7.468354430379747e-05, |
| "loss": 1.3084, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.03419534088480444, |
| "grad_norm": 0.99609375, |
| "learning_rate": 7.531645569620254e-05, |
| "loss": 1.2161, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03448030205884448, |
| "grad_norm": 1.0703125, |
| "learning_rate": 7.59493670886076e-05, |
| "loss": 1.3622, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.03476526323288452, |
| "grad_norm": 1.0859375, |
| "learning_rate": 7.658227848101266e-05, |
| "loss": 1.2539, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.035050224406924554, |
| "grad_norm": 1.1015625, |
| "learning_rate": 7.721518987341773e-05, |
| "loss": 1.3334, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.035335185580964594, |
| "grad_norm": 1.0234375, |
| "learning_rate": 7.78481012658228e-05, |
| "loss": 1.3351, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.035620146755004634, |
| "grad_norm": 1.0, |
| "learning_rate": 7.848101265822784e-05, |
| "loss": 1.2245, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.035905107929044666, |
| "grad_norm": 1.0546875, |
| "learning_rate": 7.911392405063291e-05, |
| "loss": 1.3739, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.036190069103084706, |
| "grad_norm": 0.95703125, |
| "learning_rate": 7.974683544303798e-05, |
| "loss": 1.2191, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.03647503027712474, |
| "grad_norm": 1.0390625, |
| "learning_rate": 8.037974683544304e-05, |
| "loss": 1.304, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.03675999145116478, |
| "grad_norm": 1.0234375, |
| "learning_rate": 8.10126582278481e-05, |
| "loss": 1.2241, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.03704495262520482, |
| "grad_norm": 1.03125, |
| "learning_rate": 8.164556962025317e-05, |
| "loss": 1.0792, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.03732991379924485, |
| "grad_norm": 0.9609375, |
| "learning_rate": 8.227848101265824e-05, |
| "loss": 1.218, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.03761487497328489, |
| "grad_norm": 1.0703125, |
| "learning_rate": 8.29113924050633e-05, |
| "loss": 1.2508, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.03789983614732493, |
| "grad_norm": 1.03125, |
| "learning_rate": 8.354430379746835e-05, |
| "loss": 1.3057, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.03818479732136496, |
| "grad_norm": 1.0390625, |
| "learning_rate": 8.417721518987342e-05, |
| "loss": 1.3126, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.038469758495405, |
| "grad_norm": 1.0234375, |
| "learning_rate": 8.481012658227848e-05, |
| "loss": 1.3134, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.03875471966944504, |
| "grad_norm": 1.0234375, |
| "learning_rate": 8.544303797468355e-05, |
| "loss": 1.2166, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.039039680843485074, |
| "grad_norm": 1.0, |
| "learning_rate": 8.607594936708861e-05, |
| "loss": 1.2348, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.03932464201752511, |
| "grad_norm": 1.015625, |
| "learning_rate": 8.670886075949367e-05, |
| "loss": 1.1484, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.039609603191565146, |
| "grad_norm": 1.0390625, |
| "learning_rate": 8.734177215189874e-05, |
| "loss": 1.3253, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.039894564365605185, |
| "grad_norm": 1.125, |
| "learning_rate": 8.797468354430381e-05, |
| "loss": 1.3117, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.040179525539645225, |
| "grad_norm": 0.91015625, |
| "learning_rate": 8.860759493670887e-05, |
| "loss": 1.1099, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.04046448671368526, |
| "grad_norm": 1.0546875, |
| "learning_rate": 8.924050632911392e-05, |
| "loss": 1.3046, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.0407494478877253, |
| "grad_norm": 1.0625, |
| "learning_rate": 8.9873417721519e-05, |
| "loss": 1.2424, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.04103440906176534, |
| "grad_norm": 1.0546875, |
| "learning_rate": 9.050632911392407e-05, |
| "loss": 1.3855, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.04131937023580537, |
| "grad_norm": 1.1171875, |
| "learning_rate": 9.113924050632912e-05, |
| "loss": 1.2624, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.04160433140984541, |
| "grad_norm": 1.015625, |
| "learning_rate": 9.177215189873418e-05, |
| "loss": 1.2397, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.04188929258388545, |
| "grad_norm": 1.0546875, |
| "learning_rate": 9.240506329113925e-05, |
| "loss": 1.3379, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.04217425375792548, |
| "grad_norm": 1.0078125, |
| "learning_rate": 9.303797468354431e-05, |
| "loss": 1.2333, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.04245921493196552, |
| "grad_norm": 1.0703125, |
| "learning_rate": 9.367088607594936e-05, |
| "loss": 1.2861, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.04274417610600555, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.430379746835444e-05, |
| "loss": 1.1509, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04302913728004559, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.493670886075949e-05, |
| "loss": 1.0674, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.04331409845408563, |
| "grad_norm": 0.9765625, |
| "learning_rate": 9.556962025316456e-05, |
| "loss": 1.2168, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.043599059628125665, |
| "grad_norm": 1.0390625, |
| "learning_rate": 9.620253164556962e-05, |
| "loss": 1.3691, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.043884020802165705, |
| "grad_norm": 1.0078125, |
| "learning_rate": 9.683544303797469e-05, |
| "loss": 1.377, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.044168981976205744, |
| "grad_norm": 0.92578125, |
| "learning_rate": 9.746835443037975e-05, |
| "loss": 1.1519, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.04445394315024578, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.810126582278482e-05, |
| "loss": 1.1929, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.044738904324285816, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.873417721518988e-05, |
| "loss": 1.2098, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.045023865498325856, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.936708860759493e-05, |
| "loss": 1.2152, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.04530882667236589, |
| "grad_norm": 0.89453125, |
| "learning_rate": 0.0001, |
| "loss": 1.143, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.04559378784640593, |
| "grad_norm": 0.9609375, |
| "learning_rate": 9.999999053963213e-05, |
| "loss": 1.183, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.04587874902044596, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.999996215853209e-05, |
| "loss": 1.1637, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.046163710194486, |
| "grad_norm": 0.91015625, |
| "learning_rate": 9.999991485671061e-05, |
| "loss": 1.3169, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.04644867136852604, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.99998486341856e-05, |
| "loss": 1.216, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.04673363254256607, |
| "grad_norm": 1.109375, |
| "learning_rate": 9.999976349098214e-05, |
| "loss": 1.3819, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.04701859371660611, |
| "grad_norm": 0.9609375, |
| "learning_rate": 9.999965942713241e-05, |
| "loss": 1.3172, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.04730355489064615, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.99995364426758e-05, |
| "loss": 1.1758, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.047588516064686184, |
| "grad_norm": 0.8828125, |
| "learning_rate": 9.999939453765888e-05, |
| "loss": 1.1493, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.047873477238726224, |
| "grad_norm": 0.89453125, |
| "learning_rate": 9.999923371213531e-05, |
| "loss": 1.2477, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.048158438412766263, |
| "grad_norm": 0.90234375, |
| "learning_rate": 9.999905396616598e-05, |
| "loss": 1.2091, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.048443399586806296, |
| "grad_norm": 0.8671875, |
| "learning_rate": 9.999885529981888e-05, |
| "loss": 1.1595, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.048728360760846336, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.999863771316922e-05, |
| "loss": 1.2418, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.04901332193488637, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.99984012062993e-05, |
| "loss": 1.1701, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.04929828310892641, |
| "grad_norm": 0.890625, |
| "learning_rate": 9.999814577929864e-05, |
| "loss": 1.19, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.04958324428296645, |
| "grad_norm": 0.8984375, |
| "learning_rate": 9.99978714322639e-05, |
| "loss": 1.1907, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.04986820545700648, |
| "grad_norm": 0.9140625, |
| "learning_rate": 9.999757816529889e-05, |
| "loss": 1.209, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.05015316663104652, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.99972659785146e-05, |
| "loss": 1.152, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.05043812780508656, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.999693487202915e-05, |
| "loss": 1.1788, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.05072308897912659, |
| "grad_norm": 0.9609375, |
| "learning_rate": 9.999658484596782e-05, |
| "loss": 1.4722, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.05100805015316663, |
| "grad_norm": 0.92578125, |
| "learning_rate": 9.999621590046311e-05, |
| "loss": 1.3028, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.05129301132720667, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.999582803565459e-05, |
| "loss": 1.078, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.051577972501246704, |
| "grad_norm": 0.91015625, |
| "learning_rate": 9.999542125168906e-05, |
| "loss": 1.1153, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.05186293367528674, |
| "grad_norm": 0.9765625, |
| "learning_rate": 9.999499554872045e-05, |
| "loss": 1.3579, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.052147894849326776, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.999455092690985e-05, |
| "loss": 1.1063, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.052432856023366815, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.999408738642551e-05, |
| "loss": 1.1352, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.052717817197406855, |
| "grad_norm": 0.984375, |
| "learning_rate": 9.999360492744283e-05, |
| "loss": 1.3186, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.05300277837144689, |
| "grad_norm": 0.984375, |
| "learning_rate": 9.99931035501444e-05, |
| "loss": 1.2133, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.05328773954548693, |
| "grad_norm": 0.8828125, |
| "learning_rate": 9.999258325471994e-05, |
| "loss": 1.0683, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.05357270071952697, |
| "grad_norm": 0.9765625, |
| "learning_rate": 9.999204404136633e-05, |
| "loss": 1.2635, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.053857661893567, |
| "grad_norm": 0.875, |
| "learning_rate": 9.999148591028762e-05, |
| "loss": 1.079, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.05414262306760704, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.999090886169502e-05, |
| "loss": 1.1602, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.05442758424164708, |
| "grad_norm": 0.875, |
| "learning_rate": 9.999031289580689e-05, |
| "loss": 1.2393, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.05471254541568711, |
| "grad_norm": 0.92578125, |
| "learning_rate": 9.998969801284877e-05, |
| "loss": 1.2645, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.05499750658972715, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.99890642130533e-05, |
| "loss": 1.1608, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.05528246776376719, |
| "grad_norm": 0.89453125, |
| "learning_rate": 9.998841149666036e-05, |
| "loss": 1.1396, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.05556742893780722, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.998773986391692e-05, |
| "loss": 0.9592, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.05585239011184726, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.998704931507716e-05, |
| "loss": 1.0386, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.056137351285887295, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.998633985040235e-05, |
| "loss": 1.0778, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.056422312459927335, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.998561147016103e-05, |
| "loss": 1.1591, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.056707273633967374, |
| "grad_norm": 0.90234375, |
| "learning_rate": 9.998486417462879e-05, |
| "loss": 1.2099, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.05699223480800741, |
| "grad_norm": 1.0, |
| "learning_rate": 9.998409796408839e-05, |
| "loss": 1.2088, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.057277195982047446, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.998331283882981e-05, |
| "loss": 1.0929, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.057562157156087486, |
| "grad_norm": 0.98828125, |
| "learning_rate": 9.998250879915017e-05, |
| "loss": 1.2388, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.05784711833012752, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.998168584535368e-05, |
| "loss": 1.1363, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.05813207950416756, |
| "grad_norm": 0.8984375, |
| "learning_rate": 9.998084397775181e-05, |
| "loss": 1.3204, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.0584170406782076, |
| "grad_norm": 0.890625, |
| "learning_rate": 9.997998319666311e-05, |
| "loss": 1.159, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.05870200185224763, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.997910350241329e-05, |
| "loss": 1.2258, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.05898696302628767, |
| "grad_norm": 0.875, |
| "learning_rate": 9.997820489533529e-05, |
| "loss": 1.1889, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.0592719242003277, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.997728737576912e-05, |
| "loss": 1.11, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.05955688537436774, |
| "grad_norm": 0.953125, |
| "learning_rate": 9.997635094406198e-05, |
| "loss": 1.2151, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.05984184654840778, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.997539560056826e-05, |
| "loss": 1.0552, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.060126807722447814, |
| "grad_norm": 0.91015625, |
| "learning_rate": 9.997442134564944e-05, |
| "loss": 1.1783, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.060411768896487854, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.997342817967421e-05, |
| "loss": 1.1427, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.06069673007052789, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.997241610301841e-05, |
| "loss": 1.2496, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.060981691244567926, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.997138511606501e-05, |
| "loss": 1.293, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.061266652418607966, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.997033521920415e-05, |
| "loss": 1.2313, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.061551613592648005, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.996926641283314e-05, |
| "loss": 1.1479, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.06183657476668804, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.996817869735642e-05, |
| "loss": 1.0872, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.06212153594072808, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.996707207318558e-05, |
| "loss": 1.2486, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.06240649711476811, |
| "grad_norm": 1.0, |
| "learning_rate": 9.996594654073943e-05, |
| "loss": 1.2349, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.06269145828880815, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.996480210044384e-05, |
| "loss": 1.2223, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.06297641946284818, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.996363875273192e-05, |
| "loss": 1.2541, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.06326138063688823, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.996245649804386e-05, |
| "loss": 1.2676, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.06354634181092826, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.996125533682708e-05, |
| "loss": 1.1631, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.0638313029849683, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.99600352695361e-05, |
| "loss": 1.1795, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.06411626415900834, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.99587962966326e-05, |
| "loss": 1.1826, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.06440122533304837, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.995753841858546e-05, |
| "loss": 1.0396, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.0646861865070884, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.995626163587065e-05, |
| "loss": 1.2267, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.06497114768112845, |
| "grad_norm": 0.828125, |
| "learning_rate": 9.995496594897132e-05, |
| "loss": 1.2575, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.06525610885516848, |
| "grad_norm": 0.91015625, |
| "learning_rate": 9.99536513583778e-05, |
| "loss": 1.2338, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.06554107002920852, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.995231786458754e-05, |
| "loss": 1.1742, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.06582603120324856, |
| "grad_norm": 0.9140625, |
| "learning_rate": 9.995096546810514e-05, |
| "loss": 1.28, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.0661109923772886, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.994959416944238e-05, |
| "loss": 1.3021, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.06639595355132863, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.994820396911819e-05, |
| "loss": 1.1945, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.06668091472536866, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.994679486765863e-05, |
| "loss": 1.3251, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.06696587589940871, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.994536686559692e-05, |
| "loss": 1.1902, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.06725083707344874, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.994391996347344e-05, |
| "loss": 1.0474, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.06753579824748877, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.994245416183572e-05, |
| "loss": 1.125, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.06782075942152882, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.994096946123846e-05, |
| "loss": 1.116, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.06810572059556885, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.993946586224346e-05, |
| "loss": 1.1467, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.06839068176960889, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.993794336541972e-05, |
| "loss": 1.0831, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.06867564294364893, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.993640197134338e-05, |
| "loss": 1.1449, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.06896060411768896, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.99348416805977e-05, |
| "loss": 1.0175, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.069245565291729, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.993326249377316e-05, |
| "loss": 1.1124, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.06953052646576904, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.993166441146732e-05, |
| "loss": 1.1746, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.06981548763980908, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.993004743428491e-05, |
| "loss": 1.1883, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.07010044881384911, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.992841156283786e-05, |
| "loss": 1.0455, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.07038540998788916, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.992675679774515e-05, |
| "loss": 1.2231, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.07067037116192919, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.9925083139633e-05, |
| "loss": 1.1612, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.07095533233596922, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.992339058913475e-05, |
| "loss": 1.1999, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.07124029351000927, |
| "grad_norm": 0.8828125, |
| "learning_rate": 9.992167914689087e-05, |
| "loss": 1.1173, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0715252546840493, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.991994881354903e-05, |
| "loss": 1.0398, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.07181021585808933, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.991819958976396e-05, |
| "loss": 1.1054, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.07209517703212938, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.991643147619762e-05, |
| "loss": 1.0642, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.07238013820616941, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.99146444735191e-05, |
| "loss": 1.1822, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.07266509938020944, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.991283858240462e-05, |
| "loss": 1.1526, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.07295006055424948, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.991101380353756e-05, |
| "loss": 1.2147, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.07323502172828952, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.990917013760841e-05, |
| "loss": 1.2225, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.07351998290232956, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.990730758531489e-05, |
| "loss": 1.0918, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.07380494407636959, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.990542614736178e-05, |
| "loss": 0.9888, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.07408990525040964, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.990352582446108e-05, |
| "loss": 1.03, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.07437486642444967, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.990160661733185e-05, |
| "loss": 1.1679, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.0746598275984897, |
| "grad_norm": 0.828125, |
| "learning_rate": 9.989966852670039e-05, |
| "loss": 1.1835, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.07494478877252975, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.989771155330008e-05, |
| "loss": 1.1622, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.07522974994656978, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.989573569787148e-05, |
| "loss": 1.2752, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.07551471112060981, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.989374096116229e-05, |
| "loss": 1.2444, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.07579967229464986, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.989172734392731e-05, |
| "loss": 1.0388, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.07608463346868989, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.988969484692855e-05, |
| "loss": 1.1025, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.07636959464272992, |
| "grad_norm": 0.828125, |
| "learning_rate": 9.988764347093515e-05, |
| "loss": 1.1958, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.07665455581676997, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.988557321672334e-05, |
| "loss": 1.1299, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.07693951699081, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.988348408507657e-05, |
| "loss": 1.1703, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.07722447816485004, |
| "grad_norm": 0.8671875, |
| "learning_rate": 9.988137607678541e-05, |
| "loss": 1.2471, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.07750943933889008, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.987924919264751e-05, |
| "loss": 1.1829, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.07779440051293011, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.987710343346774e-05, |
| "loss": 1.187, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.07807936168697015, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.987493880005812e-05, |
| "loss": 1.0768, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.0783643228610102, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.987275529323772e-05, |
| "loss": 1.0325, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.07864928403505023, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.987055291383285e-05, |
| "loss": 1.0838, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.07893424520909026, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.986833166267691e-05, |
| "loss": 1.2686, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.07921920638313029, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.986609154061047e-05, |
| "loss": 1.1666, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.07950416755717034, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.98638325484812e-05, |
| "loss": 1.2386, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.07978912873121037, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.986155468714394e-05, |
| "loss": 1.1716, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0800740899052504, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.985925795746068e-05, |
| "loss": 1.1143, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.08035905107929045, |
| "grad_norm": 0.8515625, |
| "learning_rate": 9.985694236030054e-05, |
| "loss": 1.0497, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.08064401225333048, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.985460789653976e-05, |
| "loss": 1.0611, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.08092897342737052, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.985225456706174e-05, |
| "loss": 1.1523, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.08121393460141056, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.984988237275703e-05, |
| "loss": 1.1303, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.0814988957754506, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.984749131452327e-05, |
| "loss": 1.0736, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.08178385694949063, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.98450813932653e-05, |
| "loss": 1.1636, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.08206881812353067, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.984265260989506e-05, |
| "loss": 1.2484, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.0823537792975707, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.984020496533165e-05, |
| "loss": 1.0472, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.08263874047161074, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.983773846050126e-05, |
| "loss": 1.2355, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.08292370164565079, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.983525309633729e-05, |
| "loss": 1.1402, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.08320866281969082, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.983274887378022e-05, |
| "loss": 1.1719, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.08349362399373085, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.983022579377768e-05, |
| "loss": 1.1649, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.0837785851677709, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.982768385728446e-05, |
| "loss": 1.2616, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.08406354634181093, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.982512306526245e-05, |
| "loss": 1.2156, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.08434850751585096, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.98225434186807e-05, |
| "loss": 1.3357, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.08463346868989101, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.981994491851537e-05, |
| "loss": 1.3112, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.08491842986393104, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.981732756574979e-05, |
| "loss": 1.2327, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.08520339103797107, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.98146913613744e-05, |
| "loss": 1.242, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.0854883522120111, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.981203630638678e-05, |
| "loss": 1.1656, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08577331338605115, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.980936240179163e-05, |
| "loss": 1.1681, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.08605827456009119, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.980666964860081e-05, |
| "loss": 0.935, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.08634323573413122, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.980395804783329e-05, |
| "loss": 1.071, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.08662819690817126, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.980122760051518e-05, |
| "loss": 1.0744, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.0869131580822113, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.979847830767971e-05, |
| "loss": 1.1554, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.08719811925625133, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.979571017036727e-05, |
| "loss": 1.1006, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.08748308043029138, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.979292318962537e-05, |
| "loss": 1.1425, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.08776804160433141, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.979011736650862e-05, |
| "loss": 1.0564, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.08805300277837144, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.97872927020788e-05, |
| "loss": 1.2216, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.08833796395241149, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.97844491974048e-05, |
| "loss": 1.0732, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.08862292512645152, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.978158685356265e-05, |
| "loss": 0.9132, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.08890788630049155, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.97787056716355e-05, |
| "loss": 1.1084, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.0891928474745316, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.977580565271362e-05, |
| "loss": 1.1568, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.08947780864857163, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.977288679789446e-05, |
| "loss": 1.1736, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.08976276982261167, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.976994910828249e-05, |
| "loss": 1.1166, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.09004773099665171, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.976699258498943e-05, |
| "loss": 1.0088, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.09033269217069174, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.976401722913406e-05, |
| "loss": 1.2103, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.09061765334473178, |
| "grad_norm": 0.75, |
| "learning_rate": 9.976102304184229e-05, |
| "loss": 1.1212, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.09090261451877182, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.975801002424715e-05, |
| "loss": 1.2837, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.09118757569281186, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.975497817748886e-05, |
| "loss": 1.1607, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.09147253686685189, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.975192750271467e-05, |
| "loss": 1.1187, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.09175749804089192, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.9748858001079e-05, |
| "loss": 1.2933, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.09204245921493197, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.974576967374343e-05, |
| "loss": 1.0885, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.092327420388972, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.97426625218766e-05, |
| "loss": 1.286, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.09261238156301203, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.97395365466543e-05, |
| "loss": 1.1598, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.09289734273705208, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.973639174925946e-05, |
| "loss": 1.0932, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.09318230391109211, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.973322813088211e-05, |
| "loss": 1.2748, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.09346726508513215, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.973004569271942e-05, |
| "loss": 1.1366, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.09375222625917219, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.972684443597565e-05, |
| "loss": 1.1482, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.09403718743321222, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.972362436186223e-05, |
| "loss": 1.191, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.09432214860725226, |
| "grad_norm": 0.75, |
| "learning_rate": 9.972038547159765e-05, |
| "loss": 1.135, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.0946071097812923, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.97171277664076e-05, |
| "loss": 1.0789, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.09489207095533234, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.971385124752479e-05, |
| "loss": 1.1557, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.09517703212937237, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.971055591618915e-05, |
| "loss": 1.0509, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.09546199330341242, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.970724177364762e-05, |
| "loss": 1.0501, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.09574695447745245, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.970390882115442e-05, |
| "loss": 1.0781, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.09603191565149248, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.970055705997069e-05, |
| "loss": 1.1743, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.09631687682553253, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.969718649136484e-05, |
| "loss": 1.1001, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.09660183799957256, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.969379711661232e-05, |
| "loss": 1.1502, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.09688679917361259, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.969038893699573e-05, |
| "loss": 1.0567, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.09717176034765264, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.968696195380479e-05, |
| "loss": 1.1007, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.09745672152169267, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.968351616833626e-05, |
| "loss": 1.1599, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.0977416826957327, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.968005158189415e-05, |
| "loss": 1.0283, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.09802664386977274, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.967656819578948e-05, |
| "loss": 1.1361, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.09831160504381278, |
| "grad_norm": 0.88671875, |
| "learning_rate": 9.967306601134042e-05, |
| "loss": 1.1803, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.09859656621785282, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.966954502987222e-05, |
| "loss": 1.1143, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.09888152739189285, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.966600525271732e-05, |
| "loss": 1.0432, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.0991664885659329, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.966244668121518e-05, |
| "loss": 1.2893, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.09945144973997293, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.965886931671245e-05, |
| "loss": 1.0628, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.09973641091401296, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.965527316056282e-05, |
| "loss": 0.9833, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.100021372088053, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.965165821412716e-05, |
| "loss": 1.1057, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.10030633326209304, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.964802447877341e-05, |
| "loss": 1.1369, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.10059129443613307, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.964437195587662e-05, |
| "loss": 0.9499, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.10087625561017312, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.964070064681897e-05, |
| "loss": 1.1867, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.10116121678421315, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.963701055298972e-05, |
| "loss": 1.0414, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.10144617795825318, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.963330167578529e-05, |
| "loss": 1.1084, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.10173113913229323, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.962957401660915e-05, |
| "loss": 1.1254, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.10201610030633326, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.96258275768719e-05, |
| "loss": 1.1325, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.1023010614803733, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.962206235799124e-05, |
| "loss": 1.1208, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.10258602265441334, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.961827836139201e-05, |
| "loss": 1.1003, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.10287098382845337, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.96144755885061e-05, |
| "loss": 1.1071, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.10315594500249341, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.961065404077257e-05, |
| "loss": 1.0503, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.10344090617653345, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.960681371963751e-05, |
| "loss": 1.2441, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.10372586735057349, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.960295462655418e-05, |
| "loss": 1.0898, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.10401082852461352, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.959907676298293e-05, |
| "loss": 1.2068, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.10429578969865355, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.959518013039118e-05, |
| "loss": 1.1186, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.1045807508726936, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.959126473025347e-05, |
| "loss": 1.1527, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.10486571204673363, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.958733056405143e-05, |
| "loss": 1.0711, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.10515067322077366, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.958337763327385e-05, |
| "loss": 1.0928, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.10543563439481371, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.957940593941655e-05, |
| "loss": 1.0705, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.10572059556885374, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.957541548398249e-05, |
| "loss": 0.9933, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.10600555674289378, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.957140626848169e-05, |
| "loss": 1.1651, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.10629051791693382, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.956737829443132e-05, |
| "loss": 1.1076, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.10657547909097385, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.956333156335564e-05, |
| "loss": 1.1624, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.10686044026501389, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.955926607678596e-05, |
| "loss": 0.9549, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.10714540143905393, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.955518183626073e-05, |
| "loss": 1.0584, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.10743036261309397, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.955107884332549e-05, |
| "loss": 1.1332, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.107715323787134, |
| "grad_norm": 0.75, |
| "learning_rate": 9.954695709953287e-05, |
| "loss": 1.0829, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.10800028496117405, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.95428166064426e-05, |
| "loss": 1.0994, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.10828524613521408, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.953865736562151e-05, |
| "loss": 1.0907, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.10857020730925411, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.953447937864351e-05, |
| "loss": 1.1457, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.10885516848329416, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.953028264708962e-05, |
| "loss": 0.93, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.10914012965733419, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.952606717254793e-05, |
| "loss": 1.1132, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.10942509083137422, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.952183295661365e-05, |
| "loss": 1.0453, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.10971005200541427, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.951758000088906e-05, |
| "loss": 1.0444, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.1099950131794543, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.951330830698356e-05, |
| "loss": 0.9644, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.11027997435349433, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.950901787651358e-05, |
| "loss": 1.0284, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.11056493552753438, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.950470871110274e-05, |
| "loss": 1.0884, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.11084989670157441, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.950038081238166e-05, |
| "loss": 1.1775, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.11113485787561445, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.949603418198808e-05, |
| "loss": 1.0584, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.11141981904965448, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.949166882156681e-05, |
| "loss": 1.0921, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.11170478022369452, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.948728473276982e-05, |
| "loss": 1.0868, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.11198974139773456, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.948288191725607e-05, |
| "loss": 1.0989, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.11227470257177459, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.947846037669166e-05, |
| "loss": 1.1235, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.11255966374581464, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.947402011274977e-05, |
| "loss": 1.0322, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.11284462491985467, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.946956112711066e-05, |
| "loss": 1.1093, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.1131295860938947, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.946508342146168e-05, |
| "loss": 1.0425, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.11341454726793475, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.946058699749725e-05, |
| "loss": 1.1546, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.11369950844197478, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.945607185691887e-05, |
| "loss": 1.1101, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.11398446961601481, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.945153800143518e-05, |
| "loss": 1.1504, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11426943079005486, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.944698543276184e-05, |
| "loss": 1.1959, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.11455439196409489, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.944241415262157e-05, |
| "loss": 1.1331, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.11483935313813493, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.943782416274425e-05, |
| "loss": 0.9313, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.11512431431217497, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.94332154648668e-05, |
| "loss": 1.0915, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.115409275486215, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.94285880607332e-05, |
| "loss": 1.1251, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.11569423666025504, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.942394195209454e-05, |
| "loss": 1.1683, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.11597919783429508, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.941927714070897e-05, |
| "loss": 1.1182, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.11626415900833512, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.941459362834173e-05, |
| "loss": 1.0603, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.11654912018237515, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.940989141676512e-05, |
| "loss": 0.9558, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.1168340813564152, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.940517050775852e-05, |
| "loss": 1.0887, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.11711904253045523, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.940043090310843e-05, |
| "loss": 1.0875, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.11740400370449526, |
| "grad_norm": 0.8671875, |
| "learning_rate": 9.939567260460835e-05, |
| "loss": 1.0975, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.1176889648785353, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.93908956140589e-05, |
| "loss": 1.0867, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.11797392605257534, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.938609993326776e-05, |
| "loss": 1.0554, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.11825888722661537, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.938128556404969e-05, |
| "loss": 0.9474, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.1185438484006554, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.937645250822652e-05, |
| "loss": 0.9967, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.11882880957469545, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.937160076762714e-05, |
| "loss": 1.0917, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.11911377074873548, |
| "grad_norm": 0.828125, |
| "learning_rate": 9.936673034408752e-05, |
| "loss": 1.1522, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.11939873192277552, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.936184123945073e-05, |
| "loss": 1.0427, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.11968369309681556, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.935693345556685e-05, |
| "loss": 1.0779, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1199686542708556, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.935200699429305e-05, |
| "loss": 1.1378, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.12025361544489563, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.93470618574936e-05, |
| "loss": 1.0899, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.12053857661893567, |
| "grad_norm": 0.75, |
| "learning_rate": 9.93420980470398e-05, |
| "loss": 1.1852, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.12082353779297571, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.933711556481003e-05, |
| "loss": 1.0719, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.12110849896701574, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.933211441268972e-05, |
| "loss": 1.1908, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.12139346014105579, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.932709459257141e-05, |
| "loss": 0.9042, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.12167842131509582, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.932205610635465e-05, |
| "loss": 1.092, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.12196338248913585, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.93169989559461e-05, |
| "loss": 1.1415, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1222483436631759, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.931192314325944e-05, |
| "loss": 1.1678, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.12253330483721593, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.930682867021543e-05, |
| "loss": 1.1055, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.12281826601125596, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.930171553874192e-05, |
| "loss": 0.9951, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.12310322718529601, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.929658375077376e-05, |
| "loss": 1.0517, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.12338818835933604, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.929143330825291e-05, |
| "loss": 1.2472, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.12367314953337608, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.928626421312838e-05, |
| "loss": 1.0593, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.12395811070741611, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.928107646735622e-05, |
| "loss": 1.1089, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.12424307188145615, |
| "grad_norm": 0.75, |
| "learning_rate": 9.927587007289955e-05, |
| "loss": 1.1667, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.12452803305549619, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.927064503172857e-05, |
| "loss": 1.1203, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.12481299422953622, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.926540134582048e-05, |
| "loss": 1.1971, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.12509795540357627, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.926013901715958e-05, |
| "loss": 1.1693, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.1253829165776163, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.925485804773721e-05, |
| "loss": 0.8981, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.12566787775165633, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.924955843955177e-05, |
| "loss": 1.0178, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.12595283892569636, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.924424019460872e-05, |
| "loss": 0.942, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.12623780009973642, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.923890331492055e-05, |
| "loss": 1.0282, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.12652276127377646, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.923354780250681e-05, |
| "loss": 0.91, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.1268077224478165, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.922817365939412e-05, |
| "loss": 1.0535, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.12709268362185652, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.92227808876161e-05, |
| "loss": 1.1114, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.12737764479589656, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.921736948921351e-05, |
| "loss": 1.2053, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.1276626059699366, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.921193946623406e-05, |
| "loss": 1.0866, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.12794756714397662, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.920649082073255e-05, |
| "loss": 1.0868, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.12823252831801668, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.920102355477086e-05, |
| "loss": 1.0975, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1285174894920567, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.919553767041785e-05, |
| "loss": 1.13, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.12880245066609675, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.919003316974949e-05, |
| "loss": 1.0777, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.12908741184013678, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.918451005484873e-05, |
| "loss": 1.0536, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.1293723730141768, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.917896832780563e-05, |
| "loss": 0.9851, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.12965733418821684, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.917340799071724e-05, |
| "loss": 1.1509, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.1299422953622569, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.916782904568767e-05, |
| "loss": 1.2241, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.13022725653629694, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.91622314948281e-05, |
| "loss": 1.0991, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.13051221771033697, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.91566153402567e-05, |
| "loss": 1.1697, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.130797178884377, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.915098058409873e-05, |
| "loss": 1.0856, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.13108214005841703, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.914532722848644e-05, |
| "loss": 1.0753, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.13136710123245707, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.913965527555916e-05, |
| "loss": 1.0088, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.13165206240649713, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.913396472746324e-05, |
| "loss": 1.0529, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.13193702358053716, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.912825558635204e-05, |
| "loss": 1.0326, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.1322219847545772, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.912252785438603e-05, |
| "loss": 1.0233, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.13250694592861723, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.911678153373262e-05, |
| "loss": 1.0682, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.13279190710265726, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.911101662656633e-05, |
| "loss": 1.0957, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.1330768682766973, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.910523313506868e-05, |
| "loss": 1.0413, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.13336182945073732, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.909943106142823e-05, |
| "loss": 0.9981, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.13364679062477738, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.909361040784057e-05, |
| "loss": 1.1892, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.13393175179881742, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.908777117650833e-05, |
| "loss": 0.9979, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.13421671297285745, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.908191336964115e-05, |
| "loss": 1.1893, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.13450167414689748, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.907603698945569e-05, |
| "loss": 1.1279, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.13478663532093751, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.907014203817571e-05, |
| "loss": 1.0797, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.13507159649497755, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.906422851803189e-05, |
| "loss": 1.1168, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.1353565576690176, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.905829643126204e-05, |
| "loss": 1.062, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.13564151884305764, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.905234578011091e-05, |
| "loss": 1.0797, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.13592648001709767, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.904637656683033e-05, |
| "loss": 1.0835, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.1362114411911377, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.904038879367915e-05, |
| "loss": 0.9696, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.13649640236517774, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.903438246292323e-05, |
| "loss": 1.1078, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.13678136353921777, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.902835757683541e-05, |
| "loss": 1.1059, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.13706632471325783, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.902231413769568e-05, |
| "loss": 0.9766, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.13735128588729786, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.901625214779089e-05, |
| "loss": 0.9869, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.1376362470613379, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.901017160941501e-05, |
| "loss": 1.0538, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.13792120823537793, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.900407252486902e-05, |
| "loss": 1.1201, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.13820616940941796, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.899795489646088e-05, |
| "loss": 1.137, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.138491130583458, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.899181872650562e-05, |
| "loss": 1.1196, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.13877609175749805, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.898566401732523e-05, |
| "loss": 1.0865, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.1390610529315381, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.897949077124876e-05, |
| "loss": 1.0929, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.13934601410557812, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.897329899061225e-05, |
| "loss": 1.0018, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.13963097527961815, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.896708867775874e-05, |
| "loss": 1.1109, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.13991593645365819, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.896085983503833e-05, |
| "loss": 1.0882, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.14020089762769822, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.895461246480812e-05, |
| "loss": 1.0533, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.14048585880173825, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.894834656943217e-05, |
| "loss": 1.0354, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.1407708199757783, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.894206215128161e-05, |
| "loss": 1.1812, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.14105578114981834, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.893575921273455e-05, |
| "loss": 1.0564, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.14134074232385838, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.892943775617612e-05, |
| "loss": 1.0851, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.1416257034978984, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.892309778399843e-05, |
| "loss": 0.9474, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.14191066467193844, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.891673929860065e-05, |
| "loss": 1.1343, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.14219562584597847, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.891036230238891e-05, |
| "loss": 0.9444, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.14248058702001853, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.890396679777634e-05, |
| "loss": 1.1215, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.14276554819405857, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.889755278718313e-05, |
| "loss": 1.0703, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.1430505093680986, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.889112027303642e-05, |
| "loss": 0.9277, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.14333547054213863, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.888466925777036e-05, |
| "loss": 1.0271, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.14362043171617866, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.887819974382612e-05, |
| "loss": 1.0812, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.1439053928902187, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.887171173365184e-05, |
| "loss": 1.0638, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.14419035406425876, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.886520522970271e-05, |
| "loss": 1.1232, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.1444753152382988, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.885868023444087e-05, |
| "loss": 1.0195, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.14476027641233882, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.885213675033547e-05, |
| "loss": 1.0605, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.14504523758637886, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.884557477986266e-05, |
| "loss": 1.0994, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.1453301987604189, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.883899432550559e-05, |
| "loss": 1.1552, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.14561515993445892, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.883239538975442e-05, |
| "loss": 1.0431, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.14590012110849895, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.882577797510624e-05, |
| "loss": 1.0683, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.14618508228253901, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.881914208406522e-05, |
| "loss": 1.0933, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.14647004345657905, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.881248771914247e-05, |
| "loss": 0.9927, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.14675500463061908, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.880581488285607e-05, |
| "loss": 1.1179, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.1470399658046591, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.879912357773115e-05, |
| "loss": 1.0898, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.14732492697869914, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.87924138062998e-05, |
| "loss": 1.1643, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.14760988815273918, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.878568557110108e-05, |
| "loss": 1.0915, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.14789484932677924, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.877893887468107e-05, |
| "loss": 1.0033, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.14817981050081927, |
| "grad_norm": 0.75, |
| "learning_rate": 9.877217371959277e-05, |
| "loss": 1.1139, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.1484647716748593, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.876539010839629e-05, |
| "loss": 1.007, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.14874973284889934, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.87585880436586e-05, |
| "loss": 0.9808, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.14903469402293937, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.87517675279537e-05, |
| "loss": 0.9804, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.1493196551969794, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.87449285638626e-05, |
| "loss": 1.0908, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.14960461637101946, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.873807115397325e-05, |
| "loss": 1.1514, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.1498895775450595, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.87311953008806e-05, |
| "loss": 1.0292, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.15017453871909953, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.872430100718655e-05, |
| "loss": 1.1896, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.15045949989313956, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.871738827550003e-05, |
| "loss": 1.1468, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.1507444610671796, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.871045710843691e-05, |
| "loss": 1.0774, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.15102942224121962, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.870350750862006e-05, |
| "loss": 1.149, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.15131438341525968, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.869653947867928e-05, |
| "loss": 0.9858, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.15159934458929972, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.86895530212514e-05, |
| "loss": 1.2171, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.15188430576333975, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.86825481389802e-05, |
| "loss": 1.0752, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.15216926693737978, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.86755248345164e-05, |
| "loss": 1.0845, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.15245422811141982, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.866848311051775e-05, |
| "loss": 0.9647, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.15273918928545985, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.866142296964893e-05, |
| "loss": 1.1936, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.15302415045949988, |
| "grad_norm": 0.75, |
| "learning_rate": 9.865434441458162e-05, |
| "loss": 1.05, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.15330911163353994, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.864724744799443e-05, |
| "loss": 0.9444, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.15359407280757997, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.864013207257296e-05, |
| "loss": 0.9836, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.15387903398162, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.863299829100978e-05, |
| "loss": 1.0449, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.15416399515566004, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.86258461060044e-05, |
| "loss": 1.0249, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.15444895632970007, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.861867552026334e-05, |
| "loss": 1.0781, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.1547339175037401, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.861148653650003e-05, |
| "loss": 1.2094, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.15501887867778016, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.86042791574349e-05, |
| "loss": 1.022, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.1553038398518202, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.859705338579533e-05, |
| "loss": 1.1364, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.15558880102586023, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.858980922431565e-05, |
| "loss": 1.0014, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.15587376219990026, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.858254667573715e-05, |
| "loss": 1.0937, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.1561587233739403, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.857526574280811e-05, |
| "loss": 0.9719, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.15644368454798033, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.856796642828372e-05, |
| "loss": 1.0532, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.1567286457220204, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.856064873492616e-05, |
| "loss": 0.9626, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.15701360689606042, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.855331266550455e-05, |
| "loss": 1.0518, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.15729856807010045, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.854595822279496e-05, |
| "loss": 1.1948, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.15758352924414049, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.853858540958043e-05, |
| "loss": 0.9794, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.15786849041818052, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.853119422865094e-05, |
| "loss": 0.9747, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.15815345159222055, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.852378468280341e-05, |
| "loss": 0.9863, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.15843841276626058, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.851635677484174e-05, |
| "loss": 1.1277, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.15872337394030064, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.850891050757674e-05, |
| "loss": 0.9376, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.15900833511434068, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.85014458838262e-05, |
| "loss": 1.0365, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.1592932962883807, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.849396290641483e-05, |
| "loss": 1.1507, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.15957825746242074, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.848646157817432e-05, |
| "loss": 1.0318, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.15986321863646077, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.847894190194327e-05, |
| "loss": 1.0928, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.1601481798105008, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.847140388056724e-05, |
| "loss": 1.099, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.16043314098454087, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.846384751689872e-05, |
| "loss": 1.0512, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.1607181021585809, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.845627281379714e-05, |
| "loss": 0.9933, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.16100306333262093, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.844867977412892e-05, |
| "loss": 0.9725, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.16128802450666097, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.844106840076734e-05, |
| "loss": 0.9792, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.161572985680701, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.843343869659267e-05, |
| "loss": 0.892, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.16185794685474103, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.84257906644921e-05, |
| "loss": 1.0554, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.1621429080287811, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.841812430735974e-05, |
| "loss": 0.9122, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.16242786920282112, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.841043962809669e-05, |
| "loss": 0.9609, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.16271283037686116, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.840273662961092e-05, |
| "loss": 0.9692, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.1629977915509012, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.839501531481736e-05, |
| "loss": 1.0936, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.16328275272494122, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.838727568663787e-05, |
| "loss": 1.065, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.16356771389898125, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.837951774800125e-05, |
| "loss": 1.0459, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.16385267507302131, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.83717415018432e-05, |
| "loss": 1.1155, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.16413763624706135, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.836394695110639e-05, |
| "loss": 1.0229, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.16442259742110138, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.835613409874038e-05, |
| "loss": 1.0946, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.1647075585951414, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.834830294770165e-05, |
| "loss": 1.1005, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.16499251976918144, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.834045350095364e-05, |
| "loss": 1.0206, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.16527748094322148, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.833258576146671e-05, |
| "loss": 0.986, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.1655624421172615, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.832469973221812e-05, |
| "loss": 1.0962, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.16584740329130157, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.831679541619203e-05, |
| "loss": 1.0193, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.1661323644653416, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.830887281637959e-05, |
| "loss": 0.895, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.16641732563938164, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.830093193577881e-05, |
| "loss": 1.0524, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.16670228681342167, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.829297277739465e-05, |
| "loss": 1.0872, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.1669872479874617, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.828499534423894e-05, |
| "loss": 1.1642, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.16727220916150173, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.827699963933048e-05, |
| "loss": 1.1197, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.1675571703355418, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.826898566569495e-05, |
| "loss": 1.0563, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.16784213150958183, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.8260953426365e-05, |
| "loss": 1.1249, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.16812709268362186, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.82529029243801e-05, |
| "loss": 0.987, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.1684120538576619, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.824483416278669e-05, |
| "loss": 1.1132, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.16869701503170192, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.823674714463811e-05, |
| "loss": 1.0831, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.16898197620574196, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.82286418729946e-05, |
| "loss": 1.0008, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.16926693737978202, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.822051835092332e-05, |
| "loss": 1.0579, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.16955189855382205, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.821237658149834e-05, |
| "loss": 1.0904, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.16983685972786208, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.820421656780062e-05, |
| "loss": 1.0895, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.17012182090190212, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.819603831291803e-05, |
| "loss": 0.9854, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.17040678207594215, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.818784181994532e-05, |
| "loss": 0.966, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.17069174324998218, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.817962709198421e-05, |
| "loss": 1.0342, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.1709767044240222, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.817139413214323e-05, |
| "loss": 1.1733, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.17126166559806227, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.816314294353785e-05, |
| "loss": 0.9962, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.1715466267721023, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.815487352929048e-05, |
| "loss": 1.1545, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.17183158794614234, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.814658589253037e-05, |
| "loss": 1.0865, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.17211654912018237, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.813828003639367e-05, |
| "loss": 1.0435, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.1724015102942224, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.812995596402346e-05, |
| "loss": 1.0579, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.17268647146826244, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.812161367856968e-05, |
| "loss": 0.9626, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.1729714326423025, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.811325318318916e-05, |
| "loss": 1.0333, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.17325639381634253, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.810487448104568e-05, |
| "loss": 1.0203, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.17354135499038256, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.809647757530981e-05, |
| "loss": 1.1151, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.1738263161644226, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.808806246915909e-05, |
| "loss": 1.074, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.17411127733846263, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.807962916577792e-05, |
| "loss": 1.1281, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.17439623851250266, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.807117766835758e-05, |
| "loss": 0.9904, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.17468119968654272, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.806270798009624e-05, |
| "loss": 1.0156, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.17496616086058275, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.805422010419897e-05, |
| "loss": 1.1171, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.17525112203462279, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.804571404387768e-05, |
| "loss": 1.1166, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.17553608320866282, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.803718980235121e-05, |
| "loss": 1.1091, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.17582104438270285, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.802864738284527e-05, |
| "loss": 1.1542, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.17610600555674288, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.80200867885924e-05, |
| "loss": 1.1295, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.17639096673078294, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.801150802283207e-05, |
| "loss": 0.9426, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.17667592790482298, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.800291108881063e-05, |
| "loss": 1.0658, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.176960889078863, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.799429598978127e-05, |
| "loss": 0.965, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.17724585025290304, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.798566272900404e-05, |
| "loss": 1.0469, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.17753081142694307, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.797701130974597e-05, |
| "loss": 1.1454, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.1778157726009831, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.796834173528081e-05, |
| "loss": 0.8913, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.17810073377502314, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.79596540088893e-05, |
| "loss": 1.0771, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.1783856949490632, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.795094813385898e-05, |
| "loss": 1.1235, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.17867065612310323, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.794222411348429e-05, |
| "loss": 1.0831, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.17895561729714327, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.793348195106652e-05, |
| "loss": 0.9887, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.1792405784711833, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.792472164991384e-05, |
| "loss": 1.0003, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.17952553964522333, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.79159432133413e-05, |
| "loss": 0.9718, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.17981050081926336, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.790714664467073e-05, |
| "loss": 0.9599, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.18009546199330342, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.789833194723094e-05, |
| "loss": 1.0511, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.18038042316734346, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.78894991243575e-05, |
| "loss": 1.0931, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.1806653843413835, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.788064817939291e-05, |
| "loss": 1.2157, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.18095034551542352, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.787177911568648e-05, |
| "loss": 1.0819, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.18123530668946355, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.786289193659441e-05, |
| "loss": 0.8964, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.1815202678635036, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.785398664547973e-05, |
| "loss": 0.9387, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.18180522903754365, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.784506324571232e-05, |
| "loss": 1.0766, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.18209019021158368, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.783612174066894e-05, |
| "loss": 1.0544, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.1823751513856237, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.782716213373318e-05, |
| "loss": 1.1305, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.18266011255966375, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.781818442829552e-05, |
| "loss": 0.9968, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.18294507373370378, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.780918862775319e-05, |
| "loss": 1.0873, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.1832300349077438, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.78001747355104e-05, |
| "loss": 1.0623, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.18351499608178384, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.77911427549781e-05, |
| "loss": 1.0687, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.1837999572558239, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.778209268957414e-05, |
| "loss": 1.016, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.18408491842986394, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.777302454272319e-05, |
| "loss": 1.0787, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.18436987960390397, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.776393831785677e-05, |
| "loss": 1.0606, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.184654840777944, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.775483401841325e-05, |
| "loss": 0.996, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.18493980195198403, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.774571164783782e-05, |
| "loss": 1.0357, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.18522476312602407, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.773657120958252e-05, |
| "loss": 0.9564, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.18550972430006413, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.772741270710626e-05, |
| "loss": 1.2073, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.18579468547410416, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.771823614387469e-05, |
| "loss": 1.0351, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.1860796466481442, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.77090415233604e-05, |
| "loss": 1.0352, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.18636460782218423, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.769982884904276e-05, |
| "loss": 1.0765, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.18664956899622426, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.769059812440799e-05, |
| "loss": 0.9166, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.1869345301702643, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.768134935294912e-05, |
| "loss": 1.1236, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.18721949134430435, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.767208253816602e-05, |
| "loss": 1.0862, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.18750445251834438, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.766279768356539e-05, |
| "loss": 0.9412, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.18778941369238442, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.765349479266078e-05, |
| "loss": 1.0688, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.18807437486642445, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.764417386897249e-05, |
| "loss": 1.1256, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.18835933604046448, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.763483491602773e-05, |
| "loss": 1.01, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.1886442972145045, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.762547793736049e-05, |
| "loss": 1.0967, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.18892925838854457, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.76161029365116e-05, |
| "loss": 1.0862, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.1892142195625846, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.760670991702869e-05, |
| "loss": 1.1208, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.18949918073662464, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.759729888246623e-05, |
| "loss": 0.9391, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.18978414191066467, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.758786983638545e-05, |
| "loss": 0.9891, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.1900691030847047, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.757842278235449e-05, |
| "loss": 1.0646, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.19035406425874474, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.756895772394821e-05, |
| "loss": 1.1456, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.19063902543278477, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.755947466474838e-05, |
| "loss": 0.9196, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.19092398660682483, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.754997360834348e-05, |
| "loss": 1.0306, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.19120894778086486, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.754045455832888e-05, |
| "loss": 0.9923, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.1914939089549049, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.75309175183067e-05, |
| "loss": 0.912, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.19177887012894493, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.752136249188593e-05, |
| "loss": 1.1638, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.19206383130298496, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.751178948268231e-05, |
| "loss": 1.1008, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.192348792477025, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.750219849431841e-05, |
| "loss": 1.0502, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.19263375365106505, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.749258953042362e-05, |
| "loss": 1.0533, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.1929187148251051, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.748296259463407e-05, |
| "loss": 1.2064, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.19320367599914512, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.74733176905928e-05, |
| "loss": 1.0515, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.19348863717318515, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.746365482194952e-05, |
| "loss": 1.0417, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.19377359834722518, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.745397399236085e-05, |
| "loss": 1.0463, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.19405855952126522, |
| "grad_norm": 0.625, |
| "learning_rate": 9.744427520549011e-05, |
| "loss": 0.9756, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.19434352069530528, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.74345584650075e-05, |
| "loss": 0.9914, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.1946284818693453, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.742482377458999e-05, |
| "loss": 1.0467, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.19491344304338534, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.741507113792128e-05, |
| "loss": 0.9449, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.19519840421742538, |
| "grad_norm": 0.875, |
| "learning_rate": 9.740530055869194e-05, |
| "loss": 1.1425, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.1954833653914654, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.739551204059932e-05, |
| "loss": 1.128, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.19576832656550544, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.73857055873475e-05, |
| "loss": 0.9958, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.19605328773954547, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.737588120264743e-05, |
| "loss": 0.9836, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.19633824891358553, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.736603889021676e-05, |
| "loss": 0.9003, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.19662321008762557, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.735617865377997e-05, |
| "loss": 1.0004, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.1969081712616656, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.734630049706835e-05, |
| "loss": 0.9171, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.19719313243570563, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.733640442381993e-05, |
| "loss": 1.0012, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.19747809360974566, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.732649043777951e-05, |
| "loss": 1.0959, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.1977630547837857, |
| "grad_norm": 0.75, |
| "learning_rate": 9.731655854269869e-05, |
| "loss": 1.1731, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.19804801595782576, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.730660874233586e-05, |
| "loss": 1.0082, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.1983329771318658, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.729664104045618e-05, |
| "loss": 1.1264, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.19861793830590582, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.728665544083154e-05, |
| "loss": 1.0095, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.19890289947994585, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.727665194724067e-05, |
| "loss": 1.0986, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.1991878606539859, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.726663056346903e-05, |
| "loss": 1.0622, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.19947282182802592, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.725659129330885e-05, |
| "loss": 1.0092, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.19975778300206598, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.724653414055916e-05, |
| "loss": 1.2674, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.200042744176106, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.72364591090257e-05, |
| "loss": 1.1078, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.20032770535014605, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.722636620252103e-05, |
| "loss": 1.0586, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.20061266652418608, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.721625542486446e-05, |
| "loss": 0.9358, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.2008976276982261, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.720612677988206e-05, |
| "loss": 1.1108, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.20118258887226614, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.719598027140663e-05, |
| "loss": 1.1469, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.2014675500463062, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.718581590327777e-05, |
| "loss": 1.1143, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.20175251122034624, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.717563367934186e-05, |
| "loss": 1.0907, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.20203747239438627, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.716543360345197e-05, |
| "loss": 1.0438, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.2023224335684263, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.715521567946797e-05, |
| "loss": 1.0072, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.20260739474246633, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.714497991125644e-05, |
| "loss": 0.9971, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.20289235591650637, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.713472630269081e-05, |
| "loss": 1.0122, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.2031773170905464, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.712445485765114e-05, |
| "loss": 1.06, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.20346227826458646, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.71141655800243e-05, |
| "loss": 0.9886, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.2037472394386265, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.710385847370394e-05, |
| "loss": 0.8964, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.20403220061266653, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.709353354259041e-05, |
| "loss": 0.9988, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.20431716178670656, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.70831907905908e-05, |
| "loss": 1.0312, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.2046021229607466, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.707283022161896e-05, |
| "loss": 1.0767, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.20488708413478662, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.706245183959548e-05, |
| "loss": 0.9476, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.20517204530882668, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.705205564844773e-05, |
| "loss": 0.9307, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.20545700648286672, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.704164165210972e-05, |
| "loss": 1.1137, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.20574196765690675, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.70312098545223e-05, |
| "loss": 1.2179, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.20602692883094678, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.702076025963303e-05, |
| "loss": 1.1086, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.20631189000498681, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.701029287139614e-05, |
| "loss": 0.9571, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.20659685117902685, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.699980769377269e-05, |
| "loss": 1.062, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.2068818123530669, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.698930473073038e-05, |
| "loss": 1.1846, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.20716677352710694, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.697878398624374e-05, |
| "loss": 0.9158, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.20745173470114697, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.696824546429393e-05, |
| "loss": 0.9724, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.207736695875187, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.695768916886892e-05, |
| "loss": 1.1375, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.20802165704922704, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.694711510396333e-05, |
| "loss": 0.8772, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.20830661822326707, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.693652327357856e-05, |
| "loss": 1.0808, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.2085915793973071, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.692591368172271e-05, |
| "loss": 1.1139, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.20887654057134716, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.691528633241061e-05, |
| "loss": 1.0693, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.2091615017453872, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.69046412296638e-05, |
| "loss": 1.081, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.20944646291942723, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.689397837751058e-05, |
| "loss": 1.0401, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.20973142409346726, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.688329777998586e-05, |
| "loss": 1.0267, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.2100163852675073, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.687259944113138e-05, |
| "loss": 1.0412, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.21030134644154733, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.686188336499555e-05, |
| "loss": 0.916, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.2105863076155874, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.685114955563349e-05, |
| "loss": 1.0391, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.21087126878962742, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.6840398017107e-05, |
| "loss": 1.1002, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.21115622996366745, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.682962875348467e-05, |
| "loss": 1.141, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.21144119113770748, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.68188417688417e-05, |
| "loss": 0.9937, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.21172615231174752, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.680803706726007e-05, |
| "loss": 1.2662, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.21201111348578755, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.679721465282845e-05, |
| "loss": 1.1191, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.2122960746598276, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.678637452964217e-05, |
| "loss": 1.1676, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.21258103583386764, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.677551670180331e-05, |
| "loss": 1.0041, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.21286599700790768, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.676464117342063e-05, |
| "loss": 1.0965, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.2131509581819477, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.675374794860958e-05, |
| "loss": 0.9738, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.21343591935598774, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.674283703149234e-05, |
| "loss": 1.0534, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.21372088053002777, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.673190842619774e-05, |
| "loss": 0.9893, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.21400584170406783, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.672096213686133e-05, |
| "loss": 0.9149, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.21429080287810787, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.670999816762536e-05, |
| "loss": 1.032, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.2145757640521479, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.669901652263873e-05, |
| "loss": 1.0288, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.21486072522618793, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.668801720605709e-05, |
| "loss": 0.9815, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.21514568640022796, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.667700022204272e-05, |
| "loss": 0.9901, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.215430647574268, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.66659655747646e-05, |
| "loss": 1.1246, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.21571560874830803, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.665491326839843e-05, |
| "loss": 1.1472, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.2160005699223481, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.664384330712655e-05, |
| "loss": 0.9106, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.21628553109638812, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.6632755695138e-05, |
| "loss": 1.0301, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.21657049227042816, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.66216504366285e-05, |
| "loss": 1.0129, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2168554534444682, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.661052753580041e-05, |
| "loss": 1.0987, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.21714041461850822, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.659938699686286e-05, |
| "loss": 0.984, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.21742537579254825, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.658822882403156e-05, |
| "loss": 1.0109, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.2177103369665883, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.65770530215289e-05, |
| "loss": 1.0126, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.21799529814062835, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.656585959358402e-05, |
| "loss": 0.9701, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.21828025931466838, |
| "grad_norm": 0.625, |
| "learning_rate": 9.655464854443266e-05, |
| "loss": 0.9953, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.2185652204887084, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.654341987831721e-05, |
| "loss": 0.9398, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.21885018166274844, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.653217359948681e-05, |
| "loss": 0.978, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.21913514283678848, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.65209097121972e-05, |
| "loss": 1.0498, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.21942010401082854, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.65096282207108e-05, |
| "loss": 1.021, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.21970506518486857, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.649832912929669e-05, |
| "loss": 0.8944, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.2199900263589086, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.648701244223062e-05, |
| "loss": 1.0971, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.22027498753294864, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.647567816379496e-05, |
| "loss": 1.1193, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.22055994870698867, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.646432629827883e-05, |
| "loss": 0.9676, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.2208449098810287, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.645295684997789e-05, |
| "loss": 0.9198, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.22112987105506876, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.644156982319452e-05, |
| "loss": 1.064, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.2214148322291088, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.643016522223776e-05, |
| "loss": 0.943, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.22169979340314883, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.641874305142324e-05, |
| "loss": 1.0012, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.22198475457718886, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.640730331507331e-05, |
| "loss": 1.022, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.2222697157512289, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.639584601751691e-05, |
| "loss": 1.1066, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.22255467692526892, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.638437116308967e-05, |
| "loss": 0.9619, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.22283963809930896, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.637287875613384e-05, |
| "loss": 0.9892, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.22312459927334902, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.636136880099831e-05, |
| "loss": 1.0539, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.22340956044738905, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.634984130203861e-05, |
| "loss": 0.9384, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.22369452162142908, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.633829626361695e-05, |
| "loss": 0.9422, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.22397948279546911, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.632673369010208e-05, |
| "loss": 1.015, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.22426444396950915, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.631515358586952e-05, |
| "loss": 1.0725, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.22454940514354918, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.630355595530129e-05, |
| "loss": 0.9548, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.22483436631758924, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.629194080278614e-05, |
| "loss": 1.0999, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.22511932749162927, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.628030813271939e-05, |
| "loss": 1.1077, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.2254042886656693, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.626865794950303e-05, |
| "loss": 1.0716, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.22568924983970934, |
| "grad_norm": 0.625, |
| "learning_rate": 9.625699025754569e-05, |
| "loss": 1.0403, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.22597421101374937, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.624530506126254e-05, |
| "loss": 0.996, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.2262591721877894, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.623360236507546e-05, |
| "loss": 0.984, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.22654413336182946, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.622188217341292e-05, |
| "loss": 0.8911, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.2268290945358695, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.621014449071002e-05, |
| "loss": 0.9672, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.22711405570990953, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.619838932140845e-05, |
| "loss": 1.0569, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.22739901688394956, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.618661666995656e-05, |
| "loss": 0.9946, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.2276839780579896, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.617482654080927e-05, |
| "loss": 0.8449, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.22796893923202963, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.616301893842817e-05, |
| "loss": 1.0208, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22825390040606966, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.615119386728142e-05, |
| "loss": 0.8984, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.22853886158010972, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.613935133184378e-05, |
| "loss": 0.9595, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.22882382275414975, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.612749133659666e-05, |
| "loss": 1.0099, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.22910878392818979, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.611561388602805e-05, |
| "loss": 0.9618, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.22939374510222982, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.610371898463257e-05, |
| "loss": 0.9654, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.22967870627626985, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.60918066369114e-05, |
| "loss": 1.0519, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.22996366745030988, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.607987684737237e-05, |
| "loss": 1.1106, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.23024862862434994, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.606792962052986e-05, |
| "loss": 0.968, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.23053358979838998, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.60559649609049e-05, |
| "loss": 1.0045, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.23081855097243, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.604398287302509e-05, |
| "loss": 0.9767, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.23110351214647004, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.60319833614246e-05, |
| "loss": 1.0603, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.23138847332051007, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.601996643064428e-05, |
| "loss": 1.106, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.2316734344945501, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.600793208523147e-05, |
| "loss": 0.9162, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.23195839566859017, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.599588032974014e-05, |
| "loss": 0.9119, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.2322433568426302, |
| "grad_norm": 0.625, |
| "learning_rate": 9.598381116873088e-05, |
| "loss": 0.9307, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.23252831801667023, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.597172460677079e-05, |
| "loss": 0.9318, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.23281327919071026, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.595962064843367e-05, |
| "loss": 1.0533, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.2330982403647503, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.594749929829979e-05, |
| "loss": 0.9443, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.23338320153879033, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.593536056095606e-05, |
| "loss": 1.0741, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.2336681627128304, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.592320444099595e-05, |
| "loss": 1.0224, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.23395312388687042, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.591103094301952e-05, |
| "loss": 0.9979, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.23423808506091046, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.589884007163341e-05, |
| "loss": 1.0549, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.2345230462349505, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.588663183145082e-05, |
| "loss": 1.0795, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.23480800740899052, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.58744062270915e-05, |
| "loss": 1.0403, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.23509296858303055, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.586216326318185e-05, |
| "loss": 1.0365, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.2353779297570706, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.584990294435474e-05, |
| "loss": 0.8826, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.23566289093111065, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.583762527524968e-05, |
| "loss": 1.0307, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.23594785210515068, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.582533026051272e-05, |
| "loss": 0.9801, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.2362328132791907, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.581301790479646e-05, |
| "loss": 1.1224, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.23651777445323074, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.58006882127601e-05, |
| "loss": 1.0178, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.23680273562727078, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.578834118906936e-05, |
| "loss": 1.0178, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.2370876968013108, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.577597683839653e-05, |
| "loss": 0.8616, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.23737265797535087, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.576359516542049e-05, |
| "loss": 1.0821, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.2376576191493909, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.57511961748266e-05, |
| "loss": 1.0931, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.23794258032343094, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.573877987130687e-05, |
| "loss": 0.9808, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.23822754149747097, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.572634625955979e-05, |
| "loss": 1.0453, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.238512502671511, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.571389534429042e-05, |
| "loss": 1.0502, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.23879746384555103, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.570142713021038e-05, |
| "loss": 1.0263, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.2390824250195911, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.56889416220378e-05, |
| "loss": 0.9521, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.23936738619363113, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.567643882449741e-05, |
| "loss": 1.0707, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.23965234736767116, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.566391874232043e-05, |
| "loss": 0.9217, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.2399373085417112, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.565138138024467e-05, |
| "loss": 0.9518, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.24022226971575122, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.563882674301442e-05, |
| "loss": 1.0683, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.24050723088979126, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.562625483538056e-05, |
| "loss": 0.9946, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.2407921920638313, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.561366566210048e-05, |
| "loss": 0.8849, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.24107715323787135, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.560105922793811e-05, |
| "loss": 0.9304, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.24136211441191138, |
| "grad_norm": 0.625, |
| "learning_rate": 9.558843553766392e-05, |
| "loss": 1.0837, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.24164707558595142, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.557579459605488e-05, |
| "loss": 1.0149, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.24193203675999145, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.556313640789452e-05, |
| "loss": 1.1387, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.24221699793403148, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.555046097797288e-05, |
| "loss": 0.9633, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2425019591080715, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.553776831108654e-05, |
| "loss": 0.982, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.24278692028211157, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.552505841203856e-05, |
| "loss": 1.0704, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.2430718814561516, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.55123312856386e-05, |
| "loss": 0.9039, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.24335684263019164, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.549958693670276e-05, |
| "loss": 0.9455, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.24364180380423167, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.548682537005369e-05, |
| "loss": 0.9679, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.2439267649782717, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.547404659052057e-05, |
| "loss": 1.002, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.24421172615231174, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.546125060293906e-05, |
| "loss": 0.9739, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.2444966873263518, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.544843741215137e-05, |
| "loss": 1.0245, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.24478164850039183, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.54356070230062e-05, |
| "loss": 1.0823, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.24506660967443186, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.542275944035874e-05, |
| "loss": 1.0684, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.2453515708484719, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.54098946690707e-05, |
| "loss": 0.962, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.24563653202251193, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.539701271401031e-05, |
| "loss": 0.9923, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.24592149319655196, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.538411358005231e-05, |
| "loss": 0.9796, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.24620645437059202, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.537119727207791e-05, |
| "loss": 0.9334, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.24649141554463205, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.53582637949748e-05, |
| "loss": 1.0171, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.24677637671867209, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.534531315363723e-05, |
| "loss": 0.9819, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.24706133789271212, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.533234535296591e-05, |
| "loss": 1.115, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.24734629906675215, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.531936039786806e-05, |
| "loss": 0.9978, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.24763126024079218, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.530635829325734e-05, |
| "loss": 1.0792, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.24791622141483222, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.529333904405398e-05, |
| "loss": 0.8426, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.24820118258887228, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.528030265518461e-05, |
| "loss": 0.9491, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.2484861437629123, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.526724913158242e-05, |
| "loss": 0.9119, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.24877110493695234, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.525417847818707e-05, |
| "loss": 0.9728, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.24905606611099237, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.524109069994466e-05, |
| "loss": 0.9545, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.2493410272850324, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.522798580180781e-05, |
| "loss": 1.0756, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.24962598845907244, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.521486378873558e-05, |
| "loss": 0.9064, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.2499109496331125, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.520172466569358e-05, |
| "loss": 0.9686, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.25019591080715253, |
| "grad_norm": 0.625, |
| "learning_rate": 9.518856843765382e-05, |
| "loss": 1.0244, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.25048087198119257, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.517539510959482e-05, |
| "loss": 1.0626, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.2507658331552326, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.516220468650155e-05, |
| "loss": 1.0234, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.25105079432927263, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.514899717336545e-05, |
| "loss": 0.978, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.25133575550331266, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.513577257518447e-05, |
| "loss": 1.0112, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.2516207166773527, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.512253089696296e-05, |
| "loss": 0.9261, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.25190567785139273, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.51092721437118e-05, |
| "loss": 0.9873, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.25219063902543276, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.509599632044827e-05, |
| "loss": 0.945, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.25247560019947285, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.508270343219614e-05, |
| "loss": 1.073, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.2527605613735129, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.506939348398565e-05, |
| "loss": 0.9472, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.2530455225475529, |
| "grad_norm": 0.625, |
| "learning_rate": 9.505606648085346e-05, |
| "loss": 0.942, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.25333048372159295, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.504272242784272e-05, |
| "loss": 0.9839, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.253615444895633, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.5029361330003e-05, |
| "loss": 0.9566, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.253900406069673, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.501598319239036e-05, |
| "loss": 1.0334, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.25418536724371305, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.500258802006726e-05, |
| "loss": 1.0752, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.2544703284177531, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.498917581810265e-05, |
| "loss": 1.0278, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.2547552895917931, |
| "grad_norm": 0.625, |
| "learning_rate": 9.49757465915719e-05, |
| "loss": 1.0858, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.25504025076583314, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.49623003455568e-05, |
| "loss": 0.9767, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.2553252119398732, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.494883708514564e-05, |
| "loss": 0.9732, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.2556101731139132, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.49353568154331e-05, |
| "loss": 1.0778, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.25589513428795324, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.492185954152032e-05, |
| "loss": 0.9255, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.25618009546199333, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.490834526851489e-05, |
| "loss": 0.9085, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.25646505663603336, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.489481400153075e-05, |
| "loss": 0.971, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2567500178100734, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.488126574568838e-05, |
| "loss": 1.0312, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.2570349789841134, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.486770050611463e-05, |
| "loss": 1.1106, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.25731994015815346, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.485411828794278e-05, |
| "loss": 0.9696, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.2576049013321935, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.484051909631253e-05, |
| "loss": 0.9714, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.2578898625062335, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.482690293637004e-05, |
| "loss": 0.858, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.25817482368027356, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.481326981326784e-05, |
| "loss": 1.0682, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.2584597848543136, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.479961973216492e-05, |
| "loss": 1.0548, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.2587447460283536, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.478595269822666e-05, |
| "loss": 1.1323, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.25902970720239366, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.477226871662489e-05, |
| "loss": 1.0222, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.2593146683764337, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.47585677925378e-05, |
| "loss": 1.0132, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.2595996295504737, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.474484993115005e-05, |
| "loss": 0.9136, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.2598845907245138, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.473111513765265e-05, |
| "loss": 1.2132, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.26016955189855384, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.471736341724307e-05, |
| "loss": 0.9834, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.2604545130725939, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.470359477512517e-05, |
| "loss": 0.9819, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.2607394742466339, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.468980921650919e-05, |
| "loss": 1.0251, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.26102443542067394, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.467600674661177e-05, |
| "loss": 1.0043, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.26130939659471397, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.466218737065601e-05, |
| "loss": 1.1059, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.261594357768754, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.464835109387133e-05, |
| "loss": 0.9952, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.26187931894279404, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.463449792149362e-05, |
| "loss": 1.0236, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.26216428011683407, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.462062785876509e-05, |
| "loss": 0.9314, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.2624492412908741, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.460674091093438e-05, |
| "loss": 1.1074, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.26273420246491413, |
| "grad_norm": 0.625, |
| "learning_rate": 9.459283708325653e-05, |
| "loss": 0.9517, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.26301916363895417, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.457891638099293e-05, |
| "loss": 1.0116, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.26330412481299426, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.45649788094114e-05, |
| "loss": 0.9336, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.2635890859870343, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.455102437378611e-05, |
| "loss": 1.0192, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.2638740471610743, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.453705307939764e-05, |
| "loss": 0.981, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.26415900833511435, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.452306493153292e-05, |
| "loss": 1.1055, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.2644439695091544, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.450905993548527e-05, |
| "loss": 1.1894, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.2647289306831944, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.44950380965544e-05, |
| "loss": 1.0545, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.26501389185723445, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.448099942004636e-05, |
| "loss": 1.1089, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.2652988530312745, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.44669439112736e-05, |
| "loss": 1.0321, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.2655838142053145, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.445287157555494e-05, |
| "loss": 0.9055, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.26586877537935455, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.443878241821555e-05, |
| "loss": 1.2172, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.2661537365533946, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.442467644458699e-05, |
| "loss": 0.9766, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.2664386977274346, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.441055366000712e-05, |
| "loss": 1.0756, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.26672365890147465, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.439641406982028e-05, |
| "loss": 1.0067, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.26700862007551474, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.438225767937705e-05, |
| "loss": 0.9499, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.26729358124955477, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.436808449403442e-05, |
| "loss": 1.0982, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.2675785424235948, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.435389451915576e-05, |
| "loss": 0.9502, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.26786350359763483, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.433968776011074e-05, |
| "loss": 1.0042, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.26814846477167487, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.432546422227542e-05, |
| "loss": 0.8678, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.2684334259457149, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.431122391103217e-05, |
| "loss": 1.041, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.26871838711975493, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.429696683176976e-05, |
| "loss": 0.9748, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.26900334829379496, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.428269298988328e-05, |
| "loss": 0.9924, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.269288309467835, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.426840239077414e-05, |
| "loss": 1.0842, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.26957327064187503, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.425409503985014e-05, |
| "loss": 0.9269, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.26985823181591506, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.423977094252537e-05, |
| "loss": 1.0251, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.2701431929899551, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.422543010422029e-05, |
| "loss": 1.0265, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.2704281541639952, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.421107253036168e-05, |
| "loss": 0.9327, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.2707131153380352, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.419669822638267e-05, |
| "loss": 0.9137, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.27099807651207525, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.418230719772269e-05, |
| "loss": 1.0876, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.2712830376861153, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.41678994498275e-05, |
| "loss": 1.0511, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.2715679988601553, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.415347498814925e-05, |
| "loss": 0.9393, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.27185296003419535, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.413903381814638e-05, |
| "loss": 0.9865, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.2721379212082354, |
| "grad_norm": 0.5703125, |
| "learning_rate": 9.412457594528357e-05, |
| "loss": 0.8684, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.2724228823822754, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.411010137503192e-05, |
| "loss": 0.8862, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.27270784355631544, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.409561011286885e-05, |
| "loss": 1.1248, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.2729928047303555, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.408110216427804e-05, |
| "loss": 1.0661, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.2732777659043955, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.406657753474952e-05, |
| "loss": 0.9036, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.27356272707843554, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.405203622977963e-05, |
| "loss": 0.8965, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.2738476882524756, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.403747825487099e-05, |
| "loss": 1.0589, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.27413264942651566, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.402290361553257e-05, |
| "loss": 1.057, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.2744176106005557, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.400831231727962e-05, |
| "loss": 1.049, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.2747025717745957, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.399370436563372e-05, |
| "loss": 0.8457, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.27498753294863576, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.397907976612272e-05, |
| "loss": 0.9325, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.2752724941226758, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.396443852428078e-05, |
| "loss": 1.0996, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.2755574552967158, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.394978064564836e-05, |
| "loss": 1.0698, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.27584241647075586, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.393510613577225e-05, |
| "loss": 0.9456, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.2761273776447959, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.392041500020545e-05, |
| "loss": 0.8715, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.2764123388188359, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.390570724450733e-05, |
| "loss": 0.9401, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.27669729999287596, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.389098287424351e-05, |
| "loss": 1.0906, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.276982261166916, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.387624189498593e-05, |
| "loss": 1.061, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.277267222340956, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.386148431231276e-05, |
| "loss": 1.0958, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.2775521835149961, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.384671013180852e-05, |
| "loss": 0.9663, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.27783714468903614, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.383191935906395e-05, |
| "loss": 1.011, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.2781221058630762, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.381711199967613e-05, |
| "loss": 1.0356, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.2784070670371162, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.380228805924835e-05, |
| "loss": 0.9148, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.27869202821115624, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.378744754339022e-05, |
| "loss": 0.8963, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.2789769893851963, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.377259045771759e-05, |
| "loss": 1.0365, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.2792619505592363, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.375771680785263e-05, |
| "loss": 1.0531, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.27954691173327634, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.374282659942374e-05, |
| "loss": 0.9912, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.27983187290731637, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.372791983806559e-05, |
| "loss": 1.0263, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.2801168340813564, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.371299652941911e-05, |
| "loss": 0.9641, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.28040179525539644, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.36980566791315e-05, |
| "loss": 1.0648, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.28068675642943647, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.368310029285625e-05, |
| "loss": 0.949, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.2809717176034765, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.366812737625303e-05, |
| "loss": 1.0348, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.2812566787775166, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.365313793498785e-05, |
| "loss": 0.9918, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.2815416399515566, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.363813197473291e-05, |
| "loss": 1.0178, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.28182660112559665, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.36231095011667e-05, |
| "loss": 1.09, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.2821115622996367, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.360807051997394e-05, |
| "loss": 0.9619, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.2823965234736767, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.359301503684561e-05, |
| "loss": 0.9254, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.28268148464771675, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.357794305747891e-05, |
| "loss": 0.9638, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.2829664458217568, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.356285458757731e-05, |
| "loss": 0.8641, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.2832514069957968, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.354774963285051e-05, |
| "loss": 1.027, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.28353636816983685, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.353262819901444e-05, |
| "loss": 0.9502, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.2838213293438769, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.351749029179128e-05, |
| "loss": 0.9592, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.2841062905179169, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.350233591690943e-05, |
| "loss": 0.9631, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.28439125169195695, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.348716508010354e-05, |
| "loss": 0.9393, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.284676212865997, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.347197778711446e-05, |
| "loss": 0.934, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.28496117404003707, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.345677404368931e-05, |
| "loss": 1.0546, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2852461352140771, |
| "grad_norm": 0.625, |
| "learning_rate": 9.344155385558138e-05, |
| "loss": 1.0737, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.28553109638811713, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.342631722855024e-05, |
| "loss": 1.0648, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.28581605756215717, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.341106416836165e-05, |
| "loss": 0.8566, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.2861010187361972, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.339579468078757e-05, |
| "loss": 0.9621, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.28638597991023723, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.338050877160623e-05, |
| "loss": 0.9685, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.28667094108427726, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.3365206446602e-05, |
| "loss": 1.1444, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.2869559022583173, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.334988771156556e-05, |
| "loss": 0.9522, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.28724086343235733, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.333455257229369e-05, |
| "loss": 0.9159, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.28752582460639736, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.33192010345895e-05, |
| "loss": 1.1429, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.2878107857804374, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.330383310426216e-05, |
| "loss": 0.9845, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.2880957469544774, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.328844878712716e-05, |
| "loss": 1.0455, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.2883807081285175, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.327304808900617e-05, |
| "loss": 0.9269, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.28866566930255755, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.325763101572701e-05, |
| "loss": 1.0034, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.2889506304765976, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.324219757312374e-05, |
| "loss": 0.9634, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.2892355916506376, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.32267477670366e-05, |
| "loss": 0.9526, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.28952055282467765, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.321128160331201e-05, |
| "loss": 0.878, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.2898055139987177, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.319579908780263e-05, |
| "loss": 0.8794, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.2900904751727577, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.318030022636723e-05, |
| "loss": 0.961, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.29037543634679774, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.316478502487085e-05, |
| "loss": 1.0243, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.2906603975208378, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.314925348918462e-05, |
| "loss": 0.9801, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.2909453586948778, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.313370562518596e-05, |
| "loss": 1.038, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.29123031986891784, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.311814143875834e-05, |
| "loss": 0.9654, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.2915152810429579, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.310256093579155e-05, |
| "loss": 0.9441, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.2918002422169979, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.308696412218143e-05, |
| "loss": 0.9697, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.292085203391038, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.307135100383006e-05, |
| "loss": 0.8806, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.29237016456507803, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.30557215866457e-05, |
| "loss": 1.1131, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.29265512573911806, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.304007587654271e-05, |
| "loss": 1.0907, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.2929400869131581, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.302441387944167e-05, |
| "loss": 0.8848, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.2932250480871981, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.300873560126932e-05, |
| "loss": 0.969, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.29351000926123816, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.299304104795854e-05, |
| "loss": 1.0513, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.2937949704352782, |
| "grad_norm": 0.625, |
| "learning_rate": 9.297733022544839e-05, |
| "loss": 0.945, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.2940799316093182, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.296160313968408e-05, |
| "loss": 1.0124, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.29436489278335826, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.294585979661694e-05, |
| "loss": 0.9433, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.2946498539573983, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.293010020220452e-05, |
| "loss": 1.0351, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.2949348151314383, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.291432436241048e-05, |
| "loss": 0.974, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.29521977630547835, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.28985322832046e-05, |
| "loss": 0.9246, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.29550473747951844, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.288272397056286e-05, |
| "loss": 0.9589, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.2957896986535585, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.286689943046734e-05, |
| "loss": 0.9765, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.2960746598275985, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.285105866890629e-05, |
| "loss": 0.8761, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.29635962100163854, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.28352016918741e-05, |
| "loss": 0.9729, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.2966445821756786, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.281932850537125e-05, |
| "loss": 0.8454, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.2969295433497186, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.280343911540444e-05, |
| "loss": 0.939, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.29721450452375864, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.278753352798639e-05, |
| "loss": 1.1006, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.29749946569779867, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.277161174913605e-05, |
| "loss": 0.9765, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.2977844268718387, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.275567378487841e-05, |
| "loss": 0.9614, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.29806938804587874, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.27397196412447e-05, |
| "loss": 0.9823, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.29835434921991877, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.272374932427215e-05, |
| "loss": 1.0585, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.2986393103939588, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.270776284000417e-05, |
| "loss": 0.9719, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.29892427156799883, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.269176019449028e-05, |
| "loss": 0.982, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.2992092327420389, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.267574139378614e-05, |
| "loss": 0.9483, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.29949419391607895, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.265970644395347e-05, |
| "loss": 0.9968, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.299779155090119, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.264365535106016e-05, |
| "loss": 1.0735, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.300064116264159, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.262758812118015e-05, |
| "loss": 0.9386, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.30034907743819905, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.261150476039354e-05, |
| "loss": 1.0808, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.3006340386122391, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.25954052747865e-05, |
| "loss": 1.0207, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.3009189997862791, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.25792896704513e-05, |
| "loss": 0.9417, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.30120396096031915, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.256315795348635e-05, |
| "loss": 0.9978, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.3014889221343592, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.254701012999612e-05, |
| "loss": 0.805, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.3017738833083992, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.253084620609116e-05, |
| "loss": 1.0497, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.30205884448243925, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.251466618788815e-05, |
| "loss": 0.9615, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.3023438056564793, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.249847008150987e-05, |
| "loss": 0.9506, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.30262876683051937, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.248225789308514e-05, |
| "loss": 1.0929, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.3029137280045594, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.246602962874891e-05, |
| "loss": 0.8992, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.30319868917859943, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.244978529464216e-05, |
| "loss": 1.1483, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.30348365035263947, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.243352489691201e-05, |
| "loss": 0.9486, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.3037686115266795, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.241724844171164e-05, |
| "loss": 1.056, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.30405357270071953, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.240095593520028e-05, |
| "loss": 1.053, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.30433853387475956, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.238464738354326e-05, |
| "loss": 1.0057, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.3046234950487996, |
| "grad_norm": 0.625, |
| "learning_rate": 9.2368322792912e-05, |
| "loss": 1.0151, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.30490845622283963, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.235198216948392e-05, |
| "loss": 0.9603, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.30519341739687966, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.233562551944262e-05, |
| "loss": 1.2601, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.3054783785709197, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.231925284897762e-05, |
| "loss": 0.9887, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.30576333974495973, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.230286416428462e-05, |
| "loss": 1.0693, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.30604830091899976, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.228645947156533e-05, |
| "loss": 1.0099, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.30633326209303985, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.227003877702755e-05, |
| "loss": 0.9986, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.3066182232670799, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.225360208688507e-05, |
| "loss": 0.9211, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.3069031844411199, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.223714940735781e-05, |
| "loss": 0.9174, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.30718814561515995, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.22206807446717e-05, |
| "loss": 1.1184, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.3074731067892, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.220419610505873e-05, |
| "loss": 0.9029, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.30775806796324, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.21876954947569e-05, |
| "loss": 1.035, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.30804302913728004, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.21711789200103e-05, |
| "loss": 1.0567, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.3083279903113201, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.215464638706906e-05, |
| "loss": 1.0679, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.3086129514853601, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.213809790218933e-05, |
| "loss": 0.908, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.30889791265940014, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.212153347163326e-05, |
| "loss": 1.051, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.3091828738334402, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.210495310166913e-05, |
| "loss": 1.0482, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.3094678350074802, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.208835679857117e-05, |
| "loss": 1.0254, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.30975279618152024, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.207174456861967e-05, |
| "loss": 1.0842, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.31003775735556033, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.205511641810093e-05, |
| "loss": 0.9351, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.31032271852960036, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.203847235330731e-05, |
| "loss": 1.0185, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.3106076797036404, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.202181238053715e-05, |
| "loss": 1.0603, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.3108926408776804, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.200513650609484e-05, |
| "loss": 1.0863, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.31117760205172046, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.198844473629076e-05, |
| "loss": 0.9454, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.3114625632257605, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.197173707744134e-05, |
| "loss": 1.0291, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.3117475243998005, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.1955013535869e-05, |
| "loss": 0.9479, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.31203248557384056, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.193827411790215e-05, |
| "loss": 0.9679, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.3123174467478806, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.192151882987528e-05, |
| "loss": 0.9627, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.3126024079219206, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.19047476781288e-05, |
| "loss": 1.0748, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.31288736909596065, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.188796066900916e-05, |
| "loss": 0.944, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.3131723302700007, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.187115780886883e-05, |
| "loss": 1.0599, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.3134572914440408, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.185433910406624e-05, |
| "loss": 0.8577, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3137422526180808, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.183750456096587e-05, |
| "loss": 1.0378, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.31402721379212084, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.182065418593811e-05, |
| "loss": 1.0227, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.3143121749661609, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.180378798535943e-05, |
| "loss": 1.025, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.3145971361402009, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.178690596561224e-05, |
| "loss": 0.915, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.31488209731424094, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.177000813308493e-05, |
| "loss": 0.8836, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.31516705848828097, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.17530944941719e-05, |
| "loss": 0.9882, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.315452019662321, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.173616505527352e-05, |
| "loss": 0.9752, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.31573698083636104, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.171921982279612e-05, |
| "loss": 0.9841, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.31602194201040107, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.170225880315207e-05, |
| "loss": 0.8979, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.3163069031844411, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.16852820027596e-05, |
| "loss": 0.94, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.31659186435848113, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.166828942804306e-05, |
| "loss": 1.0453, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.31687682553252117, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.165128108543263e-05, |
| "loss": 0.7893, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.31716178670656126, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.163425698136454e-05, |
| "loss": 1.0799, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.3174467478806013, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.161721712228097e-05, |
| "loss": 0.8786, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.3177317090546413, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.160016151463004e-05, |
| "loss": 0.9087, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.31801667022868135, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.158309016486586e-05, |
| "loss": 1.0729, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.3183016314027214, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.156600307944845e-05, |
| "loss": 1.0363, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.3185865925767614, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.154890026484385e-05, |
| "loss": 0.9994, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.31887155375080145, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.153178172752397e-05, |
| "loss": 0.9361, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.3191565149248415, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.151464747396675e-05, |
| "loss": 0.91, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.3194414760988815, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.149749751065605e-05, |
| "loss": 1.0182, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.31972643727292155, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.148033184408166e-05, |
| "loss": 0.996, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.3200113984469616, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.146315048073931e-05, |
| "loss": 1.0218, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.3202963596210016, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.144595342713069e-05, |
| "loss": 1.0173, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.3205813207950417, |
| "grad_norm": 0.625, |
| "learning_rate": 9.142874068976341e-05, |
| "loss": 0.9903, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.32086628196908173, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.141151227515103e-05, |
| "loss": 0.9324, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.32115124314312177, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.139426818981305e-05, |
| "loss": 0.9376, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.3214362043171618, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.137700844027486e-05, |
| "loss": 1.0273, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.32172116549120183, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.135973303306782e-05, |
| "loss": 1.064, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.32200612666524187, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.134244197472917e-05, |
| "loss": 1.0161, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.3222910878392819, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.132513527180215e-05, |
| "loss": 0.9926, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.32257604901332193, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.130781293083583e-05, |
| "loss": 1.0905, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.32286101018736196, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.129047495838526e-05, |
| "loss": 0.9715, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.323145971361402, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.127312136101139e-05, |
| "loss": 1.0578, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.32343093253544203, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.125575214528104e-05, |
| "loss": 0.9972, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.32371589370948206, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.1238367317767e-05, |
| "loss": 1.0004, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.3240008548835221, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.122096688504795e-05, |
| "loss": 1.0395, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.3242858160575622, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.120355085370847e-05, |
| "loss": 0.9833, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.3245707772316022, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.118611923033904e-05, |
| "loss": 0.8799, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.32485573840564225, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.116867202153603e-05, |
| "loss": 0.8872, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.3251406995796823, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.115120923390174e-05, |
| "loss": 0.9736, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.3254256607537223, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.113373087404433e-05, |
| "loss": 1.0867, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.32571062192776234, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.111623694857786e-05, |
| "loss": 1.0157, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.3259955831018024, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.109872746412233e-05, |
| "loss": 1.0677, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.3262805442758424, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.108120242730356e-05, |
| "loss": 0.9453, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.32656550544988244, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.106366184475326e-05, |
| "loss": 0.8899, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.3268504666239225, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.104610572310908e-05, |
| "loss": 1.0708, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.3271354277979625, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.10285340690145e-05, |
| "loss": 1.0211, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.32742038897200254, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.101094688911888e-05, |
| "loss": 0.9897, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.32770535014604263, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.099334419007751e-05, |
| "loss": 0.9818, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.32799031132008266, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.097572597855146e-05, |
| "loss": 1.0104, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.3282752724941227, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.095809226120775e-05, |
| "loss": 0.8764, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.3285602336681627, |
| "grad_norm": 0.5703125, |
| "learning_rate": 9.094044304471923e-05, |
| "loss": 0.925, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.32884519484220276, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.092277833576464e-05, |
| "loss": 1.007, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.3291301560162428, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.090509814102852e-05, |
| "loss": 0.9182, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.3294151171902828, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.088740246720136e-05, |
| "loss": 1.0229, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.32970007836432286, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.086969132097944e-05, |
| "loss": 1.1545, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.3299850395383629, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.085196470906494e-05, |
| "loss": 0.9633, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.3302700007124029, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.083422263816586e-05, |
| "loss": 1.0221, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.33055496188644296, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.081646511499604e-05, |
| "loss": 1.0424, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.330839923060483, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.079869214627522e-05, |
| "loss": 0.9889, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.331124884234523, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.078090373872893e-05, |
| "loss": 0.9719, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.3314098454085631, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.076309989908859e-05, |
| "loss": 1.0511, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.33169480658260314, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.07452806340914e-05, |
| "loss": 0.9553, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.3319797677566432, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.072744595048047e-05, |
| "loss": 0.9829, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.3322647289306832, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.070959585500468e-05, |
| "loss": 0.9834, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.33254969010472324, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.069173035441878e-05, |
| "loss": 1.0448, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.33283465127876327, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.067384945548333e-05, |
| "loss": 0.9865, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.3331196124528033, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.065595316496474e-05, |
| "loss": 0.9739, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.33340457362684334, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.063804148963522e-05, |
| "loss": 1.0073, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.33368953480088337, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.062011443627279e-05, |
| "loss": 0.9504, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.3339744959749234, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.060217201166135e-05, |
| "loss": 1.1431, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.33425945714896343, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.058421422259057e-05, |
| "loss": 0.9542, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.33454441832300347, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.056624107585592e-05, |
| "loss": 0.9396, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.33482937949704356, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.054825257825872e-05, |
| "loss": 0.9858, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.3351143406710836, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.053024873660608e-05, |
| "loss": 1.0003, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.3353993018451236, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.05122295577109e-05, |
| "loss": 0.981, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.33568426301916365, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.049419504839191e-05, |
| "loss": 1.0826, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.3359692241932037, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.047614521547367e-05, |
| "loss": 0.9341, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.3362541853672437, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.045808006578646e-05, |
| "loss": 0.9537, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.33653914654128375, |
| "grad_norm": 0.625, |
| "learning_rate": 9.043999960616643e-05, |
| "loss": 0.9899, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.3368241077153238, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.042190384345546e-05, |
| "loss": 0.9931, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.3371090688893638, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.040379278450128e-05, |
| "loss": 0.9624, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.33739403006340385, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.038566643615735e-05, |
| "loss": 1.0492, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.3376789912374439, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.036752480528297e-05, |
| "loss": 0.8723, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.3379639524114839, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.034936789874319e-05, |
| "loss": 1.0188, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.33824891358552395, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.033119572340887e-05, |
| "loss": 1.0452, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.33853387475956404, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.031300828615662e-05, |
| "loss": 0.9278, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.33881883593360407, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.029480559386882e-05, |
| "loss": 1.0037, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.3391037971076441, |
| "grad_norm": 0.5703125, |
| "learning_rate": 9.027658765343365e-05, |
| "loss": 0.9879, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.33938875828168413, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.025835447174505e-05, |
| "loss": 0.8852, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.33967371945572417, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.02401060557027e-05, |
| "loss": 0.9296, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.3399586806297642, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.022184241221209e-05, |
| "loss": 0.9042, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.34024364180380423, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.020356354818444e-05, |
| "loss": 1.0113, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.34052860297784426, |
| "grad_norm": 0.5703125, |
| "learning_rate": 9.018526947053676e-05, |
| "loss": 0.9, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.3408135641518843, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.016696018619179e-05, |
| "loss": 1.04, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.34109852532592433, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.014863570207802e-05, |
| "loss": 1.0158, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.34138348649996436, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.013029602512972e-05, |
| "loss": 0.9121, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.3416684476740044, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.011194116228689e-05, |
| "loss": 1.1199, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.3419534088480444, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.009357112049526e-05, |
| "loss": 1.0077, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3422383700220845, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.007518590670636e-05, |
| "loss": 0.8883, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.34252333119612455, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.00567855278774e-05, |
| "loss": 0.9633, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.3428082923701646, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.003836999097135e-05, |
| "loss": 0.9557, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.3430932535442046, |
| "grad_norm": 0.53515625, |
| "learning_rate": 9.001993930295694e-05, |
| "loss": 0.8009, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.34337821471824465, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.000149347080862e-05, |
| "loss": 0.921, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.3436631758922847, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.998303250150653e-05, |
| "loss": 0.9377, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.3439481370663247, |
| "grad_norm": 0.62109375, |
| "learning_rate": 8.99645564020366e-05, |
| "loss": 1.0437, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.34423309824036474, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.994606517939043e-05, |
| "loss": 0.97, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.3445180594144048, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.992755884056542e-05, |
| "loss": 0.9603, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.3448030205884448, |
| "grad_norm": 0.6171875, |
| "learning_rate": 8.99090373925646e-05, |
| "loss": 1.036, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.34508798176248484, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.989050084239677e-05, |
| "loss": 1.0138, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.3453729429365249, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.987194919707643e-05, |
| "loss": 0.9105, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.34565790411056496, |
| "grad_norm": 0.703125, |
| "learning_rate": 8.985338246362381e-05, |
| "loss": 0.9821, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.345942865284605, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.98348006490648e-05, |
| "loss": 1.0073, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.346227826458645, |
| "grad_norm": 0.65234375, |
| "learning_rate": 8.981620376043108e-05, |
| "loss": 1.0402, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.34651278763268506, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.979759180475992e-05, |
| "loss": 0.8968, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.3467977488067251, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.977896478909445e-05, |
| "loss": 0.9055, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.3470827099807651, |
| "grad_norm": 0.640625, |
| "learning_rate": 8.976032272048333e-05, |
| "loss": 1.0307, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.34736767115480516, |
| "grad_norm": 0.62109375, |
| "learning_rate": 8.974166560598102e-05, |
| "loss": 1.0401, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.3476526323288452, |
| "grad_norm": 0.62890625, |
| "learning_rate": 8.972299345264767e-05, |
| "loss": 0.9866, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.3479375935028852, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.970430626754905e-05, |
| "loss": 0.9936, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.34822255467692526, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.96856040577567e-05, |
| "loss": 0.9534, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.3485075158509653, |
| "grad_norm": 0.61328125, |
| "learning_rate": 8.96668868303478e-05, |
| "loss": 0.9965, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.3487924770250053, |
| "grad_norm": 0.62109375, |
| "learning_rate": 8.964815459240522e-05, |
| "loss": 1.1044, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.34907743819904535, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.962940735101752e-05, |
| "loss": 0.9528, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.34936239937308544, |
| "grad_norm": 0.6328125, |
| "learning_rate": 8.961064511327895e-05, |
| "loss": 1.083, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.3496473605471255, |
| "grad_norm": 0.6171875, |
| "learning_rate": 8.959186788628937e-05, |
| "loss": 0.9769, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.3499323217211655, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.957307567715441e-05, |
| "loss": 1.0168, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.35021728289520554, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.95542684929853e-05, |
| "loss": 0.9218, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.35050224406924557, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.953544634089894e-05, |
| "loss": 0.9457, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.3507872052432856, |
| "grad_norm": 0.6328125, |
| "learning_rate": 8.951660922801792e-05, |
| "loss": 1.1793, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.35107216641732564, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.949775716147047e-05, |
| "loss": 1.014, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.35135712759136567, |
| "grad_norm": 0.5546875, |
| "learning_rate": 8.947889014839053e-05, |
| "loss": 0.8847, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.3516420887654057, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.94600081959176e-05, |
| "loss": 0.9572, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.35192704993944574, |
| "grad_norm": 0.546875, |
| "learning_rate": 8.944111131119693e-05, |
| "loss": 0.8447, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.35221201111348577, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.942219950137935e-05, |
| "loss": 1.0276, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.3524969722875258, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.94032727736214e-05, |
| "loss": 0.9163, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.3527819334615659, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.93843311350852e-05, |
| "loss": 0.9324, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.3530668946356059, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.936537459293857e-05, |
| "loss": 0.9592, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.35335185580964595, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.934640315435493e-05, |
| "loss": 1.0246, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.353636816983686, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.932741682651335e-05, |
| "loss": 0.9345, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.353921778157726, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.930841561659853e-05, |
| "loss": 0.9938, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.35420673933176605, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.928939953180084e-05, |
| "loss": 0.912, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.3544917005058061, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.927036857931621e-05, |
| "loss": 1.0043, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.3547766616798461, |
| "grad_norm": 0.625, |
| "learning_rate": 8.925132276634625e-05, |
| "loss": 1.0069, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.35506162285388615, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.923226210009816e-05, |
| "loss": 0.8568, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.3553465840279262, |
| "grad_norm": 0.62890625, |
| "learning_rate": 8.921318658778482e-05, |
| "loss": 1.0455, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.3556315452019662, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.919409623662463e-05, |
| "loss": 0.978, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.35591650637600625, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.917499105384168e-05, |
| "loss": 0.9836, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.3562014675500463, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.915587104666567e-05, |
| "loss": 0.9179, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.35648642872408637, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.913673622233187e-05, |
| "loss": 1.0516, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.3567713898981264, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.911758658808118e-05, |
| "loss": 0.9401, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.35705635107216643, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.909842215116013e-05, |
| "loss": 0.939, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.35734131224620647, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.907924291882079e-05, |
| "loss": 0.9655, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.3576262734202465, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.906004889832089e-05, |
| "loss": 1.0238, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.35791123459428653, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.904084009692371e-05, |
| "loss": 0.9008, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.35819619576832656, |
| "grad_norm": 0.61328125, |
| "learning_rate": 8.902161652189815e-05, |
| "loss": 0.9832, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.3584811569423666, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.900237818051869e-05, |
| "loss": 1.0039, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.35876611811640663, |
| "grad_norm": 0.6171875, |
| "learning_rate": 8.898312508006542e-05, |
| "loss": 1.0734, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.35905107929044666, |
| "grad_norm": 0.6328125, |
| "learning_rate": 8.896385722782398e-05, |
| "loss": 1.0488, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.3593360404644867, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.894457463108562e-05, |
| "loss": 0.9711, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.3596210016385267, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.892527729714712e-05, |
| "loss": 1.0352, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.3599059628125668, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.890596523331094e-05, |
| "loss": 0.9508, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.36019092398660685, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.8886638446885e-05, |
| "loss": 1.008, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.3604758851606469, |
| "grad_norm": 0.625, |
| "learning_rate": 8.886729694518285e-05, |
| "loss": 0.9686, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.3607608463346869, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.884794073552363e-05, |
| "loss": 0.9213, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.36104580750872695, |
| "grad_norm": 0.625, |
| "learning_rate": 8.882856982523196e-05, |
| "loss": 0.9974, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.361330768682767, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.88091842216381e-05, |
| "loss": 0.9662, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.361615729856807, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.878978393207788e-05, |
| "loss": 0.8937, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.36190069103084704, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.877036896389262e-05, |
| "loss": 0.8857, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.3621856522048871, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.875093932442924e-05, |
| "loss": 0.941, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.3624706133789271, |
| "grad_norm": 0.63671875, |
| "learning_rate": 8.873149502104019e-05, |
| "loss": 1.0399, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.36275557455296714, |
| "grad_norm": 0.62890625, |
| "learning_rate": 8.871203606108351e-05, |
| "loss": 1.0476, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.3630405357270072, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.869256245192272e-05, |
| "loss": 0.9912, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.3633254969010472, |
| "grad_norm": 0.5546875, |
| "learning_rate": 8.867307420092695e-05, |
| "loss": 0.8887, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.3636104580750873, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.865357131547081e-05, |
| "loss": 0.9155, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.3638954192491273, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.863405380293451e-05, |
| "loss": 0.9796, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.36418038042316736, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.861452167070375e-05, |
| "loss": 0.9555, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.3644653415972074, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.859497492616977e-05, |
| "loss": 0.8128, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.3647503027712474, |
| "grad_norm": 0.61328125, |
| "learning_rate": 8.857541357672937e-05, |
| "loss": 1.0065, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.36503526394528746, |
| "grad_norm": 0.6171875, |
| "learning_rate": 8.855583762978482e-05, |
| "loss": 1.0453, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.3653202251193275, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.853624709274398e-05, |
| "loss": 0.8737, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.3656051862933675, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.851664197302016e-05, |
| "loss": 1.0193, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.36589014746740756, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.849702227803225e-05, |
| "loss": 0.9143, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.3661751086414476, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.847738801520463e-05, |
| "loss": 1.0282, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.3664600698154876, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.84577391919672e-05, |
| "loss": 0.8523, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.36674503098952765, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.843807581575534e-05, |
| "loss": 0.9639, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.3670299921635677, |
| "grad_norm": 0.546875, |
| "learning_rate": 8.841839789400998e-05, |
| "loss": 0.8762, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.3673149533376078, |
| "grad_norm": 0.625, |
| "learning_rate": 8.839870543417752e-05, |
| "loss": 1.0485, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.3675999145116478, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.83789984437099e-05, |
| "loss": 0.9031, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.36788487568568784, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.835927693006453e-05, |
| "loss": 0.981, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.3681698368597279, |
| "grad_norm": 0.6328125, |
| "learning_rate": 8.83395409007043e-05, |
| "loss": 1.066, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.3684547980337679, |
| "grad_norm": 0.6171875, |
| "learning_rate": 8.831979036309763e-05, |
| "loss": 0.9907, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.36873975920780794, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.830002532471842e-05, |
| "loss": 0.9663, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.36902472038184797, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.828024579304603e-05, |
| "loss": 1.0183, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.369309681555888, |
| "grad_norm": 0.73828125, |
| "learning_rate": 8.826045177556535e-05, |
| "loss": 0.8963, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.36959464272992804, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.824064327976672e-05, |
| "loss": 0.9051, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.36987960390396807, |
| "grad_norm": 0.62890625, |
| "learning_rate": 8.822082031314593e-05, |
| "loss": 1.0171, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.3701645650780081, |
| "grad_norm": 0.55078125, |
| "learning_rate": 8.820098288320433e-05, |
| "loss": 0.891, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.37044952625204813, |
| "grad_norm": 0.6484375, |
| "learning_rate": 8.818113099744869e-05, |
| "loss": 1.0859, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.3707344874260882, |
| "grad_norm": 0.54296875, |
| "learning_rate": 8.816126466339126e-05, |
| "loss": 0.9437, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.37101944860012825, |
| "grad_norm": 0.6484375, |
| "learning_rate": 8.814138388854971e-05, |
| "loss": 0.9826, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.3713044097741683, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.812148868044726e-05, |
| "loss": 0.9527, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.3715893709482083, |
| "grad_norm": 0.640625, |
| "learning_rate": 8.810157904661253e-05, |
| "loss": 1.0328, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.37187433212224835, |
| "grad_norm": 0.53125, |
| "learning_rate": 8.808165499457964e-05, |
| "loss": 0.8642, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.3721592932962884, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.80617165318881e-05, |
| "loss": 0.9816, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.3724442544703284, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.804176366608297e-05, |
| "loss": 0.9911, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.37272921564436845, |
| "grad_norm": 0.6328125, |
| "learning_rate": 8.802179640471467e-05, |
| "loss": 1.0987, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.3730141768184085, |
| "grad_norm": 0.625, |
| "learning_rate": 8.800181475533912e-05, |
| "loss": 1.0396, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.3732991379924485, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.798181872551769e-05, |
| "loss": 1.0355, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.37358409916648855, |
| "grad_norm": 0.64453125, |
| "learning_rate": 8.796180832281714e-05, |
| "loss": 1.0007, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.3738690603405286, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.79417835548097e-05, |
| "loss": 0.9801, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.3741540215145686, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.792174442907307e-05, |
| "loss": 0.8304, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.3744389826886087, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.790169095319032e-05, |
| "loss": 0.9567, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.37472394386264873, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.788162313474998e-05, |
| "loss": 0.9262, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.37500890503668877, |
| "grad_norm": 0.71484375, |
| "learning_rate": 8.786154098134604e-05, |
| "loss": 0.9575, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.3752938662107288, |
| "grad_norm": 0.65234375, |
| "learning_rate": 8.784144450057785e-05, |
| "loss": 0.9661, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.37557882738476883, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.782133370005023e-05, |
| "loss": 0.9039, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.37586378855880886, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.78012085873734e-05, |
| "loss": 0.8655, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.3761487497328489, |
| "grad_norm": 0.69140625, |
| "learning_rate": 8.778106917016298e-05, |
| "loss": 1.0576, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.37643371090688893, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.776091545604006e-05, |
| "loss": 0.9396, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.37671867208092896, |
| "grad_norm": 0.6796875, |
| "learning_rate": 8.774074745263106e-05, |
| "loss": 1.1204, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.377003633254969, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.772056516756788e-05, |
| "loss": 0.9742, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.377288594429009, |
| "grad_norm": 0.61328125, |
| "learning_rate": 8.770036860848779e-05, |
| "loss": 1.1384, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.37757355560304906, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.768015778303344e-05, |
| "loss": 1.0291, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.37785851677708915, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.765993269885293e-05, |
| "loss": 0.9446, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.3781434779511292, |
| "grad_norm": 0.62109375, |
| "learning_rate": 8.763969336359972e-05, |
| "loss": 0.8731, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.3784284391251692, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.761943978493266e-05, |
| "loss": 0.9028, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.37871340029920925, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.759917197051603e-05, |
| "loss": 0.8729, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.3789983614732493, |
| "grad_norm": 0.625, |
| "learning_rate": 8.757888992801945e-05, |
| "loss": 1.005, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.3792833226472893, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.755859366511796e-05, |
| "loss": 0.9928, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.37956828382132934, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.753828318949196e-05, |
| "loss": 0.9139, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.3798532449953694, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.75179585088272e-05, |
| "loss": 0.9584, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.3801382061694094, |
| "grad_norm": 0.6328125, |
| "learning_rate": 8.749761963081488e-05, |
| "loss": 1.0471, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.38042316734344944, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.747726656315152e-05, |
| "loss": 0.989, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.3807081285174895, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.7456899313539e-05, |
| "loss": 0.9473, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.3809930896915295, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.743651788968461e-05, |
| "loss": 1.0057, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.38127805086556954, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.741612229930097e-05, |
| "loss": 0.8847, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.38156301203960963, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.739571255010606e-05, |
| "loss": 1.0922, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.38184797321364966, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.737528864982328e-05, |
| "loss": 0.9593, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.3821329343876897, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.735485060618127e-05, |
| "loss": 0.9891, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.3824178955617297, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.733439842691412e-05, |
| "loss": 0.9242, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.38270285673576976, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.731393211976123e-05, |
| "loss": 0.9275, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.3829878179098098, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.729345169246734e-05, |
| "loss": 0.916, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.3832727790838498, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.727295715278257e-05, |
| "loss": 1.0325, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.38355774025788986, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.725244850846233e-05, |
| "loss": 0.9395, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.3838427014319299, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.723192576726742e-05, |
| "loss": 0.934, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.3841276626059699, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.721138893696392e-05, |
| "loss": 0.9359, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.38441262378000995, |
| "grad_norm": 0.62109375, |
| "learning_rate": 8.71908380253233e-05, |
| "loss": 0.9401, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.38469758495405, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.717027304012228e-05, |
| "loss": 0.9891, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.3849825461280901, |
| "grad_norm": 0.61328125, |
| "learning_rate": 8.714969398914301e-05, |
| "loss": 1.037, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.3852675073021301, |
| "grad_norm": 0.61328125, |
| "learning_rate": 8.712910088017287e-05, |
| "loss": 1.0466, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.38555246847617014, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.71084937210046e-05, |
| "loss": 0.9235, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.3858374296502102, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.708787251943625e-05, |
| "loss": 0.914, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.3861223908242502, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.70672372832712e-05, |
| "loss": 0.9256, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.38640735199829024, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.70465880203181e-05, |
| "loss": 0.9568, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.38669231317233027, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.702592473839097e-05, |
| "loss": 0.9254, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.3869772743463703, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.700524744530907e-05, |
| "loss": 0.9149, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.38726223552041034, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.698455614889703e-05, |
| "loss": 0.9946, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.38754719669445037, |
| "grad_norm": 0.62109375, |
| "learning_rate": 8.696385085698468e-05, |
| "loss": 1.0242, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.3878321578684904, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.694313157740727e-05, |
| "loss": 1.0245, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.38811711904253043, |
| "grad_norm": 0.65234375, |
| "learning_rate": 8.692239831800524e-05, |
| "loss": 1.0409, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.38840208021657047, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.690165108662438e-05, |
| "loss": 0.8971, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.38868704139061055, |
| "grad_norm": 0.625, |
| "learning_rate": 8.688088989111573e-05, |
| "loss": 1.0359, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.3889720025646506, |
| "grad_norm": 0.6171875, |
| "learning_rate": 8.686011473933566e-05, |
| "loss": 0.977, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.3892569637386906, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.683932563914576e-05, |
| "loss": 0.9534, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.38954192491273065, |
| "grad_norm": 0.55078125, |
| "learning_rate": 8.681852259841297e-05, |
| "loss": 0.8909, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.3898268860867707, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.679770562500943e-05, |
| "loss": 0.98, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.3901118472608107, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.67768747268126e-05, |
| "loss": 0.9297, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.39039680843485075, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.675602991170521e-05, |
| "loss": 0.898, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.3906817696088908, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.673517118757524e-05, |
| "loss": 1.0496, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.3909667307829308, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.671429856231593e-05, |
| "loss": 1.0079, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.39125169195697085, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.66934120438258e-05, |
| "loss": 0.919, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.3915366531310109, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.667251164000859e-05, |
| "loss": 1.0264, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.3918216143050509, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.665159735877335e-05, |
| "loss": 1.0051, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.39210657547909095, |
| "grad_norm": 0.55078125, |
| "learning_rate": 8.663066920803435e-05, |
| "loss": 0.8353, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.39239153665313103, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.66097271957111e-05, |
| "loss": 1.0129, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.39267649782717107, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.658877132972835e-05, |
| "loss": 0.9286, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.3929614590012111, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.656780161801615e-05, |
| "loss": 1.0064, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.39324642017525113, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.654681806850971e-05, |
| "loss": 1.0259, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.39353138134929116, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.652582068914952e-05, |
| "loss": 1.0131, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.3938163425233312, |
| "grad_norm": 0.65625, |
| "learning_rate": 8.65048094878813e-05, |
| "loss": 1.0303, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.39410130369737123, |
| "grad_norm": 0.6171875, |
| "learning_rate": 8.648378447265603e-05, |
| "loss": 1.0413, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.39438626487141126, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.646274565142984e-05, |
| "loss": 0.9901, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.3946712260454513, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.644169303216414e-05, |
| "loss": 0.8797, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.39495618721949133, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.642062662282557e-05, |
| "loss": 0.9751, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.39524114839353136, |
| "grad_norm": 0.52734375, |
| "learning_rate": 8.639954643138595e-05, |
| "loss": 0.8173, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.3955261095675714, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.637845246582234e-05, |
| "loss": 0.9654, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.3958110707416115, |
| "grad_norm": 0.62890625, |
| "learning_rate": 8.635734473411701e-05, |
| "loss": 0.987, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.3960960319156515, |
| "grad_norm": 0.6484375, |
| "learning_rate": 8.633622324425745e-05, |
| "loss": 1.049, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.39638099308969155, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.63150880042363e-05, |
| "loss": 1.0071, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.3966659542637316, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.629393902205149e-05, |
| "loss": 0.9277, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.3969509154377716, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.62727763057061e-05, |
| "loss": 0.844, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.39723587661181164, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.625159986320838e-05, |
| "loss": 0.9436, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.3975208377858517, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.623040970257183e-05, |
| "loss": 0.9101, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.3978057989598917, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.620920583181513e-05, |
| "loss": 0.9121, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.39809076013393174, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.618798825896213e-05, |
| "loss": 0.9445, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.3983757213079718, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.616675699204185e-05, |
| "loss": 0.9364, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.3986606824820118, |
| "grad_norm": 0.625, |
| "learning_rate": 8.614551203908853e-05, |
| "loss": 1.036, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.39894564365605184, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.612425340814159e-05, |
| "loss": 0.9296, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3992306048300919, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.61029811072456e-05, |
| "loss": 0.8705, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.39951556600413196, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.608169514445028e-05, |
| "loss": 0.9125, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.399800527178172, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.606039552781058e-05, |
| "loss": 0.9602, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.400085488352212, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.60390822653866e-05, |
| "loss": 0.9155, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.40037044952625206, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.601775536524355e-05, |
| "loss": 0.9916, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.4006554107002921, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.599641483545189e-05, |
| "loss": 0.9282, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.4009403718743321, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.597506068408717e-05, |
| "loss": 1.0603, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.40122533304837216, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.59536929192301e-05, |
| "loss": 1.0229, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.4015102942224122, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.593231154896659e-05, |
| "loss": 0.894, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.4017952553964522, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.591091658138761e-05, |
| "loss": 0.8648, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.40208021657049225, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.58895080245894e-05, |
| "loss": 0.9662, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.4023651777445323, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.586808588667323e-05, |
| "loss": 1.0265, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.4026501389185723, |
| "grad_norm": 0.625, |
| "learning_rate": 8.584665017574556e-05, |
| "loss": 1.0178, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.4029351000926124, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.582520089991798e-05, |
| "loss": 1.0322, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.40322006126665244, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.580373806730724e-05, |
| "loss": 0.9437, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.4035050224406925, |
| "grad_norm": 0.61328125, |
| "learning_rate": 8.578226168603513e-05, |
| "loss": 0.9531, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.4037899836147325, |
| "grad_norm": 0.61328125, |
| "learning_rate": 8.576077176422867e-05, |
| "loss": 0.9953, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.40407494478877254, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.573926831001995e-05, |
| "loss": 0.8791, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.40435990596281257, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.571775133154622e-05, |
| "loss": 0.9967, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.4046448671368526, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.569622083694977e-05, |
| "loss": 0.9516, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.40492982831089264, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.567467683437811e-05, |
| "loss": 1.0403, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.40521478948493267, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.565311933198376e-05, |
| "loss": 0.9238, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.4054997506589727, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.563154833792444e-05, |
| "loss": 0.9464, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.40578471183301273, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.56099638603629e-05, |
| "loss": 0.9654, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.40606967300705277, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.558836590746705e-05, |
| "loss": 0.9547, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.4063546341810928, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.556675448740985e-05, |
| "loss": 0.9017, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.4066395953551329, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.554512960836937e-05, |
| "loss": 1.0015, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.4069245565291729, |
| "grad_norm": 0.6171875, |
| "learning_rate": 8.552349127852883e-05, |
| "loss": 0.9377, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.40720951770321295, |
| "grad_norm": 0.5546875, |
| "learning_rate": 8.550183950607644e-05, |
| "loss": 0.8432, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.407494478877253, |
| "grad_norm": 0.62109375, |
| "learning_rate": 8.548017429920559e-05, |
| "loss": 0.9568, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.407779440051293, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.545849566611467e-05, |
| "loss": 1.0386, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.40806440122533305, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.543680361500725e-05, |
| "loss": 0.9595, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.4083493623993731, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.541509815409186e-05, |
| "loss": 0.9505, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.4086343235734131, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.539337929158222e-05, |
| "loss": 0.9916, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.40891928474745315, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.537164703569703e-05, |
| "loss": 0.9698, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.4092042459214932, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.53499013946601e-05, |
| "loss": 0.9859, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.4094892070955332, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.532814237670032e-05, |
| "loss": 1.0218, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.40977416826957325, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.530636999005162e-05, |
| "loss": 0.8932, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.41005912944361333, |
| "grad_norm": 0.61328125, |
| "learning_rate": 8.528458424295297e-05, |
| "loss": 1.0003, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.41034409061765337, |
| "grad_norm": 0.64453125, |
| "learning_rate": 8.526278514364843e-05, |
| "loss": 0.9291, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.4106290517916934, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.524097270038712e-05, |
| "loss": 0.8347, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.41091401296573343, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.521914692142313e-05, |
| "loss": 1.0188, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.41119897413977347, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.519730781501571e-05, |
| "loss": 0.9563, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.4114839353138135, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.51754553894291e-05, |
| "loss": 0.9416, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.41176889648785353, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.515358965293256e-05, |
| "loss": 0.8633, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.41205385766189356, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.513171061380041e-05, |
| "loss": 0.9688, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.4123388188359336, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.510981828031199e-05, |
| "loss": 0.8366, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.41262378000997363, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.508791266075171e-05, |
| "loss": 1.068, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.41290874118401366, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.506599376340895e-05, |
| "loss": 0.8938, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.4131937023580537, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.504406159657816e-05, |
| "loss": 1.0014, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.4134786635320937, |
| "grad_norm": 0.62890625, |
| "learning_rate": 8.502211616855878e-05, |
| "loss": 1.0489, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.4137636247061338, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.50001574876553e-05, |
| "loss": 0.9896, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.41404858588017385, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.49781855621772e-05, |
| "loss": 0.9565, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.4143335470542139, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.495620040043896e-05, |
| "loss": 0.9884, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.4146185082282539, |
| "grad_norm": 0.5390625, |
| "learning_rate": 8.493420201076014e-05, |
| "loss": 0.8105, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.41490346940229395, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.49121904014652e-05, |
| "loss": 0.9667, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.415188430576334, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.48901655808837e-05, |
| "loss": 0.9508, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.415473391750374, |
| "grad_norm": 0.66015625, |
| "learning_rate": 8.486812755735011e-05, |
| "loss": 1.0046, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.41575835292441404, |
| "grad_norm": 0.546875, |
| "learning_rate": 8.484607633920398e-05, |
| "loss": 0.8572, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.4160433140984541, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.48240119347898e-05, |
| "loss": 0.9747, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.4163282752724941, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.480193435245708e-05, |
| "loss": 0.9554, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.41661323644653414, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.477984360056028e-05, |
| "loss": 0.9253, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.4168981976205742, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.475773968745888e-05, |
| "loss": 0.9911, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.4171831587946142, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.473562262151733e-05, |
| "loss": 0.8795, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.4174681199686543, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.471349241110503e-05, |
| "loss": 0.9323, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.4177530811426943, |
| "grad_norm": 0.6171875, |
| "learning_rate": 8.469134906459642e-05, |
| "loss": 1.0185, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.41803804231673436, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.46691925903708e-05, |
| "loss": 1.0109, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.4183230034907744, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.464702299681258e-05, |
| "loss": 0.9631, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.4186079646648144, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.462484029231102e-05, |
| "loss": 0.9222, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.41889292583885446, |
| "grad_norm": 0.62109375, |
| "learning_rate": 8.460264448526038e-05, |
| "loss": 1.0884, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.4191778870128945, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.458043558405989e-05, |
| "loss": 0.9434, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.4194628481869345, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.455821359711374e-05, |
| "loss": 0.8854, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.41974780936097456, |
| "grad_norm": 0.546875, |
| "learning_rate": 8.453597853283103e-05, |
| "loss": 0.8779, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.4200327705350146, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.451373039962584e-05, |
| "loss": 0.8777, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.4203177317090546, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.44914692059172e-05, |
| "loss": 1.0149, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.42060269288309465, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.446919496012909e-05, |
| "loss": 0.9547, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.42088765405713474, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.444690767069037e-05, |
| "loss": 0.9941, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.4211726152311748, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.442460734603492e-05, |
| "loss": 0.9185, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.4214575764052148, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.440229399460147e-05, |
| "loss": 0.9171, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.42174253757925484, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.437996762483376e-05, |
| "loss": 0.846, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.42202749875329487, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.435762824518039e-05, |
| "loss": 1.0084, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.4223124599273349, |
| "grad_norm": 0.55078125, |
| "learning_rate": 8.433527586409493e-05, |
| "loss": 0.8722, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.42259742110137494, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.431291049003584e-05, |
| "loss": 0.9551, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.42288238227541497, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.42905321314665e-05, |
| "loss": 0.9697, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.423167343449455, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.426814079685521e-05, |
| "loss": 1.0667, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.42345230462349503, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.42457364946752e-05, |
| "loss": 1.029, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.42373726579753507, |
| "grad_norm": 0.55078125, |
| "learning_rate": 8.422331923340457e-05, |
| "loss": 0.899, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.4240222269715751, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.420088902152636e-05, |
| "loss": 1.0318, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.42430718814561513, |
| "grad_norm": 0.546875, |
| "learning_rate": 8.417844586752845e-05, |
| "loss": 0.8648, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.4245921493196552, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.41559897799037e-05, |
| "loss": 1.043, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.42487711049369525, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.413352076714982e-05, |
| "loss": 0.9173, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.4251620716677353, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.411103883776941e-05, |
| "loss": 0.9297, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.4254470328417753, |
| "grad_norm": 0.62109375, |
| "learning_rate": 8.408854400026996e-05, |
| "loss": 1.0108, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.42573199401581535, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.406603626316382e-05, |
| "loss": 0.9977, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.4260169551898554, |
| "grad_norm": 0.64453125, |
| "learning_rate": 8.404351563496831e-05, |
| "loss": 1.2144, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.4263019163638954, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.402098212420553e-05, |
| "loss": 0.9607, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.42658687753793545, |
| "grad_norm": 0.53125, |
| "learning_rate": 8.399843573940248e-05, |
| "loss": 0.8164, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.4268718387119755, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.397587648909107e-05, |
| "loss": 0.987, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.4271567998860155, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.395330438180803e-05, |
| "loss": 0.9275, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.42744176106005555, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.393071942609501e-05, |
| "loss": 0.877, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4277267222340956, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.390812163049845e-05, |
| "loss": 1.0126, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.42801168340813567, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.388551100356971e-05, |
| "loss": 0.9069, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.4282966445821757, |
| "grad_norm": 0.72265625, |
| "learning_rate": 8.386288755386499e-05, |
| "loss": 1.0696, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.42858160575621573, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.384025128994532e-05, |
| "loss": 0.9017, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.42886656693025577, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.38176022203766e-05, |
| "loss": 0.9497, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.4291515281042958, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.37949403537296e-05, |
| "loss": 0.9465, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.42943648927833583, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.377226569857985e-05, |
| "loss": 0.9455, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.42972145045237586, |
| "grad_norm": 0.5546875, |
| "learning_rate": 8.374957826350781e-05, |
| "loss": 0.9299, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.4300064116264159, |
| "grad_norm": 0.51953125, |
| "learning_rate": 8.372687805709873e-05, |
| "loss": 0.8362, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.43029137280045593, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.37041650879427e-05, |
| "loss": 1.0492, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.43057633397449596, |
| "grad_norm": 0.53515625, |
| "learning_rate": 8.368143936463465e-05, |
| "loss": 0.9604, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.430861295148536, |
| "grad_norm": 0.54296875, |
| "learning_rate": 8.365870089577431e-05, |
| "loss": 0.8321, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.431146256322576, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.363594968996628e-05, |
| "loss": 1.0465, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.43143121749661606, |
| "grad_norm": 0.640625, |
| "learning_rate": 8.361318575581992e-05, |
| "loss": 1.0719, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.43171617867065615, |
| "grad_norm": 0.5546875, |
| "learning_rate": 8.359040910194946e-05, |
| "loss": 0.9021, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.4320011398446962, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.35676197369739e-05, |
| "loss": 0.9804, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.4322861010187362, |
| "grad_norm": 0.84765625, |
| "learning_rate": 8.354481766951712e-05, |
| "loss": 0.8445, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.43257106219277625, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.35220029082077e-05, |
| "loss": 0.8799, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.4328560233668163, |
| "grad_norm": 0.67578125, |
| "learning_rate": 8.349917546167909e-05, |
| "loss": 1.0454, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.4331409845408563, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.347633533856956e-05, |
| "loss": 0.9651, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.43342594571489634, |
| "grad_norm": 1.0078125, |
| "learning_rate": 8.345348254752214e-05, |
| "loss": 1.0017, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.4337109068889364, |
| "grad_norm": 0.625, |
| "learning_rate": 8.343061709718465e-05, |
| "loss": 0.9982, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.4339958680629764, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.340773899620971e-05, |
| "loss": 1.0013, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.43428082923701644, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.338484825325476e-05, |
| "loss": 1.0574, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.4345657904110565, |
| "grad_norm": 0.55078125, |
| "learning_rate": 8.336194487698194e-05, |
| "loss": 0.9522, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.4348507515850965, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.333902887605829e-05, |
| "loss": 0.9004, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.4351357127591366, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.331610025915551e-05, |
| "loss": 0.9407, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.4354206739331766, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.329315903495016e-05, |
| "loss": 0.9734, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.43570563510721666, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.327020521212352e-05, |
| "loss": 0.8726, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.4359905962812567, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.324723879936164e-05, |
| "loss": 0.9743, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.4362755574552967, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.32242598053554e-05, |
| "loss": 0.9513, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.43656051862933676, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.320126823880032e-05, |
| "loss": 0.9463, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.4368454798033768, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.31782641083968e-05, |
| "loss": 0.7969, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.4371304409774168, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.315524742284992e-05, |
| "loss": 0.9063, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.43741540215145686, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.313221819086954e-05, |
| "loss": 0.8677, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.4377003633254969, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.310917642117024e-05, |
| "loss": 1.0184, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.4379853244995369, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.30861221224714e-05, |
| "loss": 0.8789, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.43827028567357695, |
| "grad_norm": 0.53125, |
| "learning_rate": 8.306305530349708e-05, |
| "loss": 0.8259, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.438555246847617, |
| "grad_norm": 0.59375, |
| "learning_rate": 8.303997597297612e-05, |
| "loss": 0.9158, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.4388402080216571, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.301688413964207e-05, |
| "loss": 1.0237, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.4391251691956971, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.29937798122332e-05, |
| "loss": 0.9758, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.43941013036973714, |
| "grad_norm": 0.515625, |
| "learning_rate": 8.297066299949255e-05, |
| "loss": 0.8428, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.4396950915437772, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.294753371016786e-05, |
| "loss": 0.927, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.4399800527178172, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.29243919530116e-05, |
| "loss": 1.0507, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.44026501389185724, |
| "grad_norm": 0.53125, |
| "learning_rate": 8.290123773678093e-05, |
| "loss": 0.8273, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.44054997506589727, |
| "grad_norm": 0.55078125, |
| "learning_rate": 8.287807107023776e-05, |
| "loss": 0.8995, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.4408349362399373, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.28548919621487e-05, |
| "loss": 0.9847, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.44111989741397734, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.283170042128508e-05, |
| "loss": 0.9554, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.44140485858801737, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.280849645642288e-05, |
| "loss": 0.9638, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.4416898197620574, |
| "grad_norm": 0.5546875, |
| "learning_rate": 8.278528007634286e-05, |
| "loss": 0.9492, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.44197478093609743, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.27620512898304e-05, |
| "loss": 0.9208, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.4422597421101375, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.273881010567566e-05, |
| "loss": 0.9669, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.44254470328417755, |
| "grad_norm": 0.55078125, |
| "learning_rate": 8.271555653267342e-05, |
| "loss": 0.9314, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.4428296644582176, |
| "grad_norm": 0.6328125, |
| "learning_rate": 8.269229057962318e-05, |
| "loss": 1.1307, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.4431146256322576, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.266901225532911e-05, |
| "loss": 0.9241, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.44339958680629765, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.264572156860008e-05, |
| "loss": 0.8712, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.4436845479803377, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.262241852824964e-05, |
| "loss": 0.9752, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.4439695091543777, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.259910314309598e-05, |
| "loss": 0.9804, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.44425447032841775, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.257577542196198e-05, |
| "loss": 0.948, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.4445394315024578, |
| "grad_norm": 0.5546875, |
| "learning_rate": 8.255243537367523e-05, |
| "loss": 0.9416, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.4448243926764978, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.252908300706792e-05, |
| "loss": 0.9388, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.44510935385053785, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.250571833097693e-05, |
| "loss": 0.929, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.4453943150245779, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.248234135424382e-05, |
| "loss": 0.9338, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.4456792761986179, |
| "grad_norm": 0.54296875, |
| "learning_rate": 8.245895208571475e-05, |
| "loss": 0.92, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.445964237372658, |
| "grad_norm": 0.6640625, |
| "learning_rate": 8.243555053424057e-05, |
| "loss": 0.9015, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.44624919854669803, |
| "grad_norm": 0.53125, |
| "learning_rate": 8.241213670867679e-05, |
| "loss": 0.7701, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.44653415972073807, |
| "grad_norm": 0.54296875, |
| "learning_rate": 8.238871061788353e-05, |
| "loss": 0.8525, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.4468191208947781, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.236527227072558e-05, |
| "loss": 0.9254, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.44710408206881813, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.234182167607235e-05, |
| "loss": 0.8946, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.44738904324285816, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.231835884279788e-05, |
| "loss": 0.8708, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.4476740044168982, |
| "grad_norm": 0.515625, |
| "learning_rate": 8.229488377978087e-05, |
| "loss": 0.7654, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.44795896559093823, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.22713964959046e-05, |
| "loss": 0.9207, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.44824392676497826, |
| "grad_norm": 0.58203125, |
| "learning_rate": 8.224789700005704e-05, |
| "loss": 0.9206, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.4485288879390183, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.222438530113071e-05, |
| "loss": 0.9539, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.4488138491130583, |
| "grad_norm": 0.59765625, |
| "learning_rate": 8.220086140802281e-05, |
| "loss": 0.9634, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.44909881028709836, |
| "grad_norm": 0.5859375, |
| "learning_rate": 8.217732532963513e-05, |
| "loss": 0.9676, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.4493837714611384, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.215377707487404e-05, |
| "loss": 0.9847, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.4496687326351785, |
| "grad_norm": 0.57421875, |
| "learning_rate": 8.213021665265057e-05, |
| "loss": 0.9436, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.4499536938092185, |
| "grad_norm": 0.625, |
| "learning_rate": 8.210664407188032e-05, |
| "loss": 0.9658, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.45023865498325855, |
| "grad_norm": 0.62109375, |
| "learning_rate": 8.20830593414835e-05, |
| "loss": 1.0291, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.45023865498325855, |
| "eval_loss": 0.9564817547798157, |
| "eval_model_preparation_time": 0.0565, |
| "eval_runtime": 300.2915, |
| "eval_samples_per_second": 5.138, |
| "eval_steps_per_second": 1.285, |
| "step": 1580 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 5265, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 1580, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.1604940962994954e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|