| { |
| "best_metric": 1.0219863653182983, |
| "best_model_checkpoint": "miner_id_24/checkpoint-150", |
| "epoch": 1.4205607476635513, |
| "eval_steps": 25, |
| "global_step": 190, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007476635514018692, |
| "grad_norm": 53.585628509521484, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 5.7506, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.007476635514018692, |
| "eval_loss": 6.5497517585754395, |
| "eval_runtime": 1.7647, |
| "eval_samples_per_second": 28.333, |
| "eval_steps_per_second": 3.967, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.014953271028037384, |
| "grad_norm": 51.50555419921875, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 6.2904, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.022429906542056073, |
| "grad_norm": 35.29224395751953, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 6.3377, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.029906542056074768, |
| "grad_norm": 28.75082778930664, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 6.1761, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.037383177570093455, |
| "grad_norm": 21.41317367553711, |
| "learning_rate": 5.555555555555556e-05, |
| "loss": 5.9294, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.044859813084112146, |
| "grad_norm": 17.97064971923828, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 5.5866, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.052336448598130844, |
| "grad_norm": 10.495800018310547, |
| "learning_rate": 7.777777777777778e-05, |
| "loss": 5.4007, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.059813084112149535, |
| "grad_norm": 9.666411399841309, |
| "learning_rate": 8.888888888888889e-05, |
| "loss": 4.8408, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.06728971962616823, |
| "grad_norm": 9.070466041564941, |
| "learning_rate": 0.0001, |
| "loss": 4.3133, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.07476635514018691, |
| "grad_norm": 8.968005180358887, |
| "learning_rate": 9.999322180262823e-05, |
| "loss": 3.9532, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08224299065420561, |
| "grad_norm": 8.822087287902832, |
| "learning_rate": 9.997288925246668e-05, |
| "loss": 3.5377, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.08971962616822429, |
| "grad_norm": 6.6545939445495605, |
| "learning_rate": 9.993900847476147e-05, |
| "loss": 3.1288, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.09719626168224299, |
| "grad_norm": 6.504652500152588, |
| "learning_rate": 9.989158967620588e-05, |
| "loss": 2.662, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.10467289719626169, |
| "grad_norm": 6.791845321655273, |
| "learning_rate": 9.983064714186548e-05, |
| "loss": 2.686, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.11214953271028037, |
| "grad_norm": 6.590876579284668, |
| "learning_rate": 9.975619923087478e-05, |
| "loss": 2.4704, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.11962616822429907, |
| "grad_norm": 4.255295753479004, |
| "learning_rate": 9.966826837090643e-05, |
| "loss": 2.2264, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.12710280373831775, |
| "grad_norm": 4.624362468719482, |
| "learning_rate": 9.956688105141482e-05, |
| "loss": 2.1681, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.13457943925233645, |
| "grad_norm": 3.850867986679077, |
| "learning_rate": 9.945206781565605e-05, |
| "loss": 2.1567, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.14205607476635515, |
| "grad_norm": 5.03740119934082, |
| "learning_rate": 9.932386325148672e-05, |
| "loss": 1.9059, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.14953271028037382, |
| "grad_norm": 7.313295364379883, |
| "learning_rate": 9.918230598094414e-05, |
| "loss": 1.849, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.15700934579439252, |
| "grad_norm": 5.069641590118408, |
| "learning_rate": 9.902743864861138e-05, |
| "loss": 1.7289, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.16448598130841122, |
| "grad_norm": 2.52390718460083, |
| "learning_rate": 9.885930790877044e-05, |
| "loss": 1.5924, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.17196261682242991, |
| "grad_norm": 2.989380359649658, |
| "learning_rate": 9.867796441134754e-05, |
| "loss": 1.5416, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.17943925233644858, |
| "grad_norm": 3.9914767742156982, |
| "learning_rate": 9.84834627866545e-05, |
| "loss": 1.558, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.18691588785046728, |
| "grad_norm": 2.8886358737945557, |
| "learning_rate": 9.82758616289314e-05, |
| "loss": 1.526, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.18691588785046728, |
| "eval_loss": 1.497947335243225, |
| "eval_runtime": 1.7334, |
| "eval_samples_per_second": 28.846, |
| "eval_steps_per_second": 4.038, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.19439252336448598, |
| "grad_norm": 2.935326337814331, |
| "learning_rate": 9.805522347869479e-05, |
| "loss": 1.4824, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.20186915887850468, |
| "grad_norm": 3.524118185043335, |
| "learning_rate": 9.78216148038971e-05, |
| "loss": 1.4853, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.20934579439252338, |
| "grad_norm": 2.3418080806732178, |
| "learning_rate": 9.757510597990301e-05, |
| "loss": 1.3454, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.21682242990654205, |
| "grad_norm": 2.0094268321990967, |
| "learning_rate": 9.731577126828865e-05, |
| "loss": 1.4325, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.22429906542056074, |
| "grad_norm": 2.4824633598327637, |
| "learning_rate": 9.704368879447005e-05, |
| "loss": 1.4706, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.23177570093457944, |
| "grad_norm": 2.506647825241089, |
| "learning_rate": 9.675894052416765e-05, |
| "loss": 1.7467, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.23925233644859814, |
| "grad_norm": 2.892395257949829, |
| "learning_rate": 9.64616122387137e-05, |
| "loss": 1.6818, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.2467289719626168, |
| "grad_norm": 3.7638962268829346, |
| "learning_rate": 9.615179350921063e-05, |
| "loss": 2.4594, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.2542056074766355, |
| "grad_norm": 2.2599539756774902, |
| "learning_rate": 9.58295776695472e-05, |
| "loss": 1.7344, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.2616822429906542, |
| "grad_norm": 2.3030145168304443, |
| "learning_rate": 9.549506178828152e-05, |
| "loss": 1.3113, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2691588785046729, |
| "grad_norm": 1.872995138168335, |
| "learning_rate": 9.514834663939882e-05, |
| "loss": 1.1947, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.2766355140186916, |
| "grad_norm": 2.171834945678711, |
| "learning_rate": 9.478953667195292e-05, |
| "loss": 1.3481, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.2841121495327103, |
| "grad_norm": 1.6435407400131226, |
| "learning_rate": 9.441873997860061e-05, |
| "loss": 1.1591, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.29158878504672897, |
| "grad_norm": 1.8728747367858887, |
| "learning_rate": 9.403606826303847e-05, |
| "loss": 1.166, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.29906542056074764, |
| "grad_norm": 1.5220937728881836, |
| "learning_rate": 9.364163680635166e-05, |
| "loss": 1.1966, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.30654205607476637, |
| "grad_norm": 1.9457396268844604, |
| "learning_rate": 9.323556443228521e-05, |
| "loss": 1.115, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.31401869158878504, |
| "grad_norm": 1.6101148128509521, |
| "learning_rate": 9.281797347144796e-05, |
| "loss": 1.1116, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.32149532710280376, |
| "grad_norm": 1.4722403287887573, |
| "learning_rate": 9.238898972446005e-05, |
| "loss": 1.1254, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.32897196261682243, |
| "grad_norm": 1.5298360586166382, |
| "learning_rate": 9.194874242405508e-05, |
| "loss": 1.0772, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.3364485981308411, |
| "grad_norm": 1.4249259233474731, |
| "learning_rate": 9.149736419614837e-05, |
| "loss": 1.0679, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.34392523364485983, |
| "grad_norm": 1.617456316947937, |
| "learning_rate": 9.103499101988296e-05, |
| "loss": 1.1213, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.3514018691588785, |
| "grad_norm": 1.6541962623596191, |
| "learning_rate": 9.056176218666543e-05, |
| "loss": 1.1241, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.35887850467289717, |
| "grad_norm": 1.584012508392334, |
| "learning_rate": 9.007782025820393e-05, |
| "loss": 1.0767, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.3663551401869159, |
| "grad_norm": 1.4249681234359741, |
| "learning_rate": 8.958331102356102e-05, |
| "loss": 1.1389, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.37383177570093457, |
| "grad_norm": 1.602219581604004, |
| "learning_rate": 8.907838345523424e-05, |
| "loss": 1.2053, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.37383177570093457, |
| "eval_loss": 1.1656819581985474, |
| "eval_runtime": 1.8134, |
| "eval_samples_per_second": 27.573, |
| "eval_steps_per_second": 3.86, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3813084112149533, |
| "grad_norm": 1.888527274131775, |
| "learning_rate": 8.856318966427766e-05, |
| "loss": 1.3811, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.38878504672897196, |
| "grad_norm": 1.4429353475570679, |
| "learning_rate": 8.803788485447791e-05, |
| "loss": 1.1332, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.39626168224299063, |
| "grad_norm": 1.5478978157043457, |
| "learning_rate": 8.750262727559867e-05, |
| "loss": 0.932, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.40373831775700936, |
| "grad_norm": 1.5630110502243042, |
| "learning_rate": 8.695757817570717e-05, |
| "loss": 1.1636, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.411214953271028, |
| "grad_norm": 1.6892582178115845, |
| "learning_rate": 8.640290175259794e-05, |
| "loss": 1.0162, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.41869158878504675, |
| "grad_norm": 1.607128620147705, |
| "learning_rate": 8.58387651043276e-05, |
| "loss": 1.1545, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.4261682242990654, |
| "grad_norm": 1.8287376165390015, |
| "learning_rate": 8.526533817887597e-05, |
| "loss": 1.1185, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.4336448598130841, |
| "grad_norm": 1.4400948286056519, |
| "learning_rate": 8.468279372294879e-05, |
| "loss": 0.8956, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.4411214953271028, |
| "grad_norm": 1.4424753189086914, |
| "learning_rate": 8.409130722993716e-05, |
| "loss": 0.9624, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.4485981308411215, |
| "grad_norm": 1.5252015590667725, |
| "learning_rate": 8.349105688704965e-05, |
| "loss": 1.0351, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.45607476635514016, |
| "grad_norm": 2.396437406539917, |
| "learning_rate": 8.28822235216328e-05, |
| "loss": 1.3683, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.4635514018691589, |
| "grad_norm": 1.8390008211135864, |
| "learning_rate": 8.22649905466962e-05, |
| "loss": 1.4461, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.47102803738317756, |
| "grad_norm": 1.6097524166107178, |
| "learning_rate": 8.163954390565895e-05, |
| "loss": 1.2682, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.4785046728971963, |
| "grad_norm": 1.5528298616409302, |
| "learning_rate": 8.100607201633341e-05, |
| "loss": 1.3143, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.48598130841121495, |
| "grad_norm": 2.06070876121521, |
| "learning_rate": 8.03647657141638e-05, |
| "loss": 1.5265, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.4934579439252336, |
| "grad_norm": 3.6317155361175537, |
| "learning_rate": 7.971581819473646e-05, |
| "loss": 2.6631, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.5009345794392523, |
| "grad_norm": 1.9490833282470703, |
| "learning_rate": 7.905942495557893e-05, |
| "loss": 1.5979, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.508411214953271, |
| "grad_norm": 1.641648769378662, |
| "learning_rate": 7.839578373726587e-05, |
| "loss": 1.0137, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.5158878504672897, |
| "grad_norm": 1.5076510906219482, |
| "learning_rate": 7.772509446384883e-05, |
| "loss": 1.0979, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.5233644859813084, |
| "grad_norm": 1.3256382942199707, |
| "learning_rate": 7.704755918262877e-05, |
| "loss": 0.974, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5308411214953271, |
| "grad_norm": 1.267953872680664, |
| "learning_rate": 7.636338200328847e-05, |
| "loss": 0.935, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.5383177570093458, |
| "grad_norm": 1.5867618322372437, |
| "learning_rate": 7.567276903640388e-05, |
| "loss": 1.0266, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.5457943925233645, |
| "grad_norm": 2.156276226043701, |
| "learning_rate": 7.49759283313526e-05, |
| "loss": 0.9141, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.5532710280373832, |
| "grad_norm": 1.622321605682373, |
| "learning_rate": 7.427306981363847e-05, |
| "loss": 0.8034, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.5607476635514018, |
| "grad_norm": 1.727579951286316, |
| "learning_rate": 7.356440522165072e-05, |
| "loss": 0.8998, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5607476635514018, |
| "eval_loss": 1.0979422330856323, |
| "eval_runtime": 1.7942, |
| "eval_samples_per_second": 27.867, |
| "eval_steps_per_second": 3.901, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5682242990654206, |
| "grad_norm": 1.4198564291000366, |
| "learning_rate": 7.28501480428771e-05, |
| "loss": 0.9308, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.5757009345794393, |
| "grad_norm": 1.5244777202606201, |
| "learning_rate": 7.213051344959015e-05, |
| "loss": 0.8892, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.5831775700934579, |
| "grad_norm": 1.2492246627807617, |
| "learning_rate": 7.140571823402581e-05, |
| "loss": 0.9154, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.5906542056074766, |
| "grad_norm": 1.314004898071289, |
| "learning_rate": 7.06759807430741e-05, |
| "loss": 0.8424, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.5981308411214953, |
| "grad_norm": 1.3923033475875854, |
| "learning_rate": 6.994152081250139e-05, |
| "loss": 0.9523, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6056074766355141, |
| "grad_norm": 1.9231942892074585, |
| "learning_rate": 6.920255970072414e-05, |
| "loss": 0.9735, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.6130841121495327, |
| "grad_norm": 1.9869779348373413, |
| "learning_rate": 6.845932002215419e-05, |
| "loss": 0.9535, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.6205607476635514, |
| "grad_norm": 1.4400379657745361, |
| "learning_rate": 6.771202568013538e-05, |
| "loss": 0.8512, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.6280373831775701, |
| "grad_norm": 1.6403017044067383, |
| "learning_rate": 6.696090179949188e-05, |
| "loss": 1.0716, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.6355140186915887, |
| "grad_norm": 1.586094856262207, |
| "learning_rate": 6.620617465870877e-05, |
| "loss": 0.9685, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.6429906542056075, |
| "grad_norm": 1.352432131767273, |
| "learning_rate": 6.544807162176478e-05, |
| "loss": 1.0035, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.6504672897196262, |
| "grad_norm": 1.3724721670150757, |
| "learning_rate": 6.468682106963829e-05, |
| "loss": 1.1021, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.6579439252336449, |
| "grad_norm": 1.259173035621643, |
| "learning_rate": 6.39226523315067e-05, |
| "loss": 0.8772, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.6654205607476635, |
| "grad_norm": 1.35273277759552, |
| "learning_rate": 6.315579561566031e-05, |
| "loss": 1.024, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.6728971962616822, |
| "grad_norm": 1.3475384712219238, |
| "learning_rate": 6.238648194015137e-05, |
| "loss": 1.0481, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.680373831775701, |
| "grad_norm": 1.4782145023345947, |
| "learning_rate": 6.16149430631992e-05, |
| "loss": 0.9756, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.6878504672897197, |
| "grad_norm": 1.314041018486023, |
| "learning_rate": 6.084141141337213e-05, |
| "loss": 1.113, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.6953271028037383, |
| "grad_norm": 1.2568014860153198, |
| "learning_rate": 6.006612001956774e-05, |
| "loss": 0.9907, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.702803738317757, |
| "grad_norm": 1.3871257305145264, |
| "learning_rate": 5.928930244081214e-05, |
| "loss": 1.2216, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.7102803738317757, |
| "grad_norm": 1.4609795808792114, |
| "learning_rate": 5.851119269589963e-05, |
| "loss": 0.9784, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.7177570093457943, |
| "grad_norm": 1.5408998727798462, |
| "learning_rate": 5.773202519289364e-05, |
| "loss": 0.9439, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.7252336448598131, |
| "grad_norm": 1.5144058465957642, |
| "learning_rate": 5.695203465851068e-05, |
| "loss": 1.2598, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.7327102803738318, |
| "grad_norm": 1.4548237323760986, |
| "learning_rate": 5.617145606740804e-05, |
| "loss": 1.2112, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.7401869158878505, |
| "grad_norm": 2.4008612632751465, |
| "learning_rate": 5.5390524571397106e-05, |
| "loss": 2.154, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.7476635514018691, |
| "grad_norm": 2.0451629161834717, |
| "learning_rate": 5.46094754286029e-05, |
| "loss": 1.3008, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7476635514018691, |
| "eval_loss": 1.081913709640503, |
| "eval_runtime": 1.7949, |
| "eval_samples_per_second": 27.857, |
| "eval_steps_per_second": 3.9, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7551401869158878, |
| "grad_norm": 1.8308786153793335, |
| "learning_rate": 5.382854393259197e-05, |
| "loss": 0.8989, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.7626168224299066, |
| "grad_norm": 1.6299549341201782, |
| "learning_rate": 5.3047965341489344e-05, |
| "loss": 1.0614, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.7700934579439253, |
| "grad_norm": 1.342459797859192, |
| "learning_rate": 5.226797480710638e-05, |
| "loss": 1.0972, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.7775700934579439, |
| "grad_norm": 1.3016060590744019, |
| "learning_rate": 5.1488807304100386e-05, |
| "loss": 0.9202, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.7850467289719626, |
| "grad_norm": 1.2006447315216064, |
| "learning_rate": 5.071069755918787e-05, |
| "loss": 0.9442, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.7925233644859813, |
| "grad_norm": 1.2381112575531006, |
| "learning_rate": 4.9933879980432284e-05, |
| "loss": 0.8637, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.3466088771820068, |
| "learning_rate": 4.9158588586627895e-05, |
| "loss": 0.9609, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.8074766355140187, |
| "grad_norm": 1.3996555805206299, |
| "learning_rate": 4.8385056936800786e-05, |
| "loss": 0.971, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.8149532710280374, |
| "grad_norm": 1.1782788038253784, |
| "learning_rate": 4.7613518059848614e-05, |
| "loss": 0.9633, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.822429906542056, |
| "grad_norm": 1.092534065246582, |
| "learning_rate": 4.684420438433971e-05, |
| "loss": 0.7657, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8299065420560747, |
| "grad_norm": 1.141908049583435, |
| "learning_rate": 4.607734766849332e-05, |
| "loss": 0.9123, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.8373831775700935, |
| "grad_norm": 1.3389331102371216, |
| "learning_rate": 4.531317893036172e-05, |
| "loss": 0.9046, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.8448598130841122, |
| "grad_norm": 1.2002415657043457, |
| "learning_rate": 4.455192837823523e-05, |
| "loss": 0.8237, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.8523364485981308, |
| "grad_norm": 1.2427459955215454, |
| "learning_rate": 4.379382534129125e-05, |
| "loss": 0.9271, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.8598130841121495, |
| "grad_norm": 1.2061138153076172, |
| "learning_rate": 4.303909820050814e-05, |
| "loss": 0.9193, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.8672897196261682, |
| "grad_norm": 1.336917757987976, |
| "learning_rate": 4.228797431986463e-05, |
| "loss": 0.8825, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.874766355140187, |
| "grad_norm": 1.0712493658065796, |
| "learning_rate": 4.154067997784581e-05, |
| "loss": 0.8476, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.8822429906542056, |
| "grad_norm": 1.3190851211547852, |
| "learning_rate": 4.079744029927587e-05, |
| "loss": 0.977, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.8897196261682243, |
| "grad_norm": 1.257728934288025, |
| "learning_rate": 4.005847918749863e-05, |
| "loss": 0.8868, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.897196261682243, |
| "grad_norm": 1.2771947383880615, |
| "learning_rate": 3.932401925692591e-05, |
| "loss": 0.948, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9046728971962616, |
| "grad_norm": 1.36766517162323, |
| "learning_rate": 3.8594281765974204e-05, |
| "loss": 0.9731, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.9121495327102803, |
| "grad_norm": 1.3747360706329346, |
| "learning_rate": 3.786948655040987e-05, |
| "loss": 0.9178, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.9196261682242991, |
| "grad_norm": 1.2876604795455933, |
| "learning_rate": 3.714985195712292e-05, |
| "loss": 1.0227, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.9271028037383178, |
| "grad_norm": 1.2210800647735596, |
| "learning_rate": 3.643559477834928e-05, |
| "loss": 0.9963, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.9345794392523364, |
| "grad_norm": 1.3463104963302612, |
| "learning_rate": 3.572693018636152e-05, |
| "loss": 0.9492, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.9345794392523364, |
| "eval_loss": 1.0178673267364502, |
| "eval_runtime": 1.8163, |
| "eval_samples_per_second": 27.528, |
| "eval_steps_per_second": 3.854, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.9420560747663551, |
| "grad_norm": 1.2226606607437134, |
| "learning_rate": 3.5024071668647405e-05, |
| "loss": 0.9361, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.9495327102803738, |
| "grad_norm": 1.3024946451187134, |
| "learning_rate": 3.432723096359614e-05, |
| "loss": 1.0504, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.9570093457943926, |
| "grad_norm": 1.3780845403671265, |
| "learning_rate": 3.363661799671154e-05, |
| "loss": 1.0951, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.9644859813084112, |
| "grad_norm": 1.4091452360153198, |
| "learning_rate": 3.2952440817371225e-05, |
| "loss": 0.934, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.9719626168224299, |
| "grad_norm": 1.4428718090057373, |
| "learning_rate": 3.2274905536151187e-05, |
| "loss": 1.1048, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9794392523364486, |
| "grad_norm": 1.5878864526748657, |
| "learning_rate": 3.160421626273415e-05, |
| "loss": 1.2798, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.9869158878504672, |
| "grad_norm": 3.471273899078369, |
| "learning_rate": 3.094057504442107e-05, |
| "loss": 2.45, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.994392523364486, |
| "grad_norm": 1.5296839475631714, |
| "learning_rate": 3.0284181805263556e-05, |
| "loss": 1.1224, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.0018691588785047, |
| "grad_norm": 1.9310905933380127, |
| "learning_rate": 2.963523428583621e-05, |
| "loss": 1.7417, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.0093457943925233, |
| "grad_norm": 1.5308727025985718, |
| "learning_rate": 2.8993927983666613e-05, |
| "loss": 1.0866, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.016822429906542, |
| "grad_norm": 1.4650174379348755, |
| "learning_rate": 2.836045609434107e-05, |
| "loss": 0.769, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.0242990654205608, |
| "grad_norm": 1.3912580013275146, |
| "learning_rate": 2.7735009453303806e-05, |
| "loss": 0.9367, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.0317757009345794, |
| "grad_norm": 1.2151148319244385, |
| "learning_rate": 2.7117776478367228e-05, |
| "loss": 0.8128, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.0392523364485982, |
| "grad_norm": 1.2486802339553833, |
| "learning_rate": 2.650894311295034e-05, |
| "loss": 0.8115, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.0467289719626167, |
| "grad_norm": 1.2411141395568848, |
| "learning_rate": 2.5908692770062843e-05, |
| "loss": 0.9227, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0542056074766355, |
| "grad_norm": 1.0788848400115967, |
| "learning_rate": 2.531720627705123e-05, |
| "loss": 0.7906, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.0616822429906543, |
| "grad_norm": 1.148887276649475, |
| "learning_rate": 2.4734661821124045e-05, |
| "loss": 0.6868, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.0691588785046728, |
| "grad_norm": 1.1361629962921143, |
| "learning_rate": 2.4161234895672416e-05, |
| "loss": 0.8188, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.0766355140186916, |
| "grad_norm": 1.2399373054504395, |
| "learning_rate": 2.359709824740207e-05, |
| "loss": 0.7538, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.0841121495327102, |
| "grad_norm": 1.1857571601867676, |
| "learning_rate": 2.3042421824292836e-05, |
| "loss": 0.7866, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.091588785046729, |
| "grad_norm": 1.3761775493621826, |
| "learning_rate": 2.249737272440135e-05, |
| "loss": 0.803, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.0990654205607477, |
| "grad_norm": 1.153091549873352, |
| "learning_rate": 2.196211514552208e-05, |
| "loss": 0.7357, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.1065420560747663, |
| "grad_norm": 1.0790941715240479, |
| "learning_rate": 2.1436810335722354e-05, |
| "loss": 0.766, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.114018691588785, |
| "grad_norm": 1.2671778202056885, |
| "learning_rate": 2.092161654476577e-05, |
| "loss": 0.8744, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.1214953271028036, |
| "grad_norm": 1.2150834798812866, |
| "learning_rate": 2.0416688976438993e-05, |
| "loss": 0.8142, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.1214953271028036, |
| "eval_loss": 1.0219863653182983, |
| "eval_runtime": 1.7535, |
| "eval_samples_per_second": 28.515, |
| "eval_steps_per_second": 3.992, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.1289719626168224, |
| "grad_norm": 1.3583935499191284, |
| "learning_rate": 1.9922179741796086e-05, |
| "loss": 0.8198, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.1364485981308412, |
| "grad_norm": 1.284717321395874, |
| "learning_rate": 1.9438237813334586e-05, |
| "loss": 0.7878, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.1439252336448598, |
| "grad_norm": 1.3621183633804321, |
| "learning_rate": 1.8965008980117037e-05, |
| "loss": 0.9232, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.1514018691588785, |
| "grad_norm": 1.6327229738235474, |
| "learning_rate": 1.850263580385163e-05, |
| "loss": 1.0294, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.158878504672897, |
| "grad_norm": 1.386734127998352, |
| "learning_rate": 1.8051257575944925e-05, |
| "loss": 0.8834, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.1663551401869159, |
| "grad_norm": 1.375533938407898, |
| "learning_rate": 1.7611010275539962e-05, |
| "loss": 0.9483, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.1738317757009347, |
| "grad_norm": 1.242241382598877, |
| "learning_rate": 1.718202652855205e-05, |
| "loss": 0.8194, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.1813084112149532, |
| "grad_norm": 1.247066617012024, |
| "learning_rate": 1.6764435567714794e-05, |
| "loss": 0.8326, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.188785046728972, |
| "grad_norm": 1.4270540475845337, |
| "learning_rate": 1.6358363193648352e-05, |
| "loss": 0.8584, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.1962616822429906, |
| "grad_norm": 1.225496768951416, |
| "learning_rate": 1.5963931736961547e-05, |
| "loss": 0.8475, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.2037383177570093, |
| "grad_norm": 1.2226568460464478, |
| "learning_rate": 1.5581260021399396e-05, |
| "loss": 0.828, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.2112149532710281, |
| "grad_norm": 1.387080192565918, |
| "learning_rate": 1.5210463328047095e-05, |
| "loss": 0.8902, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.2186915887850467, |
| "grad_norm": 1.2809566259384155, |
| "learning_rate": 1.4851653360601179e-05, |
| "loss": 0.9188, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.2261682242990655, |
| "grad_norm": 1.4872632026672363, |
| "learning_rate": 1.4504938211718489e-05, |
| "loss": 1.1853, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.233644859813084, |
| "grad_norm": 1.3789664506912231, |
| "learning_rate": 1.4170422330452816e-05, |
| "loss": 0.9331, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.2411214953271028, |
| "grad_norm": 1.7620553970336914, |
| "learning_rate": 1.384820649078939e-05, |
| "loss": 1.3851, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.2485981308411216, |
| "grad_norm": 2.51485013961792, |
| "learning_rate": 1.3538387761286303e-05, |
| "loss": 1.7585, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.2560747663551401, |
| "grad_norm": 1.4379189014434814, |
| "learning_rate": 1.3241059475832373e-05, |
| "loss": 0.9246, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.263551401869159, |
| "grad_norm": 1.3618923425674438, |
| "learning_rate": 1.2956311205529943e-05, |
| "loss": 0.8608, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.2710280373831775, |
| "grad_norm": 1.3011233806610107, |
| "learning_rate": 1.268422873171136e-05, |
| "loss": 0.8322, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.2785046728971963, |
| "grad_norm": 1.5399248600006104, |
| "learning_rate": 1.2424894020096997e-05, |
| "loss": 0.7588, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.2859813084112148, |
| "grad_norm": 1.449872374534607, |
| "learning_rate": 1.217838519610291e-05, |
| "loss": 0.857, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.2934579439252336, |
| "grad_norm": 1.3477046489715576, |
| "learning_rate": 1.1944776521305213e-05, |
| "loss": 0.8627, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.3009345794392524, |
| "grad_norm": 1.3076852560043335, |
| "learning_rate": 1.1724138371068603e-05, |
| "loss": 0.9005, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.308411214953271, |
| "grad_norm": 1.294968843460083, |
| "learning_rate": 1.1516537213345519e-05, |
| "loss": 0.7639, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.308411214953271, |
| "eval_loss": 1.0100014209747314, |
| "eval_runtime": 1.729, |
| "eval_samples_per_second": 28.918, |
| "eval_steps_per_second": 4.049, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.3158878504672897, |
| "grad_norm": 1.3219696283340454, |
| "learning_rate": 1.1322035588652484e-05, |
| "loss": 0.7752, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.3233644859813083, |
| "grad_norm": 1.1848926544189453, |
| "learning_rate": 1.1140692091229556e-05, |
| "loss": 0.7759, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.330841121495327, |
| "grad_norm": 1.1485064029693604, |
| "learning_rate": 1.0972561351388622e-05, |
| "loss": 0.7454, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.3383177570093459, |
| "grad_norm": 1.1740100383758545, |
| "learning_rate": 1.0817694019055866e-05, |
| "loss": 0.761, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.3457943925233644, |
| "grad_norm": 1.3378069400787354, |
| "learning_rate": 1.0676136748513286e-05, |
| "loss": 0.8535, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.3532710280373832, |
| "grad_norm": 1.2721531391143799, |
| "learning_rate": 1.0547932184343948e-05, |
| "loss": 0.8117, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.3607476635514018, |
| "grad_norm": 1.255110740661621, |
| "learning_rate": 1.043311894858519e-05, |
| "loss": 0.8114, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.3682242990654205, |
| "grad_norm": 1.184085726737976, |
| "learning_rate": 1.033173162909358e-05, |
| "loss": 0.7484, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.3757009345794393, |
| "grad_norm": 1.2864772081375122, |
| "learning_rate": 1.0243800769125222e-05, |
| "loss": 0.8197, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.3831775700934579, |
| "grad_norm": 1.3960767984390259, |
| "learning_rate": 1.0169352858134525e-05, |
| "loss": 0.8416, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.3906542056074767, |
| "grad_norm": 1.6105817556381226, |
| "learning_rate": 1.0108410323794131e-05, |
| "loss": 0.8156, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.3981308411214952, |
| "grad_norm": 1.4161114692687988, |
| "learning_rate": 1.0060991525238538e-05, |
| "loss": 0.8663, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.405607476635514, |
| "grad_norm": 1.3891263008117676, |
| "learning_rate": 1.0027110747533332e-05, |
| "loss": 0.9249, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.4130841121495328, |
| "grad_norm": 1.4171258211135864, |
| "learning_rate": 1.0006778197371774e-05, |
| "loss": 0.837, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.4205607476635513, |
| "grad_norm": 1.4086953401565552, |
| "learning_rate": 1e-05, |
| "loss": 0.9393, |
| "step": 190 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 190, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 80, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.916153511660749e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|