| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9420289855072463, |
| "eval_steps": 1, |
| "global_step": 68, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.028985507246376812, |
| "eval_loss": 3.890916585922241, |
| "eval_runtime": 2.366, |
| "eval_samples_per_second": 253.597, |
| "eval_steps_per_second": 31.7, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.057971014492753624, |
| "grad_norm": 251.42497029296223, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 3.8462, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.057971014492753624, |
| "eval_loss": 3.1606125831604004, |
| "eval_runtime": 2.3735, |
| "eval_samples_per_second": 252.789, |
| "eval_steps_per_second": 31.599, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.08695652173913043, |
| "eval_loss": 1.4003069400787354, |
| "eval_runtime": 2.3791, |
| "eval_samples_per_second": 252.195, |
| "eval_steps_per_second": 31.524, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.11594202898550725, |
| "grad_norm": 174.88885660985272, |
| "learning_rate": 9.994161134161635e-06, |
| "loss": 2.3026, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.11594202898550725, |
| "eval_loss": 0.5247076749801636, |
| "eval_runtime": 2.3571, |
| "eval_samples_per_second": 254.551, |
| "eval_steps_per_second": 31.819, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.14492753623188406, |
| "eval_loss": 0.25349560379981995, |
| "eval_runtime": 2.3667, |
| "eval_samples_per_second": 253.52, |
| "eval_steps_per_second": 31.69, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 9.541839408808285, |
| "learning_rate": 9.947531997255256e-06, |
| "loss": 0.3725, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "eval_loss": 0.12238868325948715, |
| "eval_runtime": 2.3709, |
| "eval_samples_per_second": 253.068, |
| "eval_steps_per_second": 31.633, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.2028985507246377, |
| "eval_loss": 0.07106433808803558, |
| "eval_runtime": 2.3595, |
| "eval_samples_per_second": 254.287, |
| "eval_steps_per_second": 31.786, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.2318840579710145, |
| "grad_norm": 8.373395170519098, |
| "learning_rate": 9.854709087130261e-06, |
| "loss": 0.1704, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.2318840579710145, |
| "eval_loss": 0.07050631195306778, |
| "eval_runtime": 2.374, |
| "eval_samples_per_second": 252.739, |
| "eval_steps_per_second": 31.592, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.2608695652173913, |
| "eval_loss": 0.0841919556260109, |
| "eval_runtime": 2.3733, |
| "eval_samples_per_second": 252.81, |
| "eval_steps_per_second": 31.601, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.2898550724637681, |
| "grad_norm": 9.328057178580242, |
| "learning_rate": 9.716559066288716e-06, |
| "loss": 0.0719, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2898550724637681, |
| "eval_loss": 0.06837386637926102, |
| "eval_runtime": 2.402, |
| "eval_samples_per_second": 249.788, |
| "eval_steps_per_second": 31.223, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3188405797101449, |
| "eval_loss": 0.08372741937637329, |
| "eval_runtime": 2.3771, |
| "eval_samples_per_second": 252.413, |
| "eval_steps_per_second": 31.552, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 8.195685627940097, |
| "learning_rate": 9.534371804252727e-06, |
| "loss": 0.0719, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "eval_loss": 0.07937659323215485, |
| "eval_runtime": 2.3703, |
| "eval_samples_per_second": 253.131, |
| "eval_steps_per_second": 31.641, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.37681159420289856, |
| "eval_loss": 0.06787987053394318, |
| "eval_runtime": 2.3654, |
| "eval_samples_per_second": 253.659, |
| "eval_steps_per_second": 31.707, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.4057971014492754, |
| "grad_norm": 3.0846120042199954, |
| "learning_rate": 9.309848334400247e-06, |
| "loss": 0.0729, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.4057971014492754, |
| "eval_loss": 0.060705069452524185, |
| "eval_runtime": 2.3698, |
| "eval_samples_per_second": 253.186, |
| "eval_steps_per_second": 31.648, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "eval_loss": 0.06819155067205429, |
| "eval_runtime": 2.3712, |
| "eval_samples_per_second": 253.037, |
| "eval_steps_per_second": 31.63, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.463768115942029, |
| "grad_norm": 3.7022895578403414, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.0639, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.463768115942029, |
| "eval_loss": 0.06595086306333542, |
| "eval_runtime": 2.3702, |
| "eval_samples_per_second": 253.148, |
| "eval_steps_per_second": 31.643, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.4927536231884058, |
| "eval_loss": 0.06074570491909981, |
| "eval_runtime": 2.3929, |
| "eval_samples_per_second": 250.74, |
| "eval_steps_per_second": 31.342, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 2.6201997383559235, |
| "learning_rate": 8.742553740855507e-06, |
| "loss": 0.0659, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "eval_loss": 0.060938794165849686, |
| "eval_runtime": 2.3734, |
| "eval_samples_per_second": 252.797, |
| "eval_steps_per_second": 31.6, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.5507246376811594, |
| "eval_loss": 0.05989724025130272, |
| "eval_runtime": 2.386, |
| "eval_samples_per_second": 251.47, |
| "eval_steps_per_second": 31.434, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.5797101449275363, |
| "grad_norm": 1.5759739495214995, |
| "learning_rate": 8.405079293933986e-06, |
| "loss": 0.0584, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5797101449275363, |
| "eval_loss": 0.05950001999735832, |
| "eval_runtime": 2.3751, |
| "eval_samples_per_second": 252.625, |
| "eval_steps_per_second": 31.578, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6086956521739131, |
| "eval_loss": 0.057929884642362595, |
| "eval_runtime": 2.3951, |
| "eval_samples_per_second": 250.515, |
| "eval_steps_per_second": 31.314, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.6376811594202898, |
| "grad_norm": 0.9083257875769617, |
| "learning_rate": 8.035812539093557e-06, |
| "loss": 0.059, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.6376811594202898, |
| "eval_loss": 0.05716191604733467, |
| "eval_runtime": 2.3793, |
| "eval_samples_per_second": 252.176, |
| "eval_steps_per_second": 31.522, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "eval_loss": 0.05785393714904785, |
| "eval_runtime": 2.3743, |
| "eval_samples_per_second": 252.704, |
| "eval_steps_per_second": 31.588, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 9.258583060973042, |
| "learning_rate": 7.638201220530664e-06, |
| "loss": 0.1069, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "eval_loss": 0.06170507147908211, |
| "eval_runtime": 2.3968, |
| "eval_samples_per_second": 250.337, |
| "eval_steps_per_second": 31.292, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.7246376811594203, |
| "eval_loss": 0.06007671728730202, |
| "eval_runtime": 2.375, |
| "eval_samples_per_second": 252.631, |
| "eval_steps_per_second": 31.579, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.7536231884057971, |
| "grad_norm": 2.788879674143748, |
| "learning_rate": 7.215957727996208e-06, |
| "loss": 0.0585, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.7536231884057971, |
| "eval_loss": 0.05631522089242935, |
| "eval_runtime": 2.4038, |
| "eval_samples_per_second": 249.609, |
| "eval_steps_per_second": 31.201, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.782608695652174, |
| "eval_loss": 0.05981193110346794, |
| "eval_runtime": 2.3841, |
| "eval_samples_per_second": 251.665, |
| "eval_steps_per_second": 31.458, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.8115942028985508, |
| "grad_norm": 3.982184927790719, |
| "learning_rate": 6.773024435212678e-06, |
| "loss": 0.097, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.8115942028985508, |
| "eval_loss": 0.05898861214518547, |
| "eval_runtime": 2.3921, |
| "eval_samples_per_second": 250.824, |
| "eval_steps_per_second": 31.353, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.8405797101449275, |
| "eval_loss": 0.05481765791773796, |
| "eval_runtime": 2.3767, |
| "eval_samples_per_second": 252.451, |
| "eval_steps_per_second": 31.556, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.18833058180333875, |
| "learning_rate": 6.313536890992935e-06, |
| "loss": 0.059, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "eval_loss": 0.05593809857964516, |
| "eval_runtime": 2.3764, |
| "eval_samples_per_second": 252.478, |
| "eval_steps_per_second": 31.56, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8985507246376812, |
| "eval_loss": 0.05695917829871178, |
| "eval_runtime": 2.39, |
| "eval_samples_per_second": 251.049, |
| "eval_steps_per_second": 31.381, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.927536231884058, |
| "grad_norm": 3.4944330077548207, |
| "learning_rate": 5.841785206735192e-06, |
| "loss": 0.0695, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.927536231884058, |
| "eval_loss": 0.05482754111289978, |
| "eval_runtime": 2.3734, |
| "eval_samples_per_second": 252.799, |
| "eval_steps_per_second": 31.6, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.9565217391304348, |
| "eval_loss": 0.055433232337236404, |
| "eval_runtime": 2.3729, |
| "eval_samples_per_second": 252.86, |
| "eval_steps_per_second": 31.607, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.9855072463768116, |
| "grad_norm": 2.742927364863374, |
| "learning_rate": 5.362174000808813e-06, |
| "loss": 0.0533, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.9855072463768116, |
| "eval_loss": 0.05639192834496498, |
| "eval_runtime": 2.3727, |
| "eval_samples_per_second": 252.873, |
| "eval_steps_per_second": 31.609, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.0144927536231885, |
| "eval_loss": 0.054112281650304794, |
| "eval_runtime": 2.37, |
| "eval_samples_per_second": 253.168, |
| "eval_steps_per_second": 31.646, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.0144927536231885, |
| "grad_norm": 1.5488276127209792, |
| "learning_rate": 4.87918127381934e-06, |
| "loss": 0.0544, |
| "step": 36 |
| }, |
| { |
| "epoch": 1.0144927536231885, |
| "eval_loss": 0.0547836609184742, |
| "eval_runtime": 2.372, |
| "eval_samples_per_second": 252.956, |
| "eval_steps_per_second": 31.619, |
| "step": 36 |
| }, |
| { |
| "epoch": 1.0434782608695652, |
| "eval_loss": 0.05551725998520851, |
| "eval_runtime": 2.376, |
| "eval_samples_per_second": 252.528, |
| "eval_steps_per_second": 31.566, |
| "step": 37 |
| }, |
| { |
| "epoch": 1.0724637681159421, |
| "grad_norm": 2.1899578742270838, |
| "learning_rate": 4.397316598723385e-06, |
| "loss": 0.0555, |
| "step": 38 |
| }, |
| { |
| "epoch": 1.0724637681159421, |
| "eval_loss": 0.05312129110097885, |
| "eval_runtime": 2.3838, |
| "eval_samples_per_second": 251.695, |
| "eval_steps_per_second": 31.462, |
| "step": 38 |
| }, |
| { |
| "epoch": 1.1014492753623188, |
| "eval_loss": 0.053158555179834366, |
| "eval_runtime": 2.3919, |
| "eval_samples_per_second": 250.844, |
| "eval_steps_per_second": 31.355, |
| "step": 39 |
| }, |
| { |
| "epoch": 1.1304347826086956, |
| "grad_norm": 2.1733146824122724, |
| "learning_rate": 3.92107901616097e-06, |
| "loss": 0.0524, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.1304347826086956, |
| "eval_loss": 0.05355316773056984, |
| "eval_runtime": 2.3826, |
| "eval_samples_per_second": 251.826, |
| "eval_steps_per_second": 31.478, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.1594202898550725, |
| "eval_loss": 0.05187664180994034, |
| "eval_runtime": 2.3848, |
| "eval_samples_per_second": 251.594, |
| "eval_steps_per_second": 31.449, |
| "step": 41 |
| }, |
| { |
| "epoch": 1.1884057971014492, |
| "grad_norm": 2.315639907284586, |
| "learning_rate": 3.4549150281252635e-06, |
| "loss": 0.0641, |
| "step": 42 |
| }, |
| { |
| "epoch": 1.1884057971014492, |
| "eval_loss": 0.05204891413450241, |
| "eval_runtime": 2.3997, |
| "eval_samples_per_second": 250.028, |
| "eval_steps_per_second": 31.254, |
| "step": 42 |
| }, |
| { |
| "epoch": 1.2173913043478262, |
| "eval_loss": 0.052227165549993515, |
| "eval_runtime": 2.3864, |
| "eval_samples_per_second": 251.429, |
| "eval_steps_per_second": 31.429, |
| "step": 43 |
| }, |
| { |
| "epoch": 1.2463768115942029, |
| "grad_norm": 1.5853052624042796, |
| "learning_rate": 3.0031770821715233e-06, |
| "loss": 0.0494, |
| "step": 44 |
| }, |
| { |
| "epoch": 1.2463768115942029, |
| "eval_loss": 0.05136393383145332, |
| "eval_runtime": 2.3856, |
| "eval_samples_per_second": 251.513, |
| "eval_steps_per_second": 31.439, |
| "step": 44 |
| }, |
| { |
| "epoch": 1.2753623188405796, |
| "eval_loss": 0.051076941192150116, |
| "eval_runtime": 2.3923, |
| "eval_samples_per_second": 250.8, |
| "eval_steps_per_second": 31.35, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.3043478260869565, |
| "grad_norm": 0.2342975899295018, |
| "learning_rate": 2.57008293378697e-06, |
| "loss": 0.0502, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.3043478260869565, |
| "eval_loss": 0.05139908567070961, |
| "eval_runtime": 2.3812, |
| "eval_samples_per_second": 251.974, |
| "eval_steps_per_second": 31.497, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "eval_loss": 0.05105065554380417, |
| "eval_runtime": 2.4018, |
| "eval_samples_per_second": 249.811, |
| "eval_steps_per_second": 31.226, |
| "step": 47 |
| }, |
| { |
| "epoch": 1.3623188405797102, |
| "grad_norm": 1.150174671283694, |
| "learning_rate": 2.159676266344222e-06, |
| "loss": 0.0482, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.3623188405797102, |
| "eval_loss": 0.050515007227659225, |
| "eval_runtime": 2.3849, |
| "eval_samples_per_second": 251.588, |
| "eval_steps_per_second": 31.449, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.391304347826087, |
| "eval_loss": 0.05112989619374275, |
| "eval_runtime": 2.3861, |
| "eval_samples_per_second": 251.452, |
| "eval_steps_per_second": 31.432, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.4202898550724639, |
| "grad_norm": 0.9809479332869758, |
| "learning_rate": 1.7757889363191484e-06, |
| "loss": 0.0472, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.4202898550724639, |
| "eval_loss": 0.050852496176958084, |
| "eval_runtime": 2.3888, |
| "eval_samples_per_second": 251.167, |
| "eval_steps_per_second": 31.396, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.4492753623188406, |
| "eval_loss": 0.04979565367102623, |
| "eval_runtime": 2.3955, |
| "eval_samples_per_second": 250.469, |
| "eval_steps_per_second": 31.309, |
| "step": 51 |
| }, |
| { |
| "epoch": 1.4782608695652173, |
| "grad_norm": 0.7181974427552101, |
| "learning_rate": 1.4220051962793952e-06, |
| "loss": 0.0478, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.4782608695652173, |
| "eval_loss": 0.04979529604315758, |
| "eval_runtime": 2.3784, |
| "eval_samples_per_second": 252.271, |
| "eval_steps_per_second": 31.534, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.5072463768115942, |
| "eval_loss": 0.050219178199768066, |
| "eval_runtime": 2.3756, |
| "eval_samples_per_second": 252.57, |
| "eval_steps_per_second": 31.571, |
| "step": 53 |
| }, |
| { |
| "epoch": 1.5362318840579712, |
| "grad_norm": 1.7774492629444423, |
| "learning_rate": 1.1016282296838887e-06, |
| "loss": 0.055, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.5362318840579712, |
| "eval_loss": 0.04986535757780075, |
| "eval_runtime": 2.3832, |
| "eval_samples_per_second": 251.765, |
| "eval_steps_per_second": 31.471, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.5652173913043477, |
| "eval_loss": 0.04925783351063728, |
| "eval_runtime": 2.3863, |
| "eval_samples_per_second": 251.431, |
| "eval_steps_per_second": 31.429, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.5942028985507246, |
| "grad_norm": 0.7119057580240911, |
| "learning_rate": 8.176493099488664e-07, |
| "loss": 0.0459, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.5942028985507246, |
| "eval_loss": 0.049321793019771576, |
| "eval_runtime": 2.3816, |
| "eval_samples_per_second": 251.931, |
| "eval_steps_per_second": 31.491, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.6231884057971016, |
| "eval_loss": 0.049656953662633896, |
| "eval_runtime": 2.3857, |
| "eval_samples_per_second": 251.499, |
| "eval_steps_per_second": 31.437, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.6521739130434783, |
| "grad_norm": 1.6066866518550498, |
| "learning_rate": 5.727198717339511e-07, |
| "loss": 0.0492, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.6521739130434783, |
| "eval_loss": 0.04972606897354126, |
| "eval_runtime": 2.3792, |
| "eval_samples_per_second": 252.181, |
| "eval_steps_per_second": 31.523, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.681159420289855, |
| "eval_loss": 0.04940681904554367, |
| "eval_runtime": 2.3921, |
| "eval_samples_per_second": 250.828, |
| "eval_steps_per_second": 31.354, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.710144927536232, |
| "grad_norm": 1.5684134897039972, |
| "learning_rate": 3.691267552111183e-07, |
| "loss": 0.0504, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.710144927536232, |
| "eval_loss": 0.04902585968375206, |
| "eval_runtime": 2.3816, |
| "eval_samples_per_second": 251.932, |
| "eval_steps_per_second": 31.491, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.7391304347826086, |
| "eval_loss": 0.04878399893641472, |
| "eval_runtime": 2.3923, |
| "eval_samples_per_second": 250.809, |
| "eval_steps_per_second": 31.351, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.7681159420289854, |
| "grad_norm": 3.0252941822207946, |
| "learning_rate": 2.0877085445416889e-07, |
| "loss": 0.0564, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.7681159420289854, |
| "eval_loss": 0.04881977662444115, |
| "eval_runtime": 2.3726, |
| "eval_samples_per_second": 252.892, |
| "eval_steps_per_second": 31.611, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.7971014492753623, |
| "eval_loss": 0.04876958206295967, |
| "eval_runtime": 2.383, |
| "eval_samples_per_second": 251.783, |
| "eval_steps_per_second": 31.473, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.8260869565217392, |
| "grad_norm": 0.5275348821410851, |
| "learning_rate": 9.314936930293283e-08, |
| "loss": 0.0503, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.8260869565217392, |
| "eval_loss": 0.048753608018159866, |
| "eval_runtime": 2.3935, |
| "eval_samples_per_second": 250.679, |
| "eval_steps_per_second": 31.335, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.855072463768116, |
| "eval_loss": 0.04874425381422043, |
| "eval_runtime": 2.3788, |
| "eval_samples_per_second": 252.224, |
| "eval_steps_per_second": 31.528, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.8840579710144927, |
| "grad_norm": 0.7635061728851181, |
| "learning_rate": 2.3341826411756863e-08, |
| "loss": 0.0495, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.8840579710144927, |
| "eval_loss": 0.048726994544267654, |
| "eval_runtime": 2.3833, |
| "eval_samples_per_second": 251.751, |
| "eval_steps_per_second": 31.469, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.9130434782608696, |
| "eval_loss": 0.048674505203962326, |
| "eval_runtime": 2.3749, |
| "eval_samples_per_second": 252.645, |
| "eval_steps_per_second": 31.581, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.9420289855072463, |
| "grad_norm": 0.4446688344675618, |
| "learning_rate": 0.0, |
| "loss": 0.0446, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.9420289855072463, |
| "eval_loss": 0.048731766641139984, |
| "eval_runtime": 2.3934, |
| "eval_samples_per_second": 250.692, |
| "eval_steps_per_second": 31.336, |
| "step": 68 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 68, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 35518238687232.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|