| { | |
| "best_metric": 0.0003887661441694945, | |
| "best_model_checkpoint": "Models/t5-base-class-gen/checkpoint-3200", | |
| "epoch": 4.662379421221865, | |
| "eval_steps": 100, | |
| "global_step": 5800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08038585209003216, | |
| "grad_norm": 1.386023998260498, | |
| "learning_rate": 3.936334405144695e-05, | |
| "loss": 0.6335, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08038585209003216, | |
| "eval_loss": 0.08590172976255417, | |
| "eval_runtime": 0.1746, | |
| "eval_samples_per_second": 171.859, | |
| "eval_steps_per_second": 22.915, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1607717041800643, | |
| "grad_norm": 0.95208740234375, | |
| "learning_rate": 3.872025723472669e-05, | |
| "loss": 0.1377, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1607717041800643, | |
| "eval_loss": 0.04498327895998955, | |
| "eval_runtime": 0.1731, | |
| "eval_samples_per_second": 173.336, | |
| "eval_steps_per_second": 23.112, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.24115755627009647, | |
| "grad_norm": 1.106736660003662, | |
| "learning_rate": 3.8077170418006436e-05, | |
| "loss": 0.0849, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.24115755627009647, | |
| "eval_loss": 0.03304059058427811, | |
| "eval_runtime": 0.2272, | |
| "eval_samples_per_second": 132.057, | |
| "eval_steps_per_second": 17.608, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3215434083601286, | |
| "grad_norm": 0.9386335015296936, | |
| "learning_rate": 3.743408360128617e-05, | |
| "loss": 0.059, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3215434083601286, | |
| "eval_loss": 0.016175953671336174, | |
| "eval_runtime": 0.2536, | |
| "eval_samples_per_second": 118.301, | |
| "eval_steps_per_second": 15.774, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.40192926045016075, | |
| "grad_norm": 0.7505399584770203, | |
| "learning_rate": 3.679099678456592e-05, | |
| "loss": 0.0471, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.40192926045016075, | |
| "eval_loss": 0.010730231180787086, | |
| "eval_runtime": 0.2933, | |
| "eval_samples_per_second": 102.3, | |
| "eval_steps_per_second": 13.64, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.48231511254019294, | |
| "grad_norm": 0.05818118155002594, | |
| "learning_rate": 3.614790996784566e-05, | |
| "loss": 0.0368, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.48231511254019294, | |
| "eval_loss": 0.008968004025518894, | |
| "eval_runtime": 0.1746, | |
| "eval_samples_per_second": 171.788, | |
| "eval_steps_per_second": 22.905, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5627009646302251, | |
| "grad_norm": 0.0749644786119461, | |
| "learning_rate": 3.5504823151125405e-05, | |
| "loss": 0.0232, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5627009646302251, | |
| "eval_loss": 0.007880235090851784, | |
| "eval_runtime": 0.1711, | |
| "eval_samples_per_second": 175.365, | |
| "eval_steps_per_second": 23.382, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6430868167202572, | |
| "grad_norm": 0.26040318608283997, | |
| "learning_rate": 3.486173633440515e-05, | |
| "loss": 0.0244, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6430868167202572, | |
| "eval_loss": 0.007417692337185144, | |
| "eval_runtime": 0.1745, | |
| "eval_samples_per_second": 171.912, | |
| "eval_steps_per_second": 22.922, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7234726688102894, | |
| "grad_norm": 0.19101421535015106, | |
| "learning_rate": 3.421864951768489e-05, | |
| "loss": 0.0201, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7234726688102894, | |
| "eval_loss": 0.007583172060549259, | |
| "eval_runtime": 0.1833, | |
| "eval_samples_per_second": 163.635, | |
| "eval_steps_per_second": 21.818, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8038585209003215, | |
| "grad_norm": 0.18456044793128967, | |
| "learning_rate": 3.3575562700964637e-05, | |
| "loss": 0.0206, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8038585209003215, | |
| "eval_loss": 0.006357308477163315, | |
| "eval_runtime": 0.1846, | |
| "eval_samples_per_second": 162.547, | |
| "eval_steps_per_second": 21.673, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8842443729903537, | |
| "grad_norm": 0.34773826599121094, | |
| "learning_rate": 3.2932475884244374e-05, | |
| "loss": 0.0228, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8842443729903537, | |
| "eval_loss": 0.007683805655688047, | |
| "eval_runtime": 0.1718, | |
| "eval_samples_per_second": 174.574, | |
| "eval_steps_per_second": 23.277, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9646302250803859, | |
| "grad_norm": 0.20785485208034515, | |
| "learning_rate": 3.228938906752412e-05, | |
| "loss": 0.0212, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9646302250803859, | |
| "eval_loss": 0.006169555243104696, | |
| "eval_runtime": 0.1672, | |
| "eval_samples_per_second": 179.46, | |
| "eval_steps_per_second": 23.928, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.045016077170418, | |
| "grad_norm": 0.6145943999290466, | |
| "learning_rate": 3.164630225080386e-05, | |
| "loss": 0.0144, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.045016077170418, | |
| "eval_loss": 0.006269162520766258, | |
| "eval_runtime": 0.1787, | |
| "eval_samples_per_second": 167.852, | |
| "eval_steps_per_second": 22.38, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.1254019292604502, | |
| "grad_norm": 0.07819650322198868, | |
| "learning_rate": 3.1003215434083605e-05, | |
| "loss": 0.0108, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.1254019292604502, | |
| "eval_loss": 0.007830055430531502, | |
| "eval_runtime": 0.1706, | |
| "eval_samples_per_second": 175.836, | |
| "eval_steps_per_second": 23.445, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.2057877813504823, | |
| "grad_norm": 0.22531314194202423, | |
| "learning_rate": 3.0360128617363346e-05, | |
| "loss": 0.0127, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2057877813504823, | |
| "eval_loss": 0.005049354862421751, | |
| "eval_runtime": 0.1729, | |
| "eval_samples_per_second": 173.534, | |
| "eval_steps_per_second": 23.138, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2861736334405145, | |
| "grad_norm": 0.025266777724027634, | |
| "learning_rate": 2.971704180064309e-05, | |
| "loss": 0.0119, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.2861736334405145, | |
| "eval_loss": 0.005167185328900814, | |
| "eval_runtime": 0.1769, | |
| "eval_samples_per_second": 169.631, | |
| "eval_steps_per_second": 22.617, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.3665594855305465, | |
| "grad_norm": 0.08020277321338654, | |
| "learning_rate": 2.9073954983922834e-05, | |
| "loss": 0.0099, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.3665594855305465, | |
| "eval_loss": 0.002216967288404703, | |
| "eval_runtime": 0.1741, | |
| "eval_samples_per_second": 172.331, | |
| "eval_steps_per_second": 22.977, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.4469453376205788, | |
| "grad_norm": 0.07968125492334366, | |
| "learning_rate": 2.8430868167202574e-05, | |
| "loss": 0.0123, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.4469453376205788, | |
| "eval_loss": 0.0026903103571385145, | |
| "eval_runtime": 0.1673, | |
| "eval_samples_per_second": 179.365, | |
| "eval_steps_per_second": 23.915, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.527331189710611, | |
| "grad_norm": 0.20680592954158783, | |
| "learning_rate": 2.7787781350482318e-05, | |
| "loss": 0.0108, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.527331189710611, | |
| "eval_loss": 0.0027133109979331493, | |
| "eval_runtime": 0.2477, | |
| "eval_samples_per_second": 121.116, | |
| "eval_steps_per_second": 16.149, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.607717041800643, | |
| "grad_norm": 0.7855786085128784, | |
| "learning_rate": 2.714469453376206e-05, | |
| "loss": 0.014, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.607717041800643, | |
| "eval_loss": 0.0028839909937232733, | |
| "eval_runtime": 0.1692, | |
| "eval_samples_per_second": 177.293, | |
| "eval_steps_per_second": 23.639, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.6881028938906752, | |
| "grad_norm": 0.12237449735403061, | |
| "learning_rate": 2.6501607717041802e-05, | |
| "loss": 0.0095, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.6881028938906752, | |
| "eval_loss": 0.0036916760727763176, | |
| "eval_runtime": 0.2775, | |
| "eval_samples_per_second": 108.096, | |
| "eval_steps_per_second": 14.413, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.7684887459807075, | |
| "grad_norm": 0.37831100821495056, | |
| "learning_rate": 2.5858520900321543e-05, | |
| "loss": 0.011, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.7684887459807075, | |
| "eval_loss": 0.0021040281280875206, | |
| "eval_runtime": 0.1705, | |
| "eval_samples_per_second": 175.999, | |
| "eval_steps_per_second": 23.467, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.8488745980707395, | |
| "grad_norm": 0.049582913517951965, | |
| "learning_rate": 2.521543408360129e-05, | |
| "loss": 0.0102, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.8488745980707395, | |
| "eval_loss": 0.0018795446958392859, | |
| "eval_runtime": 0.1653, | |
| "eval_samples_per_second": 181.47, | |
| "eval_steps_per_second": 24.196, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.9292604501607717, | |
| "grad_norm": 0.06450924277305603, | |
| "learning_rate": 2.457234726688103e-05, | |
| "loss": 0.0101, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.9292604501607717, | |
| "eval_loss": 0.001975016202777624, | |
| "eval_runtime": 0.1716, | |
| "eval_samples_per_second": 174.862, | |
| "eval_steps_per_second": 23.315, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.009646302250804, | |
| "grad_norm": 0.06440392136573792, | |
| "learning_rate": 2.3929260450160775e-05, | |
| "loss": 0.01, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.009646302250804, | |
| "eval_loss": 0.0010833271080628037, | |
| "eval_runtime": 0.1729, | |
| "eval_samples_per_second": 173.502, | |
| "eval_steps_per_second": 23.134, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.090032154340836, | |
| "grad_norm": 0.06326356530189514, | |
| "learning_rate": 2.3286173633440515e-05, | |
| "loss": 0.0077, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.090032154340836, | |
| "eval_loss": 0.001661359565332532, | |
| "eval_runtime": 0.1798, | |
| "eval_samples_per_second": 166.856, | |
| "eval_steps_per_second": 22.248, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.170418006430868, | |
| "grad_norm": 0.19675737619400024, | |
| "learning_rate": 2.264308681672026e-05, | |
| "loss": 0.0072, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.170418006430868, | |
| "eval_loss": 0.0009880892466753721, | |
| "eval_runtime": 0.2313, | |
| "eval_samples_per_second": 129.68, | |
| "eval_steps_per_second": 17.291, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.2508038585209005, | |
| "grad_norm": 0.5421108603477478, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 0.0074, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.2508038585209005, | |
| "eval_loss": 0.002171145286411047, | |
| "eval_runtime": 0.1872, | |
| "eval_samples_per_second": 160.223, | |
| "eval_steps_per_second": 21.363, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.3311897106109325, | |
| "grad_norm": 0.5133712291717529, | |
| "learning_rate": 2.1356913183279743e-05, | |
| "loss": 0.0086, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.3311897106109325, | |
| "eval_loss": 0.001456312253139913, | |
| "eval_runtime": 0.1745, | |
| "eval_samples_per_second": 171.895, | |
| "eval_steps_per_second": 22.919, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.4115755627009645, | |
| "grad_norm": 0.4056571125984192, | |
| "learning_rate": 2.0713826366559487e-05, | |
| "loss": 0.0065, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.4115755627009645, | |
| "eval_loss": 0.0018532021204009652, | |
| "eval_runtime": 0.1763, | |
| "eval_samples_per_second": 170.132, | |
| "eval_steps_per_second": 22.684, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.491961414790997, | |
| "grad_norm": 0.29752230644226074, | |
| "learning_rate": 2.0070739549839228e-05, | |
| "loss": 0.0072, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.491961414790997, | |
| "eval_loss": 0.001285334350541234, | |
| "eval_runtime": 0.1764, | |
| "eval_samples_per_second": 170.115, | |
| "eval_steps_per_second": 22.682, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.572347266881029, | |
| "grad_norm": 0.2506803572177887, | |
| "learning_rate": 1.9427652733118975e-05, | |
| "loss": 0.0078, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.572347266881029, | |
| "eval_loss": 0.0003887661441694945, | |
| "eval_runtime": 0.2322, | |
| "eval_samples_per_second": 129.197, | |
| "eval_steps_per_second": 17.226, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.652733118971061, | |
| "grad_norm": 0.8498000502586365, | |
| "learning_rate": 1.8784565916398715e-05, | |
| "loss": 0.0073, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.652733118971061, | |
| "eval_loss": 0.0005741061177104712, | |
| "eval_runtime": 0.1749, | |
| "eval_samples_per_second": 171.507, | |
| "eval_steps_per_second": 22.868, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.733118971061093, | |
| "grad_norm": 0.056631457060575485, | |
| "learning_rate": 1.814147909967846e-05, | |
| "loss": 0.0066, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.733118971061093, | |
| "eval_loss": 0.000957026903051883, | |
| "eval_runtime": 0.1744, | |
| "eval_samples_per_second": 171.987, | |
| "eval_steps_per_second": 22.932, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.8135048231511255, | |
| "grad_norm": 0.017297716811299324, | |
| "learning_rate": 1.74983922829582e-05, | |
| "loss": 0.0072, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.8135048231511255, | |
| "eval_loss": 0.0007342658936977386, | |
| "eval_runtime": 0.1696, | |
| "eval_samples_per_second": 176.903, | |
| "eval_steps_per_second": 23.587, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.8938906752411575, | |
| "grad_norm": 0.6115002036094666, | |
| "learning_rate": 1.6855305466237944e-05, | |
| "loss": 0.008, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.8938906752411575, | |
| "eval_loss": 0.0007112031453289092, | |
| "eval_runtime": 0.1781, | |
| "eval_samples_per_second": 168.482, | |
| "eval_steps_per_second": 22.464, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.97427652733119, | |
| "grad_norm": 0.158920019865036, | |
| "learning_rate": 1.6212218649517684e-05, | |
| "loss": 0.0076, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.97427652733119, | |
| "eval_loss": 0.0015783592825755477, | |
| "eval_runtime": 0.208, | |
| "eval_samples_per_second": 144.199, | |
| "eval_steps_per_second": 19.226, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 3.054662379421222, | |
| "grad_norm": 0.2712903916835785, | |
| "learning_rate": 1.5569131832797428e-05, | |
| "loss": 0.0073, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.054662379421222, | |
| "eval_loss": 0.0012706245761364698, | |
| "eval_runtime": 0.1692, | |
| "eval_samples_per_second": 177.289, | |
| "eval_steps_per_second": 23.639, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.135048231511254, | |
| "grad_norm": 0.4465363323688507, | |
| "learning_rate": 1.492604501607717e-05, | |
| "loss": 0.0057, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.135048231511254, | |
| "eval_loss": 0.0017842828528955579, | |
| "eval_runtime": 0.1697, | |
| "eval_samples_per_second": 176.806, | |
| "eval_steps_per_second": 23.574, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.215434083601286, | |
| "grad_norm": 0.25834837555885315, | |
| "learning_rate": 1.4282958199356913e-05, | |
| "loss": 0.0054, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.215434083601286, | |
| "eval_loss": 0.001442342414520681, | |
| "eval_runtime": 0.1703, | |
| "eval_samples_per_second": 176.201, | |
| "eval_steps_per_second": 23.494, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.2958199356913185, | |
| "grad_norm": 0.1956845223903656, | |
| "learning_rate": 1.3639871382636658e-05, | |
| "loss": 0.0054, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.2958199356913185, | |
| "eval_loss": 0.001215717988088727, | |
| "eval_runtime": 0.182, | |
| "eval_samples_per_second": 164.818, | |
| "eval_steps_per_second": 21.976, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.3762057877813505, | |
| "grad_norm": 0.06578990817070007, | |
| "learning_rate": 1.29967845659164e-05, | |
| "loss": 0.0062, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.3762057877813505, | |
| "eval_loss": 0.002128337509930134, | |
| "eval_runtime": 0.1734, | |
| "eval_samples_per_second": 173.009, | |
| "eval_steps_per_second": 23.068, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.4565916398713825, | |
| "grad_norm": 0.017272261902689934, | |
| "learning_rate": 1.2360128617363345e-05, | |
| "loss": 0.0048, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 3.4565916398713825, | |
| "eval_loss": 0.0026242188178002834, | |
| "eval_runtime": 0.218, | |
| "eval_samples_per_second": 137.619, | |
| "eval_steps_per_second": 18.349, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 3.536977491961415, | |
| "grad_norm": 0.008806917816400528, | |
| "learning_rate": 1.1717041800643088e-05, | |
| "loss": 0.0062, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 3.536977491961415, | |
| "eval_loss": 0.002198620932176709, | |
| "eval_runtime": 0.2168, | |
| "eval_samples_per_second": 138.401, | |
| "eval_steps_per_second": 18.453, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 3.617363344051447, | |
| "grad_norm": 0.013797425664961338, | |
| "learning_rate": 1.107395498392283e-05, | |
| "loss": 0.0059, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.617363344051447, | |
| "eval_loss": 0.002305293455719948, | |
| "eval_runtime": 0.1711, | |
| "eval_samples_per_second": 175.368, | |
| "eval_steps_per_second": 23.382, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.697749196141479, | |
| "grad_norm": 0.07389205694198608, | |
| "learning_rate": 1.0430868167202572e-05, | |
| "loss": 0.0044, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.697749196141479, | |
| "eval_loss": 0.002617767546325922, | |
| "eval_runtime": 0.2272, | |
| "eval_samples_per_second": 132.034, | |
| "eval_steps_per_second": 17.605, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.778135048231511, | |
| "grad_norm": 0.41877493262290955, | |
| "learning_rate": 9.787781350482316e-06, | |
| "loss": 0.0049, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.778135048231511, | |
| "eval_loss": 0.0019422216573730111, | |
| "eval_runtime": 0.2456, | |
| "eval_samples_per_second": 122.127, | |
| "eval_steps_per_second": 16.284, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.8585209003215435, | |
| "grad_norm": 0.2227245718240738, | |
| "learning_rate": 9.144694533762058e-06, | |
| "loss": 0.0062, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.8585209003215435, | |
| "eval_loss": 0.001634993706829846, | |
| "eval_runtime": 0.1871, | |
| "eval_samples_per_second": 160.368, | |
| "eval_steps_per_second": 21.382, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.9389067524115755, | |
| "grad_norm": 0.24094010889530182, | |
| "learning_rate": 8.5016077170418e-06, | |
| "loss": 0.0055, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.9389067524115755, | |
| "eval_loss": 0.0020372075960040092, | |
| "eval_runtime": 0.1737, | |
| "eval_samples_per_second": 172.716, | |
| "eval_steps_per_second": 23.029, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 4.019292604501608, | |
| "grad_norm": 0.0924796536564827, | |
| "learning_rate": 7.858520900321544e-06, | |
| "loss": 0.0051, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.019292604501608, | |
| "eval_loss": 0.0011331220157444477, | |
| "eval_runtime": 0.1743, | |
| "eval_samples_per_second": 172.114, | |
| "eval_steps_per_second": 22.949, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.09967845659164, | |
| "grad_norm": 0.045692551881074905, | |
| "learning_rate": 7.215434083601287e-06, | |
| "loss": 0.004, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 4.09967845659164, | |
| "eval_loss": 0.0016930572455748916, | |
| "eval_runtime": 0.1768, | |
| "eval_samples_per_second": 169.716, | |
| "eval_steps_per_second": 22.629, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 4.180064308681672, | |
| "grad_norm": 0.012550954706966877, | |
| "learning_rate": 6.572347266881029e-06, | |
| "loss": 0.0049, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 4.180064308681672, | |
| "eval_loss": 0.0016865974757820368, | |
| "eval_runtime": 0.1688, | |
| "eval_samples_per_second": 177.694, | |
| "eval_steps_per_second": 23.693, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 4.260450160771704, | |
| "grad_norm": 0.11495041847229004, | |
| "learning_rate": 5.929260450160772e-06, | |
| "loss": 0.0034, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 4.260450160771704, | |
| "eval_loss": 0.0018789003370329738, | |
| "eval_runtime": 0.177, | |
| "eval_samples_per_second": 169.461, | |
| "eval_steps_per_second": 22.595, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 4.340836012861736, | |
| "grad_norm": 0.31747967004776, | |
| "learning_rate": 5.286173633440515e-06, | |
| "loss": 0.0039, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 4.340836012861736, | |
| "eval_loss": 0.0019645672291517258, | |
| "eval_runtime": 0.1686, | |
| "eval_samples_per_second": 177.95, | |
| "eval_steps_per_second": 23.727, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 4.421221864951768, | |
| "grad_norm": 0.03923821821808815, | |
| "learning_rate": 4.643086816720258e-06, | |
| "loss": 0.0037, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.421221864951768, | |
| "eval_loss": 0.001890690764412284, | |
| "eval_runtime": 0.1768, | |
| "eval_samples_per_second": 169.698, | |
| "eval_steps_per_second": 22.626, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.501607717041801, | |
| "grad_norm": 0.0037739709950983524, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.0046, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 4.501607717041801, | |
| "eval_loss": 0.00146665854845196, | |
| "eval_runtime": 0.1675, | |
| "eval_samples_per_second": 179.094, | |
| "eval_steps_per_second": 23.879, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 4.581993569131833, | |
| "grad_norm": 0.2434912621974945, | |
| "learning_rate": 3.356913183279743e-06, | |
| "loss": 0.0056, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 4.581993569131833, | |
| "eval_loss": 0.0013827175134792924, | |
| "eval_runtime": 0.1747, | |
| "eval_samples_per_second": 171.749, | |
| "eval_steps_per_second": 22.9, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 4.662379421221865, | |
| "grad_norm": 0.26639288663864136, | |
| "learning_rate": 2.7138263665594855e-06, | |
| "loss": 0.0035, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 4.662379421221865, | |
| "eval_loss": 0.0014188647037371993, | |
| "eval_runtime": 0.2919, | |
| "eval_samples_per_second": 102.775, | |
| "eval_steps_per_second": 13.703, | |
| "step": 5800 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 6220, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2292205513512960.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |