| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0144927536231885, |
| "eval_steps": 1, |
| "global_step": 35, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.028985507246376812, |
| "eval_loss": 3.890916585922241, |
| "eval_runtime": 2.366, |
| "eval_samples_per_second": 253.597, |
| "eval_steps_per_second": 31.7, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.057971014492753624, |
| "grad_norm": 251.42497029296223, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 3.8462, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.057971014492753624, |
| "eval_loss": 3.1606125831604004, |
| "eval_runtime": 2.3735, |
| "eval_samples_per_second": 252.789, |
| "eval_steps_per_second": 31.599, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.08695652173913043, |
| "eval_loss": 1.4003069400787354, |
| "eval_runtime": 2.3791, |
| "eval_samples_per_second": 252.195, |
| "eval_steps_per_second": 31.524, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.11594202898550725, |
| "grad_norm": 174.88885660985272, |
| "learning_rate": 9.994161134161635e-06, |
| "loss": 2.3026, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.11594202898550725, |
| "eval_loss": 0.5247076749801636, |
| "eval_runtime": 2.3571, |
| "eval_samples_per_second": 254.551, |
| "eval_steps_per_second": 31.819, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.14492753623188406, |
| "eval_loss": 0.25349560379981995, |
| "eval_runtime": 2.3667, |
| "eval_samples_per_second": 253.52, |
| "eval_steps_per_second": 31.69, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 9.541839408808285, |
| "learning_rate": 9.947531997255256e-06, |
| "loss": 0.3725, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "eval_loss": 0.12238868325948715, |
| "eval_runtime": 2.3709, |
| "eval_samples_per_second": 253.068, |
| "eval_steps_per_second": 31.633, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.2028985507246377, |
| "eval_loss": 0.07106433808803558, |
| "eval_runtime": 2.3595, |
| "eval_samples_per_second": 254.287, |
| "eval_steps_per_second": 31.786, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.2318840579710145, |
| "grad_norm": 8.373395170519098, |
| "learning_rate": 9.854709087130261e-06, |
| "loss": 0.1704, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.2318840579710145, |
| "eval_loss": 0.07050631195306778, |
| "eval_runtime": 2.374, |
| "eval_samples_per_second": 252.739, |
| "eval_steps_per_second": 31.592, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.2608695652173913, |
| "eval_loss": 0.0841919556260109, |
| "eval_runtime": 2.3733, |
| "eval_samples_per_second": 252.81, |
| "eval_steps_per_second": 31.601, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.2898550724637681, |
| "grad_norm": 9.328057178580242, |
| "learning_rate": 9.716559066288716e-06, |
| "loss": 0.0719, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2898550724637681, |
| "eval_loss": 0.06837386637926102, |
| "eval_runtime": 2.402, |
| "eval_samples_per_second": 249.788, |
| "eval_steps_per_second": 31.223, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3188405797101449, |
| "eval_loss": 0.08372741937637329, |
| "eval_runtime": 2.3771, |
| "eval_samples_per_second": 252.413, |
| "eval_steps_per_second": 31.552, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 8.195685627940097, |
| "learning_rate": 9.534371804252727e-06, |
| "loss": 0.0719, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "eval_loss": 0.07937659323215485, |
| "eval_runtime": 2.3703, |
| "eval_samples_per_second": 253.131, |
| "eval_steps_per_second": 31.641, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.37681159420289856, |
| "eval_loss": 0.06787987053394318, |
| "eval_runtime": 2.3654, |
| "eval_samples_per_second": 253.659, |
| "eval_steps_per_second": 31.707, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.4057971014492754, |
| "grad_norm": 3.0846120042199954, |
| "learning_rate": 9.309848334400247e-06, |
| "loss": 0.0729, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.4057971014492754, |
| "eval_loss": 0.060705069452524185, |
| "eval_runtime": 2.3698, |
| "eval_samples_per_second": 253.186, |
| "eval_steps_per_second": 31.648, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "eval_loss": 0.06819155067205429, |
| "eval_runtime": 2.3712, |
| "eval_samples_per_second": 253.037, |
| "eval_steps_per_second": 31.63, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.463768115942029, |
| "grad_norm": 3.7022895578403414, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.0639, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.463768115942029, |
| "eval_loss": 0.06595086306333542, |
| "eval_runtime": 2.3702, |
| "eval_samples_per_second": 253.148, |
| "eval_steps_per_second": 31.643, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.4927536231884058, |
| "eval_loss": 0.06074570491909981, |
| "eval_runtime": 2.3929, |
| "eval_samples_per_second": 250.74, |
| "eval_steps_per_second": 31.342, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 2.6201997383559235, |
| "learning_rate": 8.742553740855507e-06, |
| "loss": 0.0659, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "eval_loss": 0.060938794165849686, |
| "eval_runtime": 2.3734, |
| "eval_samples_per_second": 252.797, |
| "eval_steps_per_second": 31.6, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.5507246376811594, |
| "eval_loss": 0.05989724025130272, |
| "eval_runtime": 2.386, |
| "eval_samples_per_second": 251.47, |
| "eval_steps_per_second": 31.434, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.5797101449275363, |
| "grad_norm": 1.5759739495214995, |
| "learning_rate": 8.405079293933986e-06, |
| "loss": 0.0584, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5797101449275363, |
| "eval_loss": 0.05950001999735832, |
| "eval_runtime": 2.3751, |
| "eval_samples_per_second": 252.625, |
| "eval_steps_per_second": 31.578, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6086956521739131, |
| "eval_loss": 0.057929884642362595, |
| "eval_runtime": 2.3951, |
| "eval_samples_per_second": 250.515, |
| "eval_steps_per_second": 31.314, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.6376811594202898, |
| "grad_norm": 0.9083257875769617, |
| "learning_rate": 8.035812539093557e-06, |
| "loss": 0.059, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.6376811594202898, |
| "eval_loss": 0.05716191604733467, |
| "eval_runtime": 2.3793, |
| "eval_samples_per_second": 252.176, |
| "eval_steps_per_second": 31.522, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "eval_loss": 0.05785393714904785, |
| "eval_runtime": 2.3743, |
| "eval_samples_per_second": 252.704, |
| "eval_steps_per_second": 31.588, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 9.258583060973042, |
| "learning_rate": 7.638201220530664e-06, |
| "loss": 0.1069, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "eval_loss": 0.06170507147908211, |
| "eval_runtime": 2.3968, |
| "eval_samples_per_second": 250.337, |
| "eval_steps_per_second": 31.292, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.7246376811594203, |
| "eval_loss": 0.06007671728730202, |
| "eval_runtime": 2.375, |
| "eval_samples_per_second": 252.631, |
| "eval_steps_per_second": 31.579, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.7536231884057971, |
| "grad_norm": 2.788879674143748, |
| "learning_rate": 7.215957727996208e-06, |
| "loss": 0.0585, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.7536231884057971, |
| "eval_loss": 0.05631522089242935, |
| "eval_runtime": 2.4038, |
| "eval_samples_per_second": 249.609, |
| "eval_steps_per_second": 31.201, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.782608695652174, |
| "eval_loss": 0.05981193110346794, |
| "eval_runtime": 2.3841, |
| "eval_samples_per_second": 251.665, |
| "eval_steps_per_second": 31.458, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.8115942028985508, |
| "grad_norm": 3.982184927790719, |
| "learning_rate": 6.773024435212678e-06, |
| "loss": 0.097, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.8115942028985508, |
| "eval_loss": 0.05898861214518547, |
| "eval_runtime": 2.3921, |
| "eval_samples_per_second": 250.824, |
| "eval_steps_per_second": 31.353, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.8405797101449275, |
| "eval_loss": 0.05481765791773796, |
| "eval_runtime": 2.3767, |
| "eval_samples_per_second": 252.451, |
| "eval_steps_per_second": 31.556, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.18833058180333875, |
| "learning_rate": 6.313536890992935e-06, |
| "loss": 0.059, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "eval_loss": 0.05593809857964516, |
| "eval_runtime": 2.3764, |
| "eval_samples_per_second": 252.478, |
| "eval_steps_per_second": 31.56, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8985507246376812, |
| "eval_loss": 0.05695917829871178, |
| "eval_runtime": 2.39, |
| "eval_samples_per_second": 251.049, |
| "eval_steps_per_second": 31.381, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.927536231884058, |
| "grad_norm": 3.4944330077548207, |
| "learning_rate": 5.841785206735192e-06, |
| "loss": 0.0695, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.927536231884058, |
| "eval_loss": 0.05482754111289978, |
| "eval_runtime": 2.3734, |
| "eval_samples_per_second": 252.799, |
| "eval_steps_per_second": 31.6, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.9565217391304348, |
| "eval_loss": 0.055433232337236404, |
| "eval_runtime": 2.3729, |
| "eval_samples_per_second": 252.86, |
| "eval_steps_per_second": 31.607, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.9855072463768116, |
| "grad_norm": 2.742927364863374, |
| "learning_rate": 5.362174000808813e-06, |
| "loss": 0.0533, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.9855072463768116, |
| "eval_loss": 0.05639192834496498, |
| "eval_runtime": 2.3727, |
| "eval_samples_per_second": 252.873, |
| "eval_steps_per_second": 31.609, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.0144927536231885, |
| "eval_loss": 0.054112281650304794, |
| "eval_runtime": 2.37, |
| "eval_samples_per_second": 253.168, |
| "eval_steps_per_second": 31.646, |
| "step": 35 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 68, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 18479969206272.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|