| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.929233772571987, | |
| "eval_steps": 50, | |
| "global_step": 1280, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.3904343582235237, | |
| "grad_norm": 0.5675944685935974, | |
| "learning_rate": 9.609375e-05, | |
| "loss": 1.5678, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3904343582235237, | |
| "eval_loss": 1.53541898727417, | |
| "eval_runtime": 11.6265, | |
| "eval_samples_per_second": 37.156, | |
| "eval_steps_per_second": 18.578, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7808687164470474, | |
| "grad_norm": 0.5330150127410889, | |
| "learning_rate": 9.21875e-05, | |
| "loss": 1.5019, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7808687164470474, | |
| "eval_loss": 1.4973269701004028, | |
| "eval_runtime": 11.5507, | |
| "eval_samples_per_second": 37.4, | |
| "eval_steps_per_second": 18.7, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.16398243045388, | |
| "grad_norm": 0.5410779714584351, | |
| "learning_rate": 8.828125000000001e-05, | |
| "loss": 1.4244, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.16398243045388, | |
| "eval_loss": 1.43968665599823, | |
| "eval_runtime": 11.5205, | |
| "eval_samples_per_second": 37.498, | |
| "eval_steps_per_second": 18.749, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.5544167886774036, | |
| "grad_norm": 0.5698382258415222, | |
| "learning_rate": 8.4375e-05, | |
| "loss": 1.4023, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.5544167886774036, | |
| "eval_loss": 1.4115360975265503, | |
| "eval_runtime": 11.5294, | |
| "eval_samples_per_second": 37.469, | |
| "eval_steps_per_second": 18.735, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.9448511469009273, | |
| "grad_norm": 0.5761227607727051, | |
| "learning_rate": 8.046875e-05, | |
| "loss": 1.3693, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.9448511469009273, | |
| "eval_loss": 1.3649152517318726, | |
| "eval_runtime": 11.5198, | |
| "eval_samples_per_second": 37.501, | |
| "eval_steps_per_second": 18.75, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.32796486090776, | |
| "grad_norm": 0.5856680274009705, | |
| "learning_rate": 7.65625e-05, | |
| "loss": 1.302, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.32796486090776, | |
| "eval_loss": 1.3940138816833496, | |
| "eval_runtime": 11.5409, | |
| "eval_samples_per_second": 37.432, | |
| "eval_steps_per_second": 18.716, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.7183992191312836, | |
| "grad_norm": 0.5735114216804504, | |
| "learning_rate": 7.265625000000001e-05, | |
| "loss": 1.3063, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.7183992191312836, | |
| "eval_loss": 1.3489629030227661, | |
| "eval_runtime": 11.5502, | |
| "eval_samples_per_second": 37.402, | |
| "eval_steps_per_second": 18.701, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.101512933138116, | |
| "grad_norm": 0.6013683676719666, | |
| "learning_rate": 6.875e-05, | |
| "loss": 1.2507, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.101512933138116, | |
| "eval_loss": 1.3298077583312988, | |
| "eval_runtime": 11.5601, | |
| "eval_samples_per_second": 37.37, | |
| "eval_steps_per_second": 18.685, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.49194729136164, | |
| "grad_norm": 0.6187678575515747, | |
| "learning_rate": 6.484375e-05, | |
| "loss": 1.2463, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.49194729136164, | |
| "eval_loss": 1.2986701726913452, | |
| "eval_runtime": 11.5395, | |
| "eval_samples_per_second": 37.437, | |
| "eval_steps_per_second": 18.718, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.8823816495851635, | |
| "grad_norm": 0.5973629951477051, | |
| "learning_rate": 6.0937500000000004e-05, | |
| "loss": 1.2315, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.8823816495851635, | |
| "eval_loss": 1.2973381280899048, | |
| "eval_runtime": 11.5491, | |
| "eval_samples_per_second": 37.405, | |
| "eval_steps_per_second": 18.703, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.265495363591996, | |
| "grad_norm": 0.6226805448532104, | |
| "learning_rate": 5.703125e-05, | |
| "loss": 1.1778, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 4.265495363591996, | |
| "eval_loss": 1.2510361671447754, | |
| "eval_runtime": 11.5281, | |
| "eval_samples_per_second": 37.474, | |
| "eval_steps_per_second": 18.737, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 4.65592972181552, | |
| "grad_norm": 0.64255690574646, | |
| "learning_rate": 5.3125000000000004e-05, | |
| "loss": 1.1819, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.65592972181552, | |
| "eval_loss": 1.2529098987579346, | |
| "eval_runtime": 11.5409, | |
| "eval_samples_per_second": 37.432, | |
| "eval_steps_per_second": 18.716, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.039043435822352, | |
| "grad_norm": 0.6386131048202515, | |
| "learning_rate": 4.921875e-05, | |
| "loss": 1.1507, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 5.039043435822352, | |
| "eval_loss": 1.2271380424499512, | |
| "eval_runtime": 11.5699, | |
| "eval_samples_per_second": 37.338, | |
| "eval_steps_per_second": 18.669, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 5.4294777940458765, | |
| "grad_norm": 0.6771230101585388, | |
| "learning_rate": 4.5312500000000004e-05, | |
| "loss": 1.134, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 5.4294777940458765, | |
| "eval_loss": 1.2191808223724365, | |
| "eval_runtime": 11.5238, | |
| "eval_samples_per_second": 37.488, | |
| "eval_steps_per_second": 18.744, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 5.819912152269399, | |
| "grad_norm": 0.6427966952323914, | |
| "learning_rate": 4.140625e-05, | |
| "loss": 1.1258, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 5.819912152269399, | |
| "eval_loss": 1.2103700637817383, | |
| "eval_runtime": 11.5062, | |
| "eval_samples_per_second": 37.545, | |
| "eval_steps_per_second": 18.772, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 6.203025866276232, | |
| "grad_norm": 0.6937867403030396, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 1.0941, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 6.203025866276232, | |
| "eval_loss": 1.2105975151062012, | |
| "eval_runtime": 11.55, | |
| "eval_samples_per_second": 37.403, | |
| "eval_steps_per_second": 18.701, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 6.593460224499756, | |
| "grad_norm": 0.719428300857544, | |
| "learning_rate": 3.359375e-05, | |
| "loss": 1.0923, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 6.593460224499756, | |
| "eval_loss": 1.1799763441085815, | |
| "eval_runtime": 11.536, | |
| "eval_samples_per_second": 37.448, | |
| "eval_steps_per_second": 18.724, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 6.98389458272328, | |
| "grad_norm": 0.65595942735672, | |
| "learning_rate": 2.96875e-05, | |
| "loss": 1.0796, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 6.98389458272328, | |
| "eval_loss": 1.1729077100753784, | |
| "eval_runtime": 11.5363, | |
| "eval_samples_per_second": 37.447, | |
| "eval_steps_per_second": 18.724, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 7.367008296730113, | |
| "grad_norm": 0.7260088920593262, | |
| "learning_rate": 2.578125e-05, | |
| "loss": 1.0371, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 7.367008296730113, | |
| "eval_loss": 1.1634416580200195, | |
| "eval_runtime": 11.5405, | |
| "eval_samples_per_second": 37.433, | |
| "eval_steps_per_second": 18.717, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 7.7574426549536355, | |
| "grad_norm": 0.6944181323051453, | |
| "learning_rate": 2.1875e-05, | |
| "loss": 1.0466, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 7.7574426549536355, | |
| "eval_loss": 1.154969573020935, | |
| "eval_runtime": 11.532, | |
| "eval_samples_per_second": 37.461, | |
| "eval_steps_per_second": 18.731, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 8.140556368960468, | |
| "grad_norm": 0.7572025060653687, | |
| "learning_rate": 1.796875e-05, | |
| "loss": 1.0228, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 8.140556368960468, | |
| "eval_loss": 1.1517966985702515, | |
| "eval_runtime": 11.5429, | |
| "eval_samples_per_second": 37.426, | |
| "eval_steps_per_second": 18.713, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 8.530990727183992, | |
| "grad_norm": 0.6960224509239197, | |
| "learning_rate": 1.4062500000000001e-05, | |
| "loss": 1.0231, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 8.530990727183992, | |
| "eval_loss": 1.108694314956665, | |
| "eval_runtime": 11.5162, | |
| "eval_samples_per_second": 37.512, | |
| "eval_steps_per_second": 18.756, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 8.921425085407517, | |
| "grad_norm": 0.6743898391723633, | |
| "learning_rate": 1.0156250000000001e-05, | |
| "loss": 1.0164, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 8.921425085407517, | |
| "eval_loss": 1.1212413311004639, | |
| "eval_runtime": 11.5308, | |
| "eval_samples_per_second": 37.465, | |
| "eval_steps_per_second": 18.732, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 9.304538799414349, | |
| "grad_norm": 0.7794139385223389, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.9863, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 9.304538799414349, | |
| "eval_loss": 1.1227957010269165, | |
| "eval_runtime": 11.5133, | |
| "eval_samples_per_second": 37.522, | |
| "eval_steps_per_second": 18.761, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 9.694973157637872, | |
| "grad_norm": 0.7152210474014282, | |
| "learning_rate": 2.3437500000000002e-06, | |
| "loss": 1.005, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 9.694973157637872, | |
| "eval_loss": 1.1129647493362427, | |
| "eval_runtime": 11.5181, | |
| "eval_samples_per_second": 37.506, | |
| "eval_steps_per_second": 18.753, | |
| "step": 1250 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1280, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.541080433502454e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |