| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 11235, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.22251891410769917, | |
| "grad_norm": 3.266571044921875, | |
| "learning_rate": 6.211882510013351e-05, | |
| "loss": 1.7094, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.22251891410769917, | |
| "eval_loss": 1.6306489706039429, | |
| "eval_runtime": 46.6788, | |
| "eval_samples_per_second": 10.712, | |
| "eval_steps_per_second": 5.356, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.44503782821539833, | |
| "grad_norm": 4.2562665939331055, | |
| "learning_rate": 5.922607921673342e-05, | |
| "loss": 1.3127, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.44503782821539833, | |
| "eval_loss": 1.467544436454773, | |
| "eval_runtime": 50.253, | |
| "eval_samples_per_second": 9.95, | |
| "eval_steps_per_second": 4.975, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6675567423230975, | |
| "grad_norm": 2.5388565063476562, | |
| "learning_rate": 5.633333333333333e-05, | |
| "loss": 1.2187, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6675567423230975, | |
| "eval_loss": 1.4355698823928833, | |
| "eval_runtime": 43.8218, | |
| "eval_samples_per_second": 11.41, | |
| "eval_steps_per_second": 5.705, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8900756564307967, | |
| "grad_norm": 2.982630729675293, | |
| "learning_rate": 5.344058744993324e-05, | |
| "loss": 1.1802, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8900756564307967, | |
| "eval_loss": 1.3988703489303589, | |
| "eval_runtime": 45.8023, | |
| "eval_samples_per_second": 10.916, | |
| "eval_steps_per_second": 5.458, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.1125945705384959, | |
| "grad_norm": 2.919635057449341, | |
| "learning_rate": 5.054784156653315e-05, | |
| "loss": 1.1498, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.1125945705384959, | |
| "eval_loss": 1.380990743637085, | |
| "eval_runtime": 42.6058, | |
| "eval_samples_per_second": 11.735, | |
| "eval_steps_per_second": 5.868, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.335113484646195, | |
| "grad_norm": 2.252101421356201, | |
| "learning_rate": 4.765509568313306e-05, | |
| "loss": 1.0942, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.335113484646195, | |
| "eval_loss": 1.3706434965133667, | |
| "eval_runtime": 48.4635, | |
| "eval_samples_per_second": 10.317, | |
| "eval_steps_per_second": 5.159, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.557632398753894, | |
| "grad_norm": 7.0890302658081055, | |
| "learning_rate": 4.4768135291499773e-05, | |
| "loss": 1.0883, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.557632398753894, | |
| "eval_loss": 1.355405569076538, | |
| "eval_runtime": 38.7656, | |
| "eval_samples_per_second": 12.898, | |
| "eval_steps_per_second": 6.449, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.7801513128615931, | |
| "grad_norm": 2.3868777751922607, | |
| "learning_rate": 4.187538940809969e-05, | |
| "loss": 1.0742, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.7801513128615931, | |
| "eval_loss": 1.3445924520492554, | |
| "eval_runtime": 35.778, | |
| "eval_samples_per_second": 13.975, | |
| "eval_steps_per_second": 6.988, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.0026702269692924, | |
| "grad_norm": 2.5143444538116455, | |
| "learning_rate": 3.898264352469959e-05, | |
| "loss": 1.0859, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.0026702269692924, | |
| "eval_loss": 1.3386626243591309, | |
| "eval_runtime": 38.0062, | |
| "eval_samples_per_second": 13.156, | |
| "eval_steps_per_second": 6.578, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.2251891410769917, | |
| "grad_norm": 3.203813076019287, | |
| "learning_rate": 3.608989764129951e-05, | |
| "loss": 1.0543, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.2251891410769917, | |
| "eval_loss": 1.3348820209503174, | |
| "eval_runtime": 38.1414, | |
| "eval_samples_per_second": 13.109, | |
| "eval_steps_per_second": 6.555, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.4477080551846906, | |
| "grad_norm": 2.6094539165496826, | |
| "learning_rate": 3.319715175789942e-05, | |
| "loss": 1.0367, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.4477080551846906, | |
| "eval_loss": 1.3313384056091309, | |
| "eval_runtime": 38.134, | |
| "eval_samples_per_second": 13.112, | |
| "eval_steps_per_second": 6.556, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.67022696929239, | |
| "grad_norm": 4.17645263671875, | |
| "learning_rate": 3.030440587449933e-05, | |
| "loss": 1.0344, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.67022696929239, | |
| "eval_loss": 1.3270087242126465, | |
| "eval_runtime": 36.8681, | |
| "eval_samples_per_second": 13.562, | |
| "eval_steps_per_second": 6.781, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.8927458834000888, | |
| "grad_norm": 11.839244842529297, | |
| "learning_rate": 2.741165999109924e-05, | |
| "loss": 0.9874, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.8927458834000888, | |
| "eval_loss": 1.3242576122283936, | |
| "eval_runtime": 36.662, | |
| "eval_samples_per_second": 13.638, | |
| "eval_steps_per_second": 6.819, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.115264797507788, | |
| "grad_norm": 3.2487409114837646, | |
| "learning_rate": 2.451891410769915e-05, | |
| "loss": 0.9706, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.115264797507788, | |
| "eval_loss": 1.320884346961975, | |
| "eval_runtime": 37.4219, | |
| "eval_samples_per_second": 13.361, | |
| "eval_steps_per_second": 6.681, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.3377837116154874, | |
| "grad_norm": 2.8250956535339355, | |
| "learning_rate": 2.1626168224299063e-05, | |
| "loss": 0.9774, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.3377837116154874, | |
| "eval_loss": 1.3206593990325928, | |
| "eval_runtime": 36.929, | |
| "eval_samples_per_second": 13.539, | |
| "eval_steps_per_second": 6.77, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.5603026257231862, | |
| "grad_norm": 3.585261821746826, | |
| "learning_rate": 1.8739207832665775e-05, | |
| "loss": 0.996, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.5603026257231862, | |
| "eval_loss": 1.319943904876709, | |
| "eval_runtime": 37.6693, | |
| "eval_samples_per_second": 13.273, | |
| "eval_steps_per_second": 6.637, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.7828215398308855, | |
| "grad_norm": 3.064767837524414, | |
| "learning_rate": 1.5846461949265685e-05, | |
| "loss": 0.9925, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.7828215398308855, | |
| "eval_loss": 1.315324306488037, | |
| "eval_runtime": 37.9324, | |
| "eval_samples_per_second": 13.181, | |
| "eval_steps_per_second": 6.591, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.005340453938585, | |
| "grad_norm": 4.483344554901123, | |
| "learning_rate": 1.2959501557632397e-05, | |
| "loss": 0.9888, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.005340453938585, | |
| "eval_loss": 1.3151663541793823, | |
| "eval_runtime": 37.0808, | |
| "eval_samples_per_second": 13.484, | |
| "eval_steps_per_second": 6.742, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.227859368046284, | |
| "grad_norm": 3.749549388885498, | |
| "learning_rate": 1.0066755674232309e-05, | |
| "loss": 0.9621, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 4.227859368046284, | |
| "eval_loss": 1.3176747560501099, | |
| "eval_runtime": 37.5664, | |
| "eval_samples_per_second": 13.31, | |
| "eval_steps_per_second": 6.655, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 4.4503782821539835, | |
| "grad_norm": 3.061640501022339, | |
| "learning_rate": 7.1740097908322204e-06, | |
| "loss": 0.9529, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.4503782821539835, | |
| "eval_loss": 1.318975567817688, | |
| "eval_runtime": 37.2821, | |
| "eval_samples_per_second": 13.411, | |
| "eval_steps_per_second": 6.706, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.672897196261682, | |
| "grad_norm": 3.4453513622283936, | |
| "learning_rate": 4.281263907432131e-06, | |
| "loss": 0.9458, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 4.672897196261682, | |
| "eval_loss": 1.3179384469985962, | |
| "eval_runtime": 37.5006, | |
| "eval_samples_per_second": 13.333, | |
| "eval_steps_per_second": 6.667, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 4.895416110369381, | |
| "grad_norm": 3.4908862113952637, | |
| "learning_rate": 1.3885180240320425e-06, | |
| "loss": 0.9557, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 4.895416110369381, | |
| "eval_loss": 1.3164401054382324, | |
| "eval_runtime": 37.3637, | |
| "eval_samples_per_second": 13.382, | |
| "eval_steps_per_second": 6.691, | |
| "step": 11000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 11235, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.237323417580544e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |