| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.3699849693606197, | |
| "eval_steps": 500, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.018499248468030985, | |
| "grad_norm": 2.7567262649536133, | |
| "learning_rate": 9.9e-06, | |
| "loss": 2.3993, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03699849693606197, | |
| "grad_norm": 2.469092845916748, | |
| "learning_rate": 1.9900000000000003e-05, | |
| "loss": 1.8863, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05549774540409296, | |
| "grad_norm": 2.287935256958008, | |
| "learning_rate": 1.987638906230491e-05, | |
| "loss": 1.6964, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07399699387212394, | |
| "grad_norm": 2.158879518508911, | |
| "learning_rate": 1.9751529529279564e-05, | |
| "loss": 1.6172, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09249624234015492, | |
| "grad_norm": 2.3301937580108643, | |
| "learning_rate": 1.9626669996254215e-05, | |
| "loss": 1.5604, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09249624234015492, | |
| "eval_loss": 1.5311822891235352, | |
| "eval_runtime": 836.5935, | |
| "eval_samples_per_second": 22.154, | |
| "eval_steps_per_second": 11.077, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11099549080818592, | |
| "grad_norm": 1.958460807800293, | |
| "learning_rate": 1.950181046322887e-05, | |
| "loss": 1.5221, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1294947392762169, | |
| "grad_norm": 2.088949680328369, | |
| "learning_rate": 1.9376950930203523e-05, | |
| "loss": 1.5003, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.14799398774424788, | |
| "grad_norm": 2.262972593307495, | |
| "learning_rate": 1.9252091397178178e-05, | |
| "loss": 1.4501, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.16649323621227888, | |
| "grad_norm": 1.8716174364089966, | |
| "learning_rate": 1.912723186415283e-05, | |
| "loss": 1.4359, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.18499248468030985, | |
| "grad_norm": 1.6942836046218872, | |
| "learning_rate": 1.9002372331127483e-05, | |
| "loss": 1.4317, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.18499248468030985, | |
| "eval_loss": 1.4133305549621582, | |
| "eval_runtime": 838.9381, | |
| "eval_samples_per_second": 22.092, | |
| "eval_steps_per_second": 11.046, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.20349173314834085, | |
| "grad_norm": 2.1511948108673096, | |
| "learning_rate": 1.8877512798102137e-05, | |
| "loss": 1.4053, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.22199098161637185, | |
| "grad_norm": 1.7339736223220825, | |
| "learning_rate": 1.875265326507679e-05, | |
| "loss": 1.3915, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.24049023008440282, | |
| "grad_norm": 1.9543105363845825, | |
| "learning_rate": 1.8627793732051446e-05, | |
| "loss": 1.3825, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2589894785524338, | |
| "grad_norm": 1.7334178686141968, | |
| "learning_rate": 1.8502934199026097e-05, | |
| "loss": 1.3708, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2774887270204648, | |
| "grad_norm": 1.5893480777740479, | |
| "learning_rate": 1.837807466600075e-05, | |
| "loss": 1.3594, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2774887270204648, | |
| "eval_loss": 1.3516819477081299, | |
| "eval_runtime": 838.2447, | |
| "eval_samples_per_second": 22.11, | |
| "eval_steps_per_second": 11.055, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.29598797548849576, | |
| "grad_norm": 1.9415546655654907, | |
| "learning_rate": 1.8253215132975405e-05, | |
| "loss": 1.3602, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3144872239565268, | |
| "grad_norm": 1.6256144046783447, | |
| "learning_rate": 1.812835559995006e-05, | |
| "loss": 1.3297, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.33298647242455776, | |
| "grad_norm": 1.6166146993637085, | |
| "learning_rate": 1.800349606692471e-05, | |
| "loss": 1.3273, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3514857208925887, | |
| "grad_norm": 1.7110105752944946, | |
| "learning_rate": 1.7878636533899365e-05, | |
| "loss": 1.2984, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.3699849693606197, | |
| "grad_norm": 1.4126442670822144, | |
| "learning_rate": 1.7753777000874016e-05, | |
| "loss": 1.3113, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3699849693606197, | |
| "eval_loss": 1.3114128112792969, | |
| "eval_runtime": 836.695, | |
| "eval_samples_per_second": 22.151, | |
| "eval_steps_per_second": 11.076, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 16218, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.6722690048e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |