| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.7914012738853504, | |
| "eval_steps": 7000, | |
| "global_step": 63000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.19904458598726116, | |
| "grad_norm": 0.15043310821056366, | |
| "learning_rate": 9.94455501848166e-06, | |
| "loss": 0.0057, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.19904458598726116, | |
| "eval_loss": 0.002587760565802455, | |
| "eval_runtime": 385.9016, | |
| "eval_samples_per_second": 182.264, | |
| "eval_steps_per_second": 22.783, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.3980891719745223, | |
| "grad_norm": 0.0009809082839637995, | |
| "learning_rate": 8.900666645603615e-06, | |
| "loss": 0.0019, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.3980891719745223, | |
| "eval_loss": 0.0018288933206349611, | |
| "eval_runtime": 386.974, | |
| "eval_samples_per_second": 181.759, | |
| "eval_steps_per_second": 22.72, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.5971337579617835, | |
| "grad_norm": 0.004145281855016947, | |
| "learning_rate": 7.79517234842501e-06, | |
| "loss": 0.0016, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.5971337579617835, | |
| "eval_loss": 0.0012058253632858396, | |
| "eval_runtime": 385.6472, | |
| "eval_samples_per_second": 182.384, | |
| "eval_steps_per_second": 22.798, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.7961783439490446, | |
| "grad_norm": 0.0015833042562007904, | |
| "learning_rate": 6.689678051246407e-06, | |
| "loss": 0.001, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.7961783439490446, | |
| "eval_loss": 0.0009336507064290345, | |
| "eval_runtime": 386.8698, | |
| "eval_samples_per_second": 181.808, | |
| "eval_steps_per_second": 22.726, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.9952229299363057, | |
| "grad_norm": 0.0003439185384195298, | |
| "learning_rate": 5.5843417269596545e-06, | |
| "loss": 0.001, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.9952229299363057, | |
| "eval_loss": 0.0008668347145430744, | |
| "eval_runtime": 386.7927, | |
| "eval_samples_per_second": 181.844, | |
| "eval_steps_per_second": 22.731, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.194267515923567, | |
| "grad_norm": 0.0005341056967154145, | |
| "learning_rate": 4.478847429781049e-06, | |
| "loss": 0.0007, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.194267515923567, | |
| "eval_loss": 0.0008429304580204189, | |
| "eval_runtime": 387.0524, | |
| "eval_samples_per_second": 181.722, | |
| "eval_steps_per_second": 22.715, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.393312101910828, | |
| "grad_norm": 0.00042841769754886627, | |
| "learning_rate": 3.3735111054942974e-06, | |
| "loss": 0.0004, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.393312101910828, | |
| "eval_loss": 0.0009025875478982925, | |
| "eval_runtime": 390.6459, | |
| "eval_samples_per_second": 180.051, | |
| "eval_steps_per_second": 22.506, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.5923566878980893, | |
| "grad_norm": 0.0007971890736371279, | |
| "learning_rate": 2.2678588354238415e-06, | |
| "loss": 0.0003, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.5923566878980893, | |
| "eval_loss": 0.0009143418865278363, | |
| "eval_runtime": 388.6682, | |
| "eval_samples_per_second": 180.967, | |
| "eval_steps_per_second": 22.621, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.7914012738853504, | |
| "grad_norm": 0.02023099735379219, | |
| "learning_rate": 1.162522511137089e-06, | |
| "loss": 0.0002, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.7914012738853504, | |
| "eval_loss": 0.0008290820405818522, | |
| "eval_runtime": 389.2129, | |
| "eval_samples_per_second": 180.713, | |
| "eval_steps_per_second": 22.589, | |
| "step": 63000 | |
| } | |
| ], | |
| "logging_steps": 7000, | |
| "max_steps": 70336, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 7000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |