{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5923566878980893, "eval_steps": 7000, "global_step": 56000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19904458598726116, "grad_norm": 0.2248842865228653, "learning_rate": 9.947398350867217e-06, "loss": 0.0083, "step": 7000 }, { "epoch": 0.19904458598726116, "eval_loss": 0.002906730631366372, "eval_runtime": 281.988, "eval_samples_per_second": 249.429, "eval_steps_per_second": 31.179, "step": 7000 }, { "epoch": 0.3980891719745223, "grad_norm": 0.0002081295824609697, "learning_rate": 8.900350699819912e-06, "loss": 0.0026, "step": 14000 }, { "epoch": 0.3980891719745223, "eval_loss": 0.0018650980200618505, "eval_runtime": 284.0569, "eval_samples_per_second": 247.612, "eval_steps_per_second": 30.952, "step": 14000 }, { "epoch": 0.5971337579617835, "grad_norm": 0.010839835740625858, "learning_rate": 7.795014375533159e-06, "loss": 0.0015, "step": 21000 }, { "epoch": 0.5971337579617835, "eval_loss": 0.001360387192107737, "eval_runtime": 285.5315, "eval_samples_per_second": 246.334, "eval_steps_per_second": 30.792, "step": 21000 }, { "epoch": 0.7961783439490446, "grad_norm": 0.00013278079859446734, "learning_rate": 6.689836024138259e-06, "loss": 0.0013, "step": 28000 }, { "epoch": 0.7961783439490446, "eval_loss": 0.0011233221739530563, "eval_runtime": 285.4742, "eval_samples_per_second": 246.383, "eval_steps_per_second": 30.798, "step": 28000 }, { "epoch": 0.9952229299363057, "grad_norm": 0.00014249606465455145, "learning_rate": 5.5841837540678024e-06, "loss": 0.0013, "step": 35000 }, { "epoch": 0.9952229299363057, "eval_loss": 0.0010357595747336745, "eval_runtime": 286.0827, "eval_samples_per_second": 245.859, "eval_steps_per_second": 30.732, "step": 35000 }, { "epoch": 1.194267515923567, "grad_norm": 0.0002924288564827293, "learning_rate": 4.478689456889198e-06, "loss": 0.0008, "step": 42000 }, { "epoch": 1.194267515923567, "eval_loss": 0.0010089210700243711, "eval_runtime": 282.9155, "eval_samples_per_second": 248.611, "eval_steps_per_second": 31.076, "step": 42000 }, { "epoch": 1.393312101910828, "grad_norm": 0.00017198333807755262, "learning_rate": 3.3733531326024454e-06, "loss": 0.0005, "step": 49000 }, { "epoch": 1.393312101910828, "eval_loss": 0.0009262111852876842, "eval_runtime": 285.6294, "eval_samples_per_second": 246.249, "eval_steps_per_second": 30.781, "step": 49000 }, { "epoch": 1.5923566878980893, "grad_norm": 2.850917553587351e-05, "learning_rate": 2.2678588354238415e-06, "loss": 0.0003, "step": 56000 }, { "epoch": 1.5923566878980893, "eval_loss": 0.0008692654664628208, "eval_runtime": 281.4473, "eval_samples_per_second": 249.908, "eval_steps_per_second": 31.239, "step": 56000 } ], "logging_steps": 7000, "max_steps": 70336, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 7000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }