{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21528525296017223, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.021528525296017224, "grad_norm": 0.3708704113960266, "learning_rate": 2.9265847744427305e-05, "loss": 0.9806, "step": 10 }, { "epoch": 0.04305705059203445, "grad_norm": 0.3818190097808838, "learning_rate": 2.7135254915624213e-05, "loss": 0.9308, "step": 20 }, { "epoch": 0.06458557588805167, "grad_norm": 0.3616221249103546, "learning_rate": 2.3816778784387097e-05, "loss": 0.8707, "step": 30 }, { "epoch": 0.0861141011840689, "grad_norm": 0.35507309436798096, "learning_rate": 1.963525491562421e-05, "loss": 0.8331, "step": 40 }, { "epoch": 0.10764262648008611, "grad_norm": 0.3332822322845459, "learning_rate": 1.5e-05, "loss": 0.7764, "step": 50 }, { "epoch": 0.12917115177610333, "grad_norm": 0.3367880880832672, "learning_rate": 1.036474508437579e-05, "loss": 0.749, "step": 60 }, { "epoch": 0.15069967707212056, "grad_norm": 0.3377549946308136, "learning_rate": 6.1832212156129045e-06, "loss": 0.7373, "step": 70 }, { "epoch": 0.1722282023681378, "grad_norm": 0.33790719509124756, "learning_rate": 2.86474508437579e-06, "loss": 0.7164, "step": 80 }, { "epoch": 0.193756727664155, "grad_norm": 0.34763064980506897, "learning_rate": 7.341522555726971e-07, "loss": 0.6908, "step": 90 }, { "epoch": 0.21528525296017223, "grad_norm": 0.3527802526950836, "learning_rate": 0.0, "loss": 0.7082, "step": 100 } ], "logging_steps": 10, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.901593108591821e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }