{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.32, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016, "grad_norm": 0.987720251083374, "learning_rate": 3.997524171965045e-05, "loss": 4.506, "step": 100 }, { "epoch": 0.032, "grad_norm": 1.1270077228546143, "learning_rate": 3.990002677172515e-05, "loss": 4.1703, "step": 200 }, { "epoch": 0.048, "grad_norm": 0.9228858351707458, "learning_rate": 3.9774542629091646e-05, "loss": 3.9996, "step": 300 }, { "epoch": 0.064, "grad_norm": 0.9696429371833801, "learning_rate": 3.9599106275584746e-05, "loss": 3.9142, "step": 400 }, { "epoch": 0.08, "grad_norm": 0.9501475691795349, "learning_rate": 3.937416087865917e-05, "loss": 3.8441, "step": 500 }, { "epoch": 0.096, "grad_norm": 0.9516975283622742, "learning_rate": 3.91002746699101e-05, "loss": 3.7967, "step": 600 }, { "epoch": 0.112, "grad_norm": 0.8569780588150024, "learning_rate": 3.877813950967087e-05, "loss": 3.7618, "step": 700 }, { "epoch": 0.128, "grad_norm": 0.940584659576416, "learning_rate": 3.8408569139313696e-05, "loss": 3.6315, "step": 800 }, { "epoch": 0.144, "grad_norm": 0.9224157333374023, "learning_rate": 3.799249712566837e-05, "loss": 3.6187, "step": 900 }, { "epoch": 0.16, "grad_norm": 0.8730221390724182, "learning_rate": 3.753097450275138e-05, "loss": 3.5771, "step": 1000 }, { "epoch": 0.176, "grad_norm": 0.8778759837150574, "learning_rate": 3.7025167116762844e-05, "loss": 3.5414, "step": 1100 }, { "epoch": 0.192, "grad_norm": 1.054726004600525, "learning_rate": 3.647635268105776e-05, "loss": 3.5168, "step": 1200 }, { "epoch": 0.208, "grad_norm": 0.9288316965103149, "learning_rate": 3.5885917548531206e-05, "loss": 3.4751, "step": 1300 }, { "epoch": 0.224, "grad_norm": 1.053566813468933, "learning_rate": 3.52553532095706e-05, "loss": 3.487, "step": 1400 }, { "epoch": 0.24, "grad_norm": 0.9158502817153931, "learning_rate": 3.458625252442156e-05, "loss": 3.4344, "step": 1500 }, { "epoch": 0.256, "grad_norm": 1.1394481658935547, "learning_rate": 3.388030569948466e-05, "loss": 3.4369, "step": 1600 }, { "epoch": 0.272, "grad_norm": 0.8391066789627075, "learning_rate": 3.313929601770737e-05, "loss": 3.3938, "step": 1700 }, { "epoch": 0.288, "grad_norm": 0.8551831841468811, "learning_rate": 3.23650953338566e-05, "loss": 3.3531, "step": 1800 }, { "epoch": 0.304, "grad_norm": 0.8429365754127502, "learning_rate": 3.155965934605104e-05, "loss": 3.3752, "step": 1900 }, { "epoch": 0.32, "grad_norm": 0.998236358165741, "learning_rate": 3.072502265549804e-05, "loss": 3.3798, "step": 2000 } ], "logging_steps": 100, "max_steps": 6250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.4483807633408e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }