| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.32, | |
| "eval_steps": 500, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 0.987720251083374, | |
| "learning_rate": 3.997524171965045e-05, | |
| "loss": 4.506, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 1.1270077228546143, | |
| "learning_rate": 3.990002677172515e-05, | |
| "loss": 4.1703, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 0.9228858351707458, | |
| "learning_rate": 3.9774542629091646e-05, | |
| "loss": 3.9996, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.9696429371833801, | |
| "learning_rate": 3.9599106275584746e-05, | |
| "loss": 3.9142, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.9501475691795349, | |
| "learning_rate": 3.937416087865917e-05, | |
| "loss": 3.8441, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.9516975283622742, | |
| "learning_rate": 3.91002746699101e-05, | |
| "loss": 3.7967, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.8569780588150024, | |
| "learning_rate": 3.877813950967087e-05, | |
| "loss": 3.7618, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.940584659576416, | |
| "learning_rate": 3.8408569139313696e-05, | |
| "loss": 3.6315, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.9224157333374023, | |
| "learning_rate": 3.799249712566837e-05, | |
| "loss": 3.6187, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.8730221390724182, | |
| "learning_rate": 3.753097450275138e-05, | |
| "loss": 3.5771, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.8778759837150574, | |
| "learning_rate": 3.7025167116762844e-05, | |
| "loss": 3.5414, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 1.054726004600525, | |
| "learning_rate": 3.647635268105776e-05, | |
| "loss": 3.5168, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.9288316965103149, | |
| "learning_rate": 3.5885917548531206e-05, | |
| "loss": 3.4751, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 1.053566813468933, | |
| "learning_rate": 3.52553532095706e-05, | |
| "loss": 3.487, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.9158502817153931, | |
| "learning_rate": 3.458625252442156e-05, | |
| "loss": 3.4344, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 1.1394481658935547, | |
| "learning_rate": 3.388030569948466e-05, | |
| "loss": 3.4369, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.8391066789627075, | |
| "learning_rate": 3.313929601770737e-05, | |
| "loss": 3.3938, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.8551831841468811, | |
| "learning_rate": 3.23650953338566e-05, | |
| "loss": 3.3531, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.8429365754127502, | |
| "learning_rate": 3.155965934605104e-05, | |
| "loss": 3.3752, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.998236358165741, | |
| "learning_rate": 3.072502265549804e-05, | |
| "loss": 3.3798, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 6250, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.4483807633408e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |