| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 2018, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.049554013875123884, | |
| "grad_norm": 0.09496160596609116, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2.3288, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09910802775024777, | |
| "grad_norm": 0.21523982286453247, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 2.2852, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.14866204162537167, | |
| "grad_norm": 0.3396208584308624, | |
| "learning_rate": 2e-05, | |
| "loss": 2.2673, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19821605550049554, | |
| "grad_norm": 0.455649197101593, | |
| "learning_rate": 1.9833270328249614e-05, | |
| "loss": 2.167, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.24777006937561943, | |
| "grad_norm": 0.5028106570243835, | |
| "learning_rate": 1.933864106968685e-05, | |
| "loss": 2.1722, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.29732408325074333, | |
| "grad_norm": 0.5224338173866272, | |
| "learning_rate": 1.8532606099095376e-05, | |
| "loss": 2.1245, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3468780971258672, | |
| "grad_norm": 0.6227980256080627, | |
| "learning_rate": 1.7442043405688393e-05, | |
| "loss": 2.054, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.39643211100099107, | |
| "grad_norm": 0.6063210368156433, | |
| "learning_rate": 1.61033188214449e-05, | |
| "loss": 2.0739, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.44598612487611494, | |
| "grad_norm": 0.6256656646728516, | |
| "learning_rate": 1.4561073368463916e-05, | |
| "loss": 2.0641, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.49554013875123887, | |
| "grad_norm": 0.7533057332038879, | |
| "learning_rate": 1.2866734662372244e-05, | |
| "loss": 2.0057, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5450941526263627, | |
| "grad_norm": 0.6291965842247009, | |
| "learning_rate": 1.107680201043002e-05, | |
| "loss": 2.0111, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5946481665014867, | |
| "grad_norm": 0.782930850982666, | |
| "learning_rate": 9.250962389339966e-06, | |
| "loss": 2.0145, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6442021803766105, | |
| "grad_norm": 0.7603575587272644, | |
| "learning_rate": 7.450100127240713e-06, | |
| "loss": 1.9896, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6937561942517344, | |
| "grad_norm": 0.7139636874198914, | |
| "learning_rate": 5.7342666588977705e-06, | |
| "loss": 2.0316, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7433102081268583, | |
| "grad_norm": 0.7095770835876465, | |
| "learning_rate": 4.160678054502153e-06, | |
| "loss": 1.9755, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7928642220019821, | |
| "grad_norm": 0.7470089197158813, | |
| "learning_rate": 2.7818070963500776e-06, | |
| "loss": 1.934, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8424182358771061, | |
| "grad_norm": 0.9231148362159729, | |
| "learning_rate": 1.643633524889301e-06, | |
| "loss": 2.0226, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8919722497522299, | |
| "grad_norm": 0.8875738978385925, | |
| "learning_rate": 7.841108013127585e-07, | |
| "loss": 1.9803, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.9415262636273538, | |
| "grad_norm": 0.9144193530082703, | |
| "learning_rate": 2.3190051393322754e-07, | |
| "loss": 1.9554, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.9910802775024777, | |
| "grad_norm": 0.7741144299507141, | |
| "learning_rate": 5.41663074110188e-09, | |
| "loss": 1.9692, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 2018, | |
| "total_flos": 3.6664480530432e+16, | |
| "train_loss": 2.0711384613521036, | |
| "train_runtime": 629.2276, | |
| "train_samples_per_second": 6.413, | |
| "train_steps_per_second": 3.207 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 2018, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.6664480530432e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |