| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 154, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06527947776417789, | |
| "grad_norm": 0.3659420311450958, | |
| "learning_rate": 4.999421254949728e-05, | |
| "loss": 0.5493, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.13055895552835578, | |
| "grad_norm": 0.3503095209598541, | |
| "learning_rate": 4.930295491572653e-05, | |
| "loss": 0.5735, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.19583843329253367, | |
| "grad_norm": 0.42860716581344604, | |
| "learning_rate": 4.749077174130608e-05, | |
| "loss": 0.5553, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.26111791105671156, | |
| "grad_norm": 0.4349290430545807, | |
| "learning_rate": 4.464124637696786e-05, | |
| "loss": 0.5709, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3263973888208894, | |
| "grad_norm": 0.4874759018421173, | |
| "learning_rate": 4.088580752225444e-05, | |
| "loss": 0.5424, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.39167686658506734, | |
| "grad_norm": 0.39663174748420715, | |
| "learning_rate": 3.639766733771147e-05, | |
| "loss": 0.5106, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4569563443492452, | |
| "grad_norm": 0.3940886855125427, | |
| "learning_rate": 3.1383832377904675e-05, | |
| "loss": 0.5367, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5222358221134231, | |
| "grad_norm": 0.4165942966938019, | |
| "learning_rate": 2.607555582511326e-05, | |
| "loss": 0.5536, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.587515299877601, | |
| "grad_norm": 0.37471580505371094, | |
| "learning_rate": 2.071767139546306e-05, | |
| "loss": 0.5372, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6527947776417788, | |
| "grad_norm": 0.37760552763938904, | |
| "learning_rate": 1.5557300869900876e-05, | |
| "loss": 0.5255, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7180742554059567, | |
| "grad_norm": 0.3830636143684387, | |
| "learning_rate": 1.0832456092722062e-05, | |
| "loss": 0.5326, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7833537331701347, | |
| "grad_norm": 0.3630097210407257, | |
| "learning_rate": 6.761061147837808e-06, | |
| "loss": 0.5391, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8486332109343125, | |
| "grad_norm": 0.4070269465446472, | |
| "learning_rate": 3.5309010431049283e-06, | |
| "loss": 0.5158, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.9139126886984904, | |
| "grad_norm": 0.3441726267337799, | |
| "learning_rate": 1.2909604996957092e-06, | |
| "loss": 0.5276, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9791921664626683, | |
| "grad_norm": 0.34463751316070557, | |
| "learning_rate": 1.4455232765120396e-07, | |
| "loss": 0.5374, | |
| "step": 150 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 154, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.526035444970086e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |