| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9957081545064378, | |
| "eval_steps": 1000, | |
| "global_step": 174, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05722460658082976, | |
| "grad_norm": 2.8843040386286187, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2427, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.11444921316165951, | |
| "grad_norm": 0.9017664305111632, | |
| "learning_rate": 9.909643486313533e-06, | |
| "loss": 0.5328, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.17167381974248927, | |
| "grad_norm": 0.797068637259448, | |
| "learning_rate": 9.641839665080363e-06, | |
| "loss": 0.4447, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.22889842632331903, | |
| "grad_norm": 0.6941745476559829, | |
| "learning_rate": 9.206267664155906e-06, | |
| "loss": 0.4017, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2861230329041488, | |
| "grad_norm": 0.6250591769825486, | |
| "learning_rate": 8.61867019052535e-06, | |
| "loss": 0.3776, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.34334763948497854, | |
| "grad_norm": 0.6177949791797019, | |
| "learning_rate": 7.900284547855992e-06, | |
| "loss": 0.3662, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4005722460658083, | |
| "grad_norm": 0.6776944915753278, | |
| "learning_rate": 7.0770750650094335e-06, | |
| "loss": 0.3658, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.45779685264663805, | |
| "grad_norm": 0.7027875485112575, | |
| "learning_rate": 6.178794677547138e-06, | |
| "loss": 0.3553, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5150214592274678, | |
| "grad_norm": 0.6301404025031555, | |
| "learning_rate": 5.237909579118713e-06, | |
| "loss": 0.3317, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5722460658082976, | |
| "grad_norm": 0.6840316408079424, | |
| "learning_rate": 4.2884258086335755e-06, | |
| "loss": 0.3428, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6294706723891274, | |
| "grad_norm": 0.6028809547153681, | |
| "learning_rate": 3.3646601834128924e-06, | |
| "loss": 0.331, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6866952789699571, | |
| "grad_norm": 0.6463003155720976, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.3252, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7439198855507868, | |
| "grad_norm": 0.5511586120399353, | |
| "learning_rate": 1.7256963302735752e-06, | |
| "loss": 0.3167, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8011444921316166, | |
| "grad_norm": 0.6770595845177203, | |
| "learning_rate": 1.0697345262860638e-06, | |
| "loss": 0.326, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8583690987124464, | |
| "grad_norm": 0.5474842641924246, | |
| "learning_rate": 5.558227567253832e-07, | |
| "loss": 0.3133, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9155937052932761, | |
| "grad_norm": 0.5872523545390601, | |
| "learning_rate": 2.0253513192751374e-07, | |
| "loss": 0.3195, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9728183118741058, | |
| "grad_norm": 0.5924289357218945, | |
| "learning_rate": 2.264038713457706e-08, | |
| "loss": 0.3216, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9957081545064378, | |
| "step": 174, | |
| "total_flos": 599683779002368.0, | |
| "train_loss": 0.40995535357245083, | |
| "train_runtime": 10019.3178, | |
| "train_samples_per_second": 2.232, | |
| "train_steps_per_second": 0.017 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 174, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 599683779002368.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |