| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.26936026936026936, | |
| "grad_norm": 0.5318887233734131, | |
| "learning_rate": 0.00019, | |
| "loss": 2.3031, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.5387205387205387, | |
| "grad_norm": 0.6192853450775146, | |
| "learning_rate": 0.00039000000000000005, | |
| "loss": 2.0771, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 0.5707560777664185, | |
| "learning_rate": 0.0004918181818181818, | |
| "loss": 1.7597, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.0673400673400673, | |
| "grad_norm": 0.6496685147285461, | |
| "learning_rate": 0.00047363636363636363, | |
| "loss": 1.8263, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.3367003367003367, | |
| "grad_norm": 0.6331773400306702, | |
| "learning_rate": 0.00045545454545454546, | |
| "loss": 1.6477, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.606060606060606, | |
| "grad_norm": 0.7595996260643005, | |
| "learning_rate": 0.0004372727272727273, | |
| "loss": 1.7032, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.8754208754208754, | |
| "grad_norm": 0.7511059641838074, | |
| "learning_rate": 0.00041909090909090905, | |
| "loss": 1.7872, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.1346801346801345, | |
| "grad_norm": 0.7932320833206177, | |
| "learning_rate": 0.0004009090909090909, | |
| "loss": 1.6912, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.404040404040404, | |
| "grad_norm": 0.8974000811576843, | |
| "learning_rate": 0.00038272727272727276, | |
| "loss": 1.6086, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.6734006734006734, | |
| "grad_norm": 1.2329163551330566, | |
| "learning_rate": 0.0003645454545454546, | |
| "loss": 1.6717, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.942760942760943, | |
| "grad_norm": 0.8940255045890808, | |
| "learning_rate": 0.0003463636363636364, | |
| "loss": 1.6426, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.202020202020202, | |
| "grad_norm": 1.3482205867767334, | |
| "learning_rate": 0.0003281818181818182, | |
| "loss": 1.5154, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.4713804713804715, | |
| "grad_norm": 1.4195595979690552, | |
| "learning_rate": 0.00031, | |
| "loss": 1.5668, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.7407407407407405, | |
| "grad_norm": 1.263625144958496, | |
| "learning_rate": 0.0002918181818181818, | |
| "loss": 1.4962, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.809946060180664, | |
| "learning_rate": 0.00027363636363636365, | |
| "loss": 1.6482, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.269360269360269, | |
| "grad_norm": 0.6825782060623169, | |
| "learning_rate": 0.0002554545454545454, | |
| "loss": 1.5437, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.538720538720539, | |
| "grad_norm": 0.4899812340736389, | |
| "learning_rate": 0.00023727272727272727, | |
| "loss": 1.342, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 4.808080808080808, | |
| "grad_norm": 1.1138042211532593, | |
| "learning_rate": 0.0002190909090909091, | |
| "loss": 1.4861, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.0673400673400675, | |
| "grad_norm": 1.300628900527954, | |
| "learning_rate": 0.0002009090909090909, | |
| "loss": 1.4108, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 5.3367003367003365, | |
| "grad_norm": 1.3766148090362549, | |
| "learning_rate": 0.00018272727272727275, | |
| "loss": 1.3262, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.606060606060606, | |
| "grad_norm": 1.7345802783966064, | |
| "learning_rate": 0.00016454545454545454, | |
| "loss": 1.3737, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 5.875420875420875, | |
| "grad_norm": 1.6616499423980713, | |
| "learning_rate": 0.00014636363636363637, | |
| "loss": 1.428, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 6.134680134680135, | |
| "grad_norm": 2.957304000854492, | |
| "learning_rate": 0.00012818181818181817, | |
| "loss": 1.3526, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 6.404040404040404, | |
| "grad_norm": 2.1497538089752197, | |
| "learning_rate": 0.00011, | |
| "loss": 1.2766, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 6.673400673400673, | |
| "grad_norm": 1.914229393005371, | |
| "learning_rate": 9.181818181818182e-05, | |
| "loss": 1.3351, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.942760942760943, | |
| "grad_norm": 1.7284122705459595, | |
| "learning_rate": 7.363636363636364e-05, | |
| "loss": 1.3482, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 7.202020202020202, | |
| "grad_norm": 1.9111416339874268, | |
| "learning_rate": 5.545454545454546e-05, | |
| "loss": 1.2894, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 7.4713804713804715, | |
| "grad_norm": 1.9759680032730103, | |
| "learning_rate": 3.727272727272727e-05, | |
| "loss": 1.2795, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 7.7407407407407405, | |
| "grad_norm": 1.2780520915985107, | |
| "learning_rate": 1.9090909090909094e-05, | |
| "loss": 1.268, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.6921839714050293, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 1.2543, | |
| "step": 600 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 600, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.047361640326758e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |