| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 13.041884816753926, | |
| "eval_steps": 500, | |
| "global_step": 300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.418848167539267, | |
| "grad_norm": 1.1237713098526, | |
| "learning_rate": 4.9979013702509664e-05, | |
| "loss": 1.4003, | |
| "num_input_tokens_seen": 63440, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.837696335078534, | |
| "grad_norm": 0.5457362532615662, | |
| "learning_rate": 4.9906513710563894e-05, | |
| "loss": 0.1699, | |
| "num_input_tokens_seen": 127024, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.2931937172774868, | |
| "grad_norm": 0.47847944498062134, | |
| "learning_rate": 4.9782391165565324e-05, | |
| "loss": 0.1719, | |
| "num_input_tokens_seen": 194352, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.7120418848167538, | |
| "grad_norm": 0.8392484188079834, | |
| "learning_rate": 4.960690333044279e-05, | |
| "loss": 0.1433, | |
| "num_input_tokens_seen": 257776, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.167539267015707, | |
| "grad_norm": 0.6877206563949585, | |
| "learning_rate": 4.938041393053273e-05, | |
| "loss": 0.1352, | |
| "num_input_tokens_seen": 325440, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.5863874345549736, | |
| "grad_norm": 0.5719232559204102, | |
| "learning_rate": 4.910339239970286e-05, | |
| "loss": 0.1067, | |
| "num_input_tokens_seen": 388576, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 3.0418848167539267, | |
| "grad_norm": 0.9559742212295532, | |
| "learning_rate": 4.877641290737884e-05, | |
| "loss": 0.1101, | |
| "num_input_tokens_seen": 456256, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 3.4607329842931938, | |
| "grad_norm": 0.4461997151374817, | |
| "learning_rate": 4.8400153168490414e-05, | |
| "loss": 0.0852, | |
| "num_input_tokens_seen": 518864, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 3.8795811518324608, | |
| "grad_norm": 0.5407759547233582, | |
| "learning_rate": 4.7975393038803754e-05, | |
| "loss": 0.0897, | |
| "num_input_tokens_seen": 582464, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 4.335078534031414, | |
| "grad_norm": 0.7839125990867615, | |
| "learning_rate": 4.750301289855128e-05, | |
| "loss": 0.0928, | |
| "num_input_tokens_seen": 649664, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 4.7539267015706805, | |
| "grad_norm": 0.41905662417411804, | |
| "learning_rate": 4.69839918277092e-05, | |
| "loss": 0.0748, | |
| "num_input_tokens_seen": 712944, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 5.209424083769633, | |
| "grad_norm": 0.5274420976638794, | |
| "learning_rate": 4.641940557670478e-05, | |
| "loss": 0.0805, | |
| "num_input_tokens_seen": 780128, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 5.628272251308901, | |
| "grad_norm": 0.5005943775177002, | |
| "learning_rate": 4.581042433675921e-05, | |
| "loss": 0.07, | |
| "num_input_tokens_seen": 844064, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 6.0837696335078535, | |
| "grad_norm": 0.5864464044570923, | |
| "learning_rate": 4.5158310314487706e-05, | |
| "loss": 0.0728, | |
| "num_input_tokens_seen": 911488, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 6.50261780104712, | |
| "grad_norm": 0.4119901657104492, | |
| "learning_rate": 4.446441511578351e-05, | |
| "loss": 0.0641, | |
| "num_input_tokens_seen": 975216, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 6.9214659685863875, | |
| "grad_norm": 0.47534871101379395, | |
| "learning_rate": 4.373017694440827e-05, | |
| "loss": 0.0679, | |
| "num_input_tokens_seen": 1037504, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 7.37696335078534, | |
| "grad_norm": 0.5238337516784668, | |
| "learning_rate": 4.295711762109515e-05, | |
| "loss": 0.0682, | |
| "num_input_tokens_seen": 1104512, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 7.795811518324607, | |
| "grad_norm": 0.5485618710517883, | |
| "learning_rate": 4.214683942934291e-05, | |
| "loss": 0.0596, | |
| "num_input_tokens_seen": 1168112, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 8.25130890052356, | |
| "grad_norm": 0.544316291809082, | |
| "learning_rate": 4.130102179443877e-05, | |
| "loss": 0.0641, | |
| "num_input_tokens_seen": 1235104, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 8.670157068062828, | |
| "grad_norm": 0.5107344388961792, | |
| "learning_rate": 4.042141780259292e-05, | |
| "loss": 0.0631, | |
| "num_input_tokens_seen": 1298512, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 9.12565445026178, | |
| "grad_norm": 0.3618321120738983, | |
| "learning_rate": 3.9509850567399774e-05, | |
| "loss": 0.0687, | |
| "num_input_tokens_seen": 1365312, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 9.544502617801047, | |
| "grad_norm": 0.5421671271324158, | |
| "learning_rate": 3.856820945115655e-05, | |
| "loss": 0.0548, | |
| "num_input_tokens_seen": 1429344, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 9.963350785340314, | |
| "grad_norm": 0.5248610973358154, | |
| "learning_rate": 3.759844614887141e-05, | |
| "loss": 0.057, | |
| "num_input_tokens_seen": 1492960, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 10.418848167539267, | |
| "grad_norm": 0.4770837724208832, | |
| "learning_rate": 3.6602570643077556e-05, | |
| "loss": 0.0586, | |
| "num_input_tokens_seen": 1559488, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 10.837696335078533, | |
| "grad_norm": 0.5850111246109009, | |
| "learning_rate": 3.5582647037837445e-05, | |
| "loss": 0.0529, | |
| "num_input_tokens_seen": 1622784, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 11.293193717277488, | |
| "grad_norm": 0.5014153122901917, | |
| "learning_rate": 3.454078928057196e-05, | |
| "loss": 0.0535, | |
| "num_input_tokens_seen": 1690544, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 11.712041884816754, | |
| "grad_norm": 0.5679153203964233, | |
| "learning_rate": 3.347915678058152e-05, | |
| "loss": 0.0469, | |
| "num_input_tokens_seen": 1754032, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 12.167539267015707, | |
| "grad_norm": 0.3811012804508209, | |
| "learning_rate": 3.239994993334059e-05, | |
| "loss": 0.0531, | |
| "num_input_tokens_seen": 1820944, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 12.586387434554974, | |
| "grad_norm": 0.6256040334701538, | |
| "learning_rate": 3.1305405559842016e-05, | |
| "loss": 0.0475, | |
| "num_input_tokens_seen": 1884656, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 13.041884816753926, | |
| "grad_norm": 0.6022323966026306, | |
| "learning_rate": 3.0197792270443982e-05, | |
| "loss": 0.0472, | |
| "num_input_tokens_seen": 1951488, | |
| "step": 300 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 690, | |
| "num_input_tokens_seen": 1951488, | |
| "num_train_epochs": 30, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.374931508795802e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |