| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 17.376963350785342, | |
| "eval_steps": 500, | |
| "global_step": 400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.418848167539267, | |
| "grad_norm": 1.1237713098526, | |
| "learning_rate": 4.9979013702509664e-05, | |
| "loss": 1.4003, | |
| "num_input_tokens_seen": 63440, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.837696335078534, | |
| "grad_norm": 0.5457362532615662, | |
| "learning_rate": 4.9906513710563894e-05, | |
| "loss": 0.1699, | |
| "num_input_tokens_seen": 127024, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.2931937172774868, | |
| "grad_norm": 0.47847944498062134, | |
| "learning_rate": 4.9782391165565324e-05, | |
| "loss": 0.1719, | |
| "num_input_tokens_seen": 194352, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.7120418848167538, | |
| "grad_norm": 0.8392484188079834, | |
| "learning_rate": 4.960690333044279e-05, | |
| "loss": 0.1433, | |
| "num_input_tokens_seen": 257776, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.167539267015707, | |
| "grad_norm": 0.6877206563949585, | |
| "learning_rate": 4.938041393053273e-05, | |
| "loss": 0.1352, | |
| "num_input_tokens_seen": 325440, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.5863874345549736, | |
| "grad_norm": 0.5719232559204102, | |
| "learning_rate": 4.910339239970286e-05, | |
| "loss": 0.1067, | |
| "num_input_tokens_seen": 388576, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 3.0418848167539267, | |
| "grad_norm": 0.9559742212295532, | |
| "learning_rate": 4.877641290737884e-05, | |
| "loss": 0.1101, | |
| "num_input_tokens_seen": 456256, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 3.4607329842931938, | |
| "grad_norm": 0.4461997151374817, | |
| "learning_rate": 4.8400153168490414e-05, | |
| "loss": 0.0852, | |
| "num_input_tokens_seen": 518864, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 3.8795811518324608, | |
| "grad_norm": 0.5407759547233582, | |
| "learning_rate": 4.7975393038803754e-05, | |
| "loss": 0.0897, | |
| "num_input_tokens_seen": 582464, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 4.335078534031414, | |
| "grad_norm": 0.7839125990867615, | |
| "learning_rate": 4.750301289855128e-05, | |
| "loss": 0.0928, | |
| "num_input_tokens_seen": 649664, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 4.7539267015706805, | |
| "grad_norm": 0.40812286734580994, | |
| "learning_rate": 4.69839918277092e-05, | |
| "loss": 0.0748, | |
| "num_input_tokens_seen": 712944, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 5.209424083769633, | |
| "grad_norm": 0.5133504867553711, | |
| "learning_rate": 4.641940557670478e-05, | |
| "loss": 0.0801, | |
| "num_input_tokens_seen": 780128, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 5.628272251308901, | |
| "grad_norm": 0.4961135685443878, | |
| "learning_rate": 4.581042433675921e-05, | |
| "loss": 0.0702, | |
| "num_input_tokens_seen": 844064, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 6.0837696335078535, | |
| "grad_norm": 0.5816908478736877, | |
| "learning_rate": 4.5158310314487706e-05, | |
| "loss": 0.0735, | |
| "num_input_tokens_seen": 911488, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 6.50261780104712, | |
| "grad_norm": 0.4245397448539734, | |
| "learning_rate": 4.446441511578351e-05, | |
| "loss": 0.0632, | |
| "num_input_tokens_seen": 975216, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 6.9214659685863875, | |
| "grad_norm": 0.6995230913162231, | |
| "learning_rate": 4.373017694440827e-05, | |
| "loss": 0.0665, | |
| "num_input_tokens_seen": 1037504, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 7.37696335078534, | |
| "grad_norm": 0.5000202059745789, | |
| "learning_rate": 4.295711762109515e-05, | |
| "loss": 0.0676, | |
| "num_input_tokens_seen": 1104512, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 7.795811518324607, | |
| "grad_norm": 0.5529599785804749, | |
| "learning_rate": 4.214683942934291e-05, | |
| "loss": 0.0612, | |
| "num_input_tokens_seen": 1168112, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 8.25130890052356, | |
| "grad_norm": 0.5237070322036743, | |
| "learning_rate": 4.130102179443877e-05, | |
| "loss": 0.0642, | |
| "num_input_tokens_seen": 1235104, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 8.670157068062828, | |
| "grad_norm": 0.4693180024623871, | |
| "learning_rate": 4.042141780259292e-05, | |
| "loss": 0.0587, | |
| "num_input_tokens_seen": 1298512, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 9.12565445026178, | |
| "grad_norm": 0.32981690764427185, | |
| "learning_rate": 3.9509850567399774e-05, | |
| "loss": 0.0688, | |
| "num_input_tokens_seen": 1365312, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 9.544502617801047, | |
| "grad_norm": 0.5822737812995911, | |
| "learning_rate": 3.856820945115655e-05, | |
| "loss": 0.0533, | |
| "num_input_tokens_seen": 1429344, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 9.963350785340314, | |
| "grad_norm": 0.559769332408905, | |
| "learning_rate": 3.759844614887141e-05, | |
| "loss": 0.0568, | |
| "num_input_tokens_seen": 1492960, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 10.418848167539267, | |
| "grad_norm": 0.504461944103241, | |
| "learning_rate": 3.6602570643077556e-05, | |
| "loss": 0.0569, | |
| "num_input_tokens_seen": 1559488, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 10.837696335078533, | |
| "grad_norm": 0.5406277179718018, | |
| "learning_rate": 3.5582647037837445e-05, | |
| "loss": 0.052, | |
| "num_input_tokens_seen": 1622784, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 11.293193717277488, | |
| "grad_norm": 0.4200195074081421, | |
| "learning_rate": 3.454078928057196e-05, | |
| "loss": 0.0521, | |
| "num_input_tokens_seen": 1690544, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 11.712041884816754, | |
| "grad_norm": 0.8368775844573975, | |
| "learning_rate": 3.347915678058152e-05, | |
| "loss": 0.047, | |
| "num_input_tokens_seen": 1754032, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 12.167539267015707, | |
| "grad_norm": 0.3906252682209015, | |
| "learning_rate": 3.239994993334059e-05, | |
| "loss": 0.051, | |
| "num_input_tokens_seen": 1820944, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 12.586387434554974, | |
| "grad_norm": 0.7284943461418152, | |
| "learning_rate": 3.1305405559842016e-05, | |
| "loss": 0.0467, | |
| "num_input_tokens_seen": 1884656, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 13.041884816753926, | |
| "grad_norm": 0.5412812232971191, | |
| "learning_rate": 3.0197792270443982e-05, | |
| "loss": 0.0471, | |
| "num_input_tokens_seen": 1951488, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 13.460732984293193, | |
| "grad_norm": 0.6882799863815308, | |
| "learning_rate": 2.907940576282856e-05, | |
| "loss": 0.0395, | |
| "num_input_tokens_seen": 2015008, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 13.879581151832461, | |
| "grad_norm": 0.528359591960907, | |
| "learning_rate": 2.7952564063817704e-05, | |
| "loss": 0.0407, | |
| "num_input_tokens_seen": 2077264, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 14.335078534031414, | |
| "grad_norm": 0.5524467825889587, | |
| "learning_rate": 2.6819602724908742e-05, | |
| "loss": 0.0399, | |
| "num_input_tokens_seen": 2143504, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 14.75392670157068, | |
| "grad_norm": 0.5251730680465698, | |
| "learning_rate": 2.5682869981487152e-05, | |
| "loss": 0.0371, | |
| "num_input_tokens_seen": 2206768, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 15.209424083769633, | |
| "grad_norm": 1.0152957439422607, | |
| "learning_rate": 2.4544721885750217e-05, | |
| "loss": 0.0387, | |
| "num_input_tokens_seen": 2275200, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 15.6282722513089, | |
| "grad_norm": 0.7967268824577332, | |
| "learning_rate": 2.3407517423429015e-05, | |
| "loss": 0.0327, | |
| "num_input_tokens_seen": 2337952, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 16.083769633507853, | |
| "grad_norm": 0.6417545676231384, | |
| "learning_rate": 2.2273613624430255e-05, | |
| "loss": 0.0313, | |
| "num_input_tokens_seen": 2405232, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 16.50261780104712, | |
| "grad_norm": 0.6872904896736145, | |
| "learning_rate": 2.1145360677531924e-05, | |
| "loss": 0.0218, | |
| "num_input_tokens_seen": 2468304, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 16.921465968586386, | |
| "grad_norm": 0.6166151165962219, | |
| "learning_rate": 2.0025097059258046e-05, | |
| "loss": 0.0256, | |
| "num_input_tokens_seen": 2531808, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 17.376963350785342, | |
| "grad_norm": 0.50600665807724, | |
| "learning_rate": 1.8915144687029106e-05, | |
| "loss": 0.0221, | |
| "num_input_tokens_seen": 2599280, | |
| "step": 400 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 690, | |
| "num_input_tokens_seen": 2599280, | |
| "num_train_epochs": 30, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1154970987672371e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |