| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9967081199707388, |
| "eval_steps": 500, |
| "global_step": 682, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.029261155815654718, |
| "grad_norm": 3.175921153645058, |
| "learning_rate": 5e-06, |
| "loss": 0.8015, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.058522311631309436, |
| "grad_norm": 1.3396777412592173, |
| "learning_rate": 5e-06, |
| "loss": 0.7413, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08778346744696415, |
| "grad_norm": 1.2916710481318188, |
| "learning_rate": 5e-06, |
| "loss": 0.7082, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.11704462326261887, |
| "grad_norm": 1.0905232711210158, |
| "learning_rate": 5e-06, |
| "loss": 0.6969, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.14630577907827358, |
| "grad_norm": 1.1424161224062557, |
| "learning_rate": 5e-06, |
| "loss": 0.687, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1755669348939283, |
| "grad_norm": 0.8849186232279377, |
| "learning_rate": 5e-06, |
| "loss": 0.6763, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.20482809070958302, |
| "grad_norm": 0.5905751294504246, |
| "learning_rate": 5e-06, |
| "loss": 0.6697, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.23408924652523774, |
| "grad_norm": 0.401552296803231, |
| "learning_rate": 5e-06, |
| "loss": 0.6646, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.26335040234089246, |
| "grad_norm": 0.41681057235289837, |
| "learning_rate": 5e-06, |
| "loss": 0.6618, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.29261155815654716, |
| "grad_norm": 0.42307500226012484, |
| "learning_rate": 5e-06, |
| "loss": 0.6543, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3218727139722019, |
| "grad_norm": 0.4153399483829215, |
| "learning_rate": 5e-06, |
| "loss": 0.6631, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3511338697878566, |
| "grad_norm": 0.3802571943412753, |
| "learning_rate": 5e-06, |
| "loss": 0.668, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.38039502560351135, |
| "grad_norm": 0.3888172781548047, |
| "learning_rate": 5e-06, |
| "loss": 0.6518, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.40965618141916604, |
| "grad_norm": 0.3637230321906593, |
| "learning_rate": 5e-06, |
| "loss": 0.6525, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4389173372348208, |
| "grad_norm": 0.3510916975433033, |
| "learning_rate": 5e-06, |
| "loss": 0.6479, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4681784930504755, |
| "grad_norm": 0.38320736963066604, |
| "learning_rate": 5e-06, |
| "loss": 0.6498, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.49743964886613024, |
| "grad_norm": 0.3675280277212547, |
| "learning_rate": 5e-06, |
| "loss": 0.6549, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5267008046817849, |
| "grad_norm": 0.33533114288412946, |
| "learning_rate": 5e-06, |
| "loss": 0.6545, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5559619604974396, |
| "grad_norm": 0.35908138669562806, |
| "learning_rate": 5e-06, |
| "loss": 0.643, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5852231163130943, |
| "grad_norm": 0.34531459956691446, |
| "learning_rate": 5e-06, |
| "loss": 0.6442, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6144842721287491, |
| "grad_norm": 0.3410477096323079, |
| "learning_rate": 5e-06, |
| "loss": 0.6508, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6437454279444038, |
| "grad_norm": 0.35442684790995665, |
| "learning_rate": 5e-06, |
| "loss": 0.6388, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6730065837600585, |
| "grad_norm": 0.36210107670842623, |
| "learning_rate": 5e-06, |
| "loss": 0.6387, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7022677395757132, |
| "grad_norm": 0.3320010813437129, |
| "learning_rate": 5e-06, |
| "loss": 0.6456, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.731528895391368, |
| "grad_norm": 0.3431211501390758, |
| "learning_rate": 5e-06, |
| "loss": 0.644, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7607900512070227, |
| "grad_norm": 0.34117908000054753, |
| "learning_rate": 5e-06, |
| "loss": 0.6466, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7900512070226774, |
| "grad_norm": 0.3767554348039686, |
| "learning_rate": 5e-06, |
| "loss": 0.6489, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8193123628383321, |
| "grad_norm": 0.3445121930815649, |
| "learning_rate": 5e-06, |
| "loss": 0.6477, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8485735186539868, |
| "grad_norm": 0.334717792180829, |
| "learning_rate": 5e-06, |
| "loss": 0.6399, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8778346744696416, |
| "grad_norm": 0.35014998369274103, |
| "learning_rate": 5e-06, |
| "loss": 0.6318, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9070958302852963, |
| "grad_norm": 0.35107697366353213, |
| "learning_rate": 5e-06, |
| "loss": 0.6448, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.936356986100951, |
| "grad_norm": 0.33329010626592986, |
| "learning_rate": 5e-06, |
| "loss": 0.6368, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9656181419166057, |
| "grad_norm": 0.375641145522458, |
| "learning_rate": 5e-06, |
| "loss": 0.6401, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9948792977322605, |
| "grad_norm": 0.35063232128414173, |
| "learning_rate": 5e-06, |
| "loss": 0.6361, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9978054133138259, |
| "eval_loss": 0.6406120657920837, |
| "eval_runtime": 342.7414, |
| "eval_samples_per_second": 26.869, |
| "eval_steps_per_second": 0.42, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.025237746891002, |
| "grad_norm": 0.3689065333115297, |
| "learning_rate": 5e-06, |
| "loss": 0.6512, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.054498902706657, |
| "grad_norm": 0.35245512949324603, |
| "learning_rate": 5e-06, |
| "loss": 0.6053, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0837600585223117, |
| "grad_norm": 0.4097569215434824, |
| "learning_rate": 5e-06, |
| "loss": 0.6087, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1130212143379663, |
| "grad_norm": 0.33409266355289313, |
| "learning_rate": 5e-06, |
| "loss": 0.6061, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.142282370153621, |
| "grad_norm": 0.39236431067207433, |
| "learning_rate": 5e-06, |
| "loss": 0.6114, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.1715435259692757, |
| "grad_norm": 0.35552129157860685, |
| "learning_rate": 5e-06, |
| "loss": 0.6129, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.2008046817849305, |
| "grad_norm": 0.3510300665384777, |
| "learning_rate": 5e-06, |
| "loss": 0.6125, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.2300658376005853, |
| "grad_norm": 0.40194969560487526, |
| "learning_rate": 5e-06, |
| "loss": 0.6007, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.2593269934162399, |
| "grad_norm": 0.3583207046748375, |
| "learning_rate": 5e-06, |
| "loss": 0.6108, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.2885881492318947, |
| "grad_norm": 0.344522115799079, |
| "learning_rate": 5e-06, |
| "loss": 0.6036, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.3178493050475493, |
| "grad_norm": 0.36157379087095487, |
| "learning_rate": 5e-06, |
| "loss": 0.6073, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.347110460863204, |
| "grad_norm": 0.3791515880385353, |
| "learning_rate": 5e-06, |
| "loss": 0.6049, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.3763716166788589, |
| "grad_norm": 0.3220281951589229, |
| "learning_rate": 5e-06, |
| "loss": 0.6023, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.4056327724945135, |
| "grad_norm": 0.3692472075176485, |
| "learning_rate": 5e-06, |
| "loss": 0.6031, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.4348939283101683, |
| "grad_norm": 0.3464968328092362, |
| "learning_rate": 5e-06, |
| "loss": 0.6095, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.464155084125823, |
| "grad_norm": 0.3395757657902888, |
| "learning_rate": 5e-06, |
| "loss": 0.5978, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4934162399414777, |
| "grad_norm": 0.3472100724313473, |
| "learning_rate": 5e-06, |
| "loss": 0.6059, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.5226773957571325, |
| "grad_norm": 0.3853659527551535, |
| "learning_rate": 5e-06, |
| "loss": 0.6082, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.5519385515727873, |
| "grad_norm": 0.35016819999626914, |
| "learning_rate": 5e-06, |
| "loss": 0.6068, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.5811997073884418, |
| "grad_norm": 0.36893370536117037, |
| "learning_rate": 5e-06, |
| "loss": 0.6134, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.6104608632040964, |
| "grad_norm": 0.35806464799349075, |
| "learning_rate": 5e-06, |
| "loss": 0.6044, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.6397220190197512, |
| "grad_norm": 0.34859310509445396, |
| "learning_rate": 5e-06, |
| "loss": 0.6061, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.668983174835406, |
| "grad_norm": 0.3249373585587818, |
| "learning_rate": 5e-06, |
| "loss": 0.5968, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.6982443306510606, |
| "grad_norm": 0.35605026962044284, |
| "learning_rate": 5e-06, |
| "loss": 0.6061, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.7275054864667154, |
| "grad_norm": 0.40161934702478636, |
| "learning_rate": 5e-06, |
| "loss": 0.6063, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.7567666422823702, |
| "grad_norm": 0.33968611303586627, |
| "learning_rate": 5e-06, |
| "loss": 0.6032, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.7860277980980248, |
| "grad_norm": 0.343315623639703, |
| "learning_rate": 5e-06, |
| "loss": 0.6056, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.8152889539136796, |
| "grad_norm": 0.32594291618046917, |
| "learning_rate": 5e-06, |
| "loss": 0.6061, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.8445501097293344, |
| "grad_norm": 0.35524107661998305, |
| "learning_rate": 5e-06, |
| "loss": 0.6052, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.873811265544989, |
| "grad_norm": 0.32891502990480825, |
| "learning_rate": 5e-06, |
| "loss": 0.6, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.9030724213606436, |
| "grad_norm": 0.362729694863619, |
| "learning_rate": 5e-06, |
| "loss": 0.6057, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.9323335771762986, |
| "grad_norm": 0.3500563113476431, |
| "learning_rate": 5e-06, |
| "loss": 0.6062, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.9615947329919532, |
| "grad_norm": 0.33206893588035147, |
| "learning_rate": 5e-06, |
| "loss": 0.595, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.9908558888076078, |
| "grad_norm": 0.35566741239182464, |
| "learning_rate": 5e-06, |
| "loss": 0.6095, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.9967081199707388, |
| "eval_loss": 0.6331745386123657, |
| "eval_runtime": 342.0192, |
| "eval_samples_per_second": 26.925, |
| "eval_steps_per_second": 0.421, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.9967081199707388, |
| "step": 682, |
| "total_flos": 1429904000286720.0, |
| "train_loss": 0.6341017776570362, |
| "train_runtime": 36533.8026, |
| "train_samples_per_second": 9.578, |
| "train_steps_per_second": 0.019 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 682, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1429904000286720.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|