| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 594, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.050505050505050504, | |
| "grad_norm": 15.79202210209821, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.8701, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.10101010101010101, | |
| "grad_norm": 3.0942332223409834, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.5733, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.15151515151515152, | |
| "grad_norm": 4.298646784667104, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6136, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.20202020202020202, | |
| "grad_norm": 2.703078685427566, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.5472, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.25252525252525254, | |
| "grad_norm": 29.98527759901386, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.5259, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.30303030303030304, | |
| "grad_norm": 8.787895364111028, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5159, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.35353535353535354, | |
| "grad_norm": 2.714725648499293, | |
| "learning_rate": 9.991349683972435e-06, | |
| "loss": 0.5982, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.40404040404040403, | |
| "grad_norm": 2.393261010163645, | |
| "learning_rate": 9.965428667076687e-06, | |
| "loss": 0.6628, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 15.838479639161053, | |
| "learning_rate": 9.922326639307918e-06, | |
| "loss": 0.5424, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5050505050505051, | |
| "grad_norm": 2.726593770710591, | |
| "learning_rate": 9.86219273913078e-06, | |
| "loss": 0.6873, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 3.6530539945981584, | |
| "learning_rate": 9.785235037441473e-06, | |
| "loss": 0.6324, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 2.2910687177923896, | |
| "learning_rate": 9.691719817616148e-06, | |
| "loss": 0.4908, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6565656565656566, | |
| "grad_norm": 2.749982467389248, | |
| "learning_rate": 9.581970654136752e-06, | |
| "loss": 0.577, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7070707070707071, | |
| "grad_norm": 2.621644636283292, | |
| "learning_rate": 9.45636729298243e-06, | |
| "loss": 0.5327, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7575757575757576, | |
| "grad_norm": 2.1198799176542376, | |
| "learning_rate": 9.315344337660422e-06, | |
| "loss": 0.6347, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 1.7972487002464925, | |
| "learning_rate": 9.159389745423003e-06, | |
| "loss": 0.6343, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8585858585858586, | |
| "grad_norm": 2.5406708998399763, | |
| "learning_rate": 8.98904313887369e-06, | |
| "loss": 0.6402, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 7.266719521619973, | |
| "learning_rate": 8.804893938804839e-06, | |
| "loss": 0.5157, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9595959595959596, | |
| "grad_norm": 13.316489295838588, | |
| "learning_rate": 8.607579324727175e-06, | |
| "loss": 0.5619, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.0101010101010102, | |
| "grad_norm": 1.8872961310512126, | |
| "learning_rate": 8.397782030148147e-06, | |
| "loss": 0.4112, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0606060606060606, | |
| "grad_norm": 1.676003362944885, | |
| "learning_rate": 8.176227980227693e-06, | |
| "loss": 0.488, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 2.282289630564045, | |
| "learning_rate": 7.943683779985412e-06, | |
| "loss": 0.4915, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1616161616161615, | |
| "grad_norm": 2.4871416093603376, | |
| "learning_rate": 7.700954061750295e-06, | |
| "loss": 0.4777, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.2121212121212122, | |
| "grad_norm": 2.5839494868810826, | |
| "learning_rate": 7.4488787010311425e-06, | |
| "loss": 0.6122, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.2626262626262625, | |
| "grad_norm": 10.641920140721124, | |
| "learning_rate": 7.188329910441154e-06, | |
| "loss": 0.4543, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3131313131313131, | |
| "grad_norm": 3.0307123950276402, | |
| "learning_rate": 6.920209221732007e-06, | |
| "loss": 0.4843, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 2.122645461889907, | |
| "learning_rate": 6.64544436638005e-06, | |
| "loss": 0.4867, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.4141414141414141, | |
| "grad_norm": 2.631666798742427, | |
| "learning_rate": 6.364986065518106e-06, | |
| "loss": 0.4209, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.4646464646464645, | |
| "grad_norm": 2.9904146392503024, | |
| "learning_rate": 6.079804740320181e-06, | |
| "loss": 0.4111, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.5151515151515151, | |
| "grad_norm": 3.8938710402714247, | |
| "learning_rate": 5.790887154221521e-06, | |
| "loss": 0.4576, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.5656565656565657, | |
| "grad_norm": 1.4401476185203614, | |
| "learning_rate": 5.499232998592399e-06, | |
| "loss": 0.4169, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.6161616161616161, | |
| "grad_norm": 1.7931080573819276, | |
| "learning_rate": 5.20585143367959e-06, | |
| "loss": 0.4786, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 2.6304765740012592, | |
| "learning_rate": 4.911757596784358e-06, | |
| "loss": 0.4335, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.7171717171717171, | |
| "grad_norm": 1.7093124594050306, | |
| "learning_rate": 4.617969089759066e-06, | |
| "loss": 0.4032, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.7676767676767677, | |
| "grad_norm": 1.7751425049397478, | |
| "learning_rate": 4.325502457976126e-06, | |
| "loss": 0.4026, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 1.5604623454463842, | |
| "learning_rate": 4.035369672952516e-06, | |
| "loss": 0.4396, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.8686868686868687, | |
| "grad_norm": 4.974012035659653, | |
| "learning_rate": 3.7485746308004013e-06, | |
| "loss": 0.4729, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.9191919191919191, | |
| "grad_norm": 1.6364473729173084, | |
| "learning_rate": 3.466109678619681e-06, | |
| "loss": 0.4446, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.9696969696969697, | |
| "grad_norm": 1.9395028093472382, | |
| "learning_rate": 3.1889521808515888e-06, | |
| "loss": 0.3533, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.0202020202020203, | |
| "grad_norm": 1.8373408967141356, | |
| "learning_rate": 2.9180611374741623e-06, | |
| "loss": 0.3406, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0707070707070705, | |
| "grad_norm": 3.399858195905016, | |
| "learning_rate": 2.6543738657411033e-06, | |
| "loss": 0.2985, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.121212121212121, | |
| "grad_norm": 3.2351800580212973, | |
| "learning_rate": 2.3988027569455895e-06, | |
| "loss": 0.3004, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.1717171717171717, | |
| "grad_norm": 4.35904988804887, | |
| "learning_rate": 2.1522321194310577e-06, | |
| "loss": 0.3149, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 3.1884018644318024, | |
| "learning_rate": 1.915515118772555e-06, | |
| "loss": 0.2804, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.2727272727272725, | |
| "grad_norm": 2.0429924860959217, | |
| "learning_rate": 1.689470825715998e-06, | |
| "loss": 0.2729, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.323232323232323, | |
| "grad_norm": 2.648622548442594, | |
| "learning_rate": 1.4748813820898554e-06, | |
| "loss": 0.3002, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.3737373737373737, | |
| "grad_norm": 1.3508677209033961, | |
| "learning_rate": 1.272489294495548e-06, | |
| "loss": 0.2667, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.4242424242424243, | |
| "grad_norm": 3.366120177530913, | |
| "learning_rate": 1.0829948651407374e-06, | |
| "loss": 0.3297, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.474747474747475, | |
| "grad_norm": 1.9426057690340581, | |
| "learning_rate": 9.070537687051817e-07, | |
| "loss": 0.2862, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.525252525252525, | |
| "grad_norm": 2.997630345355421, | |
| "learning_rate": 7.452747836234392e-07, | |
| "loss": 0.2782, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.5757575757575757, | |
| "grad_norm": 1.9235000947632515, | |
| "learning_rate": 5.982176856345445e-07, | |
| "loss": 0.2541, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.6262626262626263, | |
| "grad_norm": 2.042417538987798, | |
| "learning_rate": 4.663913108871726e-07, | |
| "loss": 0.2649, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.676767676767677, | |
| "grad_norm": 2.096088188603427, | |
| "learning_rate": 3.5025179530225995e-07, | |
| "loss": 0.2737, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 2.0861722071465194, | |
| "learning_rate": 2.5020099628504603e-07, | |
| "loss": 0.2369, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 2.5284934932886927, | |
| "learning_rate": 1.6658510224765333e-07, | |
| "loss": 0.3142, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.8282828282828283, | |
| "grad_norm": 1.653966938488298, | |
| "learning_rate": 9.969343475342285e-08, | |
| "loss": 0.3054, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.878787878787879, | |
| "grad_norm": 1.9790588803483495, | |
| "learning_rate": 4.975744742772848e-08, | |
| "loss": 0.3217, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.929292929292929, | |
| "grad_norm": 2.220544003340401, | |
| "learning_rate": 1.69499250991767e-08, | |
| "loss": 0.2967, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.9797979797979797, | |
| "grad_norm": 2.3170461682627947, | |
| "learning_rate": 1.3843859422574269e-09, | |
| "loss": 0.2458, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 594, | |
| "total_flos": 31949362233344.0, | |
| "train_loss": 0.4421948113224723, | |
| "train_runtime": 1063.9965, | |
| "train_samples_per_second": 4.466, | |
| "train_steps_per_second": 0.558 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 594, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 31949362233344.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |