| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 914, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03281378178835111, |
| "grad_norm": 7.324607115951123, |
| "learning_rate": 9.782608695652175e-07, |
| "loss": 1.6507, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06562756357670221, |
| "grad_norm": 2.6718330452258137, |
| "learning_rate": 2.065217391304348e-06, |
| "loss": 1.5293, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09844134536505332, |
| "grad_norm": 2.0449760430297435, |
| "learning_rate": 3.152173913043479e-06, |
| "loss": 1.3882, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13125512715340443, |
| "grad_norm": 1.7054134520698259, |
| "learning_rate": 4.239130434782609e-06, |
| "loss": 1.3212, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.16406890894175555, |
| "grad_norm": 1.5625383091861629, |
| "learning_rate": 5.3260869565217395e-06, |
| "loss": 1.2339, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.19688269073010664, |
| "grad_norm": 1.5467201317331245, |
| "learning_rate": 6.41304347826087e-06, |
| "loss": 1.2209, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.22969647251845776, |
| "grad_norm": 1.5441409779630197, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 1.1981, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.26251025430680885, |
| "grad_norm": 1.5810387088382716, |
| "learning_rate": 8.586956521739131e-06, |
| "loss": 1.1633, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.29532403609515995, |
| "grad_norm": 1.4632034044381061, |
| "learning_rate": 9.673913043478262e-06, |
| "loss": 1.1815, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3281378178835111, |
| "grad_norm": 1.6769268374195203, |
| "learning_rate": 9.998215114657564e-06, |
| "loss": 1.1741, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3609515996718622, |
| "grad_norm": 1.564606037108119, |
| "learning_rate": 9.98947588668843e-06, |
| "loss": 1.13, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3937653814602133, |
| "grad_norm": 1.4659283460430306, |
| "learning_rate": 9.973467196782484e-06, |
| "loss": 1.1339, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4265791632485644, |
| "grad_norm": 1.3900414131292986, |
| "learning_rate": 9.950212368945013e-06, |
| "loss": 1.1501, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4593929450369155, |
| "grad_norm": 1.533573901481892, |
| "learning_rate": 9.91974528450737e-06, |
| "loss": 1.1374, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4922067268252666, |
| "grad_norm": 1.4930324222767686, |
| "learning_rate": 9.882110332763275e-06, |
| "loss": 1.1316, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5250205086136177, |
| "grad_norm": 1.4649309232116048, |
| "learning_rate": 9.83736234629543e-06, |
| "loss": 1.1199, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5578342904019689, |
| "grad_norm": 1.4519140933698538, |
| "learning_rate": 9.785566521086695e-06, |
| "loss": 1.1163, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5906480721903199, |
| "grad_norm": 1.4591675768161227, |
| "learning_rate": 9.726798321532205e-06, |
| "loss": 1.125, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.623461853978671, |
| "grad_norm": 1.3084115670512635, |
| "learning_rate": 9.661143370490846e-06, |
| "loss": 1.1385, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6562756357670222, |
| "grad_norm": 1.428698415469623, |
| "learning_rate": 9.588697324536254e-06, |
| "loss": 1.0995, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6890894175553732, |
| "grad_norm": 1.3195726786177668, |
| "learning_rate": 9.509565734589105e-06, |
| "loss": 1.105, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7219031993437244, |
| "grad_norm": 1.4644538779508807, |
| "learning_rate": 9.423863892133754e-06, |
| "loss": 1.0949, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7547169811320755, |
| "grad_norm": 1.4442654685795682, |
| "learning_rate": 9.33171666124326e-06, |
| "loss": 1.1097, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7875307629204266, |
| "grad_norm": 1.4628666514015816, |
| "learning_rate": 9.233258296657547e-06, |
| "loss": 1.0915, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8203445447087777, |
| "grad_norm": 1.4584300507718668, |
| "learning_rate": 9.128632248179761e-06, |
| "loss": 1.0952, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8531583264971287, |
| "grad_norm": 1.3907705989649481, |
| "learning_rate": 9.017990951675764e-06, |
| "loss": 1.1072, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8859721082854799, |
| "grad_norm": 1.44611189547518, |
| "learning_rate": 8.901495606981339e-06, |
| "loss": 1.0908, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.918785890073831, |
| "grad_norm": 1.4283544277374125, |
| "learning_rate": 8.779315943040629e-06, |
| "loss": 1.0934, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9515996718621821, |
| "grad_norm": 1.3679340439375278, |
| "learning_rate": 8.65162997061802e-06, |
| "loss": 1.0902, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9844134536505332, |
| "grad_norm": 1.4053097778093493, |
| "learning_rate": 8.518623722943747e-06, |
| "loss": 1.0826, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0164068908941755, |
| "grad_norm": 1.358051135209502, |
| "learning_rate": 8.380490984671105e-06, |
| "loss": 1.0004, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.0492206726825266, |
| "grad_norm": 1.4920803109368017, |
| "learning_rate": 8.23743300954015e-06, |
| "loss": 0.9529, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.0820344544708778, |
| "grad_norm": 1.391421362612517, |
| "learning_rate": 8.089658227159239e-06, |
| "loss": 0.9108, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.114848236259229, |
| "grad_norm": 1.6241278760522855, |
| "learning_rate": 7.937381939331628e-06, |
| "loss": 0.9279, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.14766201804758, |
| "grad_norm": 1.4776439598223472, |
| "learning_rate": 7.780826006369586e-06, |
| "loss": 0.9332, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.1804757998359312, |
| "grad_norm": 1.531082298396227, |
| "learning_rate": 7.620218523852987e-06, |
| "loss": 0.9503, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.2132895816242821, |
| "grad_norm": 1.4148212125540105, |
| "learning_rate": 7.4557934903034035e-06, |
| "loss": 0.9409, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.2461033634126333, |
| "grad_norm": 1.569873004540387, |
| "learning_rate": 7.287790466257854e-06, |
| "loss": 0.9228, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.2789171452009844, |
| "grad_norm": 1.50216408338208, |
| "learning_rate": 7.116454225238909e-06, |
| "loss": 0.9354, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.3117309269893356, |
| "grad_norm": 1.4646843748186438, |
| "learning_rate": 6.942034397129702e-06, |
| "loss": 0.9372, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.3445447087776867, |
| "grad_norm": 1.5763941611606018, |
| "learning_rate": 6.764785104473411e-06, |
| "loss": 0.9169, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.3773584905660377, |
| "grad_norm": 1.4076340592270304, |
| "learning_rate": 6.584964592227135e-06, |
| "loss": 0.9235, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.4101722723543888, |
| "grad_norm": 1.5649588493962718, |
| "learning_rate": 6.402834851509564e-06, |
| "loss": 0.926, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.44298605414274, |
| "grad_norm": 1.370995330259522, |
| "learning_rate": 6.2186612378906545e-06, |
| "loss": 0.9327, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.475799835931091, |
| "grad_norm": 1.4405579132350896, |
| "learning_rate": 6.0327120847794415e-06, |
| "loss": 0.9461, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.5086136177194422, |
| "grad_norm": 1.5139954304026377, |
| "learning_rate": 5.845258312473252e-06, |
| "loss": 0.9479, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.5414273995077932, |
| "grad_norm": 1.5381263145609851, |
| "learning_rate": 5.656573033437932e-06, |
| "loss": 0.9217, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.5742411812961445, |
| "grad_norm": 1.5176651174503644, |
| "learning_rate": 5.466931154394171e-06, |
| "loss": 0.9402, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.6070549630844955, |
| "grad_norm": 1.64200137668156, |
| "learning_rate": 5.276608975789683e-06, |
| "loss": 0.925, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.6398687448728466, |
| "grad_norm": 1.4813527495999301, |
| "learning_rate": 5.085883789240764e-06, |
| "loss": 0.9268, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.6726825266611978, |
| "grad_norm": 1.5116269108263887, |
| "learning_rate": 4.8950334735297746e-06, |
| "loss": 0.9095, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.7054963084495487, |
| "grad_norm": 1.4490396822436944, |
| "learning_rate": 4.704336089747135e-06, |
| "loss": 0.9341, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.7383100902379, |
| "grad_norm": 1.5628125198488163, |
| "learning_rate": 4.514069476167716e-06, |
| "loss": 0.932, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.771123872026251, |
| "grad_norm": 1.4687886059027409, |
| "learning_rate": 4.324510843451851e-06, |
| "loss": 0.9311, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.8039376538146021, |
| "grad_norm": 1.5227999474981233, |
| "learning_rate": 4.135936370760759e-06, |
| "loss": 0.9046, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.8367514356029533, |
| "grad_norm": 1.4388610554454382, |
| "learning_rate": 3.9486208033748315e-06, |
| "loss": 0.9378, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.8695652173913042, |
| "grad_norm": 1.3581055291483148, |
| "learning_rate": 3.762837052401004e-06, |
| "loss": 0.9235, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.9023789991796556, |
| "grad_norm": 1.5099266943158, |
| "learning_rate": 3.5788557971524695e-06, |
| "loss": 0.9444, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.9351927809680065, |
| "grad_norm": 1.5169803759126903, |
| "learning_rate": 3.3969450907799966e-06, |
| "loss": 0.9279, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.9680065627563577, |
| "grad_norm": 1.3979513991441848, |
| "learning_rate": 3.217369969729476e-06, |
| "loss": 0.9115, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.0032813781788352, |
| "grad_norm": 2.105088244846462, |
| "learning_rate": 3.0403920675946826e-06, |
| "loss": 0.8327, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.036095159967186, |
| "grad_norm": 1.6690335971877808, |
| "learning_rate": 2.8662692339278387e-06, |
| "loss": 0.7782, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.0689089417555375, |
| "grad_norm": 1.705302067748598, |
| "learning_rate": 2.6952551585633947e-06, |
| "loss": 0.7875, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.1017227235438884, |
| "grad_norm": 1.6066988675182772, |
| "learning_rate": 2.52759900200232e-06, |
| "loss": 0.7708, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.1345365053322394, |
| "grad_norm": 1.5203406830167119, |
| "learning_rate": 2.3635450323954773e-06, |
| "loss": 0.7927, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.1673502871205907, |
| "grad_norm": 1.626389642459301, |
| "learning_rate": 2.2033322696549197e-06, |
| "loss": 0.7885, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.2001640689089417, |
| "grad_norm": 1.7664606984797573, |
| "learning_rate": 2.0471941372116793e-06, |
| "loss": 0.7626, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.232977850697293, |
| "grad_norm": 1.5793686740276833, |
| "learning_rate": 1.8953581219273987e-06, |
| "loss": 0.7754, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.265791632485644, |
| "grad_norm": 1.6724958298486645, |
| "learning_rate": 1.7480454426552773e-06, |
| "loss": 0.7783, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.298605414273995, |
| "grad_norm": 1.5819324740235794, |
| "learning_rate": 1.6054707279332865e-06, |
| "loss": 0.7705, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.3314191960623463, |
| "grad_norm": 1.6323457128335996, |
| "learning_rate": 1.4678417032791653e-06, |
| "loss": 0.7699, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.364232977850697, |
| "grad_norm": 1.7857816578985155, |
| "learning_rate": 1.335358888542862e-06, |
| "loss": 0.7526, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.3970467596390486, |
| "grad_norm": 1.5935449756290072, |
| "learning_rate": 1.20821530575733e-06, |
| "loss": 0.7918, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.4298605414273995, |
| "grad_norm": 1.693473193818772, |
| "learning_rate": 1.0865961979133245e-06, |
| "loss": 0.7815, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.462674323215751, |
| "grad_norm": 1.6977380314021282, |
| "learning_rate": 9.706787590679685e-07, |
| "loss": 0.7731, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.495488105004102, |
| "grad_norm": 1.701374033891568, |
| "learning_rate": 8.606318761802584e-07, |
| "loss": 0.7666, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.5283018867924527, |
| "grad_norm": 1.8239897351347403, |
| "learning_rate": 7.566158830496917e-07, |
| "loss": 0.7657, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.561115668580804, |
| "grad_norm": 1.7594557186973525, |
| "learning_rate": 6.587823267164911e-07, |
| "loss": 0.7798, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.593929450369155, |
| "grad_norm": 1.6074400677010736, |
| "learning_rate": 5.672737466637701e-07, |
| "loss": 0.7816, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.626743232157506, |
| "grad_norm": 1.589497024974972, |
| "learning_rate": 4.822234671433552e-07, |
| "loss": 0.7837, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.6595570139458573, |
| "grad_norm": 1.5745731469005892, |
| "learning_rate": 4.03755402927804e-07, |
| "loss": 0.7747, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.6923707957342082, |
| "grad_norm": 1.6156048098137663, |
| "learning_rate": 3.319838787716634e-07, |
| "loss": 0.7793, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.7251845775225596, |
| "grad_norm": 1.6695478870964964, |
| "learning_rate": 2.6701346284499e-07, |
| "loss": 0.7542, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.7579983593109105, |
| "grad_norm": 1.5107440160015784, |
| "learning_rate": 2.0893881438180275e-07, |
| "loss": 0.7844, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.790812141099262, |
| "grad_norm": 1.6477028499246302, |
| "learning_rate": 1.578445457654637e-07, |
| "loss": 0.7643, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.823625922887613, |
| "grad_norm": 1.5854989560185238, |
| "learning_rate": 1.1380509925189853e-07, |
| "loss": 0.7673, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.8564397046759638, |
| "grad_norm": 1.6787763051906506, |
| "learning_rate": 7.688463851028227e-08, |
| "loss": 0.769, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.889253486464315, |
| "grad_norm": 1.581911801204859, |
| "learning_rate": 4.713695513920147e-08, |
| "loss": 0.7799, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.922067268252666, |
| "grad_norm": 1.697466745242019, |
| "learning_rate": 2.4605390294497043e-08, |
| "loss": 0.785, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.954881050041017, |
| "grad_norm": 1.6425019567953216, |
| "learning_rate": 9.322771542978892e-09, |
| "loss": 0.7753, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.9876948318293683, |
| "grad_norm": 1.5783466963735138, |
| "learning_rate": 1.3113650340046413e-09, |
| "loss": 0.7911, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 914, |
| "total_flos": 181229773520896.0, |
| "train_loss": 0.266992773216715, |
| "train_runtime": 3529.0775, |
| "train_samples_per_second": 16.577, |
| "train_steps_per_second": 0.259 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 915, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 181229773520896.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|