| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 838, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02386634844868735, |
| "grad_norm": 22.198865776871017, |
| "learning_rate": 1.0714285714285714e-06, |
| "loss": 1.5985, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0477326968973747, |
| "grad_norm": 3.2541165998224404, |
| "learning_rate": 2.261904761904762e-06, |
| "loss": 1.1786, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07159904534606205, |
| "grad_norm": 2.5041591504453575, |
| "learning_rate": 3.4523809523809528e-06, |
| "loss": 0.9456, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0954653937947494, |
| "grad_norm": 2.080495232890991, |
| "learning_rate": 4.642857142857144e-06, |
| "loss": 0.8296, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11933174224343675, |
| "grad_norm": 1.9634409779528466, |
| "learning_rate": 5.833333333333334e-06, |
| "loss": 0.7578, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1431980906921241, |
| "grad_norm": 1.697242706017287, |
| "learning_rate": 7.023809523809524e-06, |
| "loss": 0.7161, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.16706443914081145, |
| "grad_norm": 1.8172294891473797, |
| "learning_rate": 8.214285714285714e-06, |
| "loss": 0.7254, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1909307875894988, |
| "grad_norm": 1.5681357090324912, |
| "learning_rate": 9.404761904761905e-06, |
| "loss": 0.6908, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.21479713603818615, |
| "grad_norm": 1.652119506861618, |
| "learning_rate": 9.998915020921847e-06, |
| "loss": 0.6295, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2386634844868735, |
| "grad_norm": 1.8575385662659074, |
| "learning_rate": 9.990238013323298e-06, |
| "loss": 0.7128, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.26252983293556087, |
| "grad_norm": 1.845005460336626, |
| "learning_rate": 9.972899059486629e-06, |
| "loss": 0.6907, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2863961813842482, |
| "grad_norm": 1.3478623294347822, |
| "learning_rate": 9.946928255989507e-06, |
| "loss": 0.6847, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.31026252983293556, |
| "grad_norm": 1.4018840185317787, |
| "learning_rate": 9.912370682385866e-06, |
| "loss": 0.6866, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3341288782816229, |
| "grad_norm": 1.8134575466661624, |
| "learning_rate": 9.86928632295779e-06, |
| "loss": 0.6826, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.35799522673031026, |
| "grad_norm": 1.7336473448072804, |
| "learning_rate": 9.817749962596115e-06, |
| "loss": 0.6763, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3818615751789976, |
| "grad_norm": 1.5075876283514944, |
| "learning_rate": 9.757851056990446e-06, |
| "loss": 0.7218, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.40572792362768495, |
| "grad_norm": 1.5160111965341203, |
| "learning_rate": 9.689693577353917e-06, |
| "loss": 0.6813, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4295942720763723, |
| "grad_norm": 1.5232856099293663, |
| "learning_rate": 9.613395829952233e-06, |
| "loss": 0.7066, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.45346062052505964, |
| "grad_norm": 1.7920470499490964, |
| "learning_rate": 9.529090250750234e-06, |
| "loss": 0.6921, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.477326968973747, |
| "grad_norm": 1.2375823623473665, |
| "learning_rate": 9.436923175532442e-06, |
| "loss": 0.6796, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5011933174224343, |
| "grad_norm": 1.319341491851884, |
| "learning_rate": 9.337054585896596e-06, |
| "loss": 0.7005, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5250596658711217, |
| "grad_norm": 1.527221302956167, |
| "learning_rate": 9.229657831561082e-06, |
| "loss": 0.6867, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.548926014319809, |
| "grad_norm": 1.2837983849777186, |
| "learning_rate": 9.114919329468283e-06, |
| "loss": 0.6256, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5727923627684964, |
| "grad_norm": 1.4385178716193245, |
| "learning_rate": 8.993038240206114e-06, |
| "loss": 0.6633, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5966587112171837, |
| "grad_norm": 1.3575850712285624, |
| "learning_rate": 8.864226122309423e-06, |
| "loss": 0.649, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6205250596658711, |
| "grad_norm": 1.3696512745412937, |
| "learning_rate": 8.728706565041296e-06, |
| "loss": 0.6946, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6443914081145584, |
| "grad_norm": 1.3839657939910561, |
| "learning_rate": 8.586714800291704e-06, |
| "loss": 0.6845, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6682577565632458, |
| "grad_norm": 1.2964537533820486, |
| "learning_rate": 8.438497294267117e-06, |
| "loss": 0.6991, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6921241050119332, |
| "grad_norm": 1.4244871422623, |
| "learning_rate": 8.28431131967984e-06, |
| "loss": 0.6726, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7159904534606205, |
| "grad_norm": 1.5421494025118538, |
| "learning_rate": 8.124424509179648e-06, |
| "loss": 0.6935, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7398568019093079, |
| "grad_norm": 1.223951035339654, |
| "learning_rate": 7.959114390802894e-06, |
| "loss": 0.6645, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7637231503579952, |
| "grad_norm": 1.2107381990131096, |
| "learning_rate": 7.78866790624538e-06, |
| "loss": 0.6774, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7875894988066826, |
| "grad_norm": 1.468623638321339, |
| "learning_rate": 7.613380912795225e-06, |
| "loss": 0.6811, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8114558472553699, |
| "grad_norm": 1.1793519797847982, |
| "learning_rate": 7.4335576697902546e-06, |
| "loss": 0.6705, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8353221957040573, |
| "grad_norm": 1.199432894712693, |
| "learning_rate": 7.249510310491268e-06, |
| "loss": 0.6694, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8591885441527446, |
| "grad_norm": 1.5006522362419077, |
| "learning_rate": 7.0615583002879465e-06, |
| "loss": 0.6541, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.883054892601432, |
| "grad_norm": 1.3307392430470173, |
| "learning_rate": 6.870027882177791e-06, |
| "loss": 0.6804, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9069212410501193, |
| "grad_norm": 1.5324129053990416, |
| "learning_rate": 6.675251510480662e-06, |
| "loss": 0.6797, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9307875894988067, |
| "grad_norm": 1.6200646040789068, |
| "learning_rate": 6.477567273771807e-06, |
| "loss": 0.6413, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.954653937947494, |
| "grad_norm": 1.469059513496822, |
| "learning_rate": 6.277318308035109e-06, |
| "loss": 0.6785, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9785202863961814, |
| "grad_norm": 1.2149644700216862, |
| "learning_rate": 6.074852201055121e-06, |
| "loss": 0.6639, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.0023866348448687, |
| "grad_norm": 1.0025525829016835, |
| "learning_rate": 5.870520389081782e-06, |
| "loss": 0.6222, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.026252983293556, |
| "grad_norm": 1.295913557732094, |
| "learning_rate": 5.664677546815043e-06, |
| "loss": 0.5493, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.0501193317422435, |
| "grad_norm": 1.1487163460854986, |
| "learning_rate": 5.457680971768258e-06, |
| "loss": 0.5344, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.0739856801909309, |
| "grad_norm": 1.3219574700118175, |
| "learning_rate": 5.249889964078965e-06, |
| "loss": 0.5275, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.097852028639618, |
| "grad_norm": 1.3770329891016075, |
| "learning_rate": 5.041665202843543e-06, |
| "loss": 0.5603, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.1217183770883055, |
| "grad_norm": 1.331643194868804, |
| "learning_rate": 4.833368120058317e-06, |
| "loss": 0.5159, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.1455847255369929, |
| "grad_norm": 1.2515049886681966, |
| "learning_rate": 4.6253602732537685e-06, |
| "loss": 0.5565, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.1694510739856803, |
| "grad_norm": 1.4687022123622233, |
| "learning_rate": 4.418002717910887e-06, |
| "loss": 0.5413, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.1933174224343674, |
| "grad_norm": 1.353625693587157, |
| "learning_rate": 4.2116553807489255e-06, |
| "loss": 0.5313, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.2171837708830548, |
| "grad_norm": 1.3516251877378511, |
| "learning_rate": 4.006676434972474e-06, |
| "loss": 0.5359, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.2410501193317423, |
| "grad_norm": 1.4559533629414523, |
| "learning_rate": 3.803421678562213e-06, |
| "loss": 0.5283, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.2649164677804297, |
| "grad_norm": 1.3830404444432236, |
| "learning_rate": 3.602243916688548e-06, |
| "loss": 0.5256, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.288782816229117, |
| "grad_norm": 1.1347395367280129, |
| "learning_rate": 3.403492349320101e-06, |
| "loss": 0.5065, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.3126491646778042, |
| "grad_norm": 1.369934655860597, |
| "learning_rate": 3.2075119650900166e-06, |
| "loss": 0.5185, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.3365155131264916, |
| "grad_norm": 1.1407666119413948, |
| "learning_rate": 3.0146429424722277e-06, |
| "loss": 0.5269, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.360381861575179, |
| "grad_norm": 1.194998910872438, |
| "learning_rate": 2.82522005930708e-06, |
| "loss": 0.5579, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.3842482100238662, |
| "grad_norm": 1.4048755491651306, |
| "learning_rate": 2.6395721117012648e-06, |
| "loss": 0.5326, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.4081145584725536, |
| "grad_norm": 1.2124990054259197, |
| "learning_rate": 2.458021343310713e-06, |
| "loss": 0.5767, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.431980906921241, |
| "grad_norm": 1.2008255986572944, |
| "learning_rate": 2.2808828859970905e-06, |
| "loss": 0.5421, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.4558472553699284, |
| "grad_norm": 1.5274746306867921, |
| "learning_rate": 2.108464212828786e-06, |
| "loss": 0.537, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.4797136038186158, |
| "grad_norm": 1.2483775974884175, |
| "learning_rate": 1.9410646043758737e-06, |
| "loss": 0.5055, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.503579952267303, |
| "grad_norm": 1.2765814338927353, |
| "learning_rate": 1.7789746292254313e-06, |
| "loss": 0.5158, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.5274463007159904, |
| "grad_norm": 1.3914988398920136, |
| "learning_rate": 1.6224756396189216e-06, |
| "loss": 0.5338, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.5513126491646778, |
| "grad_norm": 1.3639543604430029, |
| "learning_rate": 1.4718392830871192e-06, |
| "loss": 0.5501, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.575178997613365, |
| "grad_norm": 1.4507981866997166, |
| "learning_rate": 1.32732703093025e-06, |
| "loss": 0.525, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.5990453460620526, |
| "grad_norm": 1.137016746163519, |
| "learning_rate": 1.1891897243618184e-06, |
| "loss": 0.5624, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.6229116945107398, |
| "grad_norm": 1.2443489508801573, |
| "learning_rate": 1.0576671391038996e-06, |
| "loss": 0.5304, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.6467780429594272, |
| "grad_norm": 1.6061206833052404, |
| "learning_rate": 9.32987569189675e-07, |
| "loss": 0.5236, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.6706443914081146, |
| "grad_norm": 1.3532986340010356, |
| "learning_rate": 8.15367430695636e-07, |
| "loss": 0.5209, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.6945107398568018, |
| "grad_norm": 1.4515766409844337, |
| "learning_rate": 7.050108860912752e-07, |
| "loss": 0.5111, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.7183770883054894, |
| "grad_norm": 1.3284962952790051, |
| "learning_rate": 6.021094898583269e-07, |
| "loss": 0.5213, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.7422434367541766, |
| "grad_norm": 1.4635925791592053, |
| "learning_rate": 5.068418559946864e-07, |
| "loss": 0.5311, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.766109785202864, |
| "grad_norm": 1.351280688220892, |
| "learning_rate": 4.193733479801232e-07, |
| "loss": 0.5125, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.7899761336515514, |
| "grad_norm": 1.3180660321477928, |
| "learning_rate": 3.398557917419626e-07, |
| "loss": 0.5108, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.8138424821002386, |
| "grad_norm": 1.4121861566519063, |
| "learning_rate": 2.6842721211895516e-07, |
| "loss": 0.5208, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.837708830548926, |
| "grad_norm": 1.3881888133783364, |
| "learning_rate": 2.0521159328077856e-07, |
| "loss": 0.5137, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.8615751789976134, |
| "grad_norm": 1.5243893621253777, |
| "learning_rate": 1.5031866351901182e-07, |
| "loss": 0.5452, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.8854415274463006, |
| "grad_norm": 1.3779735632404062, |
| "learning_rate": 1.0384370478316919e-07, |
| "loss": 0.5489, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.9093078758949882, |
| "grad_norm": 1.3140536094113324, |
| "learning_rate": 6.58673872923693e-08, |
| "loss": 0.4922, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.9331742243436754, |
| "grad_norm": 1.2195541397252074, |
| "learning_rate": 3.645562950973014e-08, |
| "loss": 0.5346, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.9570405727923628, |
| "grad_norm": 1.2374268027315825, |
| "learning_rate": 1.5659483722537117e-08, |
| "loss": 0.475, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.9809069212410502, |
| "grad_norm": 1.4012637588425856, |
| "learning_rate": 3.5150474267992007e-09, |
| "loss": 0.5564, |
| "step": 830 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 838, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 25783465213952.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|