| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9295120061967467, |
| "eval_steps": 500, |
| "global_step": 600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.015491866769945779, |
| "grad_norm": 0.40339195728302, |
| "learning_rate": 9.278350515463919e-07, |
| "loss": 1.8526, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.030983733539891558, |
| "grad_norm": 0.3715074062347412, |
| "learning_rate": 1.9587628865979384e-06, |
| "loss": 1.8835, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.046475600309837335, |
| "grad_norm": 0.3445941209793091, |
| "learning_rate": 2.9896907216494846e-06, |
| "loss": 1.8733, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.061967467079783116, |
| "grad_norm": 0.3444959223270416, |
| "learning_rate": 4.020618556701032e-06, |
| "loss": 1.8952, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07745933384972889, |
| "grad_norm": 0.3230769634246826, |
| "learning_rate": 5.051546391752578e-06, |
| "loss": 1.8588, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09295120061967467, |
| "grad_norm": 0.33751699328422546, |
| "learning_rate": 6.082474226804124e-06, |
| "loss": 1.8307, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.10844306738962045, |
| "grad_norm": 0.2963869869709015, |
| "learning_rate": 7.113402061855671e-06, |
| "loss": 1.8419, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12393493415956623, |
| "grad_norm": 0.2550879716873169, |
| "learning_rate": 8.144329896907216e-06, |
| "loss": 1.8281, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.139426800929512, |
| "grad_norm": 0.22610870003700256, |
| "learning_rate": 9.175257731958764e-06, |
| "loss": 1.8005, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.15491866769945778, |
| "grad_norm": 0.1930716335773468, |
| "learning_rate": 1.0206185567010309e-05, |
| "loss": 1.7897, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.17041053446940357, |
| "grad_norm": 0.18139410018920898, |
| "learning_rate": 1.1237113402061856e-05, |
| "loss": 1.7181, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.18590240123934934, |
| "grad_norm": 0.1586223542690277, |
| "learning_rate": 1.2268041237113405e-05, |
| "loss": 1.7328, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2013942680092951, |
| "grad_norm": 0.2047862708568573, |
| "learning_rate": 1.3298969072164948e-05, |
| "loss": 1.7268, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2168861347792409, |
| "grad_norm": 0.16858699917793274, |
| "learning_rate": 1.4329896907216495e-05, |
| "loss": 1.7029, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.23237800154918667, |
| "grad_norm": 0.15446807444095612, |
| "learning_rate": 1.5360824742268042e-05, |
| "loss": 1.7114, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.24786986831913246, |
| "grad_norm": 0.1550971418619156, |
| "learning_rate": 1.6391752577319588e-05, |
| "loss": 1.7054, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.26336173508907823, |
| "grad_norm": 0.13774091005325317, |
| "learning_rate": 1.7422680412371137e-05, |
| "loss": 1.685, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.278853601859024, |
| "grad_norm": 0.13972033560276031, |
| "learning_rate": 1.8453608247422682e-05, |
| "loss": 1.6774, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.29434546862896976, |
| "grad_norm": 0.14723576605319977, |
| "learning_rate": 1.9484536082474227e-05, |
| "loss": 1.63, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.30983733539891556, |
| "grad_norm": 0.1486756056547165, |
| "learning_rate": 1.9999592986072886e-05, |
| "loss": 1.6429, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.32532920216886135, |
| "grad_norm": 0.1460903435945511, |
| "learning_rate": 1.9996337073445673e-05, |
| "loss": 1.669, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.34082106893880715, |
| "grad_norm": 0.16247884929180145, |
| "learning_rate": 1.9989826308331103e-05, |
| "loss": 1.6572, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3563129357087529, |
| "grad_norm": 0.16369281709194183, |
| "learning_rate": 1.998006281066369e-05, |
| "loss": 1.6545, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3718048024786987, |
| "grad_norm": 0.1450214833021164, |
| "learning_rate": 1.996704975948236e-05, |
| "loss": 1.6762, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3872966692486445, |
| "grad_norm": 0.17881567776203156, |
| "learning_rate": 1.9950791391895335e-05, |
| "loss": 1.6397, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4027885360185902, |
| "grad_norm": 0.15863952040672302, |
| "learning_rate": 1.9931293001700518e-05, |
| "loss": 1.6597, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.418280402788536, |
| "grad_norm": 0.1730642467737198, |
| "learning_rate": 1.990856093766179e-05, |
| "loss": 1.6586, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4337722695584818, |
| "grad_norm": 0.14790953695774078, |
| "learning_rate": 1.988260260144185e-05, |
| "loss": 1.6416, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4492641363284276, |
| "grad_norm": 0.14577385783195496, |
| "learning_rate": 1.9853426445192175e-05, |
| "loss": 1.659, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.46475600309837334, |
| "grad_norm": 0.13978034257888794, |
| "learning_rate": 1.9821041968800982e-05, |
| "loss": 1.6362, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.48024786986831913, |
| "grad_norm": 0.17719446122646332, |
| "learning_rate": 1.9785459716800005e-05, |
| "loss": 1.6648, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4957397366382649, |
| "grad_norm": 0.1415952891111374, |
| "learning_rate": 1.9746691274931168e-05, |
| "loss": 1.6518, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5112316034082107, |
| "grad_norm": 0.1455707997083664, |
| "learning_rate": 1.970474926637418e-05, |
| "loss": 1.6445, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5267234701781565, |
| "grad_norm": 0.19560855627059937, |
| "learning_rate": 1.9659647347636422e-05, |
| "loss": 1.6557, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5422153369481022, |
| "grad_norm": 0.15447896718978882, |
| "learning_rate": 1.961140020410627e-05, |
| "loss": 1.6361, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.557707203718048, |
| "grad_norm": 0.17043063044548035, |
| "learning_rate": 1.9560023545271512e-05, |
| "loss": 1.6289, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5731990704879938, |
| "grad_norm": 0.16939246654510498, |
| "learning_rate": 1.9505534099604245e-05, |
| "loss": 1.6318, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5886909372579395, |
| "grad_norm": 0.1785915642976761, |
| "learning_rate": 1.9447949609114018e-05, |
| "loss": 1.6321, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6041828040278854, |
| "grad_norm": 0.16734299063682556, |
| "learning_rate": 1.938728882357093e-05, |
| "loss": 1.6443, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6196746707978311, |
| "grad_norm": 0.18670211732387543, |
| "learning_rate": 1.932357149440067e-05, |
| "loss": 1.6683, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.635166537567777, |
| "grad_norm": 0.15596874058246613, |
| "learning_rate": 1.925681836825331e-05, |
| "loss": 1.6336, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6506584043377227, |
| "grad_norm": 0.18547730147838593, |
| "learning_rate": 1.9187051180248134e-05, |
| "loss": 1.6374, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6661502711076684, |
| "grad_norm": 0.15439730882644653, |
| "learning_rate": 1.9114292646896574e-05, |
| "loss": 1.6298, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6816421378776143, |
| "grad_norm": 0.15702180564403534, |
| "learning_rate": 1.9038566458705615e-05, |
| "loss": 1.6235, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.69713400464756, |
| "grad_norm": 0.20213943719863892, |
| "learning_rate": 1.895989727246405e-05, |
| "loss": 1.6511, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7126258714175058, |
| "grad_norm": 0.17505770921707153, |
| "learning_rate": 1.8878310703214148e-05, |
| "loss": 1.6385, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7281177381874516, |
| "grad_norm": 0.29876482486724854, |
| "learning_rate": 1.879383331591123e-05, |
| "loss": 1.6508, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7436096049573974, |
| "grad_norm": 0.16168834269046783, |
| "learning_rate": 1.8706492616774043e-05, |
| "loss": 1.6424, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7591014717273431, |
| "grad_norm": 0.1786322444677353, |
| "learning_rate": 1.86163170443286e-05, |
| "loss": 1.6699, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.774593338497289, |
| "grad_norm": 0.16993215680122375, |
| "learning_rate": 1.8523335960148446e-05, |
| "loss": 1.6499, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7900852052672347, |
| "grad_norm": 0.15823154151439667, |
| "learning_rate": 1.8427579639294436e-05, |
| "loss": 1.6227, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8055770720371804, |
| "grad_norm": 0.1717902421951294, |
| "learning_rate": 1.8329079260457e-05, |
| "loss": 1.6216, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.8210689388071263, |
| "grad_norm": 0.17375893890857697, |
| "learning_rate": 1.822786689580425e-05, |
| "loss": 1.6264, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.836560805577072, |
| "grad_norm": 0.18277738988399506, |
| "learning_rate": 1.8123975500539114e-05, |
| "loss": 1.6314, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8520526723470179, |
| "grad_norm": 0.16117972135543823, |
| "learning_rate": 1.8017438902168987e-05, |
| "loss": 1.6274, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8675445391169636, |
| "grad_norm": 0.1836647391319275, |
| "learning_rate": 1.7908291789491348e-05, |
| "loss": 1.6163, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8830364058869093, |
| "grad_norm": 0.1835409700870514, |
| "learning_rate": 1.7796569701298906e-05, |
| "loss": 1.624, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8985282726568552, |
| "grad_norm": 0.20427103340625763, |
| "learning_rate": 1.7682309014808043e-05, |
| "loss": 1.6575, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.9140201394268009, |
| "grad_norm": 0.18505772948265076, |
| "learning_rate": 1.756554693381419e-05, |
| "loss": 1.6478, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9295120061967467, |
| "grad_norm": 0.16430599987506866, |
| "learning_rate": 1.7446321476578138e-05, |
| "loss": 1.6358, |
| "step": 600 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1935, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.288958696968028e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|