| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9967721110393803, | |
| "eval_steps": 1000000000, | |
| "global_step": 386, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.051646223369916075, | |
| "grad_norm": 2.108860915849221, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5468, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.10329244673983215, | |
| "grad_norm": 1.6310118874003974, | |
| "learning_rate": 1e-05, | |
| "loss": 0.4418, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1549386701097482, | |
| "grad_norm": 1.3450900424853298, | |
| "learning_rate": 9.981591817238379e-06, | |
| "loss": 0.405, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2065848934796643, | |
| "grad_norm": 1.3151962345663961, | |
| "learning_rate": 9.926502813430545e-06, | |
| "loss": 0.3851, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2582311168495804, | |
| "grad_norm": 1.2680899500012033, | |
| "learning_rate": 9.835138623956603e-06, | |
| "loss": 0.3907, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3098773402194964, | |
| "grad_norm": 1.4083168813617775, | |
| "learning_rate": 9.70817198829563e-06, | |
| "loss": 0.3781, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3615235635894125, | |
| "grad_norm": 1.3754071945041135, | |
| "learning_rate": 9.54653779646118e-06, | |
| "loss": 0.3869, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4131697869593286, | |
| "grad_norm": 1.2511484445251255, | |
| "learning_rate": 9.351426205150778e-06, | |
| "loss": 0.3756, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4648160103292447, | |
| "grad_norm": 1.1024319806871332, | |
| "learning_rate": 9.124273874297123e-06, | |
| "loss": 0.3714, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5164622336991608, | |
| "grad_norm": 1.1917707780202444, | |
| "learning_rate": 8.86675338854865e-06, | |
| "loss": 0.3728, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5681084570690769, | |
| "grad_norm": 1.2715677756941004, | |
| "learning_rate": 8.580760941571968e-06, | |
| "loss": 0.3672, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6197546804389928, | |
| "grad_norm": 1.227519727921758, | |
| "learning_rate": 8.26840237386003e-06, | |
| "loss": 0.3788, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6714009038089089, | |
| "grad_norm": 1.2634658378672423, | |
| "learning_rate": 7.93197766685348e-06, | |
| "loss": 0.3797, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.723047127178825, | |
| "grad_norm": 1.3757986356984206, | |
| "learning_rate": 7.5739640075491546e-06, | |
| "loss": 0.3589, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7746933505487411, | |
| "grad_norm": 1.4230554296275335, | |
| "learning_rate": 7.1969975482957075e-06, | |
| "loss": 0.3783, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8263395739186572, | |
| "grad_norm": 1.1307040899887513, | |
| "learning_rate": 6.803853996083918e-06, | |
| "loss": 0.364, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8779857972885733, | |
| "grad_norm": 1.1231026642597897, | |
| "learning_rate": 6.397428174258048e-06, | |
| "loss": 0.3696, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9296320206584894, | |
| "grad_norm": 1.189629737482601, | |
| "learning_rate": 5.980712707140985e-06, | |
| "loss": 0.3762, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9812782440284055, | |
| "grad_norm": 1.303138113665757, | |
| "learning_rate": 5.556775984524044e-06, | |
| "loss": 0.3793, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.0361523563589412, | |
| "grad_norm": 1.1705355616301976, | |
| "learning_rate": 5.1287395682749444e-06, | |
| "loss": 0.3015, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0877985797288574, | |
| "grad_norm": 1.058647814135033, | |
| "learning_rate": 4.699755207425259e-06, | |
| "loss": 0.2478, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.1394448030987734, | |
| "grad_norm": 1.0800491931953469, | |
| "learning_rate": 4.272981630981551e-06, | |
| "loss": 0.234, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1910910264686896, | |
| "grad_norm": 1.0382356810044766, | |
| "learning_rate": 3.851561289341023e-06, | |
| "loss": 0.2482, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.2427372498386056, | |
| "grad_norm": 0.9748763823021784, | |
| "learning_rate": 3.4385972155710274e-06, | |
| "loss": 0.2384, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.2943834732085215, | |
| "grad_norm": 0.9948291834059353, | |
| "learning_rate": 3.0371301769291417e-06, | |
| "loss": 0.2297, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3460296965784377, | |
| "grad_norm": 1.0505131356235509, | |
| "learning_rate": 2.6501162848634023e-06, | |
| "loss": 0.2461, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.3976759199483537, | |
| "grad_norm": 1.010764453532433, | |
| "learning_rate": 2.280405228356377e-06, | |
| "loss": 0.2307, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.44932214331827, | |
| "grad_norm": 1.0455587580487264, | |
| "learning_rate": 1.93071929088694e-06, | |
| "loss": 0.2359, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.500968366688186, | |
| "grad_norm": 1.0088444060232913, | |
| "learning_rate": 1.6036333055135345e-06, | |
| "loss": 0.2442, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.552614590058102, | |
| "grad_norm": 1.0021021613404428, | |
| "learning_rate": 1.3015556956751669e-06, | |
| "loss": 0.2291, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.604260813428018, | |
| "grad_norm": 1.0533375999671128, | |
| "learning_rate": 1.0267107413118743e-06, | |
| "loss": 0.2421, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.655907036797934, | |
| "grad_norm": 0.9805090445662388, | |
| "learning_rate": 7.811222008840719e-07, | |
| "loss": 0.2357, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.7075532601678503, | |
| "grad_norm": 1.033728352578099, | |
| "learning_rate": 5.665984098862992e-07, | |
| "loss": 0.2368, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.7591994835377665, | |
| "grad_norm": 0.9971901057327158, | |
| "learning_rate": 3.8471896557912005e-07, | |
| "loss": 0.2229, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.8108457069076824, | |
| "grad_norm": 0.9982038746046318, | |
| "learning_rate": 2.368230959830875e-07, | |
| "loss": 0.2302, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.8624919302775984, | |
| "grad_norm": 1.0445799403567448, | |
| "learning_rate": 1.2399979877708746e-07, | |
| "loss": 0.2226, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.9141381536475146, | |
| "grad_norm": 1.0245236117970937, | |
| "learning_rate": 4.7079822711015296e-08, | |
| "loss": 0.2361, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.9657843770174306, | |
| "grad_norm": 0.9047755895156792, | |
| "learning_rate": 6.629550575847355e-09, | |
| "loss": 0.2411, | |
| "step": 380 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 386, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 110643132760064.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |