| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 7.0, |
| "eval_steps": 500, |
| "global_step": 77, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09090909090909091, |
| "grad_norm": 0.48602561891445334, |
| "learning_rate": 2.5e-06, |
| "loss": 0.7993, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 0.42845219811731217, |
| "learning_rate": 5e-06, |
| "loss": 0.8032, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 0.4066371158771155, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.7963, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.26711318310119986, |
| "learning_rate": 1e-05, |
| "loss": 0.8103, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 1.3897367945908505, |
| "learning_rate": 1.25e-05, |
| "loss": 0.8032, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 1.4269366780259367, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.798, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.6363636363636364, |
| "grad_norm": 0.5923338485829923, |
| "learning_rate": 1.7500000000000002e-05, |
| "loss": 0.7887, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.8579379494013564, |
| "learning_rate": 2e-05, |
| "loss": 0.8084, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 0.6706584276659255, |
| "learning_rate": 1.9989636736467278e-05, |
| "loss": 0.7901, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.7649795715774722, |
| "learning_rate": 1.9958568425315316e-05, |
| "loss": 0.7889, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.7364328736047583, |
| "learning_rate": 1.9906859460363307e-05, |
| "loss": 0.784, |
| "step": 11 |
| }, |
| { |
| "epoch": 1.0909090909090908, |
| "grad_norm": 0.7141176718724468, |
| "learning_rate": 1.9834617016337424e-05, |
| "loss": 0.7627, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.1818181818181819, |
| "grad_norm": 0.7953686200671357, |
| "learning_rate": 1.9741990826734793e-05, |
| "loss": 0.758, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.2727272727272727, |
| "grad_norm": 0.6364393746967878, |
| "learning_rate": 1.9629172873477995e-05, |
| "loss": 0.708, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 0.6030021495227986, |
| "learning_rate": 1.9496396989003195e-05, |
| "loss": 0.7939, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.4545454545454546, |
| "grad_norm": 0.5810362647653914, |
| "learning_rate": 1.9343938371606714e-05, |
| "loss": 0.7654, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.5454545454545454, |
| "grad_norm": 0.6203815744815387, |
| "learning_rate": 1.917211301505453e-05, |
| "loss": 0.771, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.6363636363636362, |
| "grad_norm": 0.5879716196958645, |
| "learning_rate": 1.8981277053636963e-05, |
| "loss": 0.7318, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.7272727272727273, |
| "grad_norm": 0.6018801850092053, |
| "learning_rate": 1.8771826024025944e-05, |
| "loss": 0.7436, |
| "step": 19 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.4972385824840028, |
| "learning_rate": 1.8544194045464888e-05, |
| "loss": 0.7619, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.9090909090909092, |
| "grad_norm": 0.5553723839781752, |
| "learning_rate": 1.8298852919990254e-05, |
| "loss": 0.7115, |
| "step": 21 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.5342083205065797, |
| "learning_rate": 1.8036311154549783e-05, |
| "loss": 0.6987, |
| "step": 22 |
| }, |
| { |
| "epoch": 2.090909090909091, |
| "grad_norm": 0.5990214671432684, |
| "learning_rate": 1.77571129070442e-05, |
| "loss": 0.6748, |
| "step": 23 |
| }, |
| { |
| "epoch": 2.1818181818181817, |
| "grad_norm": 0.4700162800852038, |
| "learning_rate": 1.7461836858476858e-05, |
| "loss": 0.6812, |
| "step": 24 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 0.5200961637457008, |
| "learning_rate": 1.7151095013548996e-05, |
| "loss": 0.7, |
| "step": 25 |
| }, |
| { |
| "epoch": 2.3636363636363638, |
| "grad_norm": 0.4862299916604085, |
| "learning_rate": 1.6825531432186545e-05, |
| "loss": 0.6949, |
| "step": 26 |
| }, |
| { |
| "epoch": 2.4545454545454546, |
| "grad_norm": 0.41316591942733305, |
| "learning_rate": 1.648582089462756e-05, |
| "loss": 0.6949, |
| "step": 27 |
| }, |
| { |
| "epoch": 2.5454545454545454, |
| "grad_norm": 0.4709578767454738, |
| "learning_rate": 1.6132667502837164e-05, |
| "loss": 0.6594, |
| "step": 28 |
| }, |
| { |
| "epoch": 2.6363636363636362, |
| "grad_norm": 0.48229820810287394, |
| "learning_rate": 1.5766803221148676e-05, |
| "loss": 0.6714, |
| "step": 29 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 0.4116923901481422, |
| "learning_rate": 1.538898635915576e-05, |
| "loss": 0.6412, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.8181818181818183, |
| "grad_norm": 0.4276521237689871, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.6714, |
| "step": 31 |
| }, |
| { |
| "epoch": 2.909090909090909, |
| "grad_norm": 0.42376377004663873, |
| "learning_rate": 1.4600650377311523e-05, |
| "loss": 0.6649, |
| "step": 32 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.42170416530211763, |
| "learning_rate": 1.4191765204166643e-05, |
| "loss": 0.6325, |
| "step": 33 |
| }, |
| { |
| "epoch": 3.090909090909091, |
| "grad_norm": 0.4994205555351543, |
| "learning_rate": 1.3774191957526144e-05, |
| "loss": 0.6306, |
| "step": 34 |
| }, |
| { |
| "epoch": 3.1818181818181817, |
| "grad_norm": 0.3720575583925345, |
| "learning_rate": 1.3348796121709862e-05, |
| "loss": 0.629, |
| "step": 35 |
| }, |
| { |
| "epoch": 3.2727272727272725, |
| "grad_norm": 0.42254008410157545, |
| "learning_rate": 1.291645939454825e-05, |
| "loss": 0.6186, |
| "step": 36 |
| }, |
| { |
| "epoch": 3.3636363636363638, |
| "grad_norm": 0.4187949862190593, |
| "learning_rate": 1.2478077859929e-05, |
| "loss": 0.5799, |
| "step": 37 |
| }, |
| { |
| "epoch": 3.4545454545454546, |
| "grad_norm": 0.40239109452440364, |
| "learning_rate": 1.2034560130526341e-05, |
| "loss": 0.593, |
| "step": 38 |
| }, |
| { |
| "epoch": 3.5454545454545454, |
| "grad_norm": 0.3888870447012855, |
| "learning_rate": 1.1586825464562515e-05, |
| "loss": 0.6007, |
| "step": 39 |
| }, |
| { |
| "epoch": 3.6363636363636362, |
| "grad_norm": 0.3541263376267664, |
| "learning_rate": 1.113580186050475e-05, |
| "loss": 0.6074, |
| "step": 40 |
| }, |
| { |
| "epoch": 3.7272727272727275, |
| "grad_norm": 0.3873515374796265, |
| "learning_rate": 1.0682424133646712e-05, |
| "loss": 0.6112, |
| "step": 41 |
| }, |
| { |
| "epoch": 3.8181818181818183, |
| "grad_norm": 0.32891692102339676, |
| "learning_rate": 1.0227631978561057e-05, |
| "loss": 0.5861, |
| "step": 42 |
| }, |
| { |
| "epoch": 3.909090909090909, |
| "grad_norm": 0.3834043327781268, |
| "learning_rate": 9.772368021438943e-06, |
| "loss": 0.5532, |
| "step": 43 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.3457871211979862, |
| "learning_rate": 9.317575866353293e-06, |
| "loss": 0.5957, |
| "step": 44 |
| }, |
| { |
| "epoch": 4.090909090909091, |
| "grad_norm": 0.4772472337197591, |
| "learning_rate": 8.86419813949525e-06, |
| "loss": 0.5499, |
| "step": 45 |
| }, |
| { |
| "epoch": 4.181818181818182, |
| "grad_norm": 0.4303871977497106, |
| "learning_rate": 8.413174535437486e-06, |
| "loss": 0.5798, |
| "step": 46 |
| }, |
| { |
| "epoch": 4.2727272727272725, |
| "grad_norm": 0.4877767623617989, |
| "learning_rate": 7.965439869473664e-06, |
| "loss": 0.5477, |
| "step": 47 |
| }, |
| { |
| "epoch": 4.363636363636363, |
| "grad_norm": 0.40496052778460057, |
| "learning_rate": 7.521922140071003e-06, |
| "loss": 0.5199, |
| "step": 48 |
| }, |
| { |
| "epoch": 4.454545454545454, |
| "grad_norm": 0.42511984471480235, |
| "learning_rate": 7.0835406054517505e-06, |
| "loss": 0.5391, |
| "step": 49 |
| }, |
| { |
| "epoch": 4.545454545454545, |
| "grad_norm": 0.3209374586138548, |
| "learning_rate": 6.651203878290139e-06, |
| "loss": 0.512, |
| "step": 50 |
| }, |
| { |
| "epoch": 4.636363636363637, |
| "grad_norm": 0.402447394763342, |
| "learning_rate": 6.225808042473857e-06, |
| "loss": 0.5783, |
| "step": 51 |
| }, |
| { |
| "epoch": 4.7272727272727275, |
| "grad_norm": 0.33096891728238476, |
| "learning_rate": 5.8082347958333625e-06, |
| "loss": 0.546, |
| "step": 52 |
| }, |
| { |
| "epoch": 4.818181818181818, |
| "grad_norm": 0.332333342564896, |
| "learning_rate": 5.399349622688479e-06, |
| "loss": 0.5147, |
| "step": 53 |
| }, |
| { |
| "epoch": 4.909090909090909, |
| "grad_norm": 0.35485158911867, |
| "learning_rate": 5.000000000000003e-06, |
| "loss": 0.5149, |
| "step": 54 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.2887915588717827, |
| "learning_rate": 4.611013640844245e-06, |
| "loss": 0.4971, |
| "step": 55 |
| }, |
| { |
| "epoch": 5.090909090909091, |
| "grad_norm": 0.4314750252644176, |
| "learning_rate": 4.2331967788513295e-06, |
| "loss": 0.4778, |
| "step": 56 |
| }, |
| { |
| "epoch": 5.181818181818182, |
| "grad_norm": 0.36711724886491176, |
| "learning_rate": 3.867332497162836e-06, |
| "loss": 0.4921, |
| "step": 57 |
| }, |
| { |
| "epoch": 5.2727272727272725, |
| "grad_norm": 0.2959684186547754, |
| "learning_rate": 3.5141791053724405e-06, |
| "loss": 0.5383, |
| "step": 58 |
| }, |
| { |
| "epoch": 5.363636363636363, |
| "grad_norm": 0.3972005267322283, |
| "learning_rate": 3.174468567813461e-06, |
| "loss": 0.4821, |
| "step": 59 |
| }, |
| { |
| "epoch": 5.454545454545454, |
| "grad_norm": 0.325169890742008, |
| "learning_rate": 2.8489049864510053e-06, |
| "loss": 0.4745, |
| "step": 60 |
| }, |
| { |
| "epoch": 5.545454545454545, |
| "grad_norm": 0.2936891811965537, |
| "learning_rate": 2.5381631415231455e-06, |
| "loss": 0.4839, |
| "step": 61 |
| }, |
| { |
| "epoch": 5.636363636363637, |
| "grad_norm": 0.29676712083641144, |
| "learning_rate": 2.2428870929558012e-06, |
| "loss": 0.4567, |
| "step": 62 |
| }, |
| { |
| "epoch": 5.7272727272727275, |
| "grad_norm": 0.33870568632881165, |
| "learning_rate": 1.963688845450218e-06, |
| "loss": 0.4921, |
| "step": 63 |
| }, |
| { |
| "epoch": 5.818181818181818, |
| "grad_norm": 0.29687019866187325, |
| "learning_rate": 1.7011470800097496e-06, |
| "loss": 0.5278, |
| "step": 64 |
| }, |
| { |
| "epoch": 5.909090909090909, |
| "grad_norm": 0.26870283041651666, |
| "learning_rate": 1.4558059545351144e-06, |
| "loss": 0.5204, |
| "step": 65 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.27973517857533825, |
| "learning_rate": 1.2281739759740575e-06, |
| "loss": 0.4379, |
| "step": 66 |
| }, |
| { |
| "epoch": 6.090909090909091, |
| "grad_norm": 0.28840001210298166, |
| "learning_rate": 1.01872294636304e-06, |
| "loss": 0.4902, |
| "step": 67 |
| }, |
| { |
| "epoch": 6.181818181818182, |
| "grad_norm": 0.23572925155964622, |
| "learning_rate": 8.278869849454718e-07, |
| "loss": 0.468, |
| "step": 68 |
| }, |
| { |
| "epoch": 6.2727272727272725, |
| "grad_norm": 0.24216361202149772, |
| "learning_rate": 6.560616283932897e-07, |
| "loss": 0.4709, |
| "step": 69 |
| }, |
| { |
| "epoch": 6.363636363636363, |
| "grad_norm": 0.2406420848581199, |
| "learning_rate": 5.036030109968082e-07, |
| "loss": 0.4825, |
| "step": 70 |
| }, |
| { |
| "epoch": 6.454545454545454, |
| "grad_norm": 0.2331286534805128, |
| "learning_rate": 3.708271265220087e-07, |
| "loss": 0.5122, |
| "step": 71 |
| }, |
| { |
| "epoch": 6.545454545454545, |
| "grad_norm": 0.23179243381130654, |
| "learning_rate": 2.5800917326521013e-07, |
| "loss": 0.4406, |
| "step": 72 |
| }, |
| { |
| "epoch": 6.636363636363637, |
| "grad_norm": 0.244329635687278, |
| "learning_rate": 1.6538298366257975e-07, |
| "loss": 0.412, |
| "step": 73 |
| }, |
| { |
| "epoch": 6.7272727272727275, |
| "grad_norm": 0.2208745914858074, |
| "learning_rate": 9.314053963669245e-08, |
| "loss": 0.4474, |
| "step": 74 |
| }, |
| { |
| "epoch": 6.818181818181818, |
| "grad_norm": 0.23272584882701394, |
| "learning_rate": 4.143157468468717e-08, |
| "loss": 0.4958, |
| "step": 75 |
| }, |
| { |
| "epoch": 6.909090909090909, |
| "grad_norm": 0.22856058181085342, |
| "learning_rate": 1.0363263532724433e-08, |
| "loss": 0.4805, |
| "step": 76 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.20415011684845324, |
| "learning_rate": 0.0, |
| "loss": 0.476, |
| "step": 77 |
| }, |
| { |
| "epoch": 7.0, |
| "step": 77, |
| "total_flos": 725884553134080.0, |
| "train_loss": 0.6159460084004835, |
| "train_runtime": 2666.212, |
| "train_samples_per_second": 2.625, |
| "train_steps_per_second": 0.029 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 77, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 7, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 725884553134080.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|