| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.999163179916318, | |
| "eval_steps": 500, | |
| "global_step": 420, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07140864714086471, | |
| "grad_norm": 77.45325469970703, | |
| "learning_rate": 3.0769230769230774e-05, | |
| "loss": 0.6702, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.14281729428172943, | |
| "grad_norm": 59.097469329833984, | |
| "learning_rate": 3.997081220115612e-05, | |
| "loss": 0.5793, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.21422594142259413, | |
| "grad_norm": 52.32804870605469, | |
| "learning_rate": 3.982805660299152e-05, | |
| "loss": 0.5313, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.28563458856345886, | |
| "grad_norm": 42.44932174682617, | |
| "learning_rate": 3.956722125241571e-05, | |
| "loss": 0.4916, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.35704323570432356, | |
| "grad_norm": 44.81378936767578, | |
| "learning_rate": 3.9189859472289956e-05, | |
| "loss": 0.4705, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.42845188284518826, | |
| "grad_norm": 47.092281341552734, | |
| "learning_rate": 3.869821852196291e-05, | |
| "loss": 0.4503, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.499860529986053, | |
| "grad_norm": 49.76475143432617, | |
| "learning_rate": 3.809522621442463e-05, | |
| "loss": 0.4432, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5712691771269177, | |
| "grad_norm": 47.245887756347656, | |
| "learning_rate": 3.738447348063752e-05, | |
| "loss": 0.4199, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6426778242677824, | |
| "grad_norm": 53.50367736816406, | |
| "learning_rate": 3.657019298487685e-05, | |
| "loss": 0.4037, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.7140864714086471, | |
| "grad_norm": 45.266780853271484, | |
| "learning_rate": 3.565723391843037e-05, | |
| "loss": 0.3886, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7854951185495118, | |
| "grad_norm": 42.24121856689453, | |
| "learning_rate": 3.465103312176541e-05, | |
| "loss": 0.382, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.8569037656903765, | |
| "grad_norm": 46.39073181152344, | |
| "learning_rate": 3.35575827071361e-05, | |
| "loss": 0.366, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.9283124128312413, | |
| "grad_norm": 44.36848831176758, | |
| "learning_rate": 3.238339437444418e-05, | |
| "loss": 0.3539, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.999721059972106, | |
| "grad_norm": 43.62345504760742, | |
| "learning_rate": 3.113546063285907e-05, | |
| "loss": 0.348, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.0711297071129706, | |
| "grad_norm": 42.84613037109375, | |
| "learning_rate": 2.9821213159129655e-05, | |
| "loss": 0.2955, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.1425383542538354, | |
| "grad_norm": 47.24495315551758, | |
| "learning_rate": 2.8448478540571694e-05, | |
| "loss": 0.2896, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.2139470013947, | |
| "grad_norm": 42.732933044433594, | |
| "learning_rate": 2.70254316662896e-05, | |
| "loss": 0.2889, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.2853556485355648, | |
| "grad_norm": 44.07963180541992, | |
| "learning_rate": 2.5560547044196552e-05, | |
| "loss": 0.288, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.3567642956764296, | |
| "grad_norm": 43.4594612121582, | |
| "learning_rate": 2.4062548333748996e-05, | |
| "loss": 0.2808, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.4281729428172942, | |
| "grad_norm": 42.07709884643555, | |
| "learning_rate": 2.2540356394937577e-05, | |
| "loss": 0.2762, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.499581589958159, | |
| "grad_norm": 41.83521270751953, | |
| "learning_rate": 2.1003036162912327e-05, | |
| "loss": 0.2798, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.5709902370990236, | |
| "grad_norm": 40.76252365112305, | |
| "learning_rate": 1.945974266461355e-05, | |
| "loss": 0.2747, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.6423988842398884, | |
| "grad_norm": 44.36031723022461, | |
| "learning_rate": 1.791966649888943e-05, | |
| "loss": 0.2585, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.7138075313807533, | |
| "grad_norm": 40.18080139160156, | |
| "learning_rate": 1.639197910477628e-05, | |
| "loss": 0.2593, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.7852161785216178, | |
| "grad_norm": 42.134490966796875, | |
| "learning_rate": 1.4885778143879096e-05, | |
| "loss": 0.2474, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.8566248256624824, | |
| "grad_norm": 47.6600456237793, | |
| "learning_rate": 1.3410033322110323e-05, | |
| "loss": 0.2593, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.9280334728033472, | |
| "grad_norm": 43.97560119628906, | |
| "learning_rate": 1.1973532973428536e-05, | |
| "loss": 0.2482, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.999442119944212, | |
| "grad_norm": 41.20925521850586, | |
| "learning_rate": 1.05848317236807e-05, | |
| "loss": 0.2488, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.070850767085077, | |
| "grad_norm": 44.23745346069336, | |
| "learning_rate": 9.25219954621956e-06, | |
| "loss": 0.196, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.1422594142259412, | |
| "grad_norm": 45.39742660522461, | |
| "learning_rate": 7.983572512679384e-06, | |
| "loss": 0.1993, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.213668061366806, | |
| "grad_norm": 43.84931182861328, | |
| "learning_rate": 6.7865055321983754e-06, | |
| "loss": 0.1937, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.285076708507671, | |
| "grad_norm": 44.85445022583008, | |
| "learning_rate": 5.668127360534343e-06, | |
| "loss": 0.191, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.3564853556485357, | |
| "grad_norm": 39.72176742553711, | |
| "learning_rate": 4.635098147002792e-06, | |
| "loss": 0.1931, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.4278940027894, | |
| "grad_norm": 44.86882400512695, | |
| "learning_rate": 3.6935697720532095e-06, | |
| "loss": 0.1827, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.499302649930265, | |
| "grad_norm": 42.152137756347656, | |
| "learning_rate": 2.849149211680693e-06, | |
| "loss": 0.1886, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.5707112970711297, | |
| "grad_norm": 39.35033416748047, | |
| "learning_rate": 2.1068651468445546e-06, | |
| "loss": 0.19, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.6421199442119945, | |
| "grad_norm": 40.978153228759766, | |
| "learning_rate": 1.4711380167411094e-06, | |
| "loss": 0.1912, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.7135285913528593, | |
| "grad_norm": 44.58827209472656, | |
| "learning_rate": 9.45753694268885e-07, | |
| "loss": 0.1828, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.7849372384937237, | |
| "grad_norm": 41.021331787109375, | |
| "learning_rate": 5.338409404537537e-07, | |
| "loss": 0.1808, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.8563458856345885, | |
| "grad_norm": 42.61368179321289, | |
| "learning_rate": 2.3785277209707802e-07, | |
| "loss": 0.1946, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.9277545327754533, | |
| "grad_norm": 39.83855438232422, | |
| "learning_rate": 5.9551853605968044e-08, | |
| "loss": 0.1858, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.999163179916318, | |
| "grad_norm": 39.88948059082031, | |
| "learning_rate": 0.0, | |
| "loss": 0.1823, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.999163179916318, | |
| "step": 420, | |
| "total_flos": 4.006659812993925e+17, | |
| "train_loss": 0.2970780080273038, | |
| "train_runtime": 9700.7388, | |
| "train_samples_per_second": 5.543, | |
| "train_steps_per_second": 0.043 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 420, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.006659812993925e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |