| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 604, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.033112582781456956, | |
| "grad_norm": 2.8630754947662354, | |
| "learning_rate": 8.264462809917356e-07, | |
| "loss": 2.5, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06622516556291391, | |
| "grad_norm": 2.044926643371582, | |
| "learning_rate": 1.6528925619834712e-06, | |
| "loss": 2.5625, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09933774834437085, | |
| "grad_norm": 2.0647263526916504, | |
| "learning_rate": 2.479338842975207e-06, | |
| "loss": 2.475, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13245033112582782, | |
| "grad_norm": 1.9722806215286255, | |
| "learning_rate": 3.3057851239669424e-06, | |
| "loss": 2.375, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16556291390728478, | |
| "grad_norm": 1.7632310390472412, | |
| "learning_rate": 4.132231404958678e-06, | |
| "loss": 2.225, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1986754966887417, | |
| "grad_norm": 2.126565456390381, | |
| "learning_rate": 4.958677685950414e-06, | |
| "loss": 1.9, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23178807947019867, | |
| "grad_norm": 1.3362902402877808, | |
| "learning_rate": 4.980933547537104e-06, | |
| "loss": 1.4625, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.26490066225165565, | |
| "grad_norm": 1.6563653945922852, | |
| "learning_rate": 4.919995460276783e-06, | |
| "loss": 1.25, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2980132450331126, | |
| "grad_norm": 1.2334599494934082, | |
| "learning_rate": 4.81816262909214e-06, | |
| "loss": 1.0375, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.33112582781456956, | |
| "grad_norm": 1.2277352809906006, | |
| "learning_rate": 4.677155895043723e-06, | |
| "loss": 1.05, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.36423841059602646, | |
| "grad_norm": 1.204990267753601, | |
| "learning_rate": 4.499358086684381e-06, | |
| "loss": 0.975, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3973509933774834, | |
| "grad_norm": 1.0835106372833252, | |
| "learning_rate": 4.287773753387249e-06, | |
| "loss": 0.8875, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4304635761589404, | |
| "grad_norm": 1.209863305091858, | |
| "learning_rate": 4.045978392408671e-06, | |
| "loss": 0.8938, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.46357615894039733, | |
| "grad_norm": 1.1028498411178589, | |
| "learning_rate": 3.778058027682004e-06, | |
| "loss": 0.9125, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4966887417218543, | |
| "grad_norm": 1.186710238456726, | |
| "learning_rate": 3.488540161381304e-06, | |
| "loss": 0.8625, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5298013245033113, | |
| "grad_norm": 1.2400007247924805, | |
| "learning_rate": 3.18231726508275e-06, | |
| "loss": 0.8812, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5629139072847682, | |
| "grad_norm": 1.3776289224624634, | |
| "learning_rate": 2.8645641034226584e-06, | |
| "loss": 0.7625, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5960264900662252, | |
| "grad_norm": 1.7026677131652832, | |
| "learning_rate": 2.5406502873736693e-06, | |
| "loss": 0.8313, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6291390728476821, | |
| "grad_norm": 1.2438005208969116, | |
| "learning_rate": 2.2160495348738127e-06, | |
| "loss": 0.8187, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6622516556291391, | |
| "grad_norm": 1.2019314765930176, | |
| "learning_rate": 1.8962471721846555e-06, | |
| "loss": 0.825, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.695364238410596, | |
| "grad_norm": 1.3416401147842407, | |
| "learning_rate": 1.5866474390840126e-06, | |
| "loss": 0.7875, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7284768211920529, | |
| "grad_norm": 1.214216709136963, | |
| "learning_rate": 1.2924821643137226e-06, | |
| "loss": 0.7625, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7615894039735099, | |
| "grad_norm": 2.329277753829956, | |
| "learning_rate": 1.018722354547402e-06, | |
| "loss": 0.8125, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7947019867549668, | |
| "grad_norm": 1.8247755765914917, | |
| "learning_rate": 7.69994190908499e-07, | |
| "loss": 0.7844, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8278145695364238, | |
| "grad_norm": 1.354269027709961, | |
| "learning_rate": 5.505008525871183e-07, | |
| "loss": 0.8313, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8609271523178808, | |
| "grad_norm": 1.941784143447876, | |
| "learning_rate": 3.639514886337786e-07, | |
| "loss": 0.7844, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8940397350993378, | |
| "grad_norm": 1.4848183393478394, | |
| "learning_rate": 2.1349853821348797e-07, | |
| "loss": 0.8125, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9271523178807947, | |
| "grad_norm": 1.269209384918213, | |
| "learning_rate": 1.0168445852548142e-07, | |
| "loss": 0.8187, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9602649006622517, | |
| "grad_norm": 1.4907937049865723, | |
| "learning_rate": 3.0398760616796306e-08, | |
| "loss": 0.8313, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9933774834437086, | |
| "grad_norm": 1.2604339122772217, | |
| "learning_rate": 8.460791279910064e-10, | |
| "loss": 0.7156, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 604, | |
| "total_flos": 3210595501381632.0, | |
| "train_loss": 1.177695571192053, | |
| "train_runtime": 189.533, | |
| "train_samples_per_second": 12.742, | |
| "train_steps_per_second": 3.187 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 604, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3210595501381632.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |