{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9747899159663865, "eval_steps": 500, "global_step": 29, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03361344537815126, "grad_norm": 4.469196166173266, "learning_rate": 0.0, "loss": 0.889, "step": 1 }, { "epoch": 0.06722689075630252, "grad_norm": 4.365192499514651, "learning_rate": 3.3333333333333333e-06, "loss": 0.9117, "step": 2 }, { "epoch": 0.10084033613445378, "grad_norm": 3.965699785576992, "learning_rate": 6.666666666666667e-06, "loss": 0.878, "step": 3 }, { "epoch": 0.13445378151260504, "grad_norm": 2.2523927807625044, "learning_rate": 1e-05, "loss": 0.7814, "step": 4 }, { "epoch": 0.16806722689075632, "grad_norm": 4.003900590954268, "learning_rate": 9.96354437049027e-06, "loss": 0.859, "step": 5 }, { "epoch": 0.20168067226890757, "grad_norm": 3.9172213327695036, "learning_rate": 9.854709087130261e-06, "loss": 0.7727, "step": 6 }, { "epoch": 0.23529411764705882, "grad_norm": 3.3484290901238447, "learning_rate": 9.675081213427076e-06, "loss": 0.6556, "step": 7 }, { "epoch": 0.2689075630252101, "grad_norm": 3.0412731911621695, "learning_rate": 9.427280128266049e-06, "loss": 0.7545, "step": 8 }, { "epoch": 0.3025210084033613, "grad_norm": 1.824318016038074, "learning_rate": 9.114919329468283e-06, "loss": 0.6364, "step": 9 }, { "epoch": 0.33613445378151263, "grad_norm": 1.4055884613148466, "learning_rate": 8.742553740855507e-06, "loss": 0.6121, "step": 10 }, { "epoch": 0.3697478991596639, "grad_norm": 1.6529134782337978, "learning_rate": 8.315613291203977e-06, "loss": 0.6341, "step": 11 }, { "epoch": 0.40336134453781514, "grad_norm": 1.410865474352178, "learning_rate": 7.84032373365578e-06, "loss": 0.6073, "step": 12 }, { "epoch": 0.4369747899159664, "grad_norm": 1.3475768201990301, "learning_rate": 7.323615860218844e-06, "loss": 0.5754, "step": 13 }, { "epoch": 0.47058823529411764, "grad_norm": 1.231638918744367, "learning_rate": 6.773024435212678e-06, "loss": 0.6227, "step": 14 }, { "epoch": 0.5042016806722689, "grad_norm": 1.3031603882175071, "learning_rate": 6.1965783214377895e-06, "loss": 0.7014, "step": 15 }, { "epoch": 0.5378151260504201, "grad_norm": 1.289342432305298, "learning_rate": 5.6026834012766155e-06, "loss": 0.5801, "step": 16 }, { "epoch": 0.5714285714285714, "grad_norm": 1.297660868574315, "learning_rate": 5e-06, "loss": 0.5476, "step": 17 }, { "epoch": 0.6050420168067226, "grad_norm": 1.155602860047339, "learning_rate": 4.397316598723385e-06, "loss": 0.6054, "step": 18 }, { "epoch": 0.6386554621848739, "grad_norm": 1.1265443534540045, "learning_rate": 3.803421678562213e-06, "loss": 0.575, "step": 19 }, { "epoch": 0.6722689075630253, "grad_norm": 1.1202857464472886, "learning_rate": 3.226975564787322e-06, "loss": 0.5674, "step": 20 }, { "epoch": 0.7058823529411765, "grad_norm": 1.2002904858114178, "learning_rate": 2.6763841397811576e-06, "loss": 0.6012, "step": 21 }, { "epoch": 0.7394957983193278, "grad_norm": 1.0836740860048584, "learning_rate": 2.159676266344222e-06, "loss": 0.5655, "step": 22 }, { "epoch": 0.773109243697479, "grad_norm": 1.0370486449910274, "learning_rate": 1.6843867087960252e-06, "loss": 0.5576, "step": 23 }, { "epoch": 0.8067226890756303, "grad_norm": 0.972526640027234, "learning_rate": 1.257446259144494e-06, "loss": 0.5328, "step": 24 }, { "epoch": 0.8403361344537815, "grad_norm": 1.047736836683075, "learning_rate": 8.850806705317183e-07, "loss": 0.5591, "step": 25 }, { "epoch": 0.8739495798319328, "grad_norm": 1.0682305380809587, "learning_rate": 5.727198717339511e-07, "loss": 0.5739, "step": 26 }, { "epoch": 0.907563025210084, "grad_norm": 1.0211661595553563, "learning_rate": 3.2491878657292643e-07, "loss": 0.5883, "step": 27 }, { "epoch": 0.9411764705882353, "grad_norm": 0.8985589898304817, "learning_rate": 1.4529091286973994e-07, "loss": 0.5539, "step": 28 }, { "epoch": 0.9747899159663865, "grad_norm": 0.97842043850681, "learning_rate": 3.645562950973014e-08, "loss": 0.6418, "step": 29 }, { "epoch": 0.9747899159663865, "step": 29, "total_flos": 3973377679360.0, "train_loss": 0.6531299044345987, "train_runtime": 453.7926, "train_samples_per_second": 2.098, "train_steps_per_second": 0.064 } ], "logging_steps": 1, "max_steps": 29, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3973377679360.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }