| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9747899159663865, | |
| "eval_steps": 500, | |
| "global_step": 29, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03361344537815126, | |
| "grad_norm": 4.469196166173266, | |
| "learning_rate": 0.0, | |
| "loss": 0.889, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.06722689075630252, | |
| "grad_norm": 4.365192499514651, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.9117, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.10084033613445378, | |
| "grad_norm": 3.965699785576992, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.878, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.13445378151260504, | |
| "grad_norm": 2.2523927807625044, | |
| "learning_rate": 1e-05, | |
| "loss": 0.7814, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.16806722689075632, | |
| "grad_norm": 4.003900590954268, | |
| "learning_rate": 9.96354437049027e-06, | |
| "loss": 0.859, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.20168067226890757, | |
| "grad_norm": 3.9172213327695036, | |
| "learning_rate": 9.854709087130261e-06, | |
| "loss": 0.7727, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 3.3484290901238447, | |
| "learning_rate": 9.675081213427076e-06, | |
| "loss": 0.6556, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.2689075630252101, | |
| "grad_norm": 3.0412731911621695, | |
| "learning_rate": 9.427280128266049e-06, | |
| "loss": 0.7545, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.3025210084033613, | |
| "grad_norm": 1.824318016038074, | |
| "learning_rate": 9.114919329468283e-06, | |
| "loss": 0.6364, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.33613445378151263, | |
| "grad_norm": 1.4055884613148466, | |
| "learning_rate": 8.742553740855507e-06, | |
| "loss": 0.6121, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.3697478991596639, | |
| "grad_norm": 1.6529134782337978, | |
| "learning_rate": 8.315613291203977e-06, | |
| "loss": 0.6341, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.40336134453781514, | |
| "grad_norm": 1.410865474352178, | |
| "learning_rate": 7.84032373365578e-06, | |
| "loss": 0.6073, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.4369747899159664, | |
| "grad_norm": 1.3475768201990301, | |
| "learning_rate": 7.323615860218844e-06, | |
| "loss": 0.5754, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 1.231638918744367, | |
| "learning_rate": 6.773024435212678e-06, | |
| "loss": 0.6227, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.5042016806722689, | |
| "grad_norm": 1.3031603882175071, | |
| "learning_rate": 6.1965783214377895e-06, | |
| "loss": 0.7014, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.5378151260504201, | |
| "grad_norm": 1.289342432305298, | |
| "learning_rate": 5.6026834012766155e-06, | |
| "loss": 0.5801, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 1.297660868574315, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5476, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.6050420168067226, | |
| "grad_norm": 1.155602860047339, | |
| "learning_rate": 4.397316598723385e-06, | |
| "loss": 0.6054, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.6386554621848739, | |
| "grad_norm": 1.1265443534540045, | |
| "learning_rate": 3.803421678562213e-06, | |
| "loss": 0.575, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.6722689075630253, | |
| "grad_norm": 1.1202857464472886, | |
| "learning_rate": 3.226975564787322e-06, | |
| "loss": 0.5674, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 1.2002904858114178, | |
| "learning_rate": 2.6763841397811576e-06, | |
| "loss": 0.6012, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.7394957983193278, | |
| "grad_norm": 1.0836740860048584, | |
| "learning_rate": 2.159676266344222e-06, | |
| "loss": 0.5655, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.773109243697479, | |
| "grad_norm": 1.0370486449910274, | |
| "learning_rate": 1.6843867087960252e-06, | |
| "loss": 0.5576, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.8067226890756303, | |
| "grad_norm": 0.972526640027234, | |
| "learning_rate": 1.257446259144494e-06, | |
| "loss": 0.5328, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.8403361344537815, | |
| "grad_norm": 1.047736836683075, | |
| "learning_rate": 8.850806705317183e-07, | |
| "loss": 0.5591, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.8739495798319328, | |
| "grad_norm": 1.0682305380809587, | |
| "learning_rate": 5.727198717339511e-07, | |
| "loss": 0.5739, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.907563025210084, | |
| "grad_norm": 1.0211661595553563, | |
| "learning_rate": 3.2491878657292643e-07, | |
| "loss": 0.5883, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 0.8985589898304817, | |
| "learning_rate": 1.4529091286973994e-07, | |
| "loss": 0.5539, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.9747899159663865, | |
| "grad_norm": 0.97842043850681, | |
| "learning_rate": 3.645562950973014e-08, | |
| "loss": 0.6418, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.9747899159663865, | |
| "step": 29, | |
| "total_flos": 3973377679360.0, | |
| "train_loss": 0.6531299044345987, | |
| "train_runtime": 453.7926, | |
| "train_samples_per_second": 2.098, | |
| "train_steps_per_second": 0.064 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 29, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3973377679360.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |