{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 500, "global_step": 6705, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.1185682326621924, "grad_norm": 1.7562521696090698, "learning_rate": 4.627889634601044e-05, "loss": 1.7047, "step": 500 }, { "epoch": 2.237136465324385, "grad_norm": 2.1150481700897217, "learning_rate": 4.25503355704698e-05, "loss": 1.3195, "step": 1000 }, { "epoch": 3.3557046979865772, "grad_norm": 1.773620843887329, "learning_rate": 3.882177479492916e-05, "loss": 1.1938, "step": 1500 }, { "epoch": 4.47427293064877, "grad_norm": 2.1066176891326904, "learning_rate": 3.509321401938851e-05, "loss": 1.1122, "step": 2000 }, { "epoch": 5.592841163310962, "grad_norm": 1.8713332414627075, "learning_rate": 3.1364653243847876e-05, "loss": 1.0513, "step": 2500 }, { "epoch": 6.7114093959731544, "grad_norm": 1.7295632362365723, "learning_rate": 2.7636092468307233e-05, "loss": 1.0046, "step": 3000 }, { "epoch": 7.829977628635347, "grad_norm": 1.7196978330612183, "learning_rate": 2.3907531692766593e-05, "loss": 0.9671, "step": 3500 }, { "epoch": 8.94854586129754, "grad_norm": 1.7857871055603027, "learning_rate": 2.017897091722595e-05, "loss": 0.9349, "step": 4000 }, { "epoch": 10.06711409395973, "grad_norm": 1.7896051406860352, "learning_rate": 1.645041014168531e-05, "loss": 0.9086, "step": 4500 }, { "epoch": 11.185682326621924, "grad_norm": 2.3136727809906006, "learning_rate": 1.272184936614467e-05, "loss": 0.8844, "step": 5000 }, { "epoch": 12.304250559284116, "grad_norm": 2.1995062828063965, "learning_rate": 8.993288590604027e-06, "loss": 0.877, "step": 5500 }, { "epoch": 13.422818791946309, "grad_norm": 1.8003933429718018, "learning_rate": 5.264727815063386e-06, "loss": 0.8575, "step": 6000 }, { "epoch": 14.5413870246085, "grad_norm": 1.9917985200881958, "learning_rate": 1.5361670395227442e-06, "loss": 0.8476, "step": 6500 }, { "epoch": 15.0, "step": 6705, "total_flos": 4785148574908416.0, "train_loss": 1.044801017653133, "train_runtime": 1030.3618, "train_samples_per_second": 52.059, "train_steps_per_second": 6.507 } ], "logging_steps": 500, "max_steps": 6705, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4785148574908416.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }