| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 15.0, | |
| "eval_steps": 500, | |
| "global_step": 6705, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.1185682326621924, | |
| "grad_norm": 1.7562521696090698, | |
| "learning_rate": 4.627889634601044e-05, | |
| "loss": 1.7047, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.237136465324385, | |
| "grad_norm": 2.1150481700897217, | |
| "learning_rate": 4.25503355704698e-05, | |
| "loss": 1.3195, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.3557046979865772, | |
| "grad_norm": 1.773620843887329, | |
| "learning_rate": 3.882177479492916e-05, | |
| "loss": 1.1938, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.47427293064877, | |
| "grad_norm": 2.1066176891326904, | |
| "learning_rate": 3.509321401938851e-05, | |
| "loss": 1.1122, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.592841163310962, | |
| "grad_norm": 1.8713332414627075, | |
| "learning_rate": 3.1364653243847876e-05, | |
| "loss": 1.0513, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.7114093959731544, | |
| "grad_norm": 1.7295632362365723, | |
| "learning_rate": 2.7636092468307233e-05, | |
| "loss": 1.0046, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 7.829977628635347, | |
| "grad_norm": 1.7196978330612183, | |
| "learning_rate": 2.3907531692766593e-05, | |
| "loss": 0.9671, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 8.94854586129754, | |
| "grad_norm": 1.7857871055603027, | |
| "learning_rate": 2.017897091722595e-05, | |
| "loss": 0.9349, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 10.06711409395973, | |
| "grad_norm": 1.7896051406860352, | |
| "learning_rate": 1.645041014168531e-05, | |
| "loss": 0.9086, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 11.185682326621924, | |
| "grad_norm": 2.3136727809906006, | |
| "learning_rate": 1.272184936614467e-05, | |
| "loss": 0.8844, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 12.304250559284116, | |
| "grad_norm": 2.1995062828063965, | |
| "learning_rate": 8.993288590604027e-06, | |
| "loss": 0.877, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 13.422818791946309, | |
| "grad_norm": 1.8003933429718018, | |
| "learning_rate": 5.264727815063386e-06, | |
| "loss": 0.8575, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 14.5413870246085, | |
| "grad_norm": 1.9917985200881958, | |
| "learning_rate": 1.5361670395227442e-06, | |
| "loss": 0.8476, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "step": 6705, | |
| "total_flos": 4785148574908416.0, | |
| "train_loss": 1.044801017653133, | |
| "train_runtime": 1030.3618, | |
| "train_samples_per_second": 52.059, | |
| "train_steps_per_second": 6.507 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 6705, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4785148574908416.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |