{ "best_global_step": 200, "best_metric": 1.0, "best_model_checkpoint": "./models/punctuation/encoder_model/checkpoint-200", "epoch": 1.8752941176470588, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09411764705882353, "grad_norm": 0.36188849806785583, "learning_rate": 8.181818181818183e-05, "loss": 1.1219, "step": 10 }, { "epoch": 0.18823529411764706, "grad_norm": 0.2508435845375061, "learning_rate": 0.00017272727272727275, "loss": 0.9281, "step": 20 }, { "epoch": 0.2823529411764706, "grad_norm": 0.2157556265592575, "learning_rate": 0.00019934477790194445, "loss": 0.852, "step": 30 }, { "epoch": 0.3764705882352941, "grad_norm": 0.25683870911598206, "learning_rate": 0.0001961561797682962, "loss": 0.844, "step": 40 }, { "epoch": 0.47058823529411764, "grad_norm": 0.26627957820892334, "learning_rate": 0.00019039892931234435, "loss": 0.8168, "step": 50 }, { "epoch": 0.47058823529411764, "eval_loss": 0.8057724237442017, "eval_model_preparation_time": 0.0129, "eval_runtime": 3.824, "eval_samples_per_second": 26.15, "eval_steps_per_second": 13.075, "step": 50 }, { "epoch": 0.5647058823529412, "grad_norm": 0.228170707821846, "learning_rate": 0.00018222682189897752, "loss": 0.8023, "step": 60 }, { "epoch": 0.6588235294117647, "grad_norm": 0.26517826318740845, "learning_rate": 0.0001718581617779698, "loss": 0.7858, "step": 70 }, { "epoch": 0.7529411764705882, "grad_norm": 0.22760385274887085, "learning_rate": 0.00015956993044924334, "loss": 0.7632, "step": 80 }, { "epoch": 0.8470588235294118, "grad_norm": 0.2228352576494217, "learning_rate": 0.00014569038756304207, "loss": 0.7788, "step": 90 }, { "epoch": 0.9411764705882353, "grad_norm": 0.25218382477760315, "learning_rate": 0.00013059030200965536, "loss": 0.7637, "step": 100 }, { "epoch": 0.9411764705882353, "eval_loss": 0.753893256187439, "eval_model_preparation_time": 0.0129, "eval_runtime": 3.8121, "eval_samples_per_second": 26.232, "eval_steps_per_second": 13.116, "step": 100 }, { "epoch": 1.0282352941176471, "grad_norm": 0.23036813735961914, "learning_rate": 0.00011467304744553618, "loss": 0.7508, "step": 110 }, { "epoch": 1.1223529411764706, "grad_norm": 0.25190669298171997, "learning_rate": 9.836382683735132e-05, "loss": 0.6933, "step": 120 }, { "epoch": 1.2164705882352942, "grad_norm": 0.27167296409606934, "learning_rate": 8.209831387233676e-05, "loss": 0.6799, "step": 130 }, { "epoch": 1.3105882352941176, "grad_norm": 0.29528751969337463, "learning_rate": 6.6311014660778e-05, "loss": 0.6377, "step": 140 }, { "epoch": 1.4047058823529412, "grad_norm": 0.2657102346420288, "learning_rate": 5.142366062836599e-05, "loss": 0.6636, "step": 150 }, { "epoch": 1.4047058823529412, "eval_loss": 0.7372229099273682, "eval_model_preparation_time": 0.0129, "eval_runtime": 3.8052, "eval_samples_per_second": 26.28, "eval_steps_per_second": 13.14, "step": 150 }, { "epoch": 1.4988235294117647, "grad_norm": 0.27975210547447205, "learning_rate": 3.783394266299228e-05, "loss": 0.6498, "step": 160 }, { "epoch": 1.592941176470588, "grad_norm": 0.2573317587375641, "learning_rate": 2.5904887464504114e-05, "loss": 0.6343, "step": 170 }, { "epoch": 1.6870588235294117, "grad_norm": 0.2996138036251068, "learning_rate": 1.595515989055618e-05, "loss": 0.6505, "step": 180 }, { "epoch": 1.7811764705882354, "grad_norm": 0.2739143669605255, "learning_rate": 8.250550355250875e-06, "loss": 0.6494, "step": 190 }, { "epoch": 1.8752941176470588, "grad_norm": 0.3023104965686798, "learning_rate": 2.996874680545603e-06, "loss": 0.6241, "step": 200 }, { "epoch": 1.8752941176470588, "eval_loss": 0.7298660278320312, "eval_model_preparation_time": 0.0129, "eval_runtime": 3.8044, "eval_samples_per_second": 26.286, "eval_steps_per_second": 13.143, "step": 200 } ], "logging_steps": 10, "max_steps": 214, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.9025710777769984e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }