{ "best_metric": 2.3113090991973877, "best_model_checkpoint": "epochmetrics/task-embedder/checkpoint-1245", "epoch": 15.0, "eval_steps": 1, "global_step": 1245, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 6.888462066650391, "learning_rate": 4.666666666666667e-05, "loss": 5.6652, "step": 83 }, { "epoch": 1.0, "eval_accuracy": 0.36489910020092603, "eval_loss": 4.285048484802246, "eval_runtime": 9.4538, "eval_samples_per_second": 17.348, "eval_steps_per_second": 2.221, "step": 83 }, { "epoch": 2.0, "grad_norm": 7.599656581878662, "learning_rate": 4.3333333333333334e-05, "loss": 3.9443, "step": 166 }, { "epoch": 2.0, "eval_accuracy": 0.4281380027739251, "eval_loss": 3.5406606197357178, "eval_runtime": 9.4735, "eval_samples_per_second": 17.311, "eval_steps_per_second": 2.217, "step": 166 }, { "epoch": 3.0, "grad_norm": 7.9922637939453125, "learning_rate": 4e-05, "loss": 3.3575, "step": 249 }, { "epoch": 3.0, "eval_accuracy": 0.4709654748308882, "eval_loss": 3.109172821044922, "eval_runtime": 10.2035, "eval_samples_per_second": 16.073, "eval_steps_per_second": 2.058, "step": 249 }, { "epoch": 4.0, "grad_norm": 7.56485652923584, "learning_rate": 3.6666666666666666e-05, "loss": 3.084, "step": 332 }, { "epoch": 4.0, "eval_accuracy": 0.4962124510230736, "eval_loss": 2.8743200302124023, "eval_runtime": 9.3602, "eval_samples_per_second": 17.521, "eval_steps_per_second": 2.244, "step": 332 }, { "epoch": 5.0, "grad_norm": 7.547119617462158, "learning_rate": 3.3333333333333335e-05, "loss": 2.8764, "step": 415 }, { "epoch": 5.0, "eval_accuracy": 0.5210972307154713, "eval_loss": 2.7019972801208496, "eval_runtime": 10.277, "eval_samples_per_second": 15.958, "eval_steps_per_second": 2.043, "step": 415 }, { "epoch": 6.0, "grad_norm": 6.975924968719482, "learning_rate": 3e-05, "loss": 2.7367, "step": 498 }, { "epoch": 6.0, "eval_accuracy": 0.5187872505830526, "eval_loss": 2.669877767562866, "eval_runtime": 3.8263, "eval_samples_per_second": 42.861, "eval_steps_per_second": 5.488, "step": 498 }, { "epoch": 7.0, "grad_norm": 7.427117347717285, "learning_rate": 2.6666666666666667e-05, "loss": 2.6275, "step": 581 }, { "epoch": 7.0, "eval_accuracy": 0.5403870967741935, "eval_loss": 2.5638315677642822, "eval_runtime": 3.776, "eval_samples_per_second": 43.433, "eval_steps_per_second": 5.562, "step": 581 }, { "epoch": 8.0, "grad_norm": 7.4915266036987305, "learning_rate": 2.3333333333333336e-05, "loss": 2.5257, "step": 664 }, { "epoch": 8.0, "eval_accuracy": 0.5430055462628752, "eval_loss": 2.5348384380340576, "eval_runtime": 3.924, "eval_samples_per_second": 41.794, "eval_steps_per_second": 5.352, "step": 664 }, { "epoch": 9.0, "grad_norm": 7.47868013381958, "learning_rate": 2e-05, "loss": 2.4742, "step": 747 }, { "epoch": 9.0, "eval_accuracy": 0.5590811583839829, "eval_loss": 2.4301819801330566, "eval_runtime": 3.7824, "eval_samples_per_second": 43.359, "eval_steps_per_second": 5.552, "step": 747 }, { "epoch": 10.0, "grad_norm": 7.228312015533447, "learning_rate": 1.6666666666666667e-05, "loss": 2.4238, "step": 830 }, { "epoch": 10.0, "eval_accuracy": 0.5576721426074799, "eval_loss": 2.4159433841705322, "eval_runtime": 3.7919, "eval_samples_per_second": 43.251, "eval_steps_per_second": 5.538, "step": 830 }, { "epoch": 11.0, "grad_norm": 7.564913272857666, "learning_rate": 1.3333333333333333e-05, "loss": 2.3516, "step": 913 }, { "epoch": 11.0, "eval_accuracy": 0.5740578439964943, "eval_loss": 2.3461461067199707, "eval_runtime": 3.8232, "eval_samples_per_second": 42.896, "eval_steps_per_second": 5.493, "step": 913 }, { "epoch": 12.0, "grad_norm": 7.104005336761475, "learning_rate": 1e-05, "loss": 2.3115, "step": 996 }, { "epoch": 12.0, "eval_accuracy": 0.572778166550035, "eval_loss": 2.329103469848633, "eval_runtime": 3.8201, "eval_samples_per_second": 42.93, "eval_steps_per_second": 5.497, "step": 996 }, { "epoch": 13.0, "grad_norm": 7.211333751678467, "learning_rate": 6.666666666666667e-06, "loss": 2.29, "step": 1079 }, { "epoch": 13.0, "eval_accuracy": 0.5698073370282396, "eval_loss": 2.3577311038970947, "eval_runtime": 3.8954, "eval_samples_per_second": 42.101, "eval_steps_per_second": 5.391, "step": 1079 }, { "epoch": 14.0, "grad_norm": 7.1609063148498535, "learning_rate": 3.3333333333333333e-06, "loss": 2.2412, "step": 1162 }, { "epoch": 14.0, "eval_accuracy": 0.5673802421477452, "eval_loss": 2.347292423248291, "eval_runtime": 3.8169, "eval_samples_per_second": 42.967, "eval_steps_per_second": 5.502, "step": 1162 }, { "epoch": 15.0, "grad_norm": 6.575444221496582, "learning_rate": 0.0, "loss": 2.245, "step": 1245 }, { "epoch": 15.0, "eval_accuracy": 0.5719677022994558, "eval_loss": 2.3113090991973877, "eval_runtime": 3.8203, "eval_samples_per_second": 42.928, "eval_steps_per_second": 5.497, "step": 1245 }, { "epoch": 15.0, "step": 1245, "total_flos": 2605727798784000.0, "train_loss": 2.876972158654148, "train_runtime": 3151.4946, "train_samples_per_second": 3.141, "train_steps_per_second": 0.395 } ], "logging_steps": 1, "max_steps": 1245, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 1, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2605727798784000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }