| { | |
| "best_metric": 2.3113090991973877, | |
| "best_model_checkpoint": "epochmetrics/task-embedder/checkpoint-1245", | |
| "epoch": 15.0, | |
| "eval_steps": 1, | |
| "global_step": 1245, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 6.888462066650391, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 5.6652, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.36489910020092603, | |
| "eval_loss": 4.285048484802246, | |
| "eval_runtime": 9.4538, | |
| "eval_samples_per_second": 17.348, | |
| "eval_steps_per_second": 2.221, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 7.599656581878662, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 3.9443, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.4281380027739251, | |
| "eval_loss": 3.5406606197357178, | |
| "eval_runtime": 9.4735, | |
| "eval_samples_per_second": 17.311, | |
| "eval_steps_per_second": 2.217, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 7.9922637939453125, | |
| "learning_rate": 4e-05, | |
| "loss": 3.3575, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.4709654748308882, | |
| "eval_loss": 3.109172821044922, | |
| "eval_runtime": 10.2035, | |
| "eval_samples_per_second": 16.073, | |
| "eval_steps_per_second": 2.058, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 7.56485652923584, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 3.084, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.4962124510230736, | |
| "eval_loss": 2.8743200302124023, | |
| "eval_runtime": 9.3602, | |
| "eval_samples_per_second": 17.521, | |
| "eval_steps_per_second": 2.244, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 7.547119617462158, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 2.8764, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.5210972307154713, | |
| "eval_loss": 2.7019972801208496, | |
| "eval_runtime": 10.277, | |
| "eval_samples_per_second": 15.958, | |
| "eval_steps_per_second": 2.043, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 6.975924968719482, | |
| "learning_rate": 3e-05, | |
| "loss": 2.7367, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.5187872505830526, | |
| "eval_loss": 2.669877767562866, | |
| "eval_runtime": 3.8263, | |
| "eval_samples_per_second": 42.861, | |
| "eval_steps_per_second": 5.488, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 7.427117347717285, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 2.6275, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.5403870967741935, | |
| "eval_loss": 2.5638315677642822, | |
| "eval_runtime": 3.776, | |
| "eval_samples_per_second": 43.433, | |
| "eval_steps_per_second": 5.562, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 7.4915266036987305, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 2.5257, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5430055462628752, | |
| "eval_loss": 2.5348384380340576, | |
| "eval_runtime": 3.924, | |
| "eval_samples_per_second": 41.794, | |
| "eval_steps_per_second": 5.352, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 7.47868013381958, | |
| "learning_rate": 2e-05, | |
| "loss": 2.4742, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.5590811583839829, | |
| "eval_loss": 2.4301819801330566, | |
| "eval_runtime": 3.7824, | |
| "eval_samples_per_second": 43.359, | |
| "eval_steps_per_second": 5.552, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 7.228312015533447, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 2.4238, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.5576721426074799, | |
| "eval_loss": 2.4159433841705322, | |
| "eval_runtime": 3.7919, | |
| "eval_samples_per_second": 43.251, | |
| "eval_steps_per_second": 5.538, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 7.564913272857666, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 2.3516, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.5740578439964943, | |
| "eval_loss": 2.3461461067199707, | |
| "eval_runtime": 3.8232, | |
| "eval_samples_per_second": 42.896, | |
| "eval_steps_per_second": 5.493, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 7.104005336761475, | |
| "learning_rate": 1e-05, | |
| "loss": 2.3115, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.572778166550035, | |
| "eval_loss": 2.329103469848633, | |
| "eval_runtime": 3.8201, | |
| "eval_samples_per_second": 42.93, | |
| "eval_steps_per_second": 5.497, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 7.211333751678467, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2.29, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.5698073370282396, | |
| "eval_loss": 2.3577311038970947, | |
| "eval_runtime": 3.8954, | |
| "eval_samples_per_second": 42.101, | |
| "eval_steps_per_second": 5.391, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 7.1609063148498535, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 2.2412, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.5673802421477452, | |
| "eval_loss": 2.347292423248291, | |
| "eval_runtime": 3.8169, | |
| "eval_samples_per_second": 42.967, | |
| "eval_steps_per_second": 5.502, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 6.575444221496582, | |
| "learning_rate": 0.0, | |
| "loss": 2.245, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.5719677022994558, | |
| "eval_loss": 2.3113090991973877, | |
| "eval_runtime": 3.8203, | |
| "eval_samples_per_second": 42.928, | |
| "eval_steps_per_second": 5.497, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "step": 1245, | |
| "total_flos": 2605727798784000.0, | |
| "train_loss": 2.876972158654148, | |
| "train_runtime": 3151.4946, | |
| "train_samples_per_second": 3.141, | |
| "train_steps_per_second": 0.395 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1245, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 1, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2605727798784000.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |