{ "best_global_step": 252, "best_metric": 0.7365710735321045, "best_model_checkpoint": "./multitask_model2/checkpoint-252", "epoch": 12.0, "eval_steps": 500, "global_step": 252, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.7021834254264832, "learning_rate": 1.904761904761905e-05, "loss": 0.7665, "step": 21 }, { "epoch": 1.0, "eval_dis_accuracy": 0.6524822695035462, "eval_dis_f1": 0.0, "eval_dis_precision": 0.0, "eval_dis_recall": 0.0, "eval_loss": 0.755448579788208, "eval_runtime": 0.4506, "eval_samples_per_second": 312.894, "eval_steps_per_second": 11.096, "eval_target_accuracy": 0.5957446808510638, "eval_target_f1": 0.37333333333333335, "eval_target_precision": 0.2978723404255319, "eval_target_recall": 0.5, "eval_target_soft_ce": 0.680282711982727, "step": 21 }, { "epoch": 2.0, "grad_norm": 0.5450627207756042, "learning_rate": 1.804761904761905e-05, "loss": 0.7636, "step": 42 }, { "epoch": 2.0, "eval_dis_accuracy": 0.6524822695035462, "eval_dis_f1": 0.0, "eval_dis_precision": 0.0, "eval_dis_recall": 0.0, "eval_loss": 0.7531750202178955, "eval_runtime": 0.3837, "eval_samples_per_second": 367.514, "eval_steps_per_second": 13.032, "eval_target_accuracy": 0.5957446808510638, "eval_target_f1": 0.37333333333333335, "eval_target_precision": 0.2978723404255319, "eval_target_recall": 0.5, "eval_target_soft_ce": 0.678588330745697, "step": 42 }, { "epoch": 3.0, "grad_norm": 0.9124093651771545, "learning_rate": 1.704761904761905e-05, "loss": 0.7657, "step": 63 }, { "epoch": 3.0, "eval_dis_accuracy": 0.6524822695035462, "eval_dis_f1": 0.0, "eval_dis_precision": 0.0, "eval_dis_recall": 0.0, "eval_loss": 0.7538319826126099, "eval_runtime": 0.4379, "eval_samples_per_second": 321.999, "eval_steps_per_second": 11.418, "eval_target_accuracy": 0.5957446808510638, "eval_target_f1": 0.37333333333333335, "eval_target_precision": 0.2978723404255319, "eval_target_recall": 0.5, "eval_target_soft_ce": 0.6760473847389221, "step": 63 }, { "epoch": 4.0, "grad_norm": 0.8770959377288818, "learning_rate": 1.604761904761905e-05, "loss": 0.7601, "step": 84 }, { "epoch": 4.0, "eval_dis_accuracy": 0.6524822695035462, "eval_dis_f1": 0.0, "eval_dis_precision": 0.0, "eval_dis_recall": 0.0, "eval_loss": 0.7534122467041016, "eval_runtime": 0.4598, "eval_samples_per_second": 306.631, "eval_steps_per_second": 10.873, "eval_target_accuracy": 0.5957446808510638, "eval_target_f1": 0.37333333333333335, "eval_target_precision": 0.2978723404255319, "eval_target_recall": 0.5, "eval_target_soft_ce": 0.6773362159729004, "step": 84 }, { "epoch": 5.0, "grad_norm": 1.3157461881637573, "learning_rate": 1.5047619047619049e-05, "loss": 0.7586, "step": 105 }, { "epoch": 5.0, "eval_dis_accuracy": 0.6524822695035462, "eval_dis_f1": 0.0, "eval_dis_precision": 0.0, "eval_dis_recall": 0.0, "eval_loss": 0.7500014305114746, "eval_runtime": 0.4435, "eval_samples_per_second": 317.923, "eval_steps_per_second": 11.274, "eval_target_accuracy": 0.5957446808510638, "eval_target_f1": 0.37333333333333335, "eval_target_precision": 0.2978723404255319, "eval_target_recall": 0.5, "eval_target_soft_ce": 0.6725690960884094, "step": 105 }, { "epoch": 6.0, "grad_norm": 1.533734679222107, "learning_rate": 1.4047619047619048e-05, "loss": 0.7467, "step": 126 }, { "epoch": 6.0, "eval_dis_accuracy": 0.6666666666666666, "eval_dis_f1": 0.14545454545454545, "eval_dis_precision": 0.6666666666666666, "eval_dis_recall": 0.08163265306122448, "eval_loss": 0.745786190032959, "eval_runtime": 0.466, "eval_samples_per_second": 302.556, "eval_steps_per_second": 10.729, "eval_target_accuracy": 0.5886524822695035, "eval_target_f1": 0.38603603603603603, "eval_target_precision": 0.46376811594202894, "eval_target_recall": 0.49686716791979946, "eval_target_soft_ce": 0.6714953184127808, "step": 126 }, { "epoch": 7.0, "grad_norm": 2.0812835693359375, "learning_rate": 1.304761904761905e-05, "loss": 0.7387, "step": 147 }, { "epoch": 7.0, "eval_dis_accuracy": 0.6524822695035462, "eval_dis_f1": 0.0, "eval_dis_precision": 0.0, "eval_dis_recall": 0.0, "eval_loss": 0.7480353116989136, "eval_runtime": 0.4471, "eval_samples_per_second": 315.367, "eval_steps_per_second": 11.183, "eval_target_accuracy": 0.5957446808510638, "eval_target_f1": 0.37333333333333335, "eval_target_precision": 0.2978723404255319, "eval_target_recall": 0.5, "eval_target_soft_ce": 0.6714373826980591, "step": 147 }, { "epoch": 8.0, "grad_norm": 4.809630870819092, "learning_rate": 1.2047619047619049e-05, "loss": 0.74, "step": 168 }, { "epoch": 8.0, "eval_dis_accuracy": 0.5177304964539007, "eval_dis_f1": 0.40350877192982454, "eval_dis_precision": 0.35384615384615387, "eval_dis_recall": 0.46938775510204084, "eval_loss": 0.7411231994628906, "eval_runtime": 0.4289, "eval_samples_per_second": 328.731, "eval_steps_per_second": 11.657, "eval_target_accuracy": 0.574468085106383, "eval_target_f1": 0.4683257918552036, "eval_target_precision": 0.5142857142857142, "eval_target_recall": 0.5075187969924811, "eval_target_soft_ce": 0.6614766120910645, "step": 168 }, { "epoch": 9.0, "grad_norm": 7.420032024383545, "learning_rate": 1.104761904761905e-05, "loss": 0.7288, "step": 189 }, { "epoch": 9.0, "eval_dis_accuracy": 0.6382978723404256, "eval_dis_f1": 0.2153846153846154, "eval_dis_precision": 0.4375, "eval_dis_recall": 0.14285714285714285, "eval_loss": 0.7409353852272034, "eval_runtime": 0.4589, "eval_samples_per_second": 307.224, "eval_steps_per_second": 10.894, "eval_target_accuracy": 0.5886524822695035, "eval_target_f1": 0.44892183288409704, "eval_target_precision": 0.5315504807692308, "eval_target_recall": 0.5109649122807017, "eval_target_soft_ce": 0.6628619432449341, "step": 189 }, { "epoch": 10.0, "grad_norm": 3.0795819759368896, "learning_rate": 1.0047619047619048e-05, "loss": 0.7314, "step": 210 }, { "epoch": 10.0, "eval_dis_accuracy": 0.5886524822695035, "eval_dis_f1": 0.21621621621621623, "eval_dis_precision": 0.32, "eval_dis_recall": 0.16326530612244897, "eval_loss": 0.743432879447937, "eval_runtime": 0.429, "eval_samples_per_second": 328.682, "eval_steps_per_second": 11.655, "eval_target_accuracy": 0.5886524822695035, "eval_target_f1": 0.48604826546003016, "eval_target_precision": 0.5422619047619047, "eval_target_recall": 0.5222431077694236, "eval_target_soft_ce": 0.6649101972579956, "step": 210 }, { "epoch": 11.0, "grad_norm": 5.447195053100586, "learning_rate": 9.047619047619049e-06, "loss": 0.7192, "step": 231 }, { "epoch": 11.0, "eval_dis_accuracy": 0.6382978723404256, "eval_dis_f1": 0.23880597014925373, "eval_dis_precision": 0.4444444444444444, "eval_dis_recall": 0.16326530612244897, "eval_loss": 0.7414493560791016, "eval_runtime": 0.4442, "eval_samples_per_second": 317.394, "eval_steps_per_second": 11.255, "eval_target_accuracy": 0.6099290780141844, "eval_target_f1": 0.4723412941416616, "eval_target_precision": 0.5978682170542635, "eval_target_recall": 0.531641604010025, "eval_target_soft_ce": 0.6644006371498108, "step": 231 }, { "epoch": 12.0, "grad_norm": 2.391383647918701, "learning_rate": 8.047619047619048e-06, "loss": 0.7218, "step": 252 }, { "epoch": 12.0, "eval_dis_accuracy": 0.5957446808510638, "eval_dis_f1": 0.27848101265822783, "eval_dis_precision": 0.36666666666666664, "eval_dis_recall": 0.22448979591836735, "eval_loss": 0.7365710735321045, "eval_runtime": 0.4606, "eval_samples_per_second": 306.148, "eval_steps_per_second": 10.856, "eval_target_accuracy": 0.6028368794326241, "eval_target_f1": 0.487006237006237, "eval_target_precision": 0.571157495256167, "eval_target_recall": 0.531328320802005, "eval_target_soft_ce": 0.6578279137611389, "step": 252 } ], "logging_steps": 500, "max_steps": 420, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }