{ "best_global_step": 328, "best_metric": 0.6435705423355103, "best_model_checkpoint": "./britllm_cefr_welsh_allData_DA/fold_3/checkpoint-328", "epoch": 4.0, "eval_steps": 500, "global_step": 328, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 288.0, "learning_rate": 3.0142372881355933e-05, "loss": 23.3143, "step": 82 }, { "epoch": 1.0, "eval_A1_f1": 0.7387755102040816, "eval_A1_precision": 0.6094276094276094, "eval_A1_recall": 0.9378238341968912, "eval_A2_f1": 0.6645569620253164, "eval_A2_precision": 0.6104651162790697, "eval_A2_recall": 0.7291666666666666, "eval_B1_f1": 0.5294117647058824, "eval_B1_precision": 0.6101694915254238, "eval_B1_recall": 0.4675324675324675, "eval_B2_f1": 0.6, "eval_B2_precision": 0.8780487804878049, "eval_B2_recall": 0.45569620253164556, "eval_accuracy": 0.6605222734254992, "eval_loss": 1.0833252668380737, "eval_runtime": 10.1627, "eval_samples_per_second": 64.058, "eval_steps_per_second": 21.353, "eval_weighted_f1": 0.6470730904506701, "eval_weighted_precision": 0.70753781441902, "eval_weighted_recall": 0.6605222734254992, "step": 82 }, { "epoch": 2.0, "grad_norm": 44.5, "learning_rate": 2.013559322033898e-05, "loss": 12.8134, "step": 164 }, { "epoch": 2.0, "eval_A1_f1": 0.8218390804597702, "eval_A1_precision": 0.9225806451612903, "eval_A1_recall": 0.7409326424870466, "eval_A2_f1": 0.7515527950310559, "eval_A2_precision": 0.6797752808988764, "eval_A2_recall": 0.8402777777777778, "eval_B1_f1": 0.5970149253731343, "eval_B1_precision": 0.7017543859649122, "eval_B1_recall": 0.5194805194805194, "eval_B2_f1": 0.8152610441767069, "eval_B2_precision": 0.7777777777777778, "eval_B2_recall": 0.8565400843881856, "eval_accuracy": 0.7788018433179723, "eval_loss": 0.8353127241134644, "eval_runtime": 9.9698, "eval_samples_per_second": 65.297, "eval_steps_per_second": 21.766, "eval_weighted_f1": 0.7773050103484156, "eval_weighted_precision": 0.7900370599357893, "eval_weighted_recall": 0.7788018433179723, "step": 164 }, { "epoch": 3.0, "grad_norm": 120.0, "learning_rate": 1.0128813559322034e-05, "loss": 7.4136, "step": 246 }, { "epoch": 3.0, "eval_A1_f1": 0.9072681704260651, "eval_A1_precision": 0.8786407766990292, "eval_A1_recall": 0.9378238341968912, "eval_A2_f1": 0.8243243243243243, "eval_A2_precision": 0.8026315789473685, "eval_A2_recall": 0.8472222222222222, "eval_B1_f1": 0.7913669064748201, "eval_B1_precision": 0.8870967741935484, "eval_B1_recall": 0.7142857142857143, "eval_B2_f1": 0.8547008547008547, "eval_B2_precision": 0.8658008658008658, "eval_B2_recall": 0.8438818565400844, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.6585471034049988, "eval_runtime": 9.9471, "eval_samples_per_second": 65.446, "eval_steps_per_second": 21.815, "eval_weighted_f1": 0.8560749830377833, "eval_weighted_precision": 0.8581534164040586, "eval_weighted_recall": 0.8571428571428571, "step": 246 }, { "epoch": 4.0, "grad_norm": 11.0, "learning_rate": 1.2203389830508474e-07, "loss": 5.9978, "step": 328 }, { "epoch": 4.0, "eval_A1_f1": 0.9090909090909091, "eval_A1_precision": 0.8866995073891626, "eval_A1_recall": 0.9326424870466321, "eval_A2_f1": 0.8178694158075601, "eval_A2_precision": 0.8095238095238095, "eval_A2_recall": 0.8263888888888888, "eval_B1_f1": 0.8, "eval_B1_precision": 0.8529411764705882, "eval_B1_recall": 0.7532467532467533, "eval_B2_f1": 0.8595744680851064, "eval_B2_precision": 0.8669527896995708, "eval_B2_recall": 0.8523206751054853, "eval_accuracy": 0.858678955453149, "eval_loss": 0.6435705423355103, "eval_runtime": 10.0772, "eval_samples_per_second": 64.602, "eval_steps_per_second": 21.534, "eval_weighted_f1": 0.8579829343579175, "eval_weighted_precision": 0.8584465671959608, "eval_weighted_recall": 0.858678955453149, "step": 328 }, { "epoch": 4.0, "step": 328, "total_flos": 5835085400309760.0, "train_loss": 12.384773626560118, "train_runtime": 1108.4363, "train_samples_per_second": 9.397, "train_steps_per_second": 0.296 }, { "epoch": 4.0, "eval_A1_f1": 0.9090909090909091, "eval_A1_precision": 0.8866995073891626, "eval_A1_recall": 0.9326424870466321, "eval_A2_f1": 0.8178694158075601, "eval_A2_precision": 0.8095238095238095, "eval_A2_recall": 0.8263888888888888, "eval_B1_f1": 0.8, "eval_B1_precision": 0.8529411764705882, "eval_B1_recall": 0.7532467532467533, "eval_B2_f1": 0.8595744680851064, "eval_B2_precision": 0.8669527896995708, "eval_B2_recall": 0.8523206751054853, "eval_accuracy": 0.858678955453149, "eval_loss": 0.6436282992362976, "eval_runtime": 10.0424, "eval_samples_per_second": 64.825, "eval_steps_per_second": 21.608, "eval_weighted_f1": 0.8579829343579175, "eval_weighted_precision": 0.8584465671959608, "eval_weighted_recall": 0.858678955453149, "step": 328 } ], "logging_steps": 500, "max_steps": 328, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5835085400309760.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }