{ "best_global_step": 600, "best_metric": 0.9695240197651653, "best_model_checkpoint": "taskA-codebert-base-focal/checkpoint-600", "epoch": 0.14680694886224616, "eval_steps": 200, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002446782481037436, "grad_norm": 1.0, "learning_rate": 3.6734693877551025e-07, "loss": 1.1949, "step": 10 }, { "epoch": 0.004893564962074872, "grad_norm": 0.9999998807907104, "learning_rate": 7.755102040816327e-07, "loss": 1.1644, "step": 20 }, { "epoch": 0.007340347443112307, "grad_norm": 0.9999999403953552, "learning_rate": 1.1836734693877552e-06, "loss": 0.96, "step": 30 }, { "epoch": 0.009787129924149743, "grad_norm": 1.0, "learning_rate": 1.5918367346938775e-06, "loss": 0.7557, "step": 40 }, { "epoch": 0.012233912405187179, "grad_norm": 1.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.5786, "step": 50 }, { "epoch": 0.014680694886224614, "grad_norm": 0.9999999403953552, "learning_rate": 2.4081632653061225e-06, "loss": 0.4813, "step": 60 }, { "epoch": 0.01712747736726205, "grad_norm": 0.9999999403953552, "learning_rate": 2.816326530612245e-06, "loss": 0.4477, "step": 70 }, { "epoch": 0.019574259848299486, "grad_norm": 1.0, "learning_rate": 3.2244897959183672e-06, "loss": 0.4254, "step": 80 }, { "epoch": 0.022021042329336923, "grad_norm": 1.0, "learning_rate": 3.6326530612244903e-06, "loss": 0.4017, "step": 90 }, { "epoch": 0.024467824810374357, "grad_norm": 0.9999999403953552, "learning_rate": 4.040816326530612e-06, "loss": 0.379, "step": 100 }, { "epoch": 0.026914607291411794, "grad_norm": 1.0000001192092896, "learning_rate": 4.448979591836735e-06, "loss": 0.3749, "step": 110 }, { "epoch": 0.029361389772449228, "grad_norm": 1.0, "learning_rate": 4.857142857142858e-06, "loss": 0.3536, "step": 120 }, { "epoch": 0.031808172253486665, "grad_norm": 0.9999999403953552, "learning_rate": 5.26530612244898e-06, "loss": 0.336, "step": 130 }, { "epoch": 0.0342549547345241, "grad_norm": 1.0000001192092896, "learning_rate": 5.673469387755103e-06, "loss": 0.3261, "step": 140 }, { "epoch": 0.03670173721556154, "grad_norm": 0.9999999403953552, "learning_rate": 6.0816326530612245e-06, "loss": 0.3134, "step": 150 }, { "epoch": 0.03914851969659897, "grad_norm": 1.0, "learning_rate": 6.489795918367348e-06, "loss": 0.308, "step": 160 }, { "epoch": 0.041595302177636406, "grad_norm": 1.0, "learning_rate": 6.8979591836734705e-06, "loss": 0.2961, "step": 170 }, { "epoch": 0.04404208465867385, "grad_norm": 1.0, "learning_rate": 7.306122448979592e-06, "loss": 0.2998, "step": 180 }, { "epoch": 0.04648886713971128, "grad_norm": 1.0, "learning_rate": 7.714285714285716e-06, "loss": 0.2895, "step": 190 }, { "epoch": 0.048935649620748714, "grad_norm": 1.0, "learning_rate": 8.122448979591837e-06, "loss": 0.2773, "step": 200 }, { "epoch": 0.048935649620748714, "eval_accuracy": 0.81444, "eval_f1_weighted": 0.8120194753530078, "eval_loss": 0.10672978311777115, "eval_macro_f1": 0.8128217919062601, "eval_precision": 0.8426688032270206, "eval_recall": 0.81444, "eval_runtime": 1609.4299, "eval_samples_per_second": 62.134, "eval_steps_per_second": 0.243, "step": 200 }, { "epoch": 0.051382432101786155, "grad_norm": 1.0, "learning_rate": 8.530612244897961e-06, "loss": 0.2658, "step": 210 }, { "epoch": 0.05382921458282359, "grad_norm": 1.0000001192092896, "learning_rate": 8.938775510204082e-06, "loss": 0.2574, "step": 220 }, { "epoch": 0.05627599706386102, "grad_norm": 1.0, "learning_rate": 9.346938775510204e-06, "loss": 0.2633, "step": 230 }, { "epoch": 0.058722779544898455, "grad_norm": 1.0, "learning_rate": 9.755102040816327e-06, "loss": 0.2503, "step": 240 }, { "epoch": 0.061169562025935896, "grad_norm": 1.0, "learning_rate": 1.016326530612245e-05, "loss": 0.2359, "step": 250 }, { "epoch": 0.06361634450697333, "grad_norm": 0.9999999403953552, "learning_rate": 1.0571428571428572e-05, "loss": 0.2327, "step": 260 }, { "epoch": 0.06606312698801077, "grad_norm": 1.0, "learning_rate": 1.0979591836734695e-05, "loss": 0.2305, "step": 270 }, { "epoch": 0.0685099094690482, "grad_norm": 1.0, "learning_rate": 1.1387755102040819e-05, "loss": 0.2284, "step": 280 }, { "epoch": 0.07095669195008564, "grad_norm": 0.9999999403953552, "learning_rate": 1.179591836734694e-05, "loss": 0.226, "step": 290 }, { "epoch": 0.07340347443112308, "grad_norm": 0.9999999403953552, "learning_rate": 1.2204081632653062e-05, "loss": 0.2225, "step": 300 }, { "epoch": 0.0758502569121605, "grad_norm": 1.0, "learning_rate": 1.2612244897959185e-05, "loss": 0.2156, "step": 310 }, { "epoch": 0.07829703939319795, "grad_norm": 1.0, "learning_rate": 1.3020408163265308e-05, "loss": 0.2033, "step": 320 }, { "epoch": 0.08074382187423539, "grad_norm": 0.9999999403953552, "learning_rate": 1.3428571428571429e-05, "loss": 0.2021, "step": 330 }, { "epoch": 0.08319060435527281, "grad_norm": 1.0, "learning_rate": 1.3836734693877551e-05, "loss": 0.1902, "step": 340 }, { "epoch": 0.08563738683631025, "grad_norm": 1.0, "learning_rate": 1.4244897959183674e-05, "loss": 0.1975, "step": 350 }, { "epoch": 0.0880841693173477, "grad_norm": 1.0, "learning_rate": 1.4653061224489798e-05, "loss": 0.1921, "step": 360 }, { "epoch": 0.09053095179838512, "grad_norm": 0.9999999403953552, "learning_rate": 1.506122448979592e-05, "loss": 0.2039, "step": 370 }, { "epoch": 0.09297773427942256, "grad_norm": 1.0000001192092896, "learning_rate": 1.546938775510204e-05, "loss": 0.202, "step": 380 }, { "epoch": 0.09542451676046, "grad_norm": 0.9999999403953552, "learning_rate": 1.5877551020408162e-05, "loss": 0.1885, "step": 390 }, { "epoch": 0.09787129924149743, "grad_norm": 0.9999998807907104, "learning_rate": 1.6285714285714287e-05, "loss": 0.1818, "step": 400 }, { "epoch": 0.09787129924149743, "eval_accuracy": 0.9495, "eval_f1_weighted": 0.9495267406510758, "eval_loss": 0.039080820977687836, "eval_macro_f1": 0.9494696691689186, "eval_precision": 0.9504394545408126, "eval_recall": 0.9495, "eval_runtime": 1609.6857, "eval_samples_per_second": 62.124, "eval_steps_per_second": 0.243, "step": 400 }, { "epoch": 0.10031808172253487, "grad_norm": 1.0, "learning_rate": 1.669387755102041e-05, "loss": 0.1813, "step": 410 }, { "epoch": 0.10276486420357231, "grad_norm": 0.9999999403953552, "learning_rate": 1.7102040816326532e-05, "loss": 0.1758, "step": 420 }, { "epoch": 0.10521164668460974, "grad_norm": 1.0000001192092896, "learning_rate": 1.7510204081632653e-05, "loss": 0.1778, "step": 430 }, { "epoch": 0.10765842916564718, "grad_norm": 0.9999998807907104, "learning_rate": 1.7918367346938777e-05, "loss": 0.1681, "step": 440 }, { "epoch": 0.1101052116466846, "grad_norm": 0.9999998807907104, "learning_rate": 1.8326530612244898e-05, "loss": 0.1715, "step": 450 }, { "epoch": 0.11255199412772204, "grad_norm": 1.0, "learning_rate": 1.8734693877551022e-05, "loss": 0.1773, "step": 460 }, { "epoch": 0.11499877660875948, "grad_norm": 1.0, "learning_rate": 1.9142857142857146e-05, "loss": 0.1859, "step": 470 }, { "epoch": 0.11744555908979691, "grad_norm": 1.0, "learning_rate": 1.9551020408163267e-05, "loss": 0.173, "step": 480 }, { "epoch": 0.11989234157083435, "grad_norm": 1.0, "learning_rate": 1.9959183673469388e-05, "loss": 0.1753, "step": 490 }, { "epoch": 0.12233912405187179, "grad_norm": 0.9999999403953552, "learning_rate": 1.9998764266160687e-05, "loss": 0.1712, "step": 500 }, { "epoch": 0.12478590653290922, "grad_norm": 1.0, "learning_rate": 1.9994492985725524e-05, "loss": 0.1644, "step": 510 }, { "epoch": 0.12723268901394666, "grad_norm": 0.9999998807907104, "learning_rate": 1.9987172205655365e-05, "loss": 0.1585, "step": 520 }, { "epoch": 0.1296794714949841, "grad_norm": 1.0000001192092896, "learning_rate": 1.9976804159651927e-05, "loss": 0.1625, "step": 530 }, { "epoch": 0.13212625397602154, "grad_norm": 1.0, "learning_rate": 1.9963392011192586e-05, "loss": 0.1656, "step": 540 }, { "epoch": 0.13457303645705898, "grad_norm": 0.9999999403953552, "learning_rate": 1.994693985256516e-05, "loss": 0.1528, "step": 550 }, { "epoch": 0.1370198189380964, "grad_norm": 0.9999999403953552, "learning_rate": 1.9927452703619262e-05, "loss": 0.1547, "step": 560 }, { "epoch": 0.13946660141913383, "grad_norm": 1.0, "learning_rate": 1.9904936510234648e-05, "loss": 0.1685, "step": 570 }, { "epoch": 0.14191338390017127, "grad_norm": 0.9999999403953552, "learning_rate": 1.987939814250705e-05, "loss": 0.1568, "step": 580 }, { "epoch": 0.14436016638120872, "grad_norm": 1.0, "learning_rate": 1.985084539265195e-05, "loss": 0.1589, "step": 590 }, { "epoch": 0.14680694886224616, "grad_norm": 1.0, "learning_rate": 1.9819286972627066e-05, "loss": 0.1617, "step": 600 }, { "epoch": 0.14680694886224616, "eval_accuracy": 0.96956, "eval_f1_weighted": 0.9695722935959534, "eval_loss": 0.02569369599223137, "eval_macro_f1": 0.9695240197651653, "eval_precision": 0.9698519718113257, "eval_recall": 0.96956, "eval_runtime": 1608.5431, "eval_samples_per_second": 62.168, "eval_steps_per_second": 0.243, "step": 600 } ], "logging_steps": 10, "max_steps": 4087, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.0299639947264e+16, "train_batch_size": 128, "trial_name": null, "trial_params": null }