{ "best_global_step": 3654, "best_metric": 0.25175856147050574, "best_model_checkpoint": "outputs/textcnn/checkpoint-3654", "epoch": 42.0, "eval_steps": 500, "global_step": 3654, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.989197731018066, "learning_rate": 3.44e-06, "loss": 2.0625, "step": 87 }, { "epoch": 1.0, "eval_accuracy": 0.31486880466472306, "eval_loss": 1.9201620817184448, "eval_macro_f1": 0.08069587306875443, "eval_runtime": 0.0822, "eval_samples_per_second": 8343.34, "eval_steps_per_second": 133.785, "step": 87 }, { "epoch": 2.0, "grad_norm": 4.812881946563721, "learning_rate": 6.92e-06, "loss": 2.0517, "step": 174 }, { "epoch": 2.0, "eval_accuracy": 0.3163265306122449, "eval_loss": 1.867976188659668, "eval_macro_f1": 0.08271289631865439, "eval_runtime": 0.0798, "eval_samples_per_second": 8598.327, "eval_steps_per_second": 137.874, "step": 174 }, { "epoch": 3.0, "grad_norm": 4.3668341636657715, "learning_rate": 1.04e-05, "loss": 1.9731, "step": 261 }, { "epoch": 3.0, "eval_accuracy": 0.31924198250728864, "eval_loss": 1.8050793409347534, "eval_macro_f1": 0.09255094257382587, "eval_runtime": 0.0765, "eval_samples_per_second": 8966.769, "eval_steps_per_second": 143.782, "step": 261 }, { "epoch": 4.0, "grad_norm": 4.521817207336426, "learning_rate": 1.3880000000000001e-05, "loss": 1.9355, "step": 348 }, { "epoch": 4.0, "eval_accuracy": 0.3206997084548105, "eval_loss": 1.760697841644287, "eval_macro_f1": 0.09828636171767466, "eval_runtime": 0.0776, "eval_samples_per_second": 8839.5, "eval_steps_per_second": 141.741, "step": 348 }, { "epoch": 5.0, "grad_norm": 4.453028678894043, "learning_rate": 1.736e-05, "loss": 1.9005, "step": 435 }, { "epoch": 5.0, "eval_accuracy": 0.32653061224489793, "eval_loss": 1.7365907430648804, "eval_macro_f1": 0.11368274326806725, "eval_runtime": 0.0784, "eval_samples_per_second": 8750.81, "eval_steps_per_second": 140.319, "step": 435 }, { "epoch": 6.0, "grad_norm": 4.236560344696045, "learning_rate": 1.999967634800249e-05, "loss": 1.8734, "step": 522 }, { "epoch": 6.0, "eval_accuracy": 0.33527696793002915, "eval_loss": 1.7214981317520142, "eval_macro_f1": 0.12326578903926995, "eval_runtime": 0.0768, "eval_samples_per_second": 8932.057, "eval_steps_per_second": 143.225, "step": 522 }, { "epoch": 7.0, "grad_norm": 4.54931640625, "learning_rate": 1.999144090999249e-05, "loss": 1.8459, "step": 609 }, { "epoch": 7.0, "eval_accuracy": 0.33819241982507287, "eval_loss": 1.7085658311843872, "eval_macro_f1": 0.12473908000560015, "eval_runtime": 0.0772, "eval_samples_per_second": 8891.235, "eval_steps_per_second": 142.571, "step": 609 }, { "epoch": 8.0, "grad_norm": 4.285991191864014, "learning_rate": 1.9972106098590665e-05, "loss": 1.8271, "step": 696 }, { "epoch": 8.0, "eval_accuracy": 0.3469387755102041, "eval_loss": 1.6989842653274536, "eval_macro_f1": 0.1374280909101405, "eval_runtime": 0.0776, "eval_samples_per_second": 8844.853, "eval_steps_per_second": 141.827, "step": 696 }, { "epoch": 9.0, "grad_norm": 5.04291296005249, "learning_rate": 1.994169339261005e-05, "loss": 1.8219, "step": 783 }, { "epoch": 9.0, "eval_accuracy": 0.3498542274052478, "eval_loss": 1.6909065246582031, "eval_macro_f1": 0.14476905523124012, "eval_runtime": 0.0766, "eval_samples_per_second": 8950.395, "eval_steps_per_second": 143.519, "step": 783 }, { "epoch": 10.0, "grad_norm": 3.8076608180999756, "learning_rate": 1.990023657716558e-05, "loss": 1.8039, "step": 870 }, { "epoch": 10.0, "eval_accuracy": 0.3469387755102041, "eval_loss": 1.6828982830047607, "eval_macro_f1": 0.14793175460560187, "eval_runtime": 0.0764, "eval_samples_per_second": 8983.678, "eval_steps_per_second": 144.053, "step": 870 }, { "epoch": 11.0, "grad_norm": 3.9515891075134277, "learning_rate": 1.9847781706142608e-05, "loss": 1.7898, "step": 957 }, { "epoch": 11.0, "eval_accuracy": 0.35131195335276966, "eval_loss": 1.675271987915039, "eval_macro_f1": 0.15154267292502702, "eval_runtime": 0.0776, "eval_samples_per_second": 8837.816, "eval_steps_per_second": 141.714, "step": 957 }, { "epoch": 12.0, "grad_norm": 3.946139097213745, "learning_rate": 1.978438705103621e-05, "loss": 1.7634, "step": 1044 }, { "epoch": 12.0, "eval_accuracy": 0.36151603498542273, "eval_loss": 1.6689125299453735, "eval_macro_f1": 0.16042602782078802, "eval_runtime": 0.0767, "eval_samples_per_second": 8948.781, "eval_steps_per_second": 143.494, "step": 1044 }, { "epoch": 13.0, "grad_norm": 3.8360438346862793, "learning_rate": 1.9710123036218044e-05, "loss": 1.7572, "step": 1131 }, { "epoch": 13.0, "eval_accuracy": 0.37026239067055394, "eval_loss": 1.6614633798599243, "eval_macro_f1": 0.17028534014340227, "eval_runtime": 0.0767, "eval_samples_per_second": 8943.635, "eval_steps_per_second": 143.411, "step": 1131 }, { "epoch": 14.0, "grad_norm": 3.794384479522705, "learning_rate": 1.962507216070276e-05, "loss": 1.7411, "step": 1218 }, { "epoch": 14.0, "eval_accuracy": 0.36151603498542273, "eval_loss": 1.6555291414260864, "eval_macro_f1": 0.17229172694357175, "eval_runtime": 0.0764, "eval_samples_per_second": 8976.952, "eval_steps_per_second": 143.945, "step": 1218 }, { "epoch": 15.0, "grad_norm": 3.697802782058716, "learning_rate": 1.9529328906500833e-05, "loss": 1.7355, "step": 1305 }, { "epoch": 15.0, "eval_accuracy": 0.36443148688046645, "eval_loss": 1.6497727632522583, "eval_macro_f1": 0.17077990977186067, "eval_runtime": 0.0762, "eval_samples_per_second": 8997.528, "eval_steps_per_second": 144.275, "step": 1305 }, { "epoch": 16.0, "grad_norm": 4.296336650848389, "learning_rate": 1.9422999633659592e-05, "loss": 1.7163, "step": 1392 }, { "epoch": 16.0, "eval_accuracy": 0.3717201166180758, "eval_loss": 1.6435818672180176, "eval_macro_f1": 0.1808240545174343, "eval_runtime": 0.0787, "eval_samples_per_second": 8721.685, "eval_steps_per_second": 139.852, "step": 1392 }, { "epoch": 17.0, "grad_norm": 4.240530490875244, "learning_rate": 1.9306202462109128e-05, "loss": 1.6979, "step": 1479 }, { "epoch": 17.0, "eval_accuracy": 0.3760932944606414, "eval_loss": 1.6384371519088745, "eval_macro_f1": 0.18768397854098065, "eval_runtime": 0.0781, "eval_samples_per_second": 8785.572, "eval_steps_per_second": 140.877, "step": 1479 }, { "epoch": 18.0, "grad_norm": 4.70124626159668, "learning_rate": 1.9179067140444246e-05, "loss": 1.7027, "step": 1566 }, { "epoch": 18.0, "eval_accuracy": 0.37317784256559766, "eval_loss": 1.6329833269119263, "eval_macro_f1": 0.1832569421283258, "eval_runtime": 0.0773, "eval_samples_per_second": 8871.989, "eval_steps_per_second": 142.262, "step": 1566 }, { "epoch": 19.0, "grad_norm": 4.254021167755127, "learning_rate": 1.9041734901788285e-05, "loss": 1.6776, "step": 1653 }, { "epoch": 19.0, "eval_accuracy": 0.3760932944606414, "eval_loss": 1.6269856691360474, "eval_macro_f1": 0.18890492604023376, "eval_runtime": 0.0793, "eval_samples_per_second": 8655.516, "eval_steps_per_second": 138.791, "step": 1653 }, { "epoch": 20.0, "grad_norm": 3.7426421642303467, "learning_rate": 1.8894358306898934e-05, "loss": 1.6651, "step": 1740 }, { "epoch": 20.0, "eval_accuracy": 0.37900874635568516, "eval_loss": 1.621616005897522, "eval_macro_f1": 0.1934068278580951, "eval_runtime": 0.0784, "eval_samples_per_second": 8746.314, "eval_steps_per_second": 140.247, "step": 1740 }, { "epoch": 21.0, "grad_norm": 4.341787338256836, "learning_rate": 1.8737101074690274e-05, "loss": 1.6694, "step": 1827 }, { "epoch": 21.0, "eval_accuracy": 0.38338192419825073, "eval_loss": 1.617226243019104, "eval_macro_f1": 0.19340109033111008, "eval_runtime": 0.0763, "eval_samples_per_second": 8984.969, "eval_steps_per_second": 144.074, "step": 1827 }, { "epoch": 22.0, "grad_norm": 4.18576717376709, "learning_rate": 1.8570137900359382e-05, "loss": 1.6561, "step": 1914 }, { "epoch": 22.0, "eval_accuracy": 0.38338192419825073, "eval_loss": 1.6133029460906982, "eval_macro_f1": 0.19942474851997433, "eval_runtime": 0.0768, "eval_samples_per_second": 8932.362, "eval_steps_per_second": 143.23, "step": 1914 }, { "epoch": 23.0, "grad_norm": 4.433280944824219, "learning_rate": 1.8393654261319504e-05, "loss": 1.6456, "step": 2001 }, { "epoch": 23.0, "eval_accuracy": 0.3877551020408163, "eval_loss": 1.6075658798217773, "eval_macro_f1": 0.2021179986320824, "eval_runtime": 0.0779, "eval_samples_per_second": 8804.876, "eval_steps_per_second": 141.186, "step": 2001 }, { "epoch": 24.0, "grad_norm": 3.650712490081787, "learning_rate": 1.8207846211155388e-05, "loss": 1.6412, "step": 2088 }, { "epoch": 24.0, "eval_accuracy": 0.39212827988338195, "eval_loss": 1.6046576499938965, "eval_macro_f1": 0.20558065728483735, "eval_runtime": 0.0777, "eval_samples_per_second": 8832.472, "eval_steps_per_second": 141.629, "step": 2088 }, { "epoch": 25.0, "grad_norm": 3.7270474433898926, "learning_rate": 1.8012920161829693e-05, "loss": 1.6369, "step": 2175 }, { "epoch": 25.0, "eval_accuracy": 0.39504373177842567, "eval_loss": 1.6002745628356934, "eval_macro_f1": 0.2085329794328549, "eval_runtime": 0.0763, "eval_samples_per_second": 8986.091, "eval_steps_per_second": 144.092, "step": 2175 }, { "epoch": 26.0, "grad_norm": 3.5878360271453857, "learning_rate": 1.7809092654382368e-05, "loss": 1.6141, "step": 2262 }, { "epoch": 26.0, "eval_accuracy": 0.39941690962099125, "eval_loss": 1.5953983068466187, "eval_macro_f1": 0.21136043336239665, "eval_runtime": 0.0767, "eval_samples_per_second": 8941.745, "eval_steps_per_second": 143.381, "step": 2262 }, { "epoch": 27.0, "grad_norm": 3.669312000274658, "learning_rate": 1.7596590118377787e-05, "loss": 1.5989, "step": 2349 }, { "epoch": 27.0, "eval_accuracy": 0.40233236151603496, "eval_loss": 1.5911133289337158, "eval_macro_f1": 0.21358021621926357, "eval_runtime": 0.0772, "eval_samples_per_second": 8884.756, "eval_steps_per_second": 142.467, "step": 2349 }, { "epoch": 28.0, "grad_norm": 3.686958074569702, "learning_rate": 1.7375648620366817e-05, "loss": 1.6096, "step": 2436 }, { "epoch": 28.0, "eval_accuracy": 0.40524781341107874, "eval_loss": 1.5873298645019531, "eval_macro_f1": 0.214485741970254, "eval_runtime": 0.08, "eval_samples_per_second": 8573.194, "eval_steps_per_second": 137.471, "step": 2436 }, { "epoch": 29.0, "grad_norm": 3.553083896636963, "learning_rate": 1.7146513601643282e-05, "loss": 1.6039, "step": 2523 }, { "epoch": 29.0, "eval_accuracy": 0.4067055393586006, "eval_loss": 1.584189534187317, "eval_macro_f1": 0.21667857809163207, "eval_runtime": 0.0762, "eval_samples_per_second": 9000.399, "eval_steps_per_second": 144.321, "step": 2523 }, { "epoch": 30.0, "grad_norm": 3.9078423976898193, "learning_rate": 1.6909439605586156e-05, "loss": 1.5928, "step": 2610 }, { "epoch": 30.0, "eval_accuracy": 0.40816326530612246, "eval_loss": 1.579264521598816, "eval_macro_f1": 0.21831730879606145, "eval_runtime": 0.0775, "eval_samples_per_second": 8854.652, "eval_steps_per_second": 141.984, "step": 2610 }, { "epoch": 31.0, "grad_norm": 3.7723805904388428, "learning_rate": 1.6664689994890307e-05, "loss": 1.5824, "step": 2697 }, { "epoch": 31.0, "eval_accuracy": 0.40816326530612246, "eval_loss": 1.5762993097305298, "eval_macro_f1": 0.21682052505544805, "eval_runtime": 0.0776, "eval_samples_per_second": 8835.265, "eval_steps_per_second": 141.673, "step": 2697 }, { "epoch": 32.0, "grad_norm": 4.051678657531738, "learning_rate": 1.641253665900002e-05, "loss": 1.5877, "step": 2784 }, { "epoch": 32.0, "eval_accuracy": 0.41545189504373176, "eval_loss": 1.5732570886611938, "eval_macro_f1": 0.2262251950436546, "eval_runtime": 0.0769, "eval_samples_per_second": 8918.878, "eval_steps_per_second": 143.014, "step": 2784 }, { "epoch": 33.0, "grad_norm": 3.396827459335327, "learning_rate": 1.6153259712070225e-05, "loss": 1.5722, "step": 2871 }, { "epoch": 33.0, "eval_accuracy": 0.4110787172011662, "eval_loss": 1.5706168413162231, "eval_macro_f1": 0.22060087456248262, "eval_runtime": 0.0769, "eval_samples_per_second": 8923.138, "eval_steps_per_second": 143.082, "step": 2871 }, { "epoch": 34.0, "grad_norm": 3.510072708129883, "learning_rate": 1.5887147181791e-05, "loss": 1.5649, "step": 2958 }, { "epoch": 34.0, "eval_accuracy": 0.41690962099125367, "eval_loss": 1.5673753023147583, "eval_macro_f1": 0.2265284337566022, "eval_runtime": 0.0781, "eval_samples_per_second": 8778.335, "eval_steps_per_second": 140.76, "step": 2958 }, { "epoch": 35.0, "grad_norm": 3.531944513320923, "learning_rate": 1.5614494689421032e-05, "loss": 1.5662, "step": 3045 }, { "epoch": 35.0, "eval_accuracy": 0.4227405247813411, "eval_loss": 1.5635616779327393, "eval_macro_f1": 0.23237846476317717, "eval_runtime": 0.0769, "eval_samples_per_second": 8921.948, "eval_steps_per_second": 143.063, "step": 3045 }, { "epoch": 36.0, "grad_norm": 3.724010944366455, "learning_rate": 1.533560512138543e-05, "loss": 1.5545, "step": 3132 }, { "epoch": 36.0, "eval_accuracy": 0.42419825072886297, "eval_loss": 1.5617172718048096, "eval_macro_f1": 0.23396270153240778, "eval_runtime": 0.0766, "eval_samples_per_second": 8960.737, "eval_steps_per_second": 143.685, "step": 3132 }, { "epoch": 37.0, "grad_norm": 3.6395723819732666, "learning_rate": 1.5050788292802812e-05, "loss": 1.5416, "step": 3219 }, { "epoch": 37.0, "eval_accuracy": 0.43440233236151604, "eval_loss": 1.5581672191619873, "eval_macro_f1": 0.24389742844346657, "eval_runtime": 0.0768, "eval_samples_per_second": 8935.719, "eval_steps_per_second": 143.284, "step": 3219 }, { "epoch": 38.0, "grad_norm": 4.1144866943359375, "learning_rate": 1.4760360603315362e-05, "loss": 1.5351, "step": 3306 }, { "epoch": 38.0, "eval_accuracy": 0.4329446064139942, "eval_loss": 1.55453622341156, "eval_macro_f1": 0.23991444298311637, "eval_runtime": 0.0769, "eval_samples_per_second": 8923.913, "eval_steps_per_second": 143.095, "step": 3306 }, { "epoch": 39.0, "grad_norm": 3.656245708465576, "learning_rate": 1.4464644685604184e-05, "loss": 1.5424, "step": 3393 }, { "epoch": 39.0, "eval_accuracy": 0.4329446064139942, "eval_loss": 1.5531222820281982, "eval_macro_f1": 0.24107844449857171, "eval_runtime": 0.0777, "eval_samples_per_second": 8833.123, "eval_steps_per_second": 141.639, "step": 3393 }, { "epoch": 40.0, "grad_norm": 3.530606746673584, "learning_rate": 1.41639690469805e-05, "loss": 1.5232, "step": 3480 }, { "epoch": 40.0, "eval_accuracy": 0.4329446064139942, "eval_loss": 1.5503716468811035, "eval_macro_f1": 0.24171834592844124, "eval_runtime": 0.0767, "eval_samples_per_second": 8939.189, "eval_steps_per_second": 143.34, "step": 3480 }, { "epoch": 41.0, "grad_norm": 3.4572715759277344, "learning_rate": 1.3858667704450763e-05, "loss": 1.5277, "step": 3567 }, { "epoch": 41.0, "eval_accuracy": 0.43440233236151604, "eval_loss": 1.5470139980316162, "eval_macro_f1": 0.24209898836089624, "eval_runtime": 0.0769, "eval_samples_per_second": 8921.395, "eval_steps_per_second": 143.054, "step": 3567 }, { "epoch": 42.0, "grad_norm": 4.217586517333984, "learning_rate": 1.3549079813661123e-05, "loss": 1.5112, "step": 3654 }, { "epoch": 42.0, "eval_accuracy": 0.4446064139941691, "eval_loss": 1.5440438985824585, "eval_macro_f1": 0.25175856147050574, "eval_runtime": 0.0767, "eval_samples_per_second": 8948.113, "eval_steps_per_second": 143.483, "step": 3654 } ], "logging_steps": 87, "max_steps": 8700, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }