| { |
| "best_global_step": 3654, |
| "best_metric": 0.25175856147050574, |
| "best_model_checkpoint": "outputs/textcnn/checkpoint-3654", |
| "epoch": 42.0, |
| "eval_steps": 500, |
| "global_step": 3654, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 5.989197731018066, |
| "learning_rate": 3.44e-06, |
| "loss": 2.0625, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.31486880466472306, |
| "eval_loss": 1.9201620817184448, |
| "eval_macro_f1": 0.08069587306875443, |
| "eval_runtime": 0.0822, |
| "eval_samples_per_second": 8343.34, |
| "eval_steps_per_second": 133.785, |
| "step": 87 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 4.812881946563721, |
| "learning_rate": 6.92e-06, |
| "loss": 2.0517, |
| "step": 174 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.3163265306122449, |
| "eval_loss": 1.867976188659668, |
| "eval_macro_f1": 0.08271289631865439, |
| "eval_runtime": 0.0798, |
| "eval_samples_per_second": 8598.327, |
| "eval_steps_per_second": 137.874, |
| "step": 174 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 4.3668341636657715, |
| "learning_rate": 1.04e-05, |
| "loss": 1.9731, |
| "step": 261 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.31924198250728864, |
| "eval_loss": 1.8050793409347534, |
| "eval_macro_f1": 0.09255094257382587, |
| "eval_runtime": 0.0765, |
| "eval_samples_per_second": 8966.769, |
| "eval_steps_per_second": 143.782, |
| "step": 261 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 4.521817207336426, |
| "learning_rate": 1.3880000000000001e-05, |
| "loss": 1.9355, |
| "step": 348 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.3206997084548105, |
| "eval_loss": 1.760697841644287, |
| "eval_macro_f1": 0.09828636171767466, |
| "eval_runtime": 0.0776, |
| "eval_samples_per_second": 8839.5, |
| "eval_steps_per_second": 141.741, |
| "step": 348 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 4.453028678894043, |
| "learning_rate": 1.736e-05, |
| "loss": 1.9005, |
| "step": 435 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.32653061224489793, |
| "eval_loss": 1.7365907430648804, |
| "eval_macro_f1": 0.11368274326806725, |
| "eval_runtime": 0.0784, |
| "eval_samples_per_second": 8750.81, |
| "eval_steps_per_second": 140.319, |
| "step": 435 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 4.236560344696045, |
| "learning_rate": 1.999967634800249e-05, |
| "loss": 1.8734, |
| "step": 522 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.33527696793002915, |
| "eval_loss": 1.7214981317520142, |
| "eval_macro_f1": 0.12326578903926995, |
| "eval_runtime": 0.0768, |
| "eval_samples_per_second": 8932.057, |
| "eval_steps_per_second": 143.225, |
| "step": 522 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 4.54931640625, |
| "learning_rate": 1.999144090999249e-05, |
| "loss": 1.8459, |
| "step": 609 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.33819241982507287, |
| "eval_loss": 1.7085658311843872, |
| "eval_macro_f1": 0.12473908000560015, |
| "eval_runtime": 0.0772, |
| "eval_samples_per_second": 8891.235, |
| "eval_steps_per_second": 142.571, |
| "step": 609 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 4.285991191864014, |
| "learning_rate": 1.9972106098590665e-05, |
| "loss": 1.8271, |
| "step": 696 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.3469387755102041, |
| "eval_loss": 1.6989842653274536, |
| "eval_macro_f1": 0.1374280909101405, |
| "eval_runtime": 0.0776, |
| "eval_samples_per_second": 8844.853, |
| "eval_steps_per_second": 141.827, |
| "step": 696 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 5.04291296005249, |
| "learning_rate": 1.994169339261005e-05, |
| "loss": 1.8219, |
| "step": 783 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.3498542274052478, |
| "eval_loss": 1.6909065246582031, |
| "eval_macro_f1": 0.14476905523124012, |
| "eval_runtime": 0.0766, |
| "eval_samples_per_second": 8950.395, |
| "eval_steps_per_second": 143.519, |
| "step": 783 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 3.8076608180999756, |
| "learning_rate": 1.990023657716558e-05, |
| "loss": 1.8039, |
| "step": 870 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.3469387755102041, |
| "eval_loss": 1.6828982830047607, |
| "eval_macro_f1": 0.14793175460560187, |
| "eval_runtime": 0.0764, |
| "eval_samples_per_second": 8983.678, |
| "eval_steps_per_second": 144.053, |
| "step": 870 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 3.9515891075134277, |
| "learning_rate": 1.9847781706142608e-05, |
| "loss": 1.7898, |
| "step": 957 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.35131195335276966, |
| "eval_loss": 1.675271987915039, |
| "eval_macro_f1": 0.15154267292502702, |
| "eval_runtime": 0.0776, |
| "eval_samples_per_second": 8837.816, |
| "eval_steps_per_second": 141.714, |
| "step": 957 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 3.946139097213745, |
| "learning_rate": 1.978438705103621e-05, |
| "loss": 1.7634, |
| "step": 1044 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.36151603498542273, |
| "eval_loss": 1.6689125299453735, |
| "eval_macro_f1": 0.16042602782078802, |
| "eval_runtime": 0.0767, |
| "eval_samples_per_second": 8948.781, |
| "eval_steps_per_second": 143.494, |
| "step": 1044 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 3.8360438346862793, |
| "learning_rate": 1.9710123036218044e-05, |
| "loss": 1.7572, |
| "step": 1131 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.37026239067055394, |
| "eval_loss": 1.6614633798599243, |
| "eval_macro_f1": 0.17028534014340227, |
| "eval_runtime": 0.0767, |
| "eval_samples_per_second": 8943.635, |
| "eval_steps_per_second": 143.411, |
| "step": 1131 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 3.794384479522705, |
| "learning_rate": 1.962507216070276e-05, |
| "loss": 1.7411, |
| "step": 1218 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.36151603498542273, |
| "eval_loss": 1.6555291414260864, |
| "eval_macro_f1": 0.17229172694357175, |
| "eval_runtime": 0.0764, |
| "eval_samples_per_second": 8976.952, |
| "eval_steps_per_second": 143.945, |
| "step": 1218 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 3.697802782058716, |
| "learning_rate": 1.9529328906500833e-05, |
| "loss": 1.7355, |
| "step": 1305 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.36443148688046645, |
| "eval_loss": 1.6497727632522583, |
| "eval_macro_f1": 0.17077990977186067, |
| "eval_runtime": 0.0762, |
| "eval_samples_per_second": 8997.528, |
| "eval_steps_per_second": 144.275, |
| "step": 1305 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 4.296336650848389, |
| "learning_rate": 1.9422999633659592e-05, |
| "loss": 1.7163, |
| "step": 1392 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.3717201166180758, |
| "eval_loss": 1.6435818672180176, |
| "eval_macro_f1": 0.1808240545174343, |
| "eval_runtime": 0.0787, |
| "eval_samples_per_second": 8721.685, |
| "eval_steps_per_second": 139.852, |
| "step": 1392 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 4.240530490875244, |
| "learning_rate": 1.9306202462109128e-05, |
| "loss": 1.6979, |
| "step": 1479 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.3760932944606414, |
| "eval_loss": 1.6384371519088745, |
| "eval_macro_f1": 0.18768397854098065, |
| "eval_runtime": 0.0781, |
| "eval_samples_per_second": 8785.572, |
| "eval_steps_per_second": 140.877, |
| "step": 1479 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 4.70124626159668, |
| "learning_rate": 1.9179067140444246e-05, |
| "loss": 1.7027, |
| "step": 1566 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.37317784256559766, |
| "eval_loss": 1.6329833269119263, |
| "eval_macro_f1": 0.1832569421283258, |
| "eval_runtime": 0.0773, |
| "eval_samples_per_second": 8871.989, |
| "eval_steps_per_second": 142.262, |
| "step": 1566 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 4.254021167755127, |
| "learning_rate": 1.9041734901788285e-05, |
| "loss": 1.6776, |
| "step": 1653 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.3760932944606414, |
| "eval_loss": 1.6269856691360474, |
| "eval_macro_f1": 0.18890492604023376, |
| "eval_runtime": 0.0793, |
| "eval_samples_per_second": 8655.516, |
| "eval_steps_per_second": 138.791, |
| "step": 1653 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 3.7426421642303467, |
| "learning_rate": 1.8894358306898934e-05, |
| "loss": 1.6651, |
| "step": 1740 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.37900874635568516, |
| "eval_loss": 1.621616005897522, |
| "eval_macro_f1": 0.1934068278580951, |
| "eval_runtime": 0.0784, |
| "eval_samples_per_second": 8746.314, |
| "eval_steps_per_second": 140.247, |
| "step": 1740 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 4.341787338256836, |
| "learning_rate": 1.8737101074690274e-05, |
| "loss": 1.6694, |
| "step": 1827 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.38338192419825073, |
| "eval_loss": 1.617226243019104, |
| "eval_macro_f1": 0.19340109033111008, |
| "eval_runtime": 0.0763, |
| "eval_samples_per_second": 8984.969, |
| "eval_steps_per_second": 144.074, |
| "step": 1827 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 4.18576717376709, |
| "learning_rate": 1.8570137900359382e-05, |
| "loss": 1.6561, |
| "step": 1914 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.38338192419825073, |
| "eval_loss": 1.6133029460906982, |
| "eval_macro_f1": 0.19942474851997433, |
| "eval_runtime": 0.0768, |
| "eval_samples_per_second": 8932.362, |
| "eval_steps_per_second": 143.23, |
| "step": 1914 |
| }, |
| { |
| "epoch": 23.0, |
| "grad_norm": 4.433280944824219, |
| "learning_rate": 1.8393654261319504e-05, |
| "loss": 1.6456, |
| "step": 2001 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.3877551020408163, |
| "eval_loss": 1.6075658798217773, |
| "eval_macro_f1": 0.2021179986320824, |
| "eval_runtime": 0.0779, |
| "eval_samples_per_second": 8804.876, |
| "eval_steps_per_second": 141.186, |
| "step": 2001 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 3.650712490081787, |
| "learning_rate": 1.8207846211155388e-05, |
| "loss": 1.6412, |
| "step": 2088 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.39212827988338195, |
| "eval_loss": 1.6046576499938965, |
| "eval_macro_f1": 0.20558065728483735, |
| "eval_runtime": 0.0777, |
| "eval_samples_per_second": 8832.472, |
| "eval_steps_per_second": 141.629, |
| "step": 2088 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 3.7270474433898926, |
| "learning_rate": 1.8012920161829693e-05, |
| "loss": 1.6369, |
| "step": 2175 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.39504373177842567, |
| "eval_loss": 1.6002745628356934, |
| "eval_macro_f1": 0.2085329794328549, |
| "eval_runtime": 0.0763, |
| "eval_samples_per_second": 8986.091, |
| "eval_steps_per_second": 144.092, |
| "step": 2175 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 3.5878360271453857, |
| "learning_rate": 1.7809092654382368e-05, |
| "loss": 1.6141, |
| "step": 2262 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.39941690962099125, |
| "eval_loss": 1.5953983068466187, |
| "eval_macro_f1": 0.21136043336239665, |
| "eval_runtime": 0.0767, |
| "eval_samples_per_second": 8941.745, |
| "eval_steps_per_second": 143.381, |
| "step": 2262 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 3.669312000274658, |
| "learning_rate": 1.7596590118377787e-05, |
| "loss": 1.5989, |
| "step": 2349 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.40233236151603496, |
| "eval_loss": 1.5911133289337158, |
| "eval_macro_f1": 0.21358021621926357, |
| "eval_runtime": 0.0772, |
| "eval_samples_per_second": 8884.756, |
| "eval_steps_per_second": 142.467, |
| "step": 2349 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 3.686958074569702, |
| "learning_rate": 1.7375648620366817e-05, |
| "loss": 1.6096, |
| "step": 2436 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.40524781341107874, |
| "eval_loss": 1.5873298645019531, |
| "eval_macro_f1": 0.214485741970254, |
| "eval_runtime": 0.08, |
| "eval_samples_per_second": 8573.194, |
| "eval_steps_per_second": 137.471, |
| "step": 2436 |
| }, |
| { |
| "epoch": 29.0, |
| "grad_norm": 3.553083896636963, |
| "learning_rate": 1.7146513601643282e-05, |
| "loss": 1.6039, |
| "step": 2523 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.4067055393586006, |
| "eval_loss": 1.584189534187317, |
| "eval_macro_f1": 0.21667857809163207, |
| "eval_runtime": 0.0762, |
| "eval_samples_per_second": 9000.399, |
| "eval_steps_per_second": 144.321, |
| "step": 2523 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 3.9078423976898193, |
| "learning_rate": 1.6909439605586156e-05, |
| "loss": 1.5928, |
| "step": 2610 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.40816326530612246, |
| "eval_loss": 1.579264521598816, |
| "eval_macro_f1": 0.21831730879606145, |
| "eval_runtime": 0.0775, |
| "eval_samples_per_second": 8854.652, |
| "eval_steps_per_second": 141.984, |
| "step": 2610 |
| }, |
| { |
| "epoch": 31.0, |
| "grad_norm": 3.7723805904388428, |
| "learning_rate": 1.6664689994890307e-05, |
| "loss": 1.5824, |
| "step": 2697 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_accuracy": 0.40816326530612246, |
| "eval_loss": 1.5762993097305298, |
| "eval_macro_f1": 0.21682052505544805, |
| "eval_runtime": 0.0776, |
| "eval_samples_per_second": 8835.265, |
| "eval_steps_per_second": 141.673, |
| "step": 2697 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 4.051678657531738, |
| "learning_rate": 1.641253665900002e-05, |
| "loss": 1.5877, |
| "step": 2784 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_accuracy": 0.41545189504373176, |
| "eval_loss": 1.5732570886611938, |
| "eval_macro_f1": 0.2262251950436546, |
| "eval_runtime": 0.0769, |
| "eval_samples_per_second": 8918.878, |
| "eval_steps_per_second": 143.014, |
| "step": 2784 |
| }, |
| { |
| "epoch": 33.0, |
| "grad_norm": 3.396827459335327, |
| "learning_rate": 1.6153259712070225e-05, |
| "loss": 1.5722, |
| "step": 2871 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_accuracy": 0.4110787172011662, |
| "eval_loss": 1.5706168413162231, |
| "eval_macro_f1": 0.22060087456248262, |
| "eval_runtime": 0.0769, |
| "eval_samples_per_second": 8923.138, |
| "eval_steps_per_second": 143.082, |
| "step": 2871 |
| }, |
| { |
| "epoch": 34.0, |
| "grad_norm": 3.510072708129883, |
| "learning_rate": 1.5887147181791e-05, |
| "loss": 1.5649, |
| "step": 2958 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_accuracy": 0.41690962099125367, |
| "eval_loss": 1.5673753023147583, |
| "eval_macro_f1": 0.2265284337566022, |
| "eval_runtime": 0.0781, |
| "eval_samples_per_second": 8778.335, |
| "eval_steps_per_second": 140.76, |
| "step": 2958 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 3.531944513320923, |
| "learning_rate": 1.5614494689421032e-05, |
| "loss": 1.5662, |
| "step": 3045 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_accuracy": 0.4227405247813411, |
| "eval_loss": 1.5635616779327393, |
| "eval_macro_f1": 0.23237846476317717, |
| "eval_runtime": 0.0769, |
| "eval_samples_per_second": 8921.948, |
| "eval_steps_per_second": 143.063, |
| "step": 3045 |
| }, |
| { |
| "epoch": 36.0, |
| "grad_norm": 3.724010944366455, |
| "learning_rate": 1.533560512138543e-05, |
| "loss": 1.5545, |
| "step": 3132 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_accuracy": 0.42419825072886297, |
| "eval_loss": 1.5617172718048096, |
| "eval_macro_f1": 0.23396270153240778, |
| "eval_runtime": 0.0766, |
| "eval_samples_per_second": 8960.737, |
| "eval_steps_per_second": 143.685, |
| "step": 3132 |
| }, |
| { |
| "epoch": 37.0, |
| "grad_norm": 3.6395723819732666, |
| "learning_rate": 1.5050788292802812e-05, |
| "loss": 1.5416, |
| "step": 3219 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_accuracy": 0.43440233236151604, |
| "eval_loss": 1.5581672191619873, |
| "eval_macro_f1": 0.24389742844346657, |
| "eval_runtime": 0.0768, |
| "eval_samples_per_second": 8935.719, |
| "eval_steps_per_second": 143.284, |
| "step": 3219 |
| }, |
| { |
| "epoch": 38.0, |
| "grad_norm": 4.1144866943359375, |
| "learning_rate": 1.4760360603315362e-05, |
| "loss": 1.5351, |
| "step": 3306 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_accuracy": 0.4329446064139942, |
| "eval_loss": 1.55453622341156, |
| "eval_macro_f1": 0.23991444298311637, |
| "eval_runtime": 0.0769, |
| "eval_samples_per_second": 8923.913, |
| "eval_steps_per_second": 143.095, |
| "step": 3306 |
| }, |
| { |
| "epoch": 39.0, |
| "grad_norm": 3.656245708465576, |
| "learning_rate": 1.4464644685604184e-05, |
| "loss": 1.5424, |
| "step": 3393 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_accuracy": 0.4329446064139942, |
| "eval_loss": 1.5531222820281982, |
| "eval_macro_f1": 0.24107844449857171, |
| "eval_runtime": 0.0777, |
| "eval_samples_per_second": 8833.123, |
| "eval_steps_per_second": 141.639, |
| "step": 3393 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 3.530606746673584, |
| "learning_rate": 1.41639690469805e-05, |
| "loss": 1.5232, |
| "step": 3480 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_accuracy": 0.4329446064139942, |
| "eval_loss": 1.5503716468811035, |
| "eval_macro_f1": 0.24171834592844124, |
| "eval_runtime": 0.0767, |
| "eval_samples_per_second": 8939.189, |
| "eval_steps_per_second": 143.34, |
| "step": 3480 |
| }, |
| { |
| "epoch": 41.0, |
| "grad_norm": 3.4572715759277344, |
| "learning_rate": 1.3858667704450763e-05, |
| "loss": 1.5277, |
| "step": 3567 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_accuracy": 0.43440233236151604, |
| "eval_loss": 1.5470139980316162, |
| "eval_macro_f1": 0.24209898836089624, |
| "eval_runtime": 0.0769, |
| "eval_samples_per_second": 8921.395, |
| "eval_steps_per_second": 143.054, |
| "step": 3567 |
| }, |
| { |
| "epoch": 42.0, |
| "grad_norm": 4.217586517333984, |
| "learning_rate": 1.3549079813661123e-05, |
| "loss": 1.5112, |
| "step": 3654 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_accuracy": 0.4446064139941691, |
| "eval_loss": 1.5440438985824585, |
| "eval_macro_f1": 0.25175856147050574, |
| "eval_runtime": 0.0767, |
| "eval_samples_per_second": 8948.113, |
| "eval_steps_per_second": 143.483, |
| "step": 3654 |
| } |
| ], |
| "logging_steps": 87, |
| "max_steps": 8700, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|