| { | |
| "best_metric": 0.8249799337347952, | |
| "best_model_checkpoint": "./CARES/checkpoints/bert-ba-stratified/run-9/checkpoint-3976", | |
| "epoch": 56.0, | |
| "global_step": 3976, | |
| "is_hyper_param_search": true, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.24943208694458008, | |
| "eval_macro_f1": 0.08911588063155029, | |
| "eval_macro_precision": 0.1020147123407993, | |
| "eval_macro_recall": 0.08017933766090879, | |
| "eval_micro_f1": 0.27627302275189597, | |
| "eval_micro_precision": 0.7750759878419453, | |
| "eval_micro_recall": 0.16809492419248517, | |
| "eval_runtime": 2.9775, | |
| "eval_samples_per_second": 324.431, | |
| "eval_steps_per_second": 20.487, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.1698431819677353, | |
| "eval_macro_f1": 0.30238618603356054, | |
| "eval_macro_precision": 0.4320649543604988, | |
| "eval_macro_recall": 0.2644492382273683, | |
| "eval_micro_f1": 0.6521739130434782, | |
| "eval_micro_precision": 0.8376421923474664, | |
| "eval_micro_recall": 0.5339485827290705, | |
| "eval_runtime": 2.979, | |
| "eval_samples_per_second": 324.273, | |
| "eval_steps_per_second": 20.477, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.14047418534755707, | |
| "eval_macro_f1": 0.40223830727079396, | |
| "eval_macro_precision": 0.4784721691006365, | |
| "eval_macro_recall": 0.3687052580973402, | |
| "eval_micro_f1": 0.739880059970015, | |
| "eval_micro_precision": 0.8575152041702867, | |
| "eval_micro_recall": 0.6506262359920897, | |
| "eval_runtime": 2.9805, | |
| "eval_samples_per_second": 324.11, | |
| "eval_steps_per_second": 20.467, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 3.279287453609026e-05, | |
| "loss": 0.2244, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.1187577173113823, | |
| "eval_macro_f1": 0.5347249220382584, | |
| "eval_macro_precision": 0.7327975628305986, | |
| "eval_macro_recall": 0.4754068941604508, | |
| "eval_micro_f1": 0.7880299251870324, | |
| "eval_micro_precision": 0.8573643410852713, | |
| "eval_micro_recall": 0.7290705339485827, | |
| "eval_runtime": 2.9805, | |
| "eval_samples_per_second": 324.108, | |
| "eval_steps_per_second": 20.466, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.11074026674032211, | |
| "eval_macro_f1": 0.5991127961103198, | |
| "eval_macro_precision": 0.6991665233729463, | |
| "eval_macro_recall": 0.5596673950826421, | |
| "eval_micro_f1": 0.8128196385952949, | |
| "eval_micro_precision": 0.8418079096045198, | |
| "eval_micro_recall": 0.7857613711272248, | |
| "eval_runtime": 2.9796, | |
| "eval_samples_per_second": 324.206, | |
| "eval_steps_per_second": 20.473, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 0.10078531503677368, | |
| "eval_macro_f1": 0.6568773778388772, | |
| "eval_macro_precision": 0.700413763066982, | |
| "eval_macro_recall": 0.6283428000904666, | |
| "eval_micro_f1": 0.8347529812606473, | |
| "eval_micro_precision": 0.8638928067700987, | |
| "eval_micro_recall": 0.8075148319050758, | |
| "eval_runtime": 2.979, | |
| "eval_samples_per_second": 324.274, | |
| "eval_steps_per_second": 20.477, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 0.10221733897924423, | |
| "eval_macro_f1": 0.6758575377881516, | |
| "eval_macro_precision": 0.838910780569426, | |
| "eval_macro_recall": 0.6238243641780066, | |
| "eval_micro_f1": 0.8337912087912088, | |
| "eval_micro_precision": 0.8702508960573476, | |
| "eval_micro_recall": 0.8002636783124588, | |
| "eval_runtime": 2.9795, | |
| "eval_samples_per_second": 324.21, | |
| "eval_steps_per_second": 20.473, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "learning_rate": 3.0308565859113728e-05, | |
| "loss": 0.0513, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 0.10313227772712708, | |
| "eval_macro_f1": 0.7298003500123689, | |
| "eval_macro_precision": 0.8118751777216693, | |
| "eval_macro_recall": 0.6944187826598622, | |
| "eval_micro_f1": 0.8354006034193765, | |
| "eval_micro_precision": 0.849931787175989, | |
| "eval_micro_recall": 0.8213579433091628, | |
| "eval_runtime": 2.9811, | |
| "eval_samples_per_second": 324.042, | |
| "eval_steps_per_second": 20.462, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 0.10032625496387482, | |
| "eval_macro_f1": 0.7681297243773157, | |
| "eval_macro_precision": 0.8958394795684446, | |
| "eval_macro_recall": 0.7124194145895655, | |
| "eval_micro_f1": 0.8443093549476527, | |
| "eval_micro_precision": 0.8656509695290858, | |
| "eval_micro_recall": 0.8239947264337508, | |
| "eval_runtime": 2.9813, | |
| "eval_samples_per_second": 324.018, | |
| "eval_steps_per_second": 20.461, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.1044757142663002, | |
| "eval_macro_f1": 0.7829375079467304, | |
| "eval_macro_precision": 0.8858384241353942, | |
| "eval_macro_recall": 0.7367403001149204, | |
| "eval_micro_f1": 0.8420698924731183, | |
| "eval_micro_precision": 0.8588074023303632, | |
| "eval_micro_recall": 0.8259723137771918, | |
| "eval_runtime": 2.9861, | |
| "eval_samples_per_second": 323.499, | |
| "eval_steps_per_second": 20.428, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 21.13, | |
| "learning_rate": 2.7824257182137193e-05, | |
| "loss": 0.0183, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.1039622500538826, | |
| "eval_macro_f1": 0.7902798824417182, | |
| "eval_macro_precision": 0.89276275853935, | |
| "eval_macro_recall": 0.7403232660636272, | |
| "eval_micro_f1": 0.8479512360311547, | |
| "eval_micro_precision": 0.871866295264624, | |
| "eval_micro_recall": 0.8253131179960448, | |
| "eval_runtime": 2.977, | |
| "eval_samples_per_second": 324.483, | |
| "eval_steps_per_second": 20.49, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.10449391603469849, | |
| "eval_macro_f1": 0.7973957256263516, | |
| "eval_macro_precision": 0.8897797609904068, | |
| "eval_macro_recall": 0.7482788788828134, | |
| "eval_micro_f1": 0.8530297957817209, | |
| "eval_micro_precision": 0.8666666666666667, | |
| "eval_micro_recall": 0.8398154251812788, | |
| "eval_runtime": 2.982, | |
| "eval_samples_per_second": 323.947, | |
| "eval_steps_per_second": 20.456, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.10751193016767502, | |
| "eval_macro_f1": 0.815099576727704, | |
| "eval_macro_precision": 0.8596170620799346, | |
| "eval_macro_recall": 0.7893940744657464, | |
| "eval_micro_f1": 0.8517287234042553, | |
| "eval_micro_precision": 0.8591549295774648, | |
| "eval_micro_recall": 0.8444297956493079, | |
| "eval_runtime": 2.9874, | |
| "eval_samples_per_second": 323.362, | |
| "eval_steps_per_second": 20.419, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 0.10826986283063889, | |
| "eval_macro_f1": 0.8122978188941863, | |
| "eval_macro_precision": 0.8818206343623207, | |
| "eval_macro_recall": 0.7768327155632533, | |
| "eval_micro_f1": 0.8507362784471219, | |
| "eval_micro_precision": 0.8640380693405847, | |
| "eval_micro_recall": 0.8378378378378378, | |
| "eval_runtime": 2.9805, | |
| "eval_samples_per_second": 324.106, | |
| "eval_steps_per_second": 20.466, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 28.17, | |
| "learning_rate": 2.5339948505160657e-05, | |
| "loss": 0.0094, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 0.11172767728567123, | |
| "eval_macro_f1": 0.7952069303877679, | |
| "eval_macro_precision": 0.8834731091800803, | |
| "eval_macro_recall": 0.7589521997225694, | |
| "eval_micro_f1": 0.8510210913960494, | |
| "eval_micro_precision": 0.8646258503401361, | |
| "eval_micro_recall": 0.8378378378378378, | |
| "eval_runtime": 2.9811, | |
| "eval_samples_per_second": 324.039, | |
| "eval_steps_per_second": 20.462, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 0.12214264273643494, | |
| "eval_macro_f1": 0.784963786280585, | |
| "eval_macro_precision": 0.8399655989881138, | |
| "eval_macro_recall": 0.7539663033116887, | |
| "eval_micro_f1": 0.8395881766854865, | |
| "eval_micro_precision": 0.8460508701472557, | |
| "eval_micro_recall": 0.8332234673698088, | |
| "eval_runtime": 2.9802, | |
| "eval_samples_per_second": 324.138, | |
| "eval_steps_per_second": 20.468, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 0.10992709547281265, | |
| "eval_macro_f1": 0.818123937895116, | |
| "eval_macro_precision": 0.846307748666965, | |
| "eval_macro_recall": 0.7959472696549352, | |
| "eval_micro_f1": 0.8540829986613119, | |
| "eval_micro_precision": 0.86743711760707, | |
| "eval_micro_recall": 0.8411338167435728, | |
| "eval_runtime": 2.9817, | |
| "eval_samples_per_second": 323.976, | |
| "eval_steps_per_second": 20.458, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 35.21, | |
| "learning_rate": 2.2855639828184125e-05, | |
| "loss": 0.0088, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 0.11282340437173843, | |
| "eval_macro_f1": 0.810588172484549, | |
| "eval_macro_precision": 0.8881718400115531, | |
| "eval_macro_recall": 0.7660828954423657, | |
| "eval_micro_f1": 0.8533783783783784, | |
| "eval_micro_precision": 0.8752598752598753, | |
| "eval_micro_recall": 0.8325642715886619, | |
| "eval_runtime": 2.9823, | |
| "eval_samples_per_second": 323.913, | |
| "eval_steps_per_second": 20.454, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 0.11364943534135818, | |
| "eval_macro_f1": 0.8086979072156089, | |
| "eval_macro_precision": 0.874192444614366, | |
| "eval_macro_recall": 0.7687841160082525, | |
| "eval_micro_f1": 0.855510752688172, | |
| "eval_micro_precision": 0.8725154215215901, | |
| "eval_micro_recall": 0.8391562294001318, | |
| "eval_runtime": 2.9856, | |
| "eval_samples_per_second": 323.552, | |
| "eval_steps_per_second": 20.431, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 0.11377756297588348, | |
| "eval_macro_f1": 0.8074550758048753, | |
| "eval_macro_precision": 0.8601506214626871, | |
| "eval_macro_recall": 0.7772364003410493, | |
| "eval_micro_f1": 0.8550483172275909, | |
| "eval_micro_precision": 0.8645552560646901, | |
| "eval_micro_recall": 0.8457481872116018, | |
| "eval_runtime": 2.987, | |
| "eval_samples_per_second": 323.399, | |
| "eval_steps_per_second": 20.422, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 0.11456111818552017, | |
| "eval_macro_f1": 0.8197602680311404, | |
| "eval_macro_precision": 0.8652772327919502, | |
| "eval_macro_recall": 0.7923572373451824, | |
| "eval_micro_f1": 0.8584748584748586, | |
| "eval_micro_precision": 0.8674293405114402, | |
| "eval_micro_recall": 0.8497033618984838, | |
| "eval_runtime": 2.9793, | |
| "eval_samples_per_second": 324.237, | |
| "eval_steps_per_second": 20.475, | |
| "step": 2982 | |
| }, | |
| { | |
| "epoch": 42.25, | |
| "learning_rate": 2.0371331151207586e-05, | |
| "loss": 0.0049, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 0.11773423105478287, | |
| "eval_macro_f1": 0.8086306686692242, | |
| "eval_macro_precision": 0.8642281309223148, | |
| "eval_macro_recall": 0.7764573931394076, | |
| "eval_micro_f1": 0.8553291012362179, | |
| "eval_micro_precision": 0.8672086720867209, | |
| "eval_micro_recall": 0.8437705998681608, | |
| "eval_runtime": 2.9813, | |
| "eval_samples_per_second": 324.023, | |
| "eval_steps_per_second": 20.461, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 0.11780666559934616, | |
| "eval_macro_f1": 0.8101599105843645, | |
| "eval_macro_precision": 0.8691080726361069, | |
| "eval_macro_recall": 0.7762766074908475, | |
| "eval_micro_f1": 0.8565723793677205, | |
| "eval_micro_precision": 0.8649193548387096, | |
| "eval_micro_recall": 0.8483849703361899, | |
| "eval_runtime": 2.9804, | |
| "eval_samples_per_second": 324.123, | |
| "eval_steps_per_second": 20.467, | |
| "step": 3266 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 0.11932362616062164, | |
| "eval_macro_f1": 0.8165194442314286, | |
| "eval_macro_precision": 0.8648115942931565, | |
| "eval_macro_recall": 0.7864788866212832, | |
| "eval_micro_f1": 0.8535201868535202, | |
| "eval_micro_precision": 0.8641891891891892, | |
| "eval_micro_recall": 0.8431114040870138, | |
| "eval_runtime": 2.985, | |
| "eval_samples_per_second": 323.616, | |
| "eval_steps_per_second": 20.435, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 49.3, | |
| "learning_rate": 1.7887022474231054e-05, | |
| "loss": 0.0034, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 0.12221735715866089, | |
| "eval_macro_f1": 0.8141089592504215, | |
| "eval_macro_precision": 0.8615361853719665, | |
| "eval_macro_recall": 0.7844581807191786, | |
| "eval_micro_f1": 0.8523714094856378, | |
| "eval_micro_precision": 0.8639133378469871, | |
| "eval_micro_recall": 0.8411338167435728, | |
| "eval_runtime": 2.9803, | |
| "eval_samples_per_second": 324.125, | |
| "eval_steps_per_second": 20.468, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_loss": 0.12201466411352158, | |
| "eval_macro_f1": 0.8215839258353351, | |
| "eval_macro_precision": 0.8618774470352207, | |
| "eval_macro_recall": 0.795741448912531, | |
| "eval_micro_f1": 0.8534223706176962, | |
| "eval_micro_precision": 0.86468200270636, | |
| "eval_micro_recall": 0.8424522083058669, | |
| "eval_runtime": 2.9796, | |
| "eval_samples_per_second": 324.208, | |
| "eval_steps_per_second": 20.473, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_loss": 0.12538054585456848, | |
| "eval_macro_f1": 0.8134875725433206, | |
| "eval_macro_precision": 0.8710944940417797, | |
| "eval_macro_recall": 0.7763605326378977, | |
| "eval_micro_f1": 0.85425782564793, | |
| "eval_micro_precision": 0.8727647867950481, | |
| "eval_micro_recall": 0.8365194462755439, | |
| "eval_runtime": 2.9841, | |
| "eval_samples_per_second": 323.717, | |
| "eval_steps_per_second": 20.442, | |
| "step": 3834 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_loss": 0.12492760270833969, | |
| "eval_macro_f1": 0.8249799337347952, | |
| "eval_macro_precision": 0.8672879358583392, | |
| "eval_macro_recall": 0.7974535529440212, | |
| "eval_micro_f1": 0.8574290484140235, | |
| "eval_micro_precision": 0.8687415426251691, | |
| "eval_micro_recall": 0.8464073829927489, | |
| "eval_runtime": 2.9821, | |
| "eval_samples_per_second": 323.938, | |
| "eval_steps_per_second": 20.456, | |
| "step": 3976 | |
| } | |
| ], | |
| "max_steps": 7100, | |
| "num_train_epochs": 100, | |
| "total_flos": 3.752304368656253e+16, | |
| "trial_name": null, | |
| "trial_params": { | |
| "adam_epsilon": 2.4799103776060603e-09, | |
| "learning_rate": 3.4268553890214325e-05, | |
| "per_device_eval_batch_size": 16, | |
| "per_device_train_batch_size": 32, | |
| "seed": 326, | |
| "warmup_steps": 203, | |
| "weight_decay": 2.8436289860950645e-08 | |
| } | |
| } | |