{ "best_global_step": 7245, "best_metric": 0.8153345585743379, "best_model_checkpoint": "outputs/runs/bartpho/checkpoint-7245", "epoch": 23.0, "eval_steps": 500, "global_step": 7245, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9968253968253968, "grad_norm": 5.682558536529541, "learning_rate": 4.999432333543028e-06, "loss": 0.6382, "step": 314 }, { "epoch": 1.0, "eval_accuracy": 0.6618786257685761, "eval_f1": 0.5275329196417486, "eval_loss": 0.623261570930481, "eval_precision": 0.7377902962250189, "eval_recall": 0.5717004519029796, "eval_runtime": 1.4418, "eval_samples_per_second": 902.363, "eval_steps_per_second": 28.437, "step": 315 }, { "epoch": 1.9936507936507937, "grad_norm": 1.4926543235778809, "learning_rate": 4.996525669931999e-06, "loss": 0.5683, "step": 628 }, { "epoch": 2.0, "eval_accuracy": 0.6871157119533122, "eval_f1": 0.6270341888525142, "eval_loss": 0.5687424540519714, "eval_precision": 0.6851236426126874, "eval_recall": 0.6290258325510966, "eval_runtime": 1.4493, "eval_samples_per_second": 897.649, "eval_steps_per_second": 28.289, "step": 630 }, { "epoch": 2.9904761904761905, "grad_norm": 6.10944128036499, "learning_rate": 4.991155236893945e-06, "loss": 0.5534, "step": 942 }, { "epoch": 3.0, "eval_accuracy": 0.6981102581026157, "eval_f1": 0.6686886646196348, "eval_loss": 0.5649789571762085, "eval_precision": 0.6820856346718416, "eval_recall": 0.6647145253749493, "eval_runtime": 1.4482, "eval_samples_per_second": 898.381, "eval_steps_per_second": 28.312, "step": 945 }, { "epoch": 3.9873015873015873, "grad_norm": 3.255537509918213, "learning_rate": 4.983326334397891e-06, "loss": 0.5436, "step": 1256 }, { "epoch": 4.0, "eval_accuracy": 0.697224441588217, "eval_f1": 0.6146617747343325, "eval_loss": 0.5817087888717651, "eval_precision": 0.7360948991758735, "eval_recall": 0.625299233298471, "eval_runtime": 1.4497, "eval_samples_per_second": 897.453, "eval_steps_per_second": 28.283, "step": 1260 }, { "epoch": 4.984126984126984, "grad_norm": 4.936647891998291, "learning_rate": 4.97304668862541e-06, "loss": 0.5387, "step": 1570 }, { "epoch": 5.0, "eval_accuracy": 0.6994824052523708, "eval_f1": 0.6780463785641409, "eval_loss": 0.5464158654212952, "eval_precision": 0.6826491609942078, "eval_recall": 0.6754857450135558, "eval_runtime": 1.4501, "eval_samples_per_second": 897.181, "eval_steps_per_second": 28.274, "step": 1575 }, { "epoch": 5.980952380952381, "grad_norm": 5.54253625869751, "learning_rate": 4.9603264443458e-06, "loss": 0.5347, "step": 1884 }, { "epoch": 6.0, "eval_accuracy": 0.703234098725119, "eval_f1": 0.658548053122289, "eval_loss": 0.5504359602928162, "eval_precision": 0.6971966581407725, "eval_recall": 0.6552219050544602, "eval_runtime": 1.4502, "eval_samples_per_second": 897.134, "eval_steps_per_second": 28.272, "step": 1890 }, { "epoch": 6.977777777777778, "grad_norm": 2.980400800704956, "learning_rate": 4.945178154904432e-06, "loss": 0.5295, "step": 2198 }, { "epoch": 7.0, "eval_accuracy": 0.7049188869976031, "eval_f1": 0.6712744945089447, "eval_loss": 0.5498743057250977, "eval_precision": 0.6921020382053092, "eval_recall": 0.6667597786262732, "eval_runtime": 1.451, "eval_samples_per_second": 896.602, "eval_steps_per_second": 28.256, "step": 2205 }, { "epoch": 7.974603174603175, "grad_norm": 1.8561100959777832, "learning_rate": 4.92761676983411e-06, "loss": 0.526, "step": 2512 }, { "epoch": 8.0, "eval_accuracy": 0.7058568103657901, "eval_f1": 0.662676465048434, "eval_loss": 0.5571920871734619, "eval_precision": 0.6997671412894451, "eval_recall": 0.6589087948228982, "eval_runtime": 1.4516, "eval_samples_per_second": 896.228, "eval_steps_per_second": 28.244, "step": 2520 }, { "epoch": 8.971428571428572, "grad_norm": 3.5451998710632324, "learning_rate": 4.9076596201016856e-06, "loss": 0.523, "step": 2826 }, { "epoch": 9.0, "eval_accuracy": 0.7018619515753639, "eval_f1": 0.625374057862767, "eval_loss": 0.5914136171340942, "eval_precision": 0.7358493036678011, "eval_recall": 0.6327511530635204, "eval_runtime": 1.4534, "eval_samples_per_second": 895.149, "eval_steps_per_second": 28.21, "step": 2835 }, { "epoch": 9.968253968253968, "grad_norm": 5.0408406257629395, "learning_rate": 4.88532640100449e-06, "loss": 0.5171, "step": 3140 }, { "epoch": 10.0, "eval_accuracy": 0.7077847639559524, "eval_f1": 0.6786121509955985, "eval_loss": 0.5660568475723267, "eval_precision": 0.6936028147526183, "eval_recall": 0.6741239127178265, "eval_runtime": 1.4513, "eval_samples_per_second": 896.413, "eval_steps_per_second": 28.25, "step": 3150 }, { "epoch": 10.965079365079365, "grad_norm": 9.36005973815918, "learning_rate": 4.860639152733449e-06, "loss": 0.5147, "step": 3454 }, { "epoch": 11.0, "eval_accuracy": 0.701914058429152, "eval_f1": 0.6364582078339008, "eval_loss": 0.5687407851219177, "eval_precision": 0.7180110728497148, "eval_recall": 0.6392313775049545, "eval_runtime": 1.4526, "eval_samples_per_second": 895.624, "eval_steps_per_second": 28.225, "step": 3465 }, { "epoch": 11.961904761904762, "grad_norm": 3.487974166870117, "learning_rate": 4.833622238622079e-06, "loss": 0.5095, "step": 3768 }, { "epoch": 12.0, "eval_accuracy": 0.7085316288602495, "eval_f1": 0.6626885625948004, "eval_loss": 0.5603615045547485, "eval_precision": 0.7060931620291221, "eval_recall": 0.6591844125186699, "eval_runtime": 1.4523, "eval_samples_per_second": 895.797, "eval_steps_per_second": 28.23, "step": 3780 }, { "epoch": 12.958730158730159, "grad_norm": 5.526001930236816, "learning_rate": 4.804302321102816e-06, "loss": 0.508, "step": 4082 }, { "epoch": 13.0, "eval_accuracy": 0.7037377983117379, "eval_f1": 0.6584786752528541, "eval_loss": 0.5470253229141235, "eval_precision": 0.6984109009927764, "eval_recall": 0.6552206326603638, "eval_runtime": 1.4537, "eval_samples_per_second": 894.952, "eval_steps_per_second": 28.204, "step": 4095 }, { "epoch": 13.955555555555556, "grad_norm": 7.560145378112793, "learning_rate": 4.772708335394416e-06, "loss": 0.5029, "step": 4396 }, { "epoch": 14.0, "eval_accuracy": 0.713568624726439, "eval_f1": 0.6795572312733509, "eval_loss": 0.5456140637397766, "eval_precision": 0.7034572312241041, "eval_recall": 0.6744872926065051, "eval_runtime": 1.4534, "eval_samples_per_second": 895.17, "eval_steps_per_second": 28.211, "step": 4410 }, { "epoch": 14.952380952380953, "grad_norm": 5.061767578125, "learning_rate": 4.738871460946384e-06, "loss": 0.4971, "step": 4710 }, { "epoch": 15.0, "eval_accuracy": 0.7147323444610414, "eval_f1": 0.6712151153220013, "eval_loss": 0.5506519675254822, "eval_precision": 0.7131289966118773, "eval_recall": 0.66683731470523, "eval_runtime": 1.4555, "eval_samples_per_second": 893.846, "eval_steps_per_second": 28.169, "step": 4725 }, { "epoch": 15.94920634920635, "grad_norm": 3.6310696601867676, "learning_rate": 4.702825090668624e-06, "loss": 0.4883, "step": 5024 }, { "epoch": 16.0, "eval_accuracy": 0.7275853683954563, "eval_f1": 0.6800593771748749, "eval_loss": 0.5249310731887817, "eval_precision": 0.7385652261130415, "eval_recall": 0.6753129875296177, "eval_runtime": 1.4568, "eval_samples_per_second": 893.074, "eval_steps_per_second": 28.145, "step": 5040 }, { "epoch": 16.946031746031746, "grad_norm": 3.5576395988464355, "learning_rate": 4.664604797976672e-06, "loss": 0.4785, "step": 5338 }, { "epoch": 17.0, "eval_accuracy": 0.7470733317122312, "eval_f1": 0.7220592428721022, "eval_loss": 0.5220229625701904, "eval_precision": 0.7393366406970809, "eval_recall": 0.7157473225861406, "eval_runtime": 1.4557, "eval_samples_per_second": 893.714, "eval_steps_per_second": 28.165, "step": 5355 }, { "epoch": 17.942857142857143, "grad_norm": 3.7962605953216553, "learning_rate": 4.6242483016850204e-06, "loss": 0.4601, "step": 5652 }, { "epoch": 18.0, "eval_accuracy": 0.7800743391114044, "eval_f1": 0.7542114810572249, "eval_loss": 0.47945165634155273, "eval_precision": 0.7845629818690574, "eval_recall": 0.7447658710776668, "eval_runtime": 1.4542, "eval_samples_per_second": 894.647, "eval_steps_per_second": 28.194, "step": 5670 }, { "epoch": 18.93968253968254, "grad_norm": 6.58827543258667, "learning_rate": 4.581795428783211e-06, "loss": 0.438, "step": 5966 }, { "epoch": 19.0, "eval_accuracy": 0.8036266370236566, "eval_f1": 0.7794844647965122, "eval_loss": 0.45785731077194214, "eval_precision": 0.8159505749196594, "eval_recall": 0.7680218463402948, "eval_runtime": 1.4553, "eval_samples_per_second": 893.958, "eval_steps_per_second": 28.172, "step": 5985 }, { "epoch": 19.936507936507937, "grad_norm": 5.3035383224487305, "learning_rate": 4.537288075131401e-06, "loss": 0.4133, "step": 6280 }, { "epoch": 20.0, "eval_accuracy": 0.8041129676590127, "eval_f1": 0.797301642675663, "eval_loss": 0.4323329031467438, "eval_precision": 0.7942179843107702, "eval_recall": 0.8029752318176708, "eval_runtime": 1.452, "eval_samples_per_second": 895.983, "eval_steps_per_second": 28.236, "step": 6300 }, { "epoch": 20.933333333333334, "grad_norm": 5.589924335479736, "learning_rate": 4.490770164114206e-06, "loss": 0.3947, "step": 6594 }, { "epoch": 21.0, "eval_accuracy": 0.8210650640914302, "eval_f1": 0.8050070699973129, "eval_loss": 0.41711243987083435, "eval_precision": 0.821898032840692, "eval_recall": 0.7966116925782902, "eval_runtime": 1.4546, "eval_samples_per_second": 894.434, "eval_steps_per_second": 28.187, "step": 6615 }, { "epoch": 21.93015873015873, "grad_norm": 4.1770501136779785, "learning_rate": 4.442287603293625e-06, "loss": 0.3787, "step": 6908 }, { "epoch": 22.0, "eval_accuracy": 0.8151943585646299, "eval_f1": 0.807236518842082, "eval_loss": 0.4175125062465668, "eval_precision": 0.8051094915340469, "eval_recall": 0.8099929034678388, "eval_runtime": 1.4549, "eval_samples_per_second": 894.205, "eval_steps_per_second": 28.18, "step": 6930 }, { "epoch": 22.926984126984127, "grad_norm": 6.486811637878418, "learning_rate": 4.391888239103818e-06, "loss": 0.367, "step": 7222 }, { "epoch": 23.0, "eval_accuracy": 0.8269878764720187, "eval_f1": 0.8153345585743379, "eval_loss": 0.4084073305130005, "eval_precision": 0.8211330177764262, "eval_recall": 0.8111882260202181, "eval_runtime": 1.4534, "eval_samples_per_second": 895.129, "eval_steps_per_second": 28.209, "step": 7245 } ], "logging_steps": 314, "max_steps": 31500, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }