| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "global_step": 1424, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9297752808988766e-05, | |
| "loss": 2.3997, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.6560856864654333, | |
| "eval_f1": 0.2732362821948488, | |
| "eval_loss": 2.096482753753662, | |
| "eval_precision": 0.5434298440979956, | |
| "eval_recall": 0.1824981301421092, | |
| "eval_runtime": 2.8754, | |
| "eval_samples_per_second": 123.811, | |
| "eval_steps_per_second": 30.953, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.859550561797753e-05, | |
| "loss": 1.9889, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.6641674780915288, | |
| "eval_f1": 0.23144399303321833, | |
| "eval_loss": 1.841402530670166, | |
| "eval_precision": 0.35626471686727634, | |
| "eval_recall": 0.201949860724234, | |
| "eval_runtime": 3.0688, | |
| "eval_samples_per_second": 116.007, | |
| "eval_steps_per_second": 29.002, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.7893258426966292e-05, | |
| "loss": 1.7798, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.678286270691334, | |
| "eval_f1": 0.30889901953592575, | |
| "eval_loss": 1.7000586986541748, | |
| "eval_precision": 0.46442073397410666, | |
| "eval_recall": 0.30013927576601673, | |
| "eval_runtime": 3.0638, | |
| "eval_samples_per_second": 116.196, | |
| "eval_steps_per_second": 29.049, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.7191011235955056e-05, | |
| "loss": 1.6758, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.6851022395326193, | |
| "eval_f1": 0.4112622913384574, | |
| "eval_loss": 1.5615102052688599, | |
| "eval_precision": 0.5168803704334585, | |
| "eval_recall": 0.46160794941282746, | |
| "eval_runtime": 3.0543, | |
| "eval_samples_per_second": 116.558, | |
| "eval_steps_per_second": 29.139, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.648876404494382e-05, | |
| "loss": 1.5373, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.7037974683544304, | |
| "eval_f1": 0.4145163524364727, | |
| "eval_loss": 1.5320079326629639, | |
| "eval_precision": 0.4973354384146672, | |
| "eval_recall": 0.4218608852755194, | |
| "eval_runtime": 3.106, | |
| "eval_samples_per_second": 114.618, | |
| "eval_steps_per_second": 28.654, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.5786516853932585e-05, | |
| "loss": 1.5237, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.6741966893865629, | |
| "eval_f1": 0.4215682986210665, | |
| "eval_loss": 1.436463713645935, | |
| "eval_precision": 0.4890496360861274, | |
| "eval_recall": 0.5289735099337748, | |
| "eval_runtime": 3.3008, | |
| "eval_samples_per_second": 107.852, | |
| "eval_steps_per_second": 26.963, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.508426966292135e-05, | |
| "loss": 1.4836, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.7168451801363194, | |
| "eval_f1": 0.4672566121066839, | |
| "eval_loss": 1.3614311218261719, | |
| "eval_precision": 0.48757191926720095, | |
| "eval_recall": 0.49944812362030905, | |
| "eval_runtime": 3.0542, | |
| "eval_samples_per_second": 116.561, | |
| "eval_steps_per_second": 29.14, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.4382022471910113e-05, | |
| "loss": 1.2214, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_accuracy": 0.6756572541382668, | |
| "eval_f1": 0.46131327709157666, | |
| "eval_loss": 1.359670877456665, | |
| "eval_precision": 0.446314989587013, | |
| "eval_recall": 0.5764348785871964, | |
| "eval_runtime": 3.229, | |
| "eval_samples_per_second": 110.251, | |
| "eval_steps_per_second": 27.563, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 1.3679775280898877e-05, | |
| "loss": 1.1844, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_accuracy": 0.6653359298928919, | |
| "eval_f1": 0.47653942777539116, | |
| "eval_loss": 1.3439290523529053, | |
| "eval_precision": 0.4441930828791319, | |
| "eval_recall": 0.5929911699779249, | |
| "eval_runtime": 3.2134, | |
| "eval_samples_per_second": 110.785, | |
| "eval_steps_per_second": 27.696, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.2977528089887642e-05, | |
| "loss": 1.2122, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_accuracy": 0.7283349561830574, | |
| "eval_f1": 0.49999878403851467, | |
| "eval_loss": 1.279309630393982, | |
| "eval_precision": 0.5213615961013602, | |
| "eval_recall": 0.5292494481236203, | |
| "eval_runtime": 3.3746, | |
| "eval_samples_per_second": 105.495, | |
| "eval_steps_per_second": 26.374, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.2275280898876405e-05, | |
| "loss": 1.1619, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_accuracy": 0.7230769230769231, | |
| "eval_f1": 0.4887836337789245, | |
| "eval_loss": 1.3024553060531616, | |
| "eval_precision": 0.5019528496497354, | |
| "eval_recall": 0.5135209713024282, | |
| "eval_runtime": 3.4149, | |
| "eval_samples_per_second": 104.248, | |
| "eval_steps_per_second": 26.062, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 1.157303370786517e-05, | |
| "loss": 1.0476, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_accuracy": 0.7195715676728335, | |
| "eval_f1": 0.5047823749536667, | |
| "eval_loss": 1.2519958019256592, | |
| "eval_precision": 0.4975374148688318, | |
| "eval_recall": 0.5524282560706402, | |
| "eval_runtime": 3.0914, | |
| "eval_samples_per_second": 115.16, | |
| "eval_steps_per_second": 28.79, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.0870786516853932e-05, | |
| "loss": 1.1271, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_accuracy": 0.7352482960077896, | |
| "eval_f1": 0.5283455171867876, | |
| "eval_loss": 1.233576774597168, | |
| "eval_precision": 0.5226202101961099, | |
| "eval_recall": 0.5400110375275938, | |
| "eval_runtime": 2.9844, | |
| "eval_samples_per_second": 119.289, | |
| "eval_steps_per_second": 29.822, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.0168539325842697e-05, | |
| "loss": 1.065, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_accuracy": 0.7147030185004869, | |
| "eval_f1": 0.526531014037714, | |
| "eval_loss": 1.2058743238449097, | |
| "eval_precision": 0.4963744012861858, | |
| "eval_recall": 0.5905077262693157, | |
| "eval_runtime": 2.9747, | |
| "eval_samples_per_second": 119.675, | |
| "eval_steps_per_second": 29.919, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 9.466292134831461e-06, | |
| "loss": 0.9841, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_accuracy": 0.7189873417721518, | |
| "eval_f1": 0.5335199131439152, | |
| "eval_loss": 1.2104876041412354, | |
| "eval_precision": 0.497289342239156, | |
| "eval_recall": 0.6012693156732892, | |
| "eval_runtime": 2.9664, | |
| "eval_samples_per_second": 120.012, | |
| "eval_steps_per_second": 30.003, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 8.764044943820226e-06, | |
| "loss": 0.8402, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_accuracy": 0.7241480038948394, | |
| "eval_f1": 0.5318695086433913, | |
| "eval_loss": 1.2313593626022339, | |
| "eval_precision": 0.49658531359956104, | |
| "eval_recall": 0.5869205298013245, | |
| "eval_runtime": 3.0358, | |
| "eval_samples_per_second": 117.269, | |
| "eval_steps_per_second": 29.317, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 8.06179775280899e-06, | |
| "loss": 0.8774, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_accuracy": 0.7349561830574489, | |
| "eval_f1": 0.5437299456127834, | |
| "eval_loss": 1.232950210571289, | |
| "eval_precision": 0.5219091313104574, | |
| "eval_recall": 0.5800220750551877, | |
| "eval_runtime": 2.9613, | |
| "eval_samples_per_second": 120.219, | |
| "eval_steps_per_second": 30.055, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.359550561797754e-06, | |
| "loss": 0.8705, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_accuracy": 0.736222005842259, | |
| "eval_f1": 0.5357813426509713, | |
| "eval_loss": 1.23160719871521, | |
| "eval_precision": 0.5233646751377105, | |
| "eval_recall": 0.5629139072847682, | |
| "eval_runtime": 2.9653, | |
| "eval_samples_per_second": 120.057, | |
| "eval_steps_per_second": 30.014, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 6.6573033707865175e-06, | |
| "loss": 0.8113, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_accuracy": 0.7201557935735151, | |
| "eval_f1": 0.537801212068117, | |
| "eval_loss": 1.2007496356964111, | |
| "eval_precision": 0.4931233755174561, | |
| "eval_recall": 0.6128587196467992, | |
| "eval_runtime": 2.9774, | |
| "eval_samples_per_second": 119.567, | |
| "eval_steps_per_second": 29.892, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 5.955056179775281e-06, | |
| "loss": 0.838, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_accuracy": 0.7356377799415774, | |
| "eval_f1": 0.5536859104767312, | |
| "eval_loss": 1.2019802331924438, | |
| "eval_precision": 0.5212984586666464, | |
| "eval_recall": 0.6026490066225165, | |
| "eval_runtime": 2.9737, | |
| "eval_samples_per_second": 119.715, | |
| "eval_steps_per_second": 29.929, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 5.252808988764046e-06, | |
| "loss": 0.8193, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_accuracy": 0.7207400194741966, | |
| "eval_f1": 0.5384993944708429, | |
| "eval_loss": 1.1946783065795898, | |
| "eval_precision": 0.5114682731180041, | |
| "eval_recall": 0.6004415011037527, | |
| "eval_runtime": 2.9881, | |
| "eval_samples_per_second": 119.139, | |
| "eval_steps_per_second": 29.785, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 4.550561797752809e-06, | |
| "loss": 0.7141, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "eval_accuracy": 0.7366114897760467, | |
| "eval_f1": 0.5502255793929967, | |
| "eval_loss": 1.2296370267868042, | |
| "eval_precision": 0.5259775856114555, | |
| "eval_recall": 0.5963024282560706, | |
| "eval_runtime": 3.0151, | |
| "eval_samples_per_second": 118.071, | |
| "eval_steps_per_second": 29.518, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 3.848314606741573e-06, | |
| "loss": 0.6905, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "eval_accuracy": 0.7345666991236611, | |
| "eval_f1": 0.5571197569439074, | |
| "eval_loss": 1.2248950004577637, | |
| "eval_precision": 0.5197886755754393, | |
| "eval_recall": 0.6109271523178808, | |
| "eval_runtime": 3.0012, | |
| "eval_samples_per_second": 118.617, | |
| "eval_steps_per_second": 29.654, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 3.146067415730337e-06, | |
| "loss": 0.6951, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "eval_accuracy": 0.7339824732229796, | |
| "eval_f1": 0.5589116173956266, | |
| "eval_loss": 1.229973316192627, | |
| "eval_precision": 0.5182250638435781, | |
| "eval_recall": 0.6103752759381899, | |
| "eval_runtime": 2.9574, | |
| "eval_samples_per_second": 120.376, | |
| "eval_steps_per_second": 30.094, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 2.4438202247191012e-06, | |
| "loss": 0.6387, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "eval_accuracy": 0.7345666991236611, | |
| "eval_f1": 0.5571000509958351, | |
| "eval_loss": 1.2234023809432983, | |
| "eval_precision": 0.5270142770488044, | |
| "eval_recall": 0.6084437086092715, | |
| "eval_runtime": 2.9722, | |
| "eval_samples_per_second": 119.776, | |
| "eval_steps_per_second": 29.944, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 1.7415730337078653e-06, | |
| "loss": 0.6742, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "eval_accuracy": 0.7401168451801363, | |
| "eval_f1": 0.5594133884864436, | |
| "eval_loss": 1.2352250814437866, | |
| "eval_precision": 0.5322645232058062, | |
| "eval_recall": 0.5965783664459161, | |
| "eval_runtime": 2.9614, | |
| "eval_samples_per_second": 120.215, | |
| "eval_steps_per_second": 30.054, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 1.0393258426966294e-06, | |
| "loss": 0.6565, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "eval_accuracy": 0.7366114897760467, | |
| "eval_f1": 0.5585737547750994, | |
| "eval_loss": 1.2389932870864868, | |
| "eval_precision": 0.5244359046194068, | |
| "eval_recall": 0.6026490066225165, | |
| "eval_runtime": 2.9582, | |
| "eval_samples_per_second": 120.344, | |
| "eval_steps_per_second": 30.086, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 3.3707865168539325e-07, | |
| "loss": 0.675, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "eval_accuracy": 0.7382667964946446, | |
| "eval_f1": 0.5614680191047273, | |
| "eval_loss": 1.2423402070999146, | |
| "eval_precision": 0.5289138247855666, | |
| "eval_recall": 0.6056843267108167, | |
| "eval_runtime": 2.9515, | |
| "eval_samples_per_second": 120.618, | |
| "eval_steps_per_second": 30.154, | |
| "step": 1400 | |
| } | |
| ], | |
| "max_steps": 1424, | |
| "num_train_epochs": 4, | |
| "total_flos": 133980467748696.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |