| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 30.0, |
| "eval_steps": 500, |
| "global_step": 4290, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5700756907463074, |
| "learning_rate": 4.8333333333333334e-05, |
| "loss": 0.2272, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9655276022321366, |
| "eval_f1": 0.19408766564729868, |
| "eval_loss": 0.1547357589006424, |
| "eval_precision": 0.28717948717948716, |
| "eval_recall": 0.14657428791377983, |
| "eval_runtime": 21.666, |
| "eval_samples_per_second": 105.095, |
| "eval_steps_per_second": 6.6, |
| "step": 143 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.2222177982330322, |
| "learning_rate": 4.666666666666667e-05, |
| "loss": 0.1075, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.980311653089746, |
| "eval_f1": 0.43984323005250314, |
| "eval_loss": 0.08582812547683716, |
| "eval_precision": 0.42316448491747294, |
| "eval_recall": 0.45789068514241726, |
| "eval_runtime": 22.7729, |
| "eval_samples_per_second": 99.987, |
| "eval_steps_per_second": 6.279, |
| "step": 286 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.7700103521347046, |
| "learning_rate": 4.5e-05, |
| "loss": 0.0709, |
| "step": 429 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9860521108635791, |
| "eval_f1": 0.5721064728998294, |
| "eval_loss": 0.06103431060910225, |
| "eval_precision": 0.5517734553775744, |
| "eval_recall": 0.5939953810623556, |
| "eval_runtime": 21.6691, |
| "eval_samples_per_second": 105.081, |
| "eval_steps_per_second": 6.599, |
| "step": 429 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.9487684369087219, |
| "learning_rate": 4.3333333333333334e-05, |
| "loss": 0.0491, |
| "step": 572 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9917055920044754, |
| "eval_f1": 0.708815028901734, |
| "eval_loss": 0.03741060197353363, |
| "eval_precision": 0.6678012253233492, |
| "eval_recall": 0.7551963048498845, |
| "eval_runtime": 21.671, |
| "eval_samples_per_second": 105.071, |
| "eval_steps_per_second": 6.599, |
| "step": 572 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.4067820310592651, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 0.0356, |
| "step": 715 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9932296598225281, |
| "eval_f1": 0.7549240715681852, |
| "eval_loss": 0.02916356734931469, |
| "eval_precision": 0.7376230351109152, |
| "eval_recall": 0.7730561970746729, |
| "eval_runtime": 21.6799, |
| "eval_samples_per_second": 105.028, |
| "eval_steps_per_second": 6.596, |
| "step": 715 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.1893436908721924, |
| "learning_rate": 4e-05, |
| "loss": 0.0278, |
| "step": 858 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.9959575633100934, |
| "eval_f1": 0.845959595959596, |
| "eval_loss": 0.017735477536916733, |
| "eval_precision": 0.8171904146936433, |
| "eval_recall": 0.8768283294842186, |
| "eval_runtime": 21.6891, |
| "eval_samples_per_second": 104.984, |
| "eval_steps_per_second": 6.593, |
| "step": 858 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.641640305519104, |
| "learning_rate": 3.8333333333333334e-05, |
| "loss": 0.0191, |
| "step": 1001 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9963252372584175, |
| "eval_f1": 0.8722375176724457, |
| "eval_loss": 0.01618688926100731, |
| "eval_precision": 0.8440380184331797, |
| "eval_recall": 0.9023864511162433, |
| "eval_runtime": 21.6805, |
| "eval_samples_per_second": 105.025, |
| "eval_steps_per_second": 6.596, |
| "step": 1001 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 1.4300249814987183, |
| "learning_rate": 3.6666666666666666e-05, |
| "loss": 0.0152, |
| "step": 1144 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9976338402357857, |
| "eval_f1": 0.9126764771793531, |
| "eval_loss": 0.010939965024590492, |
| "eval_precision": 0.8863900174114916, |
| "eval_recall": 0.9405696689761355, |
| "eval_runtime": 21.6911, |
| "eval_samples_per_second": 104.974, |
| "eval_steps_per_second": 6.593, |
| "step": 1144 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.18491381406784058, |
| "learning_rate": 3.5e-05, |
| "loss": 0.0112, |
| "step": 1287 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.9980944210420196, |
| "eval_f1": 0.9343241011784132, |
| "eval_loss": 0.008244872093200684, |
| "eval_precision": 0.9115407147041593, |
| "eval_recall": 0.9582755966127791, |
| "eval_runtime": 21.705, |
| "eval_samples_per_second": 104.907, |
| "eval_steps_per_second": 6.588, |
| "step": 1287 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.6724687218666077, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.0094, |
| "step": 1430 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.9983059323993888, |
| "eval_f1": 0.9467041739527104, |
| "eval_loss": 0.006875106133520603, |
| "eval_precision": 0.9351156503454491, |
| "eval_recall": 0.9585835257890685, |
| "eval_runtime": 23.9754, |
| "eval_samples_per_second": 94.972, |
| "eval_steps_per_second": 5.964, |
| "step": 1430 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.32412150502204895, |
| "learning_rate": 3.1666666666666666e-05, |
| "loss": 0.0078, |
| "step": 1573 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.9987605829806496, |
| "eval_f1": 0.9648733450677279, |
| "eval_loss": 0.005323363002389669, |
| "eval_precision": 0.9592209373097992, |
| "eval_recall": 0.9705927636643572, |
| "eval_runtime": 21.9432, |
| "eval_samples_per_second": 103.768, |
| "eval_steps_per_second": 6.517, |
| "step": 1573 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.09945496916770935, |
| "learning_rate": 3e-05, |
| "loss": 0.006, |
| "step": 1716 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.9989266292798927, |
| "eval_f1": 0.9725165054506372, |
| "eval_loss": 0.004307625815272331, |
| "eval_precision": 0.9698361659776451, |
| "eval_recall": 0.975211701308699, |
| "eval_runtime": 21.7315, |
| "eval_samples_per_second": 104.779, |
| "eval_steps_per_second": 6.58, |
| "step": 1716 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.7686914205551147, |
| "learning_rate": 2.8333333333333335e-05, |
| "loss": 0.0054, |
| "step": 1859 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.9990333733294062, |
| "eval_f1": 0.974905993400353, |
| "eval_loss": 0.003870479529723525, |
| "eval_precision": 0.9718482252141983, |
| "eval_recall": 0.9779830638953041, |
| "eval_runtime": 21.705, |
| "eval_samples_per_second": 104.907, |
| "eval_steps_per_second": 6.588, |
| "step": 1859 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.1096508800983429, |
| "learning_rate": 2.6666666666666667e-05, |
| "loss": 0.0045, |
| "step": 2002 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.9990887220958206, |
| "eval_f1": 0.9804735547355474, |
| "eval_loss": 0.004123942460864782, |
| "eval_precision": 0.9791186857055121, |
| "eval_recall": 0.9818321785989222, |
| "eval_runtime": 21.7016, |
| "eval_samples_per_second": 104.923, |
| "eval_steps_per_second": 6.589, |
| "step": 2002 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.12223342061042786, |
| "learning_rate": 2.5e-05, |
| "loss": 0.0047, |
| "step": 2145 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.9989997687212261, |
| "eval_f1": 0.9775668019294081, |
| "eval_loss": 0.003942839801311493, |
| "eval_precision": 0.9722814498933902, |
| "eval_recall": 0.9829099307159354, |
| "eval_runtime": 21.705, |
| "eval_samples_per_second": 104.907, |
| "eval_steps_per_second": 6.588, |
| "step": 2145 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.16396062076091766, |
| "learning_rate": 2.3333333333333336e-05, |
| "loss": 0.004, |
| "step": 2288 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.999011629171172, |
| "eval_f1": 0.9795793499043978, |
| "eval_loss": 0.003892383072525263, |
| "eval_precision": 0.9732522796352584, |
| "eval_recall": 0.9859892224788299, |
| "eval_runtime": 21.7402, |
| "eval_samples_per_second": 104.737, |
| "eval_steps_per_second": 6.578, |
| "step": 2288 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.8400848507881165, |
| "learning_rate": 2.1666666666666667e-05, |
| "loss": 0.0034, |
| "step": 2431 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.9992231405285412, |
| "eval_f1": 0.9822594270793333, |
| "eval_loss": 0.002873779507353902, |
| "eval_precision": 0.9799264480539381, |
| "eval_recall": 0.9846035411855273, |
| "eval_runtime": 21.7422, |
| "eval_samples_per_second": 104.727, |
| "eval_steps_per_second": 6.577, |
| "step": 2431 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.33868208527565, |
| "learning_rate": 2e-05, |
| "loss": 0.0032, |
| "step": 2574 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.9992666288450096, |
| "eval_f1": 0.9836191059399878, |
| "eval_loss": 0.002709166845306754, |
| "eval_precision": 0.9780788552291064, |
| "eval_recall": 0.9892224788298691, |
| "eval_runtime": 21.8511, |
| "eval_samples_per_second": 104.205, |
| "eval_steps_per_second": 6.544, |
| "step": 2574 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.061278849840164185, |
| "learning_rate": 1.8333333333333333e-05, |
| "loss": 0.0027, |
| "step": 2717 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.9992626753616943, |
| "eval_f1": 0.9852375826541596, |
| "eval_loss": 0.002752807689830661, |
| "eval_precision": 0.9840270311780065, |
| "eval_recall": 0.9864511162432641, |
| "eval_runtime": 21.8998, |
| "eval_samples_per_second": 103.974, |
| "eval_steps_per_second": 6.53, |
| "step": 2717 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.0640825405716896, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.0025, |
| "step": 2860 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.9992943032282168, |
| "eval_f1": 0.985518351084208, |
| "eval_loss": 0.002415427938103676, |
| "eval_precision": 0.9809334960341671, |
| "eval_recall": 0.9901462663587375, |
| "eval_runtime": 21.8257, |
| "eval_samples_per_second": 104.326, |
| "eval_steps_per_second": 6.552, |
| "step": 2860 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 0.006860875524580479, |
| "learning_rate": 1.5e-05, |
| "loss": 0.0021, |
| "step": 3003 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.999310117161478, |
| "eval_f1": 0.9845776106805801, |
| "eval_loss": 0.0023553830105811357, |
| "eval_precision": 0.9813398592841848, |
| "eval_recall": 0.9878367975365666, |
| "eval_runtime": 22.397, |
| "eval_samples_per_second": 101.665, |
| "eval_steps_per_second": 6.385, |
| "step": 3003 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 0.02819938398897648, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.0021, |
| "step": 3146 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.9992883730032438, |
| "eval_f1": 0.9852579852579851, |
| "eval_loss": 0.002376557793468237, |
| "eval_precision": 0.982692602236177, |
| "eval_recall": 0.9878367975365666, |
| "eval_runtime": 22.5006, |
| "eval_samples_per_second": 101.197, |
| "eval_steps_per_second": 6.355, |
| "step": 3146 |
| }, |
| { |
| "epoch": 23.0, |
| "grad_norm": 0.0058466424234211445, |
| "learning_rate": 1.1666666666666668e-05, |
| "loss": 0.0021, |
| "step": 3289 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.9992725590699826, |
| "eval_f1": 0.986691142726021, |
| "eval_loss": 0.0024565632920712233, |
| "eval_precision": 0.9803921568627451, |
| "eval_recall": 0.9930715935334873, |
| "eval_runtime": 22.3944, |
| "eval_samples_per_second": 101.677, |
| "eval_steps_per_second": 6.386, |
| "step": 3289 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 0.2694251537322998, |
| "learning_rate": 1e-05, |
| "loss": 0.0022, |
| "step": 3432 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.9993081404198204, |
| "eval_f1": 0.986832031848109, |
| "eval_loss": 0.00239759124815464, |
| "eval_precision": 0.981422262829298, |
| "eval_recall": 0.9923017705927637, |
| "eval_runtime": 22.816, |
| "eval_samples_per_second": 99.798, |
| "eval_steps_per_second": 6.268, |
| "step": 3432 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.03165869787335396, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.0019, |
| "step": 3575 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.999310117161478, |
| "eval_f1": 0.9854316822573225, |
| "eval_loss": 0.0023246784694492817, |
| "eval_precision": 0.9815182526347945, |
| "eval_recall": 0.9893764434180139, |
| "eval_runtime": 24.0045, |
| "eval_samples_per_second": 94.857, |
| "eval_steps_per_second": 5.957, |
| "step": 3575 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 0.30673667788505554, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.0019, |
| "step": 3718 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.999310117161478, |
| "eval_f1": 0.9859003831417625, |
| "eval_loss": 0.0022403087932616472, |
| "eval_precision": 0.9813882532418001, |
| "eval_recall": 0.9904541955350269, |
| "eval_runtime": 22.0046, |
| "eval_samples_per_second": 103.478, |
| "eval_steps_per_second": 6.499, |
| "step": 3718 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 0.0877186506986618, |
| "learning_rate": 5e-06, |
| "loss": 0.0018, |
| "step": 3861 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.999310117161478, |
| "eval_f1": 0.986613631148168, |
| "eval_loss": 0.0022849994711577892, |
| "eval_precision": 0.98038917604135, |
| "eval_recall": 0.9929176289453425, |
| "eval_runtime": 22.0794, |
| "eval_samples_per_second": 103.128, |
| "eval_steps_per_second": 6.477, |
| "step": 3861 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 0.01399229560047388, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.0017, |
| "step": 4004 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.9993140706447934, |
| "eval_f1": 0.9865890106521572, |
| "eval_loss": 0.0022412315011024475, |
| "eval_precision": 0.9821483063777846, |
| "eval_recall": 0.9910700538876058, |
| "eval_runtime": 21.9558, |
| "eval_samples_per_second": 103.709, |
| "eval_steps_per_second": 6.513, |
| "step": 4004 |
| }, |
| { |
| "epoch": 29.0, |
| "grad_norm": 0.31231626868247986, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.0017, |
| "step": 4147 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.9993180241281087, |
| "eval_f1": 0.9872236248183, |
| "eval_loss": 0.0022601259406656027, |
| "eval_precision": 0.9811435523114356, |
| "eval_recall": 0.9933795227097767, |
| "eval_runtime": 21.674, |
| "eval_samples_per_second": 105.057, |
| "eval_steps_per_second": 6.598, |
| "step": 4147 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.4514252543449402, |
| "learning_rate": 0.0, |
| "loss": 0.0017, |
| "step": 4290 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.999316047386451, |
| "eval_f1": 0.9862301101591188, |
| "eval_loss": 0.0022356677800416946, |
| "eval_precision": 0.9800821043028737, |
| "eval_recall": 0.9924557351809083, |
| "eval_runtime": 21.6641, |
| "eval_samples_per_second": 105.105, |
| "eval_steps_per_second": 6.601, |
| "step": 4290 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 4290, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.785257029315584e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|