| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 2440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 20.148401260375977, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.3808, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8646616541353384, | |
| "eval_f1": 0.8186363636363636, | |
| "eval_loss": 0.379351943731308, | |
| "eval_precision": 0.8736897274633124, | |
| "eval_recall": 0.7917348608837971, | |
| "eval_runtime": 1.6409, | |
| "eval_samples_per_second": 243.152, | |
| "eval_steps_per_second": 30.47, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 21.554689407348633, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.221, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8721804511278195, | |
| "eval_f1": 0.8395201930584144, | |
| "eval_loss": 0.2850644886493683, | |
| "eval_precision": 0.8562091503267973, | |
| "eval_recall": 0.8270594653573378, | |
| "eval_runtime": 1.6504, | |
| "eval_samples_per_second": 241.765, | |
| "eval_steps_per_second": 30.296, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.09420396387577057, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.1363, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8947368421052632, | |
| "eval_f1": 0.8717238211879976, | |
| "eval_loss": 0.38322028517723083, | |
| "eval_precision": 0.8757194133300328, | |
| "eval_recall": 0.8680214584469903, | |
| "eval_runtime": 1.6524, | |
| "eval_samples_per_second": 241.46, | |
| "eval_steps_per_second": 30.258, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 7.119666576385498, | |
| "learning_rate": 4e-05, | |
| "loss": 0.099, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8717112228173498, | |
| "eval_loss": 0.4968295693397522, | |
| "eval_precision": 0.8869295958279009, | |
| "eval_recall": 0.8597926895799237, | |
| "eval_runtime": 1.6515, | |
| "eval_samples_per_second": 241.593, | |
| "eval_steps_per_second": 30.275, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.1547642946243286, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.0702, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8696741854636592, | |
| "eval_f1": 0.8377439939939939, | |
| "eval_loss": 0.5204734802246094, | |
| "eval_precision": 0.8503401360544218, | |
| "eval_recall": 0.8277868703400618, | |
| "eval_runtime": 1.6524, | |
| "eval_samples_per_second": 241.469, | |
| "eval_steps_per_second": 30.259, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.08600271493196487, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.0469, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.87468671679198, | |
| "eval_f1": 0.8448388501742161, | |
| "eval_loss": 0.5740100741386414, | |
| "eval_precision": 0.8551721930610677, | |
| "eval_recall": 0.8363338788870704, | |
| "eval_runtime": 1.6555, | |
| "eval_samples_per_second": 241.009, | |
| "eval_steps_per_second": 30.202, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.024254148826003075, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.0328, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8847117794486216, | |
| "eval_f1": 0.8629480286738351, | |
| "eval_loss": 0.6011895537376404, | |
| "eval_precision": 0.8580770590314599, | |
| "eval_recall": 0.8684306237497728, | |
| "eval_runtime": 1.6578, | |
| "eval_samples_per_second": 240.677, | |
| "eval_steps_per_second": 30.16, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.03784336522221565, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0284, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8737897035111135, | |
| "eval_loss": 0.5402500033378601, | |
| "eval_precision": 0.8812047813777917, | |
| "eval_recall": 0.8672940534642661, | |
| "eval_runtime": 1.6746, | |
| "eval_samples_per_second": 238.262, | |
| "eval_steps_per_second": 29.857, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.014071076177060604, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.019, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8922305764411027, | |
| "eval_f1": 0.8727838950061173, | |
| "eval_loss": 0.5908846259117126, | |
| "eval_precision": 0.8656898656898657, | |
| "eval_recall": 0.8812511365702855, | |
| "eval_runtime": 1.6539, | |
| "eval_samples_per_second": 241.244, | |
| "eval_steps_per_second": 30.231, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.037436336278915405, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.016, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8822055137844611, | |
| "eval_f1": 0.8521068445832446, | |
| "eval_loss": 0.8931390047073364, | |
| "eval_precision": 0.8693800752624282, | |
| "eval_recall": 0.8391525731951264, | |
| "eval_runtime": 1.6526, | |
| "eval_samples_per_second": 241.431, | |
| "eval_steps_per_second": 30.254, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.01795610599219799, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.0167, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8972431077694235, | |
| "eval_f1": 0.8751002084335417, | |
| "eval_loss": 0.6617795825004578, | |
| "eval_precision": 0.8780701754385964, | |
| "eval_recall": 0.8722949627204946, | |
| "eval_runtime": 1.6571, | |
| "eval_samples_per_second": 240.783, | |
| "eval_steps_per_second": 30.173, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.007873360067605972, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0168, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8811928811928812, | |
| "eval_loss": 0.7512642741203308, | |
| "eval_precision": 0.8842105263157894, | |
| "eval_recall": 0.878341516639389, | |
| "eval_runtime": 1.6587, | |
| "eval_samples_per_second": 240.556, | |
| "eval_steps_per_second": 30.145, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.0045745461247861385, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.0064, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.899749373433584, | |
| "eval_f1": 0.8778322106552358, | |
| "eval_loss": 0.751264750957489, | |
| "eval_precision": 0.8818924438393465, | |
| "eval_recall": 0.8740680123658847, | |
| "eval_runtime": 1.6656, | |
| "eval_samples_per_second": 239.555, | |
| "eval_steps_per_second": 30.019, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.002741220872849226, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.0078, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8947368421052632, | |
| "eval_f1": 0.8703663593044124, | |
| "eval_loss": 0.8151593208312988, | |
| "eval_precision": 0.8789149003479912, | |
| "eval_recall": 0.8630205491907619, | |
| "eval_runtime": 1.6585, | |
| "eval_samples_per_second": 240.577, | |
| "eval_steps_per_second": 30.147, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.004927061963826418, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0064, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.899749373433584, | |
| "eval_f1": 0.879667048676036, | |
| "eval_loss": 0.7460238337516785, | |
| "eval_precision": 0.8778361344537815, | |
| "eval_recall": 0.8815693762502272, | |
| "eval_runtime": 1.6712, | |
| "eval_samples_per_second": 238.744, | |
| "eval_steps_per_second": 29.918, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.0015839393017813563, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0055, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8922305764411027, | |
| "eval_f1": 0.8683279483657071, | |
| "eval_loss": 0.8232345581054688, | |
| "eval_precision": 0.873366724738676, | |
| "eval_recall": 0.863747954173486, | |
| "eval_runtime": 1.6703, | |
| "eval_samples_per_second": 238.876, | |
| "eval_steps_per_second": 29.934, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.0020133736543357372, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.006, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8947368421052632, | |
| "eval_f1": 0.8717238211879976, | |
| "eval_loss": 0.8420803546905518, | |
| "eval_precision": 0.8757194133300328, | |
| "eval_recall": 0.8680214584469903, | |
| "eval_runtime": 1.6698, | |
| "eval_samples_per_second": 238.949, | |
| "eval_steps_per_second": 29.943, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.0020168637856841087, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0052, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8872180451127819, | |
| "eval_f1": 0.8649563392675828, | |
| "eval_loss": 0.8441980481147766, | |
| "eval_precision": 0.8623655913978494, | |
| "eval_recall": 0.8677032187670486, | |
| "eval_runtime": 1.6705, | |
| "eval_samples_per_second": 238.849, | |
| "eval_steps_per_second": 29.931, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.0013460558839142323, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0035, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.8897243107769424, | |
| "eval_f1": 0.8663031558425733, | |
| "eval_loss": 0.8841463923454285, | |
| "eval_precision": 0.8682026944274341, | |
| "eval_recall": 0.8644753591562102, | |
| "eval_runtime": 1.6699, | |
| "eval_samples_per_second": 238.937, | |
| "eval_steps_per_second": 29.942, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.0018115871353074908, | |
| "learning_rate": 0.0, | |
| "loss": 0.0013, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8922305764411027, | |
| "eval_f1": 0.8690075356742023, | |
| "eval_loss": 0.8886067867279053, | |
| "eval_precision": 0.8719298245614036, | |
| "eval_recall": 0.8662484088016003, | |
| "eval_runtime": 1.6565, | |
| "eval_samples_per_second": 240.872, | |
| "eval_steps_per_second": 30.184, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 2440, | |
| "total_flos": 7584162436176000.0, | |
| "train_loss": 0.05631163200271911, | |
| "train_runtime": 865.617, | |
| "train_samples_per_second": 84.056, | |
| "train_steps_per_second": 2.819 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 7584162436176000.0, | |
| "train_batch_size": 30, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |