{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 500, "global_step": 4290, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.7125847339630127, "learning_rate": 4.8333333333333334e-05, "loss": 0.2965, "step": 143 }, { "epoch": 1.0, "eval_accuracy": 0.9559476075153481, "eval_f1": 0.0, "eval_loss": 0.27843931317329407, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 24.1849, "eval_samples_per_second": 94.15, "eval_steps_per_second": 5.913, "step": 143 }, { "epoch": 2.0, "grad_norm": 3.002716064453125, "learning_rate": 4.666666666666667e-05, "loss": 0.2066, "step": 286 }, { "epoch": 2.0, "eval_accuracy": 0.960383493145901, "eval_f1": 0.06783493499152063, "eval_loss": 0.20140178501605988, "eval_precision": 0.1901743264659271, "eval_recall": 0.0412796697626419, "eval_runtime": 24.1804, "eval_samples_per_second": 94.167, "eval_steps_per_second": 5.914, "step": 286 }, { "epoch": 3.0, "grad_norm": 1.8616505861282349, "learning_rate": 4.5e-05, "loss": 0.1505, "step": 429 }, { "epoch": 3.0, "eval_accuracy": 0.968307364681969, "eval_f1": 0.2345959595959596, "eval_loss": 0.14462772011756897, "eval_precision": 0.260991712319146, "eval_recall": 0.21304896227496847, "eval_runtime": 24.263, "eval_samples_per_second": 93.846, "eval_steps_per_second": 5.894, "step": 429 }, { "epoch": 4.0, "grad_norm": 1.670642375946045, "learning_rate": 4.3333333333333334e-05, "loss": 0.1091, "step": 572 }, { "epoch": 4.0, "eval_accuracy": 0.9783935879216193, "eval_f1": 0.3906774174563004, "eval_loss": 0.09360472857952118, "eval_precision": 0.40133010882708586, "eval_recall": 0.38057562206169016, "eval_runtime": 24.3034, "eval_samples_per_second": 93.691, "eval_steps_per_second": 5.884, "step": 572 }, { "epoch": 5.0, "grad_norm": 3.672482967376709, "learning_rate": 4.166666666666667e-05, "loss": 0.0836, "step": 715 }, { "epoch": 5.0, "eval_accuracy": 0.9833169648393345, "eval_f1": 0.5013577732518669, "eval_loss": 0.07448223978281021, "eval_precision": 0.4948073701842546, "eval_recall": 0.5080839353285174, "eval_runtime": 24.3059, "eval_samples_per_second": 93.681, "eval_steps_per_second": 5.883, "step": 715 }, { "epoch": 6.0, "grad_norm": 1.8329507112503052, "learning_rate": 4e-05, "loss": 0.0659, "step": 858 }, { "epoch": 6.0, "eval_accuracy": 0.9875376130887135, "eval_f1": 0.6012593016599884, "eval_loss": 0.05498756095767021, "eval_precision": 0.600297176820208, "eval_recall": 0.602224515537209, "eval_runtime": 24.3002, "eval_samples_per_second": 93.703, "eval_steps_per_second": 5.885, "step": 858 }, { "epoch": 7.0, "grad_norm": 0.9239581823348999, "learning_rate": 3.8333333333333334e-05, "loss": 0.0527, "step": 1001 }, { "epoch": 7.0, "eval_accuracy": 0.9897184951628606, "eval_f1": 0.6688816308142809, "eval_loss": 0.04328082129359245, "eval_precision": 0.6571871196193427, "eval_recall": 0.6809998853342507, "eval_runtime": 24.2837, "eval_samples_per_second": 93.767, "eval_steps_per_second": 5.889, "step": 1001 }, { "epoch": 8.0, "grad_norm": 0.2763989269733429, "learning_rate": 3.6666666666666666e-05, "loss": 0.0431, "step": 1144 }, { "epoch": 8.0, "eval_accuracy": 0.9925051778131597, "eval_f1": 0.735930735930736, "eval_loss": 0.03102906234562397, "eval_precision": 0.7311827956989247, "eval_recall": 0.7407407407407407, "eval_runtime": 24.3205, "eval_samples_per_second": 93.625, "eval_steps_per_second": 5.88, "step": 1144 }, { "epoch": 9.0, "grad_norm": 0.40178802609443665, "learning_rate": 3.5e-05, "loss": 0.0335, "step": 1287 }, { "epoch": 9.0, "eval_accuracy": 0.9941914415345428, "eval_f1": 0.794018817204301, "eval_loss": 0.024718057364225388, "eval_precision": 0.7760262725779967, "eval_recall": 0.8128654970760234, "eval_runtime": 24.3191, "eval_samples_per_second": 93.63, "eval_steps_per_second": 5.88, "step": 1287 }, { "epoch": 10.0, "grad_norm": 1.2496235370635986, "learning_rate": 3.3333333333333335e-05, "loss": 0.0268, "step": 1430 }, { "epoch": 10.0, "eval_accuracy": 0.9952833079847366, "eval_f1": 0.833757277711831, "eval_loss": 0.019720738753676414, "eval_precision": 0.8221850613154961, "eval_recall": 0.8456599013874556, "eval_runtime": 24.5248, "eval_samples_per_second": 92.845, "eval_steps_per_second": 5.831, "step": 1430 }, { "epoch": 11.0, "grad_norm": 1.4848353862762451, "learning_rate": 3.1666666666666666e-05, "loss": 0.0229, "step": 1573 }, { "epoch": 11.0, "eval_accuracy": 0.996325284975718, "eval_f1": 0.8729726664398321, "eval_loss": 0.015156798996031284, "eval_precision": 0.8635700661954449, "eval_recall": 0.8825822726751519, "eval_runtime": 24.3164, "eval_samples_per_second": 93.64, "eval_steps_per_second": 5.881, "step": 1573 }, { "epoch": 12.0, "grad_norm": 0.7821120023727417, "learning_rate": 3e-05, "loss": 0.0174, "step": 1716 }, { "epoch": 12.0, "eval_accuracy": 0.9967543343249458, "eval_f1": 0.898472596585804, "eval_loss": 0.01352603081613779, "eval_precision": 0.8803785627819962, "eval_recall": 0.9173259947253756, "eval_runtime": 24.33, "eval_samples_per_second": 93.588, "eval_steps_per_second": 5.878, "step": 1716 }, { "epoch": 13.0, "grad_norm": 5.437506675720215, "learning_rate": 2.8333333333333335e-05, "loss": 0.0156, "step": 1859 }, { "epoch": 13.0, "eval_accuracy": 0.9975953280659567, "eval_f1": 0.9254986645450929, "eval_loss": 0.009106193669140339, "eval_precision": 0.9174177557458315, "eval_recall": 0.9337231968810916, "eval_runtime": 24.313, "eval_samples_per_second": 93.654, "eval_steps_per_second": 5.882, "step": 1859 }, { "epoch": 14.0, "grad_norm": 0.3559066355228424, "learning_rate": 2.6666666666666667e-05, "loss": 0.0126, "step": 2002 }, { "epoch": 14.0, "eval_accuracy": 0.9979673608903701, "eval_f1": 0.9396295449348272, "eval_loss": 0.007928353734314442, "eval_precision": 0.936951316839585, "eval_recall": 0.942323128081642, "eval_runtime": 24.3095, "eval_samples_per_second": 93.667, "eval_steps_per_second": 5.882, "step": 2002 }, { "epoch": 15.0, "grad_norm": 0.4652678966522217, "learning_rate": 2.5e-05, "loss": 0.0108, "step": 2145 }, { "epoch": 15.0, "eval_accuracy": 0.998198277815868, "eval_f1": 0.9444254640701515, "eval_loss": 0.006473761051893234, "eval_precision": 0.938016061531501, "eval_recall": 0.9509230592821925, "eval_runtime": 24.3088, "eval_samples_per_second": 93.67, "eval_steps_per_second": 5.883, "step": 2145 }, { "epoch": 16.0, "grad_norm": 1.0989309549331665, "learning_rate": 2.3333333333333336e-05, "loss": 0.0088, "step": 2288 }, { "epoch": 16.0, "eval_accuracy": 0.998377879869033, "eval_f1": 0.9585400627316796, "eval_loss": 0.006167967803776264, "eval_precision": 0.9534830950760155, "eval_recall": 0.963650957459007, "eval_runtime": 24.3119, "eval_samples_per_second": 93.658, "eval_steps_per_second": 5.882, "step": 2288 }, { "epoch": 17.0, "grad_norm": 0.5213710069656372, "learning_rate": 2.1666666666666667e-05, "loss": 0.0088, "step": 2431 }, { "epoch": 17.0, "eval_accuracy": 0.9980870955924801, "eval_f1": 0.950275002806151, "eval_loss": 0.006758366245776415, "eval_precision": 0.930636473562713, "eval_recall": 0.9707602339181286, "eval_runtime": 24.3235, "eval_samples_per_second": 93.613, "eval_steps_per_second": 5.879, "step": 2431 }, { "epoch": 18.0, "grad_norm": 0.7017818689346313, "learning_rate": 2e-05, "loss": 0.0074, "step": 2574 }, { "epoch": 18.0, "eval_accuracy": 0.9985745868796424, "eval_f1": 0.9625844546641685, "eval_loss": 0.0046825287863612175, "eval_precision": 0.9533288349077823, "eval_recall": 0.972021557160876, "eval_runtime": 24.3136, "eval_samples_per_second": 93.651, "eval_steps_per_second": 5.881, "step": 2574 }, { "epoch": 19.0, "grad_norm": 0.434883713722229, "learning_rate": 1.8333333333333333e-05, "loss": 0.0064, "step": 2717 }, { "epoch": 19.0, "eval_accuracy": 0.998613073033892, "eval_f1": 0.9669478003191248, "eval_loss": 0.0045896186493337154, "eval_precision": 0.9611419508326725, "eval_recall": 0.9728242174062608, "eval_runtime": 24.326, "eval_samples_per_second": 93.604, "eval_steps_per_second": 5.878, "step": 2717 }, { "epoch": 20.0, "grad_norm": 0.06784375011920929, "learning_rate": 1.6666666666666667e-05, "loss": 0.0061, "step": 2860 }, { "epoch": 20.0, "eval_accuracy": 0.9986572608406231, "eval_f1": 0.9679410252014401, "eval_loss": 0.004400221165269613, "eval_precision": 0.9647983595352017, "eval_recall": 0.9711042311661506, "eval_runtime": 24.7155, "eval_samples_per_second": 92.128, "eval_steps_per_second": 5.786, "step": 2860 }, { "epoch": 21.0, "grad_norm": 1.0147221088409424, "learning_rate": 1.5e-05, "loss": 0.0056, "step": 3003 }, { "epoch": 21.0, "eval_accuracy": 0.9989024318973246, "eval_f1": 0.9738651994497937, "eval_loss": 0.003714313032105565, "eval_precision": 0.9735304228257133, "eval_recall": 0.9742002063983488, "eval_runtime": 24.5602, "eval_samples_per_second": 92.711, "eval_steps_per_second": 5.822, "step": 3003 }, { "epoch": 22.0, "grad_norm": 0.09583359956741333, "learning_rate": 1.3333333333333333e-05, "loss": 0.0048, "step": 3146 }, { "epoch": 22.0, "eval_accuracy": 0.9988710728086768, "eval_f1": 0.9748474305595163, "eval_loss": 0.003601672360673547, "eval_precision": 0.9698138901497957, "eval_recall": 0.9799334938653824, "eval_runtime": 24.3093, "eval_samples_per_second": 93.668, "eval_steps_per_second": 5.883, "step": 3146 }, { "epoch": 23.0, "grad_norm": 0.09160174429416656, "learning_rate": 1.1666666666666668e-05, "loss": 0.0046, "step": 3289 }, { "epoch": 23.0, "eval_accuracy": 0.9989608738352593, "eval_f1": 0.9748989697763105, "eval_loss": 0.0032980283722281456, "eval_precision": 0.9679023508137432, "eval_recall": 0.9819974773535145, "eval_runtime": 24.3088, "eval_samples_per_second": 93.67, "eval_steps_per_second": 5.883, "step": 3289 }, { "epoch": 24.0, "grad_norm": 0.23728908598423004, "learning_rate": 1e-05, "loss": 0.0041, "step": 3432 }, { "epoch": 24.0, "eval_accuracy": 0.9989323655728521, "eval_f1": 0.9761823361823362, "eval_loss": 0.0033988505601882935, "eval_precision": 0.9702118020160834, "eval_recall": 0.9822268088521958, "eval_runtime": 24.2751, "eval_samples_per_second": 93.8, "eval_steps_per_second": 5.891, "step": 3432 }, { "epoch": 25.0, "grad_norm": 0.29415127635002136, "learning_rate": 8.333333333333334e-06, "loss": 0.0038, "step": 3575 }, { "epoch": 25.0, "eval_accuracy": 0.9989822550320646, "eval_f1": 0.9797035347776512, "eval_loss": 0.0032090507447719574, "eval_precision": 0.9742601201950335, "eval_recall": 0.9852081183350533, "eval_runtime": 24.269, "eval_samples_per_second": 93.823, "eval_steps_per_second": 5.892, "step": 3575 }, { "epoch": 26.0, "grad_norm": 0.5604017972946167, "learning_rate": 6.666666666666667e-06, "loss": 0.0036, "step": 3718 }, { "epoch": 26.0, "eval_accuracy": 0.9990150395338329, "eval_f1": 0.979810653587316, "eval_loss": 0.0030433752108365297, "eval_precision": 0.9746964711222058, "eval_recall": 0.984978786836372, "eval_runtime": 24.3179, "eval_samples_per_second": 93.635, "eval_steps_per_second": 5.88, "step": 3718 }, { "epoch": 27.0, "grad_norm": 0.12406046688556671, "learning_rate": 5e-06, "loss": 0.0035, "step": 3861 }, { "epoch": 27.0, "eval_accuracy": 0.9989965091632682, "eval_f1": 0.9798382503702017, "eval_loss": 0.003074992448091507, "eval_precision": 0.9734072649089057, "eval_recall": 0.98635477582846, "eval_runtime": 24.3593, "eval_samples_per_second": 93.476, "eval_steps_per_second": 5.87, "step": 3861 }, { "epoch": 28.0, "grad_norm": 0.4010084569454193, "learning_rate": 3.3333333333333333e-06, "loss": 0.0033, "step": 4004 }, { "epoch": 28.0, "eval_accuracy": 0.999039271556879, "eval_f1": 0.9810675182481752, "eval_loss": 0.002982645994052291, "eval_precision": 0.9758366420873511, "eval_recall": 0.98635477582846, "eval_runtime": 24.2989, "eval_samples_per_second": 93.708, "eval_steps_per_second": 5.885, "step": 4004 }, { "epoch": 29.0, "grad_norm": 0.22495581209659576, "learning_rate": 1.6666666666666667e-06, "loss": 0.0031, "step": 4147 }, { "epoch": 29.0, "eval_accuracy": 0.9990335699043975, "eval_f1": 0.9815342528211557, "eval_loss": 0.0029558425303548574, "eval_precision": 0.9757507082152974, "eval_recall": 0.9873867675725261, "eval_runtime": 24.3323, "eval_samples_per_second": 93.579, "eval_steps_per_second": 5.877, "step": 4147 }, { "epoch": 30.0, "grad_norm": 0.4037317931652069, "learning_rate": 0.0, "loss": 0.0031, "step": 4290 }, { "epoch": 30.0, "eval_accuracy": 0.999039271556879, "eval_f1": 0.982488163824083, "eval_loss": 0.002941250102594495, "eval_precision": 0.9775255391600454, "eval_recall": 0.9875014333218668, "eval_runtime": 24.2995, "eval_samples_per_second": 93.706, "eval_steps_per_second": 5.885, "step": 4290 } ], "logging_steps": 500, "max_steps": 4290, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.785257029315584e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }