| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 4290, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.7125847339630127, | |
| "learning_rate": 4.8333333333333334e-05, | |
| "loss": 0.2965, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9559476075153481, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.27843931317329407, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_runtime": 24.1849, | |
| "eval_samples_per_second": 94.15, | |
| "eval_steps_per_second": 5.913, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.002716064453125, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 0.2066, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.960383493145901, | |
| "eval_f1": 0.06783493499152063, | |
| "eval_loss": 0.20140178501605988, | |
| "eval_precision": 0.1901743264659271, | |
| "eval_recall": 0.0412796697626419, | |
| "eval_runtime": 24.1804, | |
| "eval_samples_per_second": 94.167, | |
| "eval_steps_per_second": 5.914, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.8616505861282349, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.1505, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.968307364681969, | |
| "eval_f1": 0.2345959595959596, | |
| "eval_loss": 0.14462772011756897, | |
| "eval_precision": 0.260991712319146, | |
| "eval_recall": 0.21304896227496847, | |
| "eval_runtime": 24.263, | |
| "eval_samples_per_second": 93.846, | |
| "eval_steps_per_second": 5.894, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.670642375946045, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 0.1091, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9783935879216193, | |
| "eval_f1": 0.3906774174563004, | |
| "eval_loss": 0.09360472857952118, | |
| "eval_precision": 0.40133010882708586, | |
| "eval_recall": 0.38057562206169016, | |
| "eval_runtime": 24.3034, | |
| "eval_samples_per_second": 93.691, | |
| "eval_steps_per_second": 5.884, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 3.672482967376709, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.0836, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9833169648393345, | |
| "eval_f1": 0.5013577732518669, | |
| "eval_loss": 0.07448223978281021, | |
| "eval_precision": 0.4948073701842546, | |
| "eval_recall": 0.5080839353285174, | |
| "eval_runtime": 24.3059, | |
| "eval_samples_per_second": 93.681, | |
| "eval_steps_per_second": 5.883, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 1.8329507112503052, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0659, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9875376130887135, | |
| "eval_f1": 0.6012593016599884, | |
| "eval_loss": 0.05498756095767021, | |
| "eval_precision": 0.600297176820208, | |
| "eval_recall": 0.602224515537209, | |
| "eval_runtime": 24.3002, | |
| "eval_samples_per_second": 93.703, | |
| "eval_steps_per_second": 5.885, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.9239581823348999, | |
| "learning_rate": 3.8333333333333334e-05, | |
| "loss": 0.0527, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9897184951628606, | |
| "eval_f1": 0.6688816308142809, | |
| "eval_loss": 0.04328082129359245, | |
| "eval_precision": 0.6571871196193427, | |
| "eval_recall": 0.6809998853342507, | |
| "eval_runtime": 24.2837, | |
| "eval_samples_per_second": 93.767, | |
| "eval_steps_per_second": 5.889, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.2763989269733429, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.0431, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9925051778131597, | |
| "eval_f1": 0.735930735930736, | |
| "eval_loss": 0.03102906234562397, | |
| "eval_precision": 0.7311827956989247, | |
| "eval_recall": 0.7407407407407407, | |
| "eval_runtime": 24.3205, | |
| "eval_samples_per_second": 93.625, | |
| "eval_steps_per_second": 5.88, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.40178802609443665, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.0335, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9941914415345428, | |
| "eval_f1": 0.794018817204301, | |
| "eval_loss": 0.024718057364225388, | |
| "eval_precision": 0.7760262725779967, | |
| "eval_recall": 0.8128654970760234, | |
| "eval_runtime": 24.3191, | |
| "eval_samples_per_second": 93.63, | |
| "eval_steps_per_second": 5.88, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 1.2496235370635986, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.0268, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9952833079847366, | |
| "eval_f1": 0.833757277711831, | |
| "eval_loss": 0.019720738753676414, | |
| "eval_precision": 0.8221850613154961, | |
| "eval_recall": 0.8456599013874556, | |
| "eval_runtime": 24.5248, | |
| "eval_samples_per_second": 92.845, | |
| "eval_steps_per_second": 5.831, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 1.4848353862762451, | |
| "learning_rate": 3.1666666666666666e-05, | |
| "loss": 0.0229, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.996325284975718, | |
| "eval_f1": 0.8729726664398321, | |
| "eval_loss": 0.015156798996031284, | |
| "eval_precision": 0.8635700661954449, | |
| "eval_recall": 0.8825822726751519, | |
| "eval_runtime": 24.3164, | |
| "eval_samples_per_second": 93.64, | |
| "eval_steps_per_second": 5.881, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.7821120023727417, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0174, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9967543343249458, | |
| "eval_f1": 0.898472596585804, | |
| "eval_loss": 0.01352603081613779, | |
| "eval_precision": 0.8803785627819962, | |
| "eval_recall": 0.9173259947253756, | |
| "eval_runtime": 24.33, | |
| "eval_samples_per_second": 93.588, | |
| "eval_steps_per_second": 5.878, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 5.437506675720215, | |
| "learning_rate": 2.8333333333333335e-05, | |
| "loss": 0.0156, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.9975953280659567, | |
| "eval_f1": 0.9254986645450929, | |
| "eval_loss": 0.009106193669140339, | |
| "eval_precision": 0.9174177557458315, | |
| "eval_recall": 0.9337231968810916, | |
| "eval_runtime": 24.313, | |
| "eval_samples_per_second": 93.654, | |
| "eval_steps_per_second": 5.882, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.3559066355228424, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.0126, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.9979673608903701, | |
| "eval_f1": 0.9396295449348272, | |
| "eval_loss": 0.007928353734314442, | |
| "eval_precision": 0.936951316839585, | |
| "eval_recall": 0.942323128081642, | |
| "eval_runtime": 24.3095, | |
| "eval_samples_per_second": 93.667, | |
| "eval_steps_per_second": 5.882, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.4652678966522217, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0108, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.998198277815868, | |
| "eval_f1": 0.9444254640701515, | |
| "eval_loss": 0.006473761051893234, | |
| "eval_precision": 0.938016061531501, | |
| "eval_recall": 0.9509230592821925, | |
| "eval_runtime": 24.3088, | |
| "eval_samples_per_second": 93.67, | |
| "eval_steps_per_second": 5.883, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 1.0989309549331665, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.0088, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.998377879869033, | |
| "eval_f1": 0.9585400627316796, | |
| "eval_loss": 0.006167967803776264, | |
| "eval_precision": 0.9534830950760155, | |
| "eval_recall": 0.963650957459007, | |
| "eval_runtime": 24.3119, | |
| "eval_samples_per_second": 93.658, | |
| "eval_steps_per_second": 5.882, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.5213710069656372, | |
| "learning_rate": 2.1666666666666667e-05, | |
| "loss": 0.0088, | |
| "step": 2431 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.9980870955924801, | |
| "eval_f1": 0.950275002806151, | |
| "eval_loss": 0.006758366245776415, | |
| "eval_precision": 0.930636473562713, | |
| "eval_recall": 0.9707602339181286, | |
| "eval_runtime": 24.3235, | |
| "eval_samples_per_second": 93.613, | |
| "eval_steps_per_second": 5.879, | |
| "step": 2431 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.7017818689346313, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0074, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.9985745868796424, | |
| "eval_f1": 0.9625844546641685, | |
| "eval_loss": 0.0046825287863612175, | |
| "eval_precision": 0.9533288349077823, | |
| "eval_recall": 0.972021557160876, | |
| "eval_runtime": 24.3136, | |
| "eval_samples_per_second": 93.651, | |
| "eval_steps_per_second": 5.881, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.434883713722229, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 0.0064, | |
| "step": 2717 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.998613073033892, | |
| "eval_f1": 0.9669478003191248, | |
| "eval_loss": 0.0045896186493337154, | |
| "eval_precision": 0.9611419508326725, | |
| "eval_recall": 0.9728242174062608, | |
| "eval_runtime": 24.326, | |
| "eval_samples_per_second": 93.604, | |
| "eval_steps_per_second": 5.878, | |
| "step": 2717 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.06784375011920929, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0061, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.9986572608406231, | |
| "eval_f1": 0.9679410252014401, | |
| "eval_loss": 0.004400221165269613, | |
| "eval_precision": 0.9647983595352017, | |
| "eval_recall": 0.9711042311661506, | |
| "eval_runtime": 24.7155, | |
| "eval_samples_per_second": 92.128, | |
| "eval_steps_per_second": 5.786, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 1.0147221088409424, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.0056, | |
| "step": 3003 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.9989024318973246, | |
| "eval_f1": 0.9738651994497937, | |
| "eval_loss": 0.003714313032105565, | |
| "eval_precision": 0.9735304228257133, | |
| "eval_recall": 0.9742002063983488, | |
| "eval_runtime": 24.5602, | |
| "eval_samples_per_second": 92.711, | |
| "eval_steps_per_second": 5.822, | |
| "step": 3003 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 0.09583359956741333, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.0048, | |
| "step": 3146 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.9988710728086768, | |
| "eval_f1": 0.9748474305595163, | |
| "eval_loss": 0.003601672360673547, | |
| "eval_precision": 0.9698138901497957, | |
| "eval_recall": 0.9799334938653824, | |
| "eval_runtime": 24.3093, | |
| "eval_samples_per_second": 93.668, | |
| "eval_steps_per_second": 5.883, | |
| "step": 3146 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 0.09160174429416656, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 0.0046, | |
| "step": 3289 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.9989608738352593, | |
| "eval_f1": 0.9748989697763105, | |
| "eval_loss": 0.0032980283722281456, | |
| "eval_precision": 0.9679023508137432, | |
| "eval_recall": 0.9819974773535145, | |
| "eval_runtime": 24.3088, | |
| "eval_samples_per_second": 93.67, | |
| "eval_steps_per_second": 5.883, | |
| "step": 3289 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.23728908598423004, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0041, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.9989323655728521, | |
| "eval_f1": 0.9761823361823362, | |
| "eval_loss": 0.0033988505601882935, | |
| "eval_precision": 0.9702118020160834, | |
| "eval_recall": 0.9822268088521958, | |
| "eval_runtime": 24.2751, | |
| "eval_samples_per_second": 93.8, | |
| "eval_steps_per_second": 5.891, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.29415127635002136, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0038, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.9989822550320646, | |
| "eval_f1": 0.9797035347776512, | |
| "eval_loss": 0.0032090507447719574, | |
| "eval_precision": 0.9742601201950335, | |
| "eval_recall": 0.9852081183350533, | |
| "eval_runtime": 24.269, | |
| "eval_samples_per_second": 93.823, | |
| "eval_steps_per_second": 5.892, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.5604017972946167, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.0036, | |
| "step": 3718 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.9990150395338329, | |
| "eval_f1": 0.979810653587316, | |
| "eval_loss": 0.0030433752108365297, | |
| "eval_precision": 0.9746964711222058, | |
| "eval_recall": 0.984978786836372, | |
| "eval_runtime": 24.3179, | |
| "eval_samples_per_second": 93.635, | |
| "eval_steps_per_second": 5.88, | |
| "step": 3718 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 0.12406046688556671, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0035, | |
| "step": 3861 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.9989965091632682, | |
| "eval_f1": 0.9798382503702017, | |
| "eval_loss": 0.003074992448091507, | |
| "eval_precision": 0.9734072649089057, | |
| "eval_recall": 0.98635477582846, | |
| "eval_runtime": 24.3593, | |
| "eval_samples_per_second": 93.476, | |
| "eval_steps_per_second": 5.87, | |
| "step": 3861 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.4010084569454193, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.0033, | |
| "step": 4004 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.999039271556879, | |
| "eval_f1": 0.9810675182481752, | |
| "eval_loss": 0.002982645994052291, | |
| "eval_precision": 0.9758366420873511, | |
| "eval_recall": 0.98635477582846, | |
| "eval_runtime": 24.2989, | |
| "eval_samples_per_second": 93.708, | |
| "eval_steps_per_second": 5.885, | |
| "step": 4004 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 0.22495581209659576, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.0031, | |
| "step": 4147 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.9990335699043975, | |
| "eval_f1": 0.9815342528211557, | |
| "eval_loss": 0.0029558425303548574, | |
| "eval_precision": 0.9757507082152974, | |
| "eval_recall": 0.9873867675725261, | |
| "eval_runtime": 24.3323, | |
| "eval_samples_per_second": 93.579, | |
| "eval_steps_per_second": 5.877, | |
| "step": 4147 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.4037317931652069, | |
| "learning_rate": 0.0, | |
| "loss": 0.0031, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.999039271556879, | |
| "eval_f1": 0.982488163824083, | |
| "eval_loss": 0.002941250102594495, | |
| "eval_precision": 0.9775255391600454, | |
| "eval_recall": 0.9875014333218668, | |
| "eval_runtime": 24.2995, | |
| "eval_samples_per_second": 93.706, | |
| "eval_steps_per_second": 5.885, | |
| "step": 4290 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 4290, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.785257029315584e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |