{ "best_global_step": 2142, "best_metric": 0.8572526627868673, "best_model_checkpoint": "./data/ynat-model/checkpoint-2142", "epoch": 3.0, "eval_steps": 500, "global_step": 2142, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0700280112044818, "grad_norm": 4.37371301651001, "learning_rate": 4.88562091503268e-05, "loss": 0.368, "step": 50 }, { "epoch": 0.1400560224089636, "grad_norm": 2.0869579315185547, "learning_rate": 4.7689075630252105e-05, "loss": 0.3318, "step": 100 }, { "epoch": 0.21008403361344538, "grad_norm": 5.720030784606934, "learning_rate": 4.6521942110177405e-05, "loss": 0.3172, "step": 150 }, { "epoch": 0.2801120448179272, "grad_norm": 11.471839904785156, "learning_rate": 4.535480859010271e-05, "loss": 0.2934, "step": 200 }, { "epoch": 0.35014005602240894, "grad_norm": 3.376629114151001, "learning_rate": 4.418767507002801e-05, "loss": 0.3005, "step": 250 }, { "epoch": 0.42016806722689076, "grad_norm": 4.57407808303833, "learning_rate": 4.302054154995332e-05, "loss": 0.2652, "step": 300 }, { "epoch": 0.49019607843137253, "grad_norm": 4.265451431274414, "learning_rate": 4.185340802987862e-05, "loss": 0.2649, "step": 350 }, { "epoch": 0.5602240896358543, "grad_norm": 4.1893086433410645, "learning_rate": 4.068627450980392e-05, "loss": 0.2446, "step": 400 }, { "epoch": 0.6302521008403361, "grad_norm": 2.6485085487365723, "learning_rate": 3.951914098972922e-05, "loss": 0.2269, "step": 450 }, { "epoch": 0.7002801120448179, "grad_norm": 3.4019579887390137, "learning_rate": 3.8352007469654535e-05, "loss": 0.2324, "step": 500 }, { "epoch": 0.7703081232492998, "grad_norm": 1.2434275150299072, "learning_rate": 3.7184873949579834e-05, "loss": 0.2329, "step": 550 }, { "epoch": 0.8403361344537815, "grad_norm": 5.956329345703125, "learning_rate": 3.6017740429505134e-05, "loss": 0.207, "step": 600 }, { "epoch": 0.9103641456582633, "grad_norm": 2.210266351699829, "learning_rate": 3.485060690943044e-05, "loss": 0.2278, "step": 650 }, { "epoch": 0.9803921568627451, "grad_norm": 3.8561065196990967, "learning_rate": 3.3683473389355746e-05, "loss": 0.2255, "step": 700 }, { "epoch": 1.0, "eval_accuracy": 0.8529702426704733, "eval_f1": 0.8516564005044691, "eval_loss": 0.4749363362789154, "eval_precision": 0.8525157577020369, "eval_recall": 0.8535400561743023, "eval_runtime": 12.9676, "eval_samples_per_second": 702.29, "eval_steps_per_second": 11.028, "step": 714 }, { "epoch": 1.050420168067227, "grad_norm": 2.6144492626190186, "learning_rate": 3.2516339869281046e-05, "loss": 0.2636, "step": 750 }, { "epoch": 1.1204481792717087, "grad_norm": 4.134031772613525, "learning_rate": 3.134920634920635e-05, "loss": 0.2475, "step": 800 }, { "epoch": 1.1904761904761905, "grad_norm": 4.822914123535156, "learning_rate": 3.018207282913165e-05, "loss": 0.2398, "step": 850 }, { "epoch": 1.2605042016806722, "grad_norm": 4.078502655029297, "learning_rate": 2.901493930905696e-05, "loss": 0.2369, "step": 900 }, { "epoch": 1.330532212885154, "grad_norm": 5.511151313781738, "learning_rate": 2.784780578898226e-05, "loss": 0.2519, "step": 950 }, { "epoch": 1.4005602240896358, "grad_norm": 14.258254051208496, "learning_rate": 2.6680672268907564e-05, "loss": 0.2603, "step": 1000 }, { "epoch": 1.4705882352941178, "grad_norm": 37.48504638671875, "learning_rate": 2.5513538748832866e-05, "loss": 0.2329, "step": 1050 }, { "epoch": 1.5406162464985993, "grad_norm": 9.023612976074219, "learning_rate": 2.434640522875817e-05, "loss": 0.2524, "step": 1100 }, { "epoch": 1.6106442577030813, "grad_norm": 4.550325870513916, "learning_rate": 2.3179271708683476e-05, "loss": 0.2249, "step": 1150 }, { "epoch": 1.680672268907563, "grad_norm": 5.294163227081299, "learning_rate": 2.201213818860878e-05, "loss": 0.229, "step": 1200 }, { "epoch": 1.7507002801120448, "grad_norm": 5.975066661834717, "learning_rate": 2.084500466853408e-05, "loss": 0.2346, "step": 1250 }, { "epoch": 1.8207282913165266, "grad_norm": 9.633162498474121, "learning_rate": 1.9677871148459384e-05, "loss": 0.2388, "step": 1300 }, { "epoch": 1.8907563025210083, "grad_norm": 2.6880970001220703, "learning_rate": 1.851073762838469e-05, "loss": 0.2372, "step": 1350 }, { "epoch": 1.9607843137254903, "grad_norm": 5.544015884399414, "learning_rate": 1.734360410830999e-05, "loss": 0.2296, "step": 1400 }, { "epoch": 2.0, "eval_accuracy": 0.855825189414736, "eval_f1": 0.8561722862205893, "eval_loss": 0.44257503747940063, "eval_precision": 0.8489764689633209, "eval_recall": 0.865244799193242, "eval_runtime": 13.04, "eval_samples_per_second": 698.387, "eval_steps_per_second": 10.966, "step": 1428 }, { "epoch": 2.030812324929972, "grad_norm": 3.5343873500823975, "learning_rate": 1.6176470588235296e-05, "loss": 0.204, "step": 1450 }, { "epoch": 2.100840336134454, "grad_norm": 2.5762808322906494, "learning_rate": 1.5009337068160597e-05, "loss": 0.182, "step": 1500 }, { "epoch": 2.1708683473389354, "grad_norm": 2.4014556407928467, "learning_rate": 1.3842203548085902e-05, "loss": 0.1659, "step": 1550 }, { "epoch": 2.2408963585434174, "grad_norm": 5.196502685546875, "learning_rate": 1.2675070028011205e-05, "loss": 0.1721, "step": 1600 }, { "epoch": 2.310924369747899, "grad_norm": 2.636826992034912, "learning_rate": 1.1507936507936508e-05, "loss": 0.1781, "step": 1650 }, { "epoch": 2.380952380952381, "grad_norm": 3.6006743907928467, "learning_rate": 1.0340802987861812e-05, "loss": 0.1788, "step": 1700 }, { "epoch": 2.450980392156863, "grad_norm": 3.009672164916992, "learning_rate": 9.173669467787115e-06, "loss": 0.1574, "step": 1750 }, { "epoch": 2.5210084033613445, "grad_norm": 2.854926586151123, "learning_rate": 8.006535947712418e-06, "loss": 0.1511, "step": 1800 }, { "epoch": 2.5910364145658265, "grad_norm": 5.09183406829834, "learning_rate": 6.839402427637723e-06, "loss": 0.1993, "step": 1850 }, { "epoch": 2.661064425770308, "grad_norm": 1.9752312898635864, "learning_rate": 5.672268907563025e-06, "loss": 0.1701, "step": 1900 }, { "epoch": 2.73109243697479, "grad_norm": 3.669840097427368, "learning_rate": 4.505135387488329e-06, "loss": 0.1698, "step": 1950 }, { "epoch": 2.8011204481792715, "grad_norm": 2.8419978618621826, "learning_rate": 3.338001867413632e-06, "loss": 0.1637, "step": 2000 }, { "epoch": 2.8711484593837535, "grad_norm": 3.0742175579071045, "learning_rate": 2.1708683473389355e-06, "loss": 0.1591, "step": 2050 }, { "epoch": 2.9411764705882355, "grad_norm": 3.2953155040740967, "learning_rate": 1.0037348272642392e-06, "loss": 0.1686, "step": 2100 }, { "epoch": 3.0, "eval_accuracy": 0.8572526627868673, "eval_f1": 0.8569137587108026, "eval_loss": 0.4840508997440338, "eval_precision": 0.848038822955364, "eval_recall": 0.8670435119879699, "eval_runtime": 12.9882, "eval_samples_per_second": 701.177, "eval_steps_per_second": 11.01, "step": 2142 } ], "logging_steps": 50, "max_steps": 2142, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1616745095802300.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }