| { | |
| "best_global_step": 2142, | |
| "best_metric": 0.8572526627868673, | |
| "best_model_checkpoint": "./data/ynat-model/checkpoint-2142", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2142, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0700280112044818, | |
| "grad_norm": 4.37371301651001, | |
| "learning_rate": 4.88562091503268e-05, | |
| "loss": 0.368, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1400560224089636, | |
| "grad_norm": 2.0869579315185547, | |
| "learning_rate": 4.7689075630252105e-05, | |
| "loss": 0.3318, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21008403361344538, | |
| "grad_norm": 5.720030784606934, | |
| "learning_rate": 4.6521942110177405e-05, | |
| "loss": 0.3172, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2801120448179272, | |
| "grad_norm": 11.471839904785156, | |
| "learning_rate": 4.535480859010271e-05, | |
| "loss": 0.2934, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.35014005602240894, | |
| "grad_norm": 3.376629114151001, | |
| "learning_rate": 4.418767507002801e-05, | |
| "loss": 0.3005, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.42016806722689076, | |
| "grad_norm": 4.57407808303833, | |
| "learning_rate": 4.302054154995332e-05, | |
| "loss": 0.2652, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.49019607843137253, | |
| "grad_norm": 4.265451431274414, | |
| "learning_rate": 4.185340802987862e-05, | |
| "loss": 0.2649, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5602240896358543, | |
| "grad_norm": 4.1893086433410645, | |
| "learning_rate": 4.068627450980392e-05, | |
| "loss": 0.2446, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6302521008403361, | |
| "grad_norm": 2.6485085487365723, | |
| "learning_rate": 3.951914098972922e-05, | |
| "loss": 0.2269, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7002801120448179, | |
| "grad_norm": 3.4019579887390137, | |
| "learning_rate": 3.8352007469654535e-05, | |
| "loss": 0.2324, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7703081232492998, | |
| "grad_norm": 1.2434275150299072, | |
| "learning_rate": 3.7184873949579834e-05, | |
| "loss": 0.2329, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8403361344537815, | |
| "grad_norm": 5.956329345703125, | |
| "learning_rate": 3.6017740429505134e-05, | |
| "loss": 0.207, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9103641456582633, | |
| "grad_norm": 2.210266351699829, | |
| "learning_rate": 3.485060690943044e-05, | |
| "loss": 0.2278, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9803921568627451, | |
| "grad_norm": 3.8561065196990967, | |
| "learning_rate": 3.3683473389355746e-05, | |
| "loss": 0.2255, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8529702426704733, | |
| "eval_f1": 0.8516564005044691, | |
| "eval_loss": 0.4749363362789154, | |
| "eval_precision": 0.8525157577020369, | |
| "eval_recall": 0.8535400561743023, | |
| "eval_runtime": 12.9676, | |
| "eval_samples_per_second": 702.29, | |
| "eval_steps_per_second": 11.028, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.050420168067227, | |
| "grad_norm": 2.6144492626190186, | |
| "learning_rate": 3.2516339869281046e-05, | |
| "loss": 0.2636, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1204481792717087, | |
| "grad_norm": 4.134031772613525, | |
| "learning_rate": 3.134920634920635e-05, | |
| "loss": 0.2475, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.1904761904761905, | |
| "grad_norm": 4.822914123535156, | |
| "learning_rate": 3.018207282913165e-05, | |
| "loss": 0.2398, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.2605042016806722, | |
| "grad_norm": 4.078502655029297, | |
| "learning_rate": 2.901493930905696e-05, | |
| "loss": 0.2369, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.330532212885154, | |
| "grad_norm": 5.511151313781738, | |
| "learning_rate": 2.784780578898226e-05, | |
| "loss": 0.2519, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.4005602240896358, | |
| "grad_norm": 14.258254051208496, | |
| "learning_rate": 2.6680672268907564e-05, | |
| "loss": 0.2603, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 37.48504638671875, | |
| "learning_rate": 2.5513538748832866e-05, | |
| "loss": 0.2329, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.5406162464985993, | |
| "grad_norm": 9.023612976074219, | |
| "learning_rate": 2.434640522875817e-05, | |
| "loss": 0.2524, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6106442577030813, | |
| "grad_norm": 4.550325870513916, | |
| "learning_rate": 2.3179271708683476e-05, | |
| "loss": 0.2249, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.680672268907563, | |
| "grad_norm": 5.294163227081299, | |
| "learning_rate": 2.201213818860878e-05, | |
| "loss": 0.229, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.7507002801120448, | |
| "grad_norm": 5.975066661834717, | |
| "learning_rate": 2.084500466853408e-05, | |
| "loss": 0.2346, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.8207282913165266, | |
| "grad_norm": 9.633162498474121, | |
| "learning_rate": 1.9677871148459384e-05, | |
| "loss": 0.2388, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.8907563025210083, | |
| "grad_norm": 2.6880970001220703, | |
| "learning_rate": 1.851073762838469e-05, | |
| "loss": 0.2372, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.9607843137254903, | |
| "grad_norm": 5.544015884399414, | |
| "learning_rate": 1.734360410830999e-05, | |
| "loss": 0.2296, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.855825189414736, | |
| "eval_f1": 0.8561722862205893, | |
| "eval_loss": 0.44257503747940063, | |
| "eval_precision": 0.8489764689633209, | |
| "eval_recall": 0.865244799193242, | |
| "eval_runtime": 13.04, | |
| "eval_samples_per_second": 698.387, | |
| "eval_steps_per_second": 10.966, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 2.030812324929972, | |
| "grad_norm": 3.5343873500823975, | |
| "learning_rate": 1.6176470588235296e-05, | |
| "loss": 0.204, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.100840336134454, | |
| "grad_norm": 2.5762808322906494, | |
| "learning_rate": 1.5009337068160597e-05, | |
| "loss": 0.182, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.1708683473389354, | |
| "grad_norm": 2.4014556407928467, | |
| "learning_rate": 1.3842203548085902e-05, | |
| "loss": 0.1659, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.2408963585434174, | |
| "grad_norm": 5.196502685546875, | |
| "learning_rate": 1.2675070028011205e-05, | |
| "loss": 0.1721, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.310924369747899, | |
| "grad_norm": 2.636826992034912, | |
| "learning_rate": 1.1507936507936508e-05, | |
| "loss": 0.1781, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 3.6006743907928467, | |
| "learning_rate": 1.0340802987861812e-05, | |
| "loss": 0.1788, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.450980392156863, | |
| "grad_norm": 3.009672164916992, | |
| "learning_rate": 9.173669467787115e-06, | |
| "loss": 0.1574, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.5210084033613445, | |
| "grad_norm": 2.854926586151123, | |
| "learning_rate": 8.006535947712418e-06, | |
| "loss": 0.1511, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.5910364145658265, | |
| "grad_norm": 5.09183406829834, | |
| "learning_rate": 6.839402427637723e-06, | |
| "loss": 0.1993, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.661064425770308, | |
| "grad_norm": 1.9752312898635864, | |
| "learning_rate": 5.672268907563025e-06, | |
| "loss": 0.1701, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.73109243697479, | |
| "grad_norm": 3.669840097427368, | |
| "learning_rate": 4.505135387488329e-06, | |
| "loss": 0.1698, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.8011204481792715, | |
| "grad_norm": 2.8419978618621826, | |
| "learning_rate": 3.338001867413632e-06, | |
| "loss": 0.1637, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.8711484593837535, | |
| "grad_norm": 3.0742175579071045, | |
| "learning_rate": 2.1708683473389355e-06, | |
| "loss": 0.1591, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 3.2953155040740967, | |
| "learning_rate": 1.0037348272642392e-06, | |
| "loss": 0.1686, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8572526627868673, | |
| "eval_f1": 0.8569137587108026, | |
| "eval_loss": 0.4840508997440338, | |
| "eval_precision": 0.848038822955364, | |
| "eval_recall": 0.8670435119879699, | |
| "eval_runtime": 12.9882, | |
| "eval_samples_per_second": 701.177, | |
| "eval_steps_per_second": 11.01, | |
| "step": 2142 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2142, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1616745095802300.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |