| { | |
| "best_metric": 0.5740740740740741, | |
| "best_model_checkpoint": "./results/Vit-CBIS/checkpoint-330", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 495, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06060606060606061, | |
| "grad_norm": 0.9134721755981445, | |
| "learning_rate": 2.9393939393939394e-05, | |
| "loss": 0.6735, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.12121212121212122, | |
| "grad_norm": 0.41935786604881287, | |
| "learning_rate": 2.8787878787878788e-05, | |
| "loss": 0.7149, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 2.244313955307007, | |
| "learning_rate": 2.8181818181818185e-05, | |
| "loss": 0.6885, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.24242424242424243, | |
| "grad_norm": 0.9235630035400391, | |
| "learning_rate": 2.7575757575757578e-05, | |
| "loss": 0.6889, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.30303030303030304, | |
| "grad_norm": 0.14859230816364288, | |
| "learning_rate": 2.696969696969697e-05, | |
| "loss": 0.697, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 1.2009950876235962, | |
| "learning_rate": 2.6363636363636365e-05, | |
| "loss": 0.6882, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.42424242424242425, | |
| "grad_norm": 0.5788372159004211, | |
| "learning_rate": 2.575757575757576e-05, | |
| "loss": 0.6862, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.48484848484848486, | |
| "grad_norm": 0.324853777885437, | |
| "learning_rate": 2.5151515151515152e-05, | |
| "loss": 0.6898, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 0.8690597414970398, | |
| "learning_rate": 2.454545454545455e-05, | |
| "loss": 0.6813, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 0.9127840995788574, | |
| "learning_rate": 2.3939393939393942e-05, | |
| "loss": 0.7099, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.8866621851921082, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.6999, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.705461323261261, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 0.6943, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7878787878787878, | |
| "grad_norm": 1.2404519319534302, | |
| "learning_rate": 2.212121212121212e-05, | |
| "loss": 0.6866, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8484848484848485, | |
| "grad_norm": 0.7197526693344116, | |
| "learning_rate": 2.1515151515151513e-05, | |
| "loss": 0.6921, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.5975974798202515, | |
| "learning_rate": 2.090909090909091e-05, | |
| "loss": 0.6876, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9696969696969697, | |
| "grad_norm": 0.814386248588562, | |
| "learning_rate": 2.0303030303030303e-05, | |
| "loss": 0.7022, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.4470899470899471, | |
| "eval_loss": 0.6970731616020203, | |
| "eval_runtime": 35.3817, | |
| "eval_samples_per_second": 10.683, | |
| "eval_steps_per_second": 1.357, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.0303030303030303, | |
| "grad_norm": 0.6936383843421936, | |
| "learning_rate": 1.9696969696969697e-05, | |
| "loss": 0.6918, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 0.5651612281799316, | |
| "learning_rate": 1.909090909090909e-05, | |
| "loss": 0.6903, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.1515151515151516, | |
| "grad_norm": 0.1796492338180542, | |
| "learning_rate": 1.8484848484848484e-05, | |
| "loss": 0.7095, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.2121212121212122, | |
| "grad_norm": 0.6916122436523438, | |
| "learning_rate": 1.7878787878787877e-05, | |
| "loss": 0.6983, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 1.9430723190307617, | |
| "learning_rate": 1.7272727272727274e-05, | |
| "loss": 0.6975, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.5348889827728271, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.6886, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.393939393939394, | |
| "grad_norm": 0.644706130027771, | |
| "learning_rate": 1.606060606060606e-05, | |
| "loss": 0.6841, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 2.1170523166656494, | |
| "learning_rate": 1.5454545454545454e-05, | |
| "loss": 0.7109, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.5151515151515151, | |
| "grad_norm": 0.6115465760231018, | |
| "learning_rate": 1.484848484848485e-05, | |
| "loss": 0.6882, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.5757575757575757, | |
| "grad_norm": 0.8241686820983887, | |
| "learning_rate": 1.4242424242424243e-05, | |
| "loss": 0.6981, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 1.836000680923462, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 0.6897, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.696969696969697, | |
| "grad_norm": 0.6261163949966431, | |
| "learning_rate": 1.3030303030303032e-05, | |
| "loss": 0.6932, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.7575757575757576, | |
| "grad_norm": 0.7731136679649353, | |
| "learning_rate": 1.2424242424242425e-05, | |
| "loss": 0.6859, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.28496983647346497, | |
| "learning_rate": 1.1818181818181819e-05, | |
| "loss": 0.6845, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.878787878787879, | |
| "grad_norm": 0.30313462018966675, | |
| "learning_rate": 1.1212121212121212e-05, | |
| "loss": 0.6861, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.9393939393939394, | |
| "grad_norm": 0.7996814846992493, | |
| "learning_rate": 1.0606060606060606e-05, | |
| "loss": 0.6988, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.020075798034668, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.6895, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5740740740740741, | |
| "eval_loss": 0.6877079606056213, | |
| "eval_runtime": 35.5495, | |
| "eval_samples_per_second": 10.633, | |
| "eval_steps_per_second": 1.35, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.0606060606060606, | |
| "grad_norm": 0.3215593695640564, | |
| "learning_rate": 9.393939393939394e-06, | |
| "loss": 0.7026, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.121212121212121, | |
| "grad_norm": 0.6477654576301575, | |
| "learning_rate": 8.787878787878788e-06, | |
| "loss": 0.6873, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 0.27149632573127747, | |
| "learning_rate": 8.181818181818181e-06, | |
| "loss": 0.6823, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.242424242424242, | |
| "grad_norm": 0.7159335017204285, | |
| "learning_rate": 7.5757575757575764e-06, | |
| "loss": 0.7014, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.303030303030303, | |
| "grad_norm": 0.2240850031375885, | |
| "learning_rate": 6.96969696969697e-06, | |
| "loss": 0.6903, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 1.4085216522216797, | |
| "learning_rate": 6.363636363636364e-06, | |
| "loss": 0.7011, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.4242424242424243, | |
| "grad_norm": 0.6638109087944031, | |
| "learning_rate": 5.757575757575758e-06, | |
| "loss": 0.6842, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.484848484848485, | |
| "grad_norm": 0.7225008606910706, | |
| "learning_rate": 5.151515151515151e-06, | |
| "loss": 0.6887, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 0.23257039487361908, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.6993, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.606060606060606, | |
| "grad_norm": 0.1906505525112152, | |
| "learning_rate": 3.93939393939394e-06, | |
| "loss": 0.6972, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.487804651260376, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.6879, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 0.8791880011558533, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 0.6917, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.787878787878788, | |
| "grad_norm": 0.1857946664094925, | |
| "learning_rate": 2.121212121212121e-06, | |
| "loss": 0.6911, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.8484848484848486, | |
| "grad_norm": 1.3687998056411743, | |
| "learning_rate": 1.5151515151515152e-06, | |
| "loss": 0.6889, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 0.6968662738800049, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 0.6969, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.9696969696969697, | |
| "grad_norm": 0.19741572439670563, | |
| "learning_rate": 3.0303030303030305e-07, | |
| "loss": 0.6969, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5264550264550265, | |
| "eval_loss": 0.6918376684188843, | |
| "eval_runtime": 35.4925, | |
| "eval_samples_per_second": 10.65, | |
| "eval_steps_per_second": 1.352, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 495, | |
| "total_flos": 3.064033269360968e+17, | |
| "train_loss": 0.6929814497629802, | |
| "train_runtime": 561.8993, | |
| "train_samples_per_second": 7.037, | |
| "train_steps_per_second": 0.881 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 495, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.064033269360968e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |