| { | |
| "best_global_step": 300, | |
| "best_metric": 0.991578947368421, | |
| "best_model_checkpoint": "./vit-tom-jerry-model/checkpoint-300", | |
| "epoch": 2.158273381294964, | |
| "eval_steps": 100, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03597122302158273, | |
| "grad_norm": 0.00730751920491457, | |
| "learning_rate": 0.00019870503597122302, | |
| "loss": 0.0425, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07194244604316546, | |
| "grad_norm": 0.005511483643203974, | |
| "learning_rate": 0.00019726618705035972, | |
| "loss": 0.0008, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1079136690647482, | |
| "grad_norm": 0.004138847813010216, | |
| "learning_rate": 0.00019582733812949641, | |
| "loss": 0.0006, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14388489208633093, | |
| "grad_norm": 0.003660564310848713, | |
| "learning_rate": 0.00019453237410071942, | |
| "loss": 0.025, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.17985611510791366, | |
| "grad_norm": 0.002617336343973875, | |
| "learning_rate": 0.00019309352517985612, | |
| "loss": 0.0005, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2158273381294964, | |
| "grad_norm": 20.51651382446289, | |
| "learning_rate": 0.0001916546762589928, | |
| "loss": 0.0691, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2517985611510791, | |
| "grad_norm": 1.180568814277649, | |
| "learning_rate": 0.0001902158273381295, | |
| "loss": 0.3193, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.28776978417266186, | |
| "grad_norm": 0.008585783652961254, | |
| "learning_rate": 0.00018892086330935253, | |
| "loss": 0.0207, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3237410071942446, | |
| "grad_norm": 0.009863360784947872, | |
| "learning_rate": 0.00018748201438848923, | |
| "loss": 0.105, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3597122302158273, | |
| "grad_norm": 0.12301458418369293, | |
| "learning_rate": 0.0001860431654676259, | |
| "loss": 0.0808, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3597122302158273, | |
| "eval_accuracy": 0.9705263157894737, | |
| "eval_f1": 0.9693514490081853, | |
| "eval_loss": 0.1167958602309227, | |
| "eval_precision": 0.9646464646464646, | |
| "eval_recall": 0.9759450171821306, | |
| "eval_runtime": 5.9396, | |
| "eval_samples_per_second": 79.972, | |
| "eval_steps_per_second": 10.102, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.39568345323741005, | |
| "grad_norm": 0.0395534411072731, | |
| "learning_rate": 0.0001846043165467626, | |
| "loss": 0.0784, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4316546762589928, | |
| "grad_norm": 0.007597455754876137, | |
| "learning_rate": 0.0001831654676258993, | |
| "loss": 0.1622, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4676258992805755, | |
| "grad_norm": 0.008104625158011913, | |
| "learning_rate": 0.000181726618705036, | |
| "loss": 0.0327, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5035971223021583, | |
| "grad_norm": 0.012004414573311806, | |
| "learning_rate": 0.0001802877697841727, | |
| "loss": 0.0899, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.539568345323741, | |
| "grad_norm": 0.023231692612171173, | |
| "learning_rate": 0.00017884892086330936, | |
| "loss": 0.153, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5755395683453237, | |
| "grad_norm": 0.04413440078496933, | |
| "learning_rate": 0.00017741007194244606, | |
| "loss": 0.1071, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6115107913669064, | |
| "grad_norm": 0.032176461070775986, | |
| "learning_rate": 0.00017597122302158273, | |
| "loss": 0.0832, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6474820143884892, | |
| "grad_norm": 0.01023666001856327, | |
| "learning_rate": 0.00017453237410071943, | |
| "loss": 0.0437, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6834532374100719, | |
| "grad_norm": 1.4527229070663452, | |
| "learning_rate": 0.00017309352517985612, | |
| "loss": 0.0117, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7194244604316546, | |
| "grad_norm": 0.05816151946783066, | |
| "learning_rate": 0.0001716546762589928, | |
| "loss": 0.212, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7194244604316546, | |
| "eval_accuracy": 0.9705263157894737, | |
| "eval_f1": 0.9691306446821153, | |
| "eval_loss": 0.12086797505617142, | |
| "eval_precision": 0.9666666666666666, | |
| "eval_recall": 0.9719483041984163, | |
| "eval_runtime": 5.9309, | |
| "eval_samples_per_second": 80.089, | |
| "eval_steps_per_second": 10.116, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7553956834532374, | |
| "grad_norm": 0.013493811711668968, | |
| "learning_rate": 0.00017021582733812952, | |
| "loss": 0.1312, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7913669064748201, | |
| "grad_norm": 0.01136984582990408, | |
| "learning_rate": 0.0001687769784172662, | |
| "loss": 0.0076, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8273381294964028, | |
| "grad_norm": 0.010255228728055954, | |
| "learning_rate": 0.0001673381294964029, | |
| "loss": 0.0016, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.8633093525179856, | |
| "grad_norm": 0.006183688063174486, | |
| "learning_rate": 0.0001658992805755396, | |
| "loss": 0.0021, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.8992805755395683, | |
| "grad_norm": 0.006337467581033707, | |
| "learning_rate": 0.00016446043165467626, | |
| "loss": 0.055, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.935251798561151, | |
| "grad_norm": 0.005062475800514221, | |
| "learning_rate": 0.00016302158273381296, | |
| "loss": 0.0747, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.9712230215827338, | |
| "grad_norm": 0.02078184485435486, | |
| "learning_rate": 0.00016158273381294963, | |
| "loss": 0.0664, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.0071942446043165, | |
| "grad_norm": 0.008888340555131435, | |
| "learning_rate": 0.00016014388489208632, | |
| "loss": 0.0019, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.0431654676258992, | |
| "grad_norm": 0.008093398064374924, | |
| "learning_rate": 0.00015870503597122305, | |
| "loss": 0.0011, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.079136690647482, | |
| "grad_norm": 0.004925600253045559, | |
| "learning_rate": 0.00015726618705035972, | |
| "loss": 0.0008, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.079136690647482, | |
| "eval_accuracy": 0.991578947368421, | |
| "eval_f1": 0.9911287912744658, | |
| "eval_loss": 0.0403330959379673, | |
| "eval_precision": 0.9911287912744658, | |
| "eval_recall": 0.9911287912744658, | |
| "eval_runtime": 6.5468, | |
| "eval_samples_per_second": 72.555, | |
| "eval_steps_per_second": 9.165, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.1151079136690647, | |
| "grad_norm": 0.0044704582542181015, | |
| "learning_rate": 0.00015582733812949642, | |
| "loss": 0.0005, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.1510791366906474, | |
| "grad_norm": 0.005276895128190517, | |
| "learning_rate": 0.0001543884892086331, | |
| "loss": 0.0006, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.1870503597122302, | |
| "grad_norm": 0.004525118041783571, | |
| "learning_rate": 0.0001529496402877698, | |
| "loss": 0.0261, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.223021582733813, | |
| "grad_norm": 0.1290452778339386, | |
| "learning_rate": 0.00015151079136690649, | |
| "loss": 0.0008, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.2589928057553956, | |
| "grad_norm": 0.09148821979761124, | |
| "learning_rate": 0.00015007194244604316, | |
| "loss": 0.0008, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.2949640287769784, | |
| "grad_norm": 0.0038220672868192196, | |
| "learning_rate": 0.00014863309352517985, | |
| "loss": 0.0008, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.330935251798561, | |
| "grad_norm": 0.0043488466180861, | |
| "learning_rate": 0.00014719424460431655, | |
| "loss": 0.0536, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.3669064748201438, | |
| "grad_norm": 0.007121060974895954, | |
| "learning_rate": 0.00014575539568345325, | |
| "loss": 0.01, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.4028776978417266, | |
| "grad_norm": 0.0033652805723249912, | |
| "learning_rate": 0.00014431654676258995, | |
| "loss": 0.0023, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.4388489208633093, | |
| "grad_norm": 9.669677734375, | |
| "learning_rate": 0.00014287769784172662, | |
| "loss": 0.0041, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.4388489208633093, | |
| "eval_accuracy": 0.9894736842105263, | |
| "eval_f1": 0.9889220062596495, | |
| "eval_loss": 0.04642796143889427, | |
| "eval_precision": 0.9884436160298229, | |
| "eval_recall": 0.9894105782160466, | |
| "eval_runtime": 6.793, | |
| "eval_samples_per_second": 69.925, | |
| "eval_steps_per_second": 8.833, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.474820143884892, | |
| "grad_norm": 0.003309508552774787, | |
| "learning_rate": 0.00014143884892086332, | |
| "loss": 0.0004, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.5107913669064748, | |
| "grad_norm": 0.0035187567118555307, | |
| "learning_rate": 0.00014, | |
| "loss": 0.0005, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.5467625899280577, | |
| "grad_norm": 0.0034841764718294144, | |
| "learning_rate": 0.00013856115107913669, | |
| "loss": 0.0005, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.5827338129496402, | |
| "grad_norm": 0.04978319630026817, | |
| "learning_rate": 0.00013712230215827338, | |
| "loss": 0.0006, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.6187050359712232, | |
| "grad_norm": 11.15152645111084, | |
| "learning_rate": 0.00013568345323741008, | |
| "loss": 0.0305, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.6546762589928057, | |
| "grad_norm": 0.0030082084704190493, | |
| "learning_rate": 0.00013424460431654678, | |
| "loss": 0.0003, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.6906474820143886, | |
| "grad_norm": 0.002781275659799576, | |
| "learning_rate": 0.00013280575539568345, | |
| "loss": 0.0003, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.7266187050359711, | |
| "grad_norm": 0.0027191194240003824, | |
| "learning_rate": 0.00013136690647482015, | |
| "loss": 0.0005, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.762589928057554, | |
| "grad_norm": 0.0029242518357932568, | |
| "learning_rate": 0.00012992805755395685, | |
| "loss": 0.0494, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.7985611510791366, | |
| "grad_norm": 0.0035123827401548624, | |
| "learning_rate": 0.00012848920863309352, | |
| "loss": 0.0004, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.7985611510791366, | |
| "eval_accuracy": 0.968421052631579, | |
| "eval_f1": 0.9671339412977595, | |
| "eval_loss": 0.1312914788722992, | |
| "eval_precision": 0.962668443925063, | |
| "eval_recall": 0.9732276258777828, | |
| "eval_runtime": 6.6073, | |
| "eval_samples_per_second": 71.89, | |
| "eval_steps_per_second": 9.081, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.8345323741007196, | |
| "grad_norm": 0.0036016402300447226, | |
| "learning_rate": 0.00012705035971223022, | |
| "loss": 0.0997, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.870503597122302, | |
| "grad_norm": 0.004752186127007008, | |
| "learning_rate": 0.0001256115107913669, | |
| "loss": 0.0297, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.906474820143885, | |
| "grad_norm": 0.005383871030062437, | |
| "learning_rate": 0.0001241726618705036, | |
| "loss": 0.0007, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.9424460431654675, | |
| "grad_norm": 0.0036646986845880747, | |
| "learning_rate": 0.0001227338129496403, | |
| "loss": 0.004, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.9784172661870505, | |
| "grad_norm": 0.00526112737134099, | |
| "learning_rate": 0.00012129496402877698, | |
| "loss": 0.0652, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.014388489208633, | |
| "grad_norm": 0.004464145749807358, | |
| "learning_rate": 0.00011985611510791368, | |
| "loss": 0.0006, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.050359712230216, | |
| "grad_norm": 0.006042866501957178, | |
| "learning_rate": 0.00011841726618705036, | |
| "loss": 0.0007, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.0863309352517985, | |
| "grad_norm": 0.1046941950917244, | |
| "learning_rate": 0.00011697841726618706, | |
| "loss": 0.001, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.1223021582733814, | |
| "grad_norm": 0.0033078379929065704, | |
| "learning_rate": 0.00011553956834532376, | |
| "loss": 0.0004, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.158273381294964, | |
| "grad_norm": 0.016841504722833633, | |
| "learning_rate": 0.00011410071942446043, | |
| "loss": 0.0005, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.158273381294964, | |
| "eval_accuracy": 0.9810526315789474, | |
| "eval_f1": 0.9802102573360923, | |
| "eval_loss": 0.08548920601606369, | |
| "eval_precision": 0.9766839378238341, | |
| "eval_recall": 0.9845360824742269, | |
| "eval_runtime": 6.6607, | |
| "eval_samples_per_second": 71.314, | |
| "eval_steps_per_second": 9.008, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.158273381294964, | |
| "step": 600, | |
| "total_flos": 3.711866302538957e+17, | |
| "train_loss": 0.03943447194062173, | |
| "train_runtime": 362.7821, | |
| "train_samples_per_second": 30.583, | |
| "train_steps_per_second": 3.832 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1390, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 3 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.711866302538957e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |