| { | |
| "best_metric": 0.875, | |
| "best_model_checkpoint": "videomae_v1_rwf-2000/checkpoint-254", | |
| "epoch": 5.041831357048748, | |
| "eval_steps": 500, | |
| "global_step": 508, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004940711462450593, | |
| "grad_norm": 12.13394832611084, | |
| "learning_rate": 1.990118577075099e-05, | |
| "loss": 6.5747, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.009881422924901186, | |
| "grad_norm": 11.626209259033203, | |
| "learning_rate": 1.9802371541501976e-05, | |
| "loss": 4.4884, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.014822134387351778, | |
| "grad_norm": 10.6156644821167, | |
| "learning_rate": 1.9703557312252965e-05, | |
| "loss": 3.1466, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.019762845849802372, | |
| "grad_norm": 9.512907028198242, | |
| "learning_rate": 1.9604743083003953e-05, | |
| "loss": 2.3691, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.024703557312252964, | |
| "grad_norm": 9.6091947555542, | |
| "learning_rate": 1.9505928853754942e-05, | |
| "loss": 2.0846, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029644268774703556, | |
| "grad_norm": 9.88412094116211, | |
| "learning_rate": 1.940711462450593e-05, | |
| "loss": 1.8647, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03458498023715415, | |
| "grad_norm": 8.997875213623047, | |
| "learning_rate": 1.930830039525692e-05, | |
| "loss": 1.6523, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.039525691699604744, | |
| "grad_norm": 13.033836364746094, | |
| "learning_rate": 1.920948616600791e-05, | |
| "loss": 1.4589, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.041501976284584984, | |
| "eval_accuracy": 0.7, | |
| "eval_f1": 0.6992481203007519, | |
| "eval_loss": 1.434981107711792, | |
| "eval_precision": 0.702020202020202, | |
| "eval_runtime": 83.5513, | |
| "eval_samples_per_second": 1.915, | |
| "eval_steps_per_second": 0.323, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.002635046113307, | |
| "grad_norm": 7.341766357421875, | |
| "learning_rate": 1.9110671936758897e-05, | |
| "loss": 1.3173, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0075757575757576, | |
| "grad_norm": 9.149868965148926, | |
| "learning_rate": 1.9011857707509883e-05, | |
| "loss": 1.3309, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.0125164690382082, | |
| "grad_norm": 6.38141393661499, | |
| "learning_rate": 1.891304347826087e-05, | |
| "loss": 1.067, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.0174571805006587, | |
| "grad_norm": 7.950096130371094, | |
| "learning_rate": 1.881422924901186e-05, | |
| "loss": 0.9792, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.0223978919631094, | |
| "grad_norm": 9.717514991760254, | |
| "learning_rate": 1.871541501976285e-05, | |
| "loss": 1.0362, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.02733860342556, | |
| "grad_norm": 6.719062328338623, | |
| "learning_rate": 1.8616600790513834e-05, | |
| "loss": 0.8712, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.0322793148880105, | |
| "grad_norm": 6.9779510498046875, | |
| "learning_rate": 1.8517786561264823e-05, | |
| "loss": 0.8797, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.0372200263504612, | |
| "grad_norm": 8.313773155212402, | |
| "learning_rate": 1.8418972332015812e-05, | |
| "loss": 0.7575, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.0416666666666667, | |
| "eval_accuracy": 0.78125, | |
| "eval_f1": 0.7738196211478655, | |
| "eval_loss": 0.8168264627456665, | |
| "eval_precision": 0.8237992444684294, | |
| "eval_runtime": 82.516, | |
| "eval_samples_per_second": 1.939, | |
| "eval_steps_per_second": 0.327, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 2.0003293807641636, | |
| "grad_norm": 8.686193466186523, | |
| "learning_rate": 1.83201581027668e-05, | |
| "loss": 0.8684, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.005270092226614, | |
| "grad_norm": 6.766555309295654, | |
| "learning_rate": 1.8221343873517786e-05, | |
| "loss": 0.8991, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.0102108036890645, | |
| "grad_norm": 7.252024173736572, | |
| "learning_rate": 1.8122529644268775e-05, | |
| "loss": 0.6937, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.015151515151515, | |
| "grad_norm": 8.432581901550293, | |
| "learning_rate": 1.8023715415019763e-05, | |
| "loss": 0.6598, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.020092226613966, | |
| "grad_norm": 8.45376205444336, | |
| "learning_rate": 1.7924901185770752e-05, | |
| "loss": 0.5522, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.0250329380764165, | |
| "grad_norm": 12.200201034545898, | |
| "learning_rate": 1.782608695652174e-05, | |
| "loss": 0.66, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.0299736495388667, | |
| "grad_norm": 7.500545501708984, | |
| "learning_rate": 1.772727272727273e-05, | |
| "loss": 0.5698, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.0349143610013174, | |
| "grad_norm": 5.759820461273193, | |
| "learning_rate": 1.762845849802372e-05, | |
| "loss": 0.494, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.039855072463768, | |
| "grad_norm": 6.814697265625, | |
| "learning_rate": 1.7529644268774707e-05, | |
| "loss": 0.6272, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.0418313570487485, | |
| "eval_accuracy": 0.875, | |
| "eval_f1": 0.8740355849472525, | |
| "eval_loss": 0.5021969079971313, | |
| "eval_precision": 0.8868471953578337, | |
| "eval_runtime": 80.0519, | |
| "eval_samples_per_second": 1.999, | |
| "eval_steps_per_second": 0.337, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 3.0029644268774702, | |
| "grad_norm": 4.731899738311768, | |
| "learning_rate": 1.7430830039525693e-05, | |
| "loss": 0.4245, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.007905138339921, | |
| "grad_norm": 15.187234878540039, | |
| "learning_rate": 1.733201581027668e-05, | |
| "loss": 0.6465, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.0128458498023716, | |
| "grad_norm": 8.394930839538574, | |
| "learning_rate": 1.723320158102767e-05, | |
| "loss": 0.3855, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.0177865612648223, | |
| "grad_norm": 13.709633827209473, | |
| "learning_rate": 1.713438735177866e-05, | |
| "loss": 0.2623, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.022727272727273, | |
| "grad_norm": 13.21254825592041, | |
| "learning_rate": 1.7035573122529644e-05, | |
| "loss": 0.566, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.027667984189723, | |
| "grad_norm": 17.71151351928711, | |
| "learning_rate": 1.6936758893280633e-05, | |
| "loss": 0.3897, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.032608695652174, | |
| "grad_norm": 9.521211624145508, | |
| "learning_rate": 1.6837944664031622e-05, | |
| "loss": 0.405, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.0375494071146245, | |
| "grad_norm": 10.889870643615723, | |
| "learning_rate": 1.673913043478261e-05, | |
| "loss": 0.4146, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.041501976284585, | |
| "eval_accuracy": 0.86875, | |
| "eval_f1": 0.8686217008797653, | |
| "eval_loss": 0.4903731942176819, | |
| "eval_precision": 0.8701960784313727, | |
| "eval_runtime": 76.9154, | |
| "eval_samples_per_second": 2.08, | |
| "eval_steps_per_second": 0.351, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 4.000658761528327, | |
| "grad_norm": 19.58037567138672, | |
| "learning_rate": 1.6640316205533596e-05, | |
| "loss": 0.3677, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 4.005599472990777, | |
| "grad_norm": 11.428667068481445, | |
| "learning_rate": 1.6541501976284585e-05, | |
| "loss": 0.2998, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.010540184453228, | |
| "grad_norm": 17.140522003173828, | |
| "learning_rate": 1.6442687747035574e-05, | |
| "loss": 0.404, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.015480895915679, | |
| "grad_norm": 13.671965599060059, | |
| "learning_rate": 1.6343873517786562e-05, | |
| "loss": 0.3015, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.020421607378129, | |
| "grad_norm": 20.311363220214844, | |
| "learning_rate": 1.624505928853755e-05, | |
| "loss": 0.3093, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.02536231884058, | |
| "grad_norm": 18.359455108642578, | |
| "learning_rate": 1.614624505928854e-05, | |
| "loss": 0.5012, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.03030303030303, | |
| "grad_norm": 11.816179275512695, | |
| "learning_rate": 1.604743083003953e-05, | |
| "loss": 0.3386, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.0352437417654805, | |
| "grad_norm": 20.613216400146484, | |
| "learning_rate": 1.5948616600790514e-05, | |
| "loss": 0.3747, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.040184453227932, | |
| "grad_norm": 1.1029576063156128, | |
| "learning_rate": 1.5849802371541503e-05, | |
| "loss": 0.2696, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.041666666666667, | |
| "eval_accuracy": 0.85, | |
| "eval_f1": 0.85, | |
| "eval_loss": 0.47424378991127014, | |
| "eval_precision": 0.85, | |
| "eval_runtime": 80.2032, | |
| "eval_samples_per_second": 1.995, | |
| "eval_steps_per_second": 0.337, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 5.003293807641634, | |
| "grad_norm": 24.193086624145508, | |
| "learning_rate": 1.575098814229249e-05, | |
| "loss": 0.2924, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 5.008234519104084, | |
| "grad_norm": 13.23393440246582, | |
| "learning_rate": 1.565217391304348e-05, | |
| "loss": 0.3003, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 5.013175230566535, | |
| "grad_norm": 6.054862022399902, | |
| "learning_rate": 1.5553359683794466e-05, | |
| "loss": 0.2834, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 5.018115942028985, | |
| "grad_norm": 5.4301042556762695, | |
| "learning_rate": 1.5454545454545454e-05, | |
| "loss": 0.2696, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.0230566534914365, | |
| "grad_norm": 7.172131061553955, | |
| "learning_rate": 1.5355731225296443e-05, | |
| "loss": 0.2452, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.027997364953887, | |
| "grad_norm": 24.706287384033203, | |
| "learning_rate": 1.5256916996047434e-05, | |
| "loss": 0.2949, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.032938076416337, | |
| "grad_norm": 37.07192611694336, | |
| "learning_rate": 1.5158102766798419e-05, | |
| "loss": 0.3073, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 5.037878787878788, | |
| "grad_norm": 16.38689422607422, | |
| "learning_rate": 1.5059288537549408e-05, | |
| "loss": 0.3349, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.041831357048748, | |
| "eval_accuracy": 0.875, | |
| "eval_f1": 0.8748239712095135, | |
| "eval_loss": 0.4332951009273529, | |
| "eval_precision": 0.8771213073538654, | |
| "eval_runtime": 79.3756, | |
| "eval_samples_per_second": 2.016, | |
| "eval_steps_per_second": 0.34, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 5.041831357048748, | |
| "step": 508, | |
| "total_flos": 1.1541819109070832e+19, | |
| "train_loss": 0.9273178417851605, | |
| "train_runtime": 7038.3754, | |
| "train_samples_per_second": 5.176, | |
| "train_steps_per_second": 0.288 | |
| }, | |
| { | |
| "epoch": 5.041831357048748, | |
| "eval_accuracy": 0.845, | |
| "eval_f1": 0.8447195246413834, | |
| "eval_loss": 0.5974410772323608, | |
| "eval_precision": 0.8475107652791416, | |
| "eval_runtime": 395.5147, | |
| "eval_samples_per_second": 2.023, | |
| "eval_steps_per_second": 0.339, | |
| "step": 508 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2024, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1541819109070832e+19, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |