{ "best_metric": 0.875, "best_model_checkpoint": "videomae_v1_rwf-2000/checkpoint-254", "epoch": 5.041831357048748, "eval_steps": 500, "global_step": 508, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004940711462450593, "grad_norm": 12.13394832611084, "learning_rate": 1.990118577075099e-05, "loss": 6.5747, "step": 10 }, { "epoch": 0.009881422924901186, "grad_norm": 11.626209259033203, "learning_rate": 1.9802371541501976e-05, "loss": 4.4884, "step": 20 }, { "epoch": 0.014822134387351778, "grad_norm": 10.6156644821167, "learning_rate": 1.9703557312252965e-05, "loss": 3.1466, "step": 30 }, { "epoch": 0.019762845849802372, "grad_norm": 9.512907028198242, "learning_rate": 1.9604743083003953e-05, "loss": 2.3691, "step": 40 }, { "epoch": 0.024703557312252964, "grad_norm": 9.6091947555542, "learning_rate": 1.9505928853754942e-05, "loss": 2.0846, "step": 50 }, { "epoch": 0.029644268774703556, "grad_norm": 9.88412094116211, "learning_rate": 1.940711462450593e-05, "loss": 1.8647, "step": 60 }, { "epoch": 0.03458498023715415, "grad_norm": 8.997875213623047, "learning_rate": 1.930830039525692e-05, "loss": 1.6523, "step": 70 }, { "epoch": 0.039525691699604744, "grad_norm": 13.033836364746094, "learning_rate": 1.920948616600791e-05, "loss": 1.4589, "step": 80 }, { "epoch": 0.041501976284584984, "eval_accuracy": 0.7, "eval_f1": 0.6992481203007519, "eval_loss": 1.434981107711792, "eval_precision": 0.702020202020202, "eval_runtime": 83.5513, "eval_samples_per_second": 1.915, "eval_steps_per_second": 0.323, "step": 84 }, { "epoch": 1.002635046113307, "grad_norm": 7.341766357421875, "learning_rate": 1.9110671936758897e-05, "loss": 1.3173, "step": 90 }, { "epoch": 1.0075757575757576, "grad_norm": 9.149868965148926, "learning_rate": 1.9011857707509883e-05, "loss": 1.3309, "step": 100 }, { "epoch": 1.0125164690382082, "grad_norm": 6.38141393661499, "learning_rate": 1.891304347826087e-05, "loss": 1.067, "step": 110 }, { "epoch": 1.0174571805006587, "grad_norm": 7.950096130371094, "learning_rate": 1.881422924901186e-05, "loss": 0.9792, "step": 120 }, { "epoch": 1.0223978919631094, "grad_norm": 9.717514991760254, "learning_rate": 1.871541501976285e-05, "loss": 1.0362, "step": 130 }, { "epoch": 1.02733860342556, "grad_norm": 6.719062328338623, "learning_rate": 1.8616600790513834e-05, "loss": 0.8712, "step": 140 }, { "epoch": 1.0322793148880105, "grad_norm": 6.9779510498046875, "learning_rate": 1.8517786561264823e-05, "loss": 0.8797, "step": 150 }, { "epoch": 1.0372200263504612, "grad_norm": 8.313773155212402, "learning_rate": 1.8418972332015812e-05, "loss": 0.7575, "step": 160 }, { "epoch": 1.0416666666666667, "eval_accuracy": 0.78125, "eval_f1": 0.7738196211478655, "eval_loss": 0.8168264627456665, "eval_precision": 0.8237992444684294, "eval_runtime": 82.516, "eval_samples_per_second": 1.939, "eval_steps_per_second": 0.327, "step": 169 }, { "epoch": 2.0003293807641636, "grad_norm": 8.686193466186523, "learning_rate": 1.83201581027668e-05, "loss": 0.8684, "step": 170 }, { "epoch": 2.005270092226614, "grad_norm": 6.766555309295654, "learning_rate": 1.8221343873517786e-05, "loss": 0.8991, "step": 180 }, { "epoch": 2.0102108036890645, "grad_norm": 7.252024173736572, "learning_rate": 1.8122529644268775e-05, "loss": 0.6937, "step": 190 }, { "epoch": 2.015151515151515, "grad_norm": 8.432581901550293, "learning_rate": 1.8023715415019763e-05, "loss": 0.6598, "step": 200 }, { "epoch": 2.020092226613966, "grad_norm": 8.45376205444336, "learning_rate": 1.7924901185770752e-05, "loss": 0.5522, "step": 210 }, { "epoch": 2.0250329380764165, "grad_norm": 12.200201034545898, "learning_rate": 1.782608695652174e-05, "loss": 0.66, "step": 220 }, { "epoch": 2.0299736495388667, "grad_norm": 7.500545501708984, "learning_rate": 1.772727272727273e-05, "loss": 0.5698, "step": 230 }, { "epoch": 2.0349143610013174, "grad_norm": 5.759820461273193, "learning_rate": 1.762845849802372e-05, "loss": 0.494, "step": 240 }, { "epoch": 2.039855072463768, "grad_norm": 6.814697265625, "learning_rate": 1.7529644268774707e-05, "loss": 0.6272, "step": 250 }, { "epoch": 2.0418313570487485, "eval_accuracy": 0.875, "eval_f1": 0.8740355849472525, "eval_loss": 0.5021969079971313, "eval_precision": 0.8868471953578337, "eval_runtime": 80.0519, "eval_samples_per_second": 1.999, "eval_steps_per_second": 0.337, "step": 254 }, { "epoch": 3.0029644268774702, "grad_norm": 4.731899738311768, "learning_rate": 1.7430830039525693e-05, "loss": 0.4245, "step": 260 }, { "epoch": 3.007905138339921, "grad_norm": 15.187234878540039, "learning_rate": 1.733201581027668e-05, "loss": 0.6465, "step": 270 }, { "epoch": 3.0128458498023716, "grad_norm": 8.394930839538574, "learning_rate": 1.723320158102767e-05, "loss": 0.3855, "step": 280 }, { "epoch": 3.0177865612648223, "grad_norm": 13.709633827209473, "learning_rate": 1.713438735177866e-05, "loss": 0.2623, "step": 290 }, { "epoch": 3.022727272727273, "grad_norm": 13.21254825592041, "learning_rate": 1.7035573122529644e-05, "loss": 0.566, "step": 300 }, { "epoch": 3.027667984189723, "grad_norm": 17.71151351928711, "learning_rate": 1.6936758893280633e-05, "loss": 0.3897, "step": 310 }, { "epoch": 3.032608695652174, "grad_norm": 9.521211624145508, "learning_rate": 1.6837944664031622e-05, "loss": 0.405, "step": 320 }, { "epoch": 3.0375494071146245, "grad_norm": 10.889870643615723, "learning_rate": 1.673913043478261e-05, "loss": 0.4146, "step": 330 }, { "epoch": 3.041501976284585, "eval_accuracy": 0.86875, "eval_f1": 0.8686217008797653, "eval_loss": 0.4903731942176819, "eval_precision": 0.8701960784313727, "eval_runtime": 76.9154, "eval_samples_per_second": 2.08, "eval_steps_per_second": 0.351, "step": 338 }, { "epoch": 4.000658761528327, "grad_norm": 19.58037567138672, "learning_rate": 1.6640316205533596e-05, "loss": 0.3677, "step": 340 }, { "epoch": 4.005599472990777, "grad_norm": 11.428667068481445, "learning_rate": 1.6541501976284585e-05, "loss": 0.2998, "step": 350 }, { "epoch": 4.010540184453228, "grad_norm": 17.140522003173828, "learning_rate": 1.6442687747035574e-05, "loss": 0.404, "step": 360 }, { "epoch": 4.015480895915679, "grad_norm": 13.671965599060059, "learning_rate": 1.6343873517786562e-05, "loss": 0.3015, "step": 370 }, { "epoch": 4.020421607378129, "grad_norm": 20.311363220214844, "learning_rate": 1.624505928853755e-05, "loss": 0.3093, "step": 380 }, { "epoch": 4.02536231884058, "grad_norm": 18.359455108642578, "learning_rate": 1.614624505928854e-05, "loss": 0.5012, "step": 390 }, { "epoch": 4.03030303030303, "grad_norm": 11.816179275512695, "learning_rate": 1.604743083003953e-05, "loss": 0.3386, "step": 400 }, { "epoch": 4.0352437417654805, "grad_norm": 20.613216400146484, "learning_rate": 1.5948616600790514e-05, "loss": 0.3747, "step": 410 }, { "epoch": 4.040184453227932, "grad_norm": 1.1029576063156128, "learning_rate": 1.5849802371541503e-05, "loss": 0.2696, "step": 420 }, { "epoch": 4.041666666666667, "eval_accuracy": 0.85, "eval_f1": 0.85, "eval_loss": 0.47424378991127014, "eval_precision": 0.85, "eval_runtime": 80.2032, "eval_samples_per_second": 1.995, "eval_steps_per_second": 0.337, "step": 423 }, { "epoch": 5.003293807641634, "grad_norm": 24.193086624145508, "learning_rate": 1.575098814229249e-05, "loss": 0.2924, "step": 430 }, { "epoch": 5.008234519104084, "grad_norm": 13.23393440246582, "learning_rate": 1.565217391304348e-05, "loss": 0.3003, "step": 440 }, { "epoch": 5.013175230566535, "grad_norm": 6.054862022399902, "learning_rate": 1.5553359683794466e-05, "loss": 0.2834, "step": 450 }, { "epoch": 5.018115942028985, "grad_norm": 5.4301042556762695, "learning_rate": 1.5454545454545454e-05, "loss": 0.2696, "step": 460 }, { "epoch": 5.0230566534914365, "grad_norm": 7.172131061553955, "learning_rate": 1.5355731225296443e-05, "loss": 0.2452, "step": 470 }, { "epoch": 5.027997364953887, "grad_norm": 24.706287384033203, "learning_rate": 1.5256916996047434e-05, "loss": 0.2949, "step": 480 }, { "epoch": 5.032938076416337, "grad_norm": 37.07192611694336, "learning_rate": 1.5158102766798419e-05, "loss": 0.3073, "step": 490 }, { "epoch": 5.037878787878788, "grad_norm": 16.38689422607422, "learning_rate": 1.5059288537549408e-05, "loss": 0.3349, "step": 500 }, { "epoch": 5.041831357048748, "eval_accuracy": 0.875, "eval_f1": 0.8748239712095135, "eval_loss": 0.4332951009273529, "eval_precision": 0.8771213073538654, "eval_runtime": 79.3756, "eval_samples_per_second": 2.016, "eval_steps_per_second": 0.34, "step": 508 }, { "epoch": 5.041831357048748, "step": 508, "total_flos": 1.1541819109070832e+19, "train_loss": 0.9273178417851605, "train_runtime": 7038.3754, "train_samples_per_second": 5.176, "train_steps_per_second": 0.288 }, { "epoch": 5.041831357048748, "eval_accuracy": 0.845, "eval_f1": 0.8447195246413834, "eval_loss": 0.5974410772323608, "eval_precision": 0.8475107652791416, "eval_runtime": 395.5147, "eval_samples_per_second": 2.023, "eval_steps_per_second": 0.339, "step": 508 } ], "logging_steps": 10, "max_steps": 2024, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1541819109070832e+19, "train_batch_size": 6, "trial_name": null, "trial_params": null }