| { | |
| "best_metric": 0.7916666666666666, | |
| "best_model_checkpoint": "Human-action-convnext/checkpoint-234", | |
| "epoch": 2.9714285714285715, | |
| "eval_steps": 500, | |
| "global_step": 234, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 2.9202792644500732, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 0.985, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 2.8895421028137207, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.9998, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 3.1696712970733643, | |
| "learning_rate": 4.8571428571428576e-05, | |
| "loss": 0.9553, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 3.572809934616089, | |
| "learning_rate": 4.6190476190476194e-05, | |
| "loss": 0.9258, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 4.679945945739746, | |
| "learning_rate": 4.380952380952381e-05, | |
| "loss": 0.9274, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 5.121299743652344, | |
| "learning_rate": 4.1428571428571437e-05, | |
| "loss": 0.8875, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 4.941247940063477, | |
| "learning_rate": 3.904761904761905e-05, | |
| "loss": 0.9287, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.7726190476190476, | |
| "eval_loss": 0.809448778629303, | |
| "eval_runtime": 31.5426, | |
| "eval_samples_per_second": 79.892, | |
| "eval_steps_per_second": 2.505, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 3.490706205368042, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.8684, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 3.2836825847625732, | |
| "learning_rate": 3.428571428571429e-05, | |
| "loss": 0.8442, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 3.463827610015869, | |
| "learning_rate": 3.19047619047619e-05, | |
| "loss": 0.8199, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 4.105288982391357, | |
| "learning_rate": 2.9523809523809526e-05, | |
| "loss": 0.7896, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 3.594646692276001, | |
| "learning_rate": 2.714285714285714e-05, | |
| "loss": 0.8322, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 4.090346813201904, | |
| "learning_rate": 2.4761904761904762e-05, | |
| "loss": 0.7476, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 4.468383312225342, | |
| "learning_rate": 2.2380952380952384e-05, | |
| "loss": 0.7725, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 4.0550055503845215, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8263, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_accuracy": 0.7876984126984127, | |
| "eval_loss": 0.7395461201667786, | |
| "eval_runtime": 32.7967, | |
| "eval_samples_per_second": 76.837, | |
| "eval_steps_per_second": 2.409, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 3.7557804584503174, | |
| "learning_rate": 1.761904761904762e-05, | |
| "loss": 0.7784, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 3.753110647201538, | |
| "learning_rate": 1.5238095238095241e-05, | |
| "loss": 0.7744, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 3.3746166229248047, | |
| "learning_rate": 1.2857142857142857e-05, | |
| "loss": 0.7588, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 3.5075440406799316, | |
| "learning_rate": 1.0476190476190477e-05, | |
| "loss": 0.7543, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 3.45804762840271, | |
| "learning_rate": 8.095238095238097e-06, | |
| "loss": 0.7179, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 4.391757965087891, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.665, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 3.0836093425750732, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.7787, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 4.827792644500732, | |
| "learning_rate": 9.523809523809526e-07, | |
| "loss": 0.7472, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_accuracy": 0.7916666666666666, | |
| "eval_loss": 0.7180711627006531, | |
| "eval_runtime": 32.3605, | |
| "eval_samples_per_second": 77.873, | |
| "eval_steps_per_second": 2.441, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "step": 234, | |
| "total_flos": 7.528934622159176e+17, | |
| "train_loss": 0.8272387634994637, | |
| "train_runtime": 368.3522, | |
| "train_samples_per_second": 82.095, | |
| "train_steps_per_second": 0.635 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 234, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 7.528934622159176e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |