{ "best_global_step": 5040, "best_metric": 0.8438760541734693, "best_model_checkpoint": "outputs/runs/vit5/checkpoint-5040", "epoch": 16.0, "eval_steps": 500, "global_step": 5040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9968253968253968, "grad_norm": 48.960147857666016, "learning_rate": 4.999432333543028e-06, "loss": 4.2164, "step": 314 }, { "epoch": 1.0, "eval_accuracy": 0.8120646345156866, "eval_f1": 0.7880041194112739, "eval_loss": 1.4588117599487305, "eval_precision": 0.8369479599718508, "eval_recall": 0.7755522799379979, "eval_runtime": 1.628, "eval_samples_per_second": 799.123, "eval_steps_per_second": 25.184, "step": 315 }, { "epoch": 1.9936507936507937, "grad_norm": 15.666463851928711, "learning_rate": 4.996525669931999e-06, "loss": 1.5636, "step": 628 }, { "epoch": 2.0, "eval_accuracy": 0.8272790436657106, "eval_f1": 0.8200906148872792, "eval_loss": 0.8009101748466492, "eval_precision": 0.819129062168523, "eval_recall": 0.8211574663523349, "eval_runtime": 1.6386, "eval_samples_per_second": 793.961, "eval_steps_per_second": 25.021, "step": 630 }, { "epoch": 2.9904761904761905, "grad_norm": 19.285402297973633, "learning_rate": 4.991155236893945e-06, "loss": 1.1621, "step": 942 }, { "epoch": 3.0, "eval_accuracy": 0.8486576045322549, "eval_f1": 0.8387492829457615, "eval_loss": 0.6665186285972595, "eval_precision": 0.8476389137544842, "eval_recall": 0.833091047050099, "eval_runtime": 1.6403, "eval_samples_per_second": 793.151, "eval_steps_per_second": 24.996, "step": 945 }, { "epoch": 3.9873015873015873, "grad_norm": 28.716716766357422, "learning_rate": 4.983326334397891e-06, "loss": 0.9673, "step": 1256 }, { "epoch": 4.0, "eval_accuracy": 0.8452548502609966, "eval_f1": 0.8306632859859202, "eval_loss": 0.7017992734909058, "eval_precision": 0.8573660674461923, "eval_recall": 0.8196344002399345, "eval_runtime": 1.6405, "eval_samples_per_second": 793.027, "eval_steps_per_second": 24.992, "step": 1260 }, { "epoch": 4.984126984126984, "grad_norm": 62.80952072143555, "learning_rate": 4.97304668862541e-06, "loss": 0.8578, "step": 1570 }, { "epoch": 5.0, "eval_accuracy": 0.8483012951844792, "eval_f1": 0.8374094515073132, "eval_loss": 0.5731419920921326, "eval_precision": 0.8497690158969053, "eval_recall": 0.8303468421765257, "eval_runtime": 1.6424, "eval_samples_per_second": 792.155, "eval_steps_per_second": 24.964, "step": 1575 }, { "epoch": 5.980952380952381, "grad_norm": 28.359893798828125, "learning_rate": 4.9603264443458e-06, "loss": 0.7528, "step": 1884 }, { "epoch": 6.0, "eval_accuracy": 0.8424043754787907, "eval_f1": 0.8368542522225871, "eval_loss": 0.5212520360946655, "eval_precision": 0.8343696637314615, "eval_recall": 0.8403125137303477, "eval_runtime": 1.6431, "eval_samples_per_second": 791.777, "eval_steps_per_second": 24.952, "step": 1890 }, { "epoch": 6.977777777777778, "grad_norm": 13.184873580932617, "learning_rate": 4.945178154904432e-06, "loss": 0.6746, "step": 2198 }, { "epoch": 7.0, "eval_accuracy": 0.8452726657283853, "eval_f1": 0.8388474476297365, "eval_loss": 0.5077288746833801, "eval_precision": 0.8378019445498359, "eval_recall": 0.8400070136197805, "eval_runtime": 1.6424, "eval_samples_per_second": 792.11, "eval_steps_per_second": 24.963, "step": 2205 }, { "epoch": 7.974603174603175, "grad_norm": 9.050921440124512, "learning_rate": 4.92761676983411e-06, "loss": 0.6276, "step": 2512 }, { "epoch": 8.0, "eval_accuracy": 0.8498868717820812, "eval_f1": 0.8402758437579932, "eval_loss": 0.48333004117012024, "eval_precision": 0.8484423552268903, "eval_recall": 0.8349357523808886, "eval_runtime": 1.6444, "eval_samples_per_second": 791.192, "eval_steps_per_second": 24.934, "step": 2520 }, { "epoch": 8.971428571428572, "grad_norm": 14.836031913757324, "learning_rate": 4.9076596201016856e-06, "loss": 0.591, "step": 2826 }, { "epoch": 9.0, "eval_accuracy": 0.8510092462275748, "eval_f1": 0.8380468647887628, "eval_loss": 0.5170930624008179, "eval_precision": 0.8600643338742522, "eval_recall": 0.8279538920826988, "eval_runtime": 1.6449, "eval_samples_per_second": 790.913, "eval_steps_per_second": 24.925, "step": 2835 }, { "epoch": 9.968253968253968, "grad_norm": 17.490633010864258, "learning_rate": 4.88532640100449e-06, "loss": 0.5411, "step": 3140 }, { "epoch": 10.0, "eval_accuracy": 0.840872245283355, "eval_f1": 0.8357643191911723, "eval_loss": 0.4666968882083893, "eval_precision": 0.8328248424035201, "eval_recall": 0.8404652389103109, "eval_runtime": 1.6447, "eval_samples_per_second": 791.007, "eval_steps_per_second": 24.928, "step": 3150 }, { "epoch": 10.965079365079365, "grad_norm": 27.608245849609375, "learning_rate": 4.860639152733449e-06, "loss": 0.5193, "step": 3454 }, { "epoch": 11.0, "eval_accuracy": 0.8485863426626997, "eval_f1": 0.8406162312164841, "eval_loss": 0.4396151602268219, "eval_precision": 0.8434539842909232, "eval_recall": 0.8382779460247154, "eval_runtime": 1.6435, "eval_samples_per_second": 791.592, "eval_steps_per_second": 24.946, "step": 3465 }, { "epoch": 11.961904761904762, "grad_norm": 7.845660209655762, "learning_rate": 4.833622238622079e-06, "loss": 0.4943, "step": 3768 }, { "epoch": 12.0, "eval_accuracy": 0.8496374552386382, "eval_f1": 0.842659244060233, "eval_loss": 0.43954724073410034, "eval_precision": 0.843136531563339, "eval_recall": 0.8422001823877059, "eval_runtime": 1.6428, "eval_samples_per_second": 791.964, "eval_steps_per_second": 24.958, "step": 3780 }, { "epoch": 12.958730158730159, "grad_norm": 9.281864166259766, "learning_rate": 4.804302321102816e-06, "loss": 0.4715, "step": 4082 }, { "epoch": 13.0, "eval_accuracy": 0.8486576045322549, "eval_f1": 0.8384831543048918, "eval_loss": 0.478777676820755, "eval_precision": 0.8483098559263628, "eval_recall": 0.8324188900247866, "eval_runtime": 1.6428, "eval_samples_per_second": 791.931, "eval_steps_per_second": 24.957, "step": 4095 }, { "epoch": 13.955555555555556, "grad_norm": 21.69331169128418, "learning_rate": 4.772708335394416e-06, "loss": 0.4482, "step": 4396 }, { "epoch": 14.0, "eval_accuracy": 0.8356344978710516, "eval_f1": 0.8313378359536021, "eval_loss": 0.49439841508865356, "eval_precision": 0.8281393298498705, "eval_recall": 0.8386230277449718, "eval_runtime": 1.6423, "eval_samples_per_second": 792.192, "eval_steps_per_second": 24.965, "step": 4410 }, { "epoch": 14.952380952380953, "grad_norm": 11.402544021606445, "learning_rate": 4.738871460946384e-06, "loss": 0.4305, "step": 4710 }, { "epoch": 15.0, "eval_accuracy": 0.8504569667385223, "eval_f1": 0.8397878915405506, "eval_loss": 0.4550122618675232, "eval_precision": 0.8519608079867367, "eval_recall": 0.8327656095361328, "eval_runtime": 1.6443, "eval_samples_per_second": 791.219, "eval_steps_per_second": 24.935, "step": 4725 }, { "epoch": 15.94920634920635, "grad_norm": 9.356096267700195, "learning_rate": 4.702825090668624e-06, "loss": 0.4115, "step": 5024 }, { "epoch": 16.0, "eval_accuracy": 0.8531114713794516, "eval_f1": 0.8438760541734693, "eval_loss": 0.4762667417526245, "eval_precision": 0.8515227136302977, "eval_recall": 0.8387574009086176, "eval_runtime": 1.6403, "eval_samples_per_second": 793.152, "eval_steps_per_second": 24.996, "step": 5040 } ], "logging_steps": 314, "max_steps": 31500, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5258284886974464.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }