{ "best_global_step": 350, "best_metric": 0.19462016224861145, "best_model_checkpoint": "./vit-stroke-detector/checkpoint-350", "epoch": 3.0, "eval_steps": 50, "global_step": 375, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 0.828596830368042, "learning_rate": 0.0001952, "loss": 0.6849, "step": 10 }, { "epoch": 0.16, "grad_norm": 1.6220552921295166, "learning_rate": 0.00018986666666666668, "loss": 0.6632, "step": 20 }, { "epoch": 0.24, "grad_norm": 1.931017279624939, "learning_rate": 0.00018453333333333334, "loss": 0.6737, "step": 30 }, { "epoch": 0.32, "grad_norm": 0.5325367450714111, "learning_rate": 0.00017920000000000002, "loss": 0.6441, "step": 40 }, { "epoch": 0.4, "grad_norm": 0.20815162360668182, "learning_rate": 0.00017386666666666667, "loss": 0.6959, "step": 50 }, { "epoch": 0.4, "eval_accuracy": 0.6007984031936128, "eval_loss": 0.6659401059150696, "eval_runtime": 12.6028, "eval_samples_per_second": 39.753, "eval_steps_per_second": 4.999, "step": 50 }, { "epoch": 0.48, "grad_norm": 0.6144075393676758, "learning_rate": 0.00016853333333333336, "loss": 0.6542, "step": 60 }, { "epoch": 0.56, "grad_norm": 0.3622696101665497, "learning_rate": 0.0001632, "loss": 0.6336, "step": 70 }, { "epoch": 0.64, "grad_norm": 1.7294650077819824, "learning_rate": 0.00015786666666666666, "loss": 0.6705, "step": 80 }, { "epoch": 0.72, "grad_norm": 1.2596920728683472, "learning_rate": 0.00015253333333333335, "loss": 0.6144, "step": 90 }, { "epoch": 0.8, "grad_norm": 2.432494640350342, "learning_rate": 0.0001472, "loss": 0.6038, "step": 100 }, { "epoch": 0.8, "eval_accuracy": 0.624750499001996, "eval_loss": 0.6349905133247375, "eval_runtime": 4.8798, "eval_samples_per_second": 102.668, "eval_steps_per_second": 12.91, "step": 100 }, { "epoch": 0.88, "grad_norm": 0.8303850293159485, "learning_rate": 0.00014186666666666668, "loss": 0.5625, "step": 110 }, { "epoch": 0.96, "grad_norm": 1.7314858436584473, "learning_rate": 0.00013653333333333334, "loss": 0.4656, "step": 120 }, { "epoch": 1.04, "grad_norm": 2.2635459899902344, "learning_rate": 0.00013120000000000002, "loss": 0.401, "step": 130 }, { "epoch": 1.12, "grad_norm": 1.9528553485870361, "learning_rate": 0.00012586666666666667, "loss": 0.5874, "step": 140 }, { "epoch": 1.2, "grad_norm": 2.2214958667755127, "learning_rate": 0.00012053333333333334, "loss": 0.4041, "step": 150 }, { "epoch": 1.2, "eval_accuracy": 0.7904191616766467, "eval_loss": 0.5236000418663025, "eval_runtime": 6.2426, "eval_samples_per_second": 80.255, "eval_steps_per_second": 10.092, "step": 150 }, { "epoch": 1.28, "grad_norm": 1.578341007232666, "learning_rate": 0.0001152, "loss": 0.4256, "step": 160 }, { "epoch": 1.3599999999999999, "grad_norm": 0.521945059299469, "learning_rate": 0.00010986666666666668, "loss": 0.3168, "step": 170 }, { "epoch": 1.44, "grad_norm": 1.1707364320755005, "learning_rate": 0.00010453333333333333, "loss": 0.4316, "step": 180 }, { "epoch": 1.52, "grad_norm": 1.4519349336624146, "learning_rate": 9.92e-05, "loss": 0.2944, "step": 190 }, { "epoch": 1.6, "grad_norm": 1.0237702131271362, "learning_rate": 9.386666666666667e-05, "loss": 0.3243, "step": 200 }, { "epoch": 1.6, "eval_accuracy": 0.874251497005988, "eval_loss": 0.3280556797981262, "eval_runtime": 4.9813, "eval_samples_per_second": 100.577, "eval_steps_per_second": 12.647, "step": 200 }, { "epoch": 1.6800000000000002, "grad_norm": 4.305712699890137, "learning_rate": 8.853333333333333e-05, "loss": 0.3118, "step": 210 }, { "epoch": 1.76, "grad_norm": 1.6635700464248657, "learning_rate": 8.32e-05, "loss": 0.2534, "step": 220 }, { "epoch": 1.8399999999999999, "grad_norm": 1.5301051139831543, "learning_rate": 7.786666666666667e-05, "loss": 0.2702, "step": 230 }, { "epoch": 1.92, "grad_norm": 0.7002310156822205, "learning_rate": 7.253333333333334e-05, "loss": 0.1926, "step": 240 }, { "epoch": 2.0, "grad_norm": 0.4949961304664612, "learning_rate": 6.720000000000001e-05, "loss": 0.2041, "step": 250 }, { "epoch": 2.0, "eval_accuracy": 0.8922155688622755, "eval_loss": 0.28237032890319824, "eval_runtime": 4.9486, "eval_samples_per_second": 101.242, "eval_steps_per_second": 12.731, "step": 250 }, { "epoch": 2.08, "grad_norm": 13.5546236038208, "learning_rate": 6.186666666666668e-05, "loss": 0.173, "step": 260 }, { "epoch": 2.16, "grad_norm": 7.708662986755371, "learning_rate": 5.6533333333333336e-05, "loss": 0.1206, "step": 270 }, { "epoch": 2.24, "grad_norm": 0.44631195068359375, "learning_rate": 5.1200000000000004e-05, "loss": 0.1615, "step": 280 }, { "epoch": 2.32, "grad_norm": 1.9430702924728394, "learning_rate": 4.5866666666666666e-05, "loss": 0.1196, "step": 290 }, { "epoch": 2.4, "grad_norm": 2.0821869373321533, "learning_rate": 4.0533333333333334e-05, "loss": 0.117, "step": 300 }, { "epoch": 2.4, "eval_accuracy": 0.9161676646706587, "eval_loss": 0.22871683537960052, "eval_runtime": 4.8973, "eval_samples_per_second": 102.302, "eval_steps_per_second": 12.864, "step": 300 }, { "epoch": 2.48, "grad_norm": 9.634779930114746, "learning_rate": 3.52e-05, "loss": 0.0606, "step": 310 }, { "epoch": 2.56, "grad_norm": 6.319236755371094, "learning_rate": 2.986666666666667e-05, "loss": 0.0774, "step": 320 }, { "epoch": 2.64, "grad_norm": 9.121068000793457, "learning_rate": 2.4533333333333334e-05, "loss": 0.06, "step": 330 }, { "epoch": 2.7199999999999998, "grad_norm": 3.4029245376586914, "learning_rate": 1.9200000000000003e-05, "loss": 0.0997, "step": 340 }, { "epoch": 2.8, "grad_norm": 0.8972381353378296, "learning_rate": 1.3866666666666667e-05, "loss": 0.0353, "step": 350 }, { "epoch": 2.8, "eval_accuracy": 0.9401197604790419, "eval_loss": 0.19462016224861145, "eval_runtime": 5.5189, "eval_samples_per_second": 90.78, "eval_steps_per_second": 11.415, "step": 350 }, { "epoch": 2.88, "grad_norm": 0.1985252946615219, "learning_rate": 8.533333333333334e-06, "loss": 0.0833, "step": 360 }, { "epoch": 2.96, "grad_norm": 0.129100501537323, "learning_rate": 3.2000000000000003e-06, "loss": 0.0775, "step": 370 }, { "epoch": 3.0, "step": 375, "total_flos": 4.64951937687552e+17, "train_loss": 0.3516346867879232, "train_runtime": 232.2268, "train_samples_per_second": 25.837, "train_steps_per_second": 1.615 } ], "logging_steps": 10, "max_steps": 375, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.64951937687552e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }