{ "best_metric": 0.9675, "best_model_checkpoint": "Wound-classification/checkpoint-6000", "epoch": 31.0, "eval_steps": 500, "global_step": 6200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.2730700969696045, "learning_rate": 0.0001957070707070707, "loss": 1.2288, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.775, "eval_loss": 0.7021821737289429, "eval_runtime": 7.7413, "eval_samples_per_second": 51.671, "eval_steps_per_second": 12.918, "step": 200 }, { "epoch": 2.0, "grad_norm": 0.5704953670501709, "learning_rate": 0.00018939393939393942, "loss": 0.6343, "step": 400 }, { "epoch": 2.0, "eval_accuracy": 0.7875, "eval_loss": 0.6516726613044739, "eval_runtime": 7.6496, "eval_samples_per_second": 52.29, "eval_steps_per_second": 13.073, "step": 400 }, { "epoch": 3.0, "grad_norm": 6.674056053161621, "learning_rate": 0.0001830808080808081, "loss": 0.5022, "step": 600 }, { "epoch": 3.0, "eval_accuracy": 0.73, "eval_loss": 0.8923927545547485, "eval_runtime": 7.8144, "eval_samples_per_second": 51.187, "eval_steps_per_second": 12.797, "step": 600 }, { "epoch": 4.0, "grad_norm": 3.7206950187683105, "learning_rate": 0.0001767676767676768, "loss": 0.5071, "step": 800 }, { "epoch": 4.0, "eval_accuracy": 0.8475, "eval_loss": 0.46834319829940796, "eval_runtime": 7.622, "eval_samples_per_second": 52.48, "eval_steps_per_second": 13.12, "step": 800 }, { "epoch": 5.0, "grad_norm": 1.3697607517242432, "learning_rate": 0.00017045454545454547, "loss": 0.3484, "step": 1000 }, { "epoch": 5.0, "eval_accuracy": 0.8675, "eval_loss": 0.44843006134033203, "eval_runtime": 7.8641, "eval_samples_per_second": 50.864, "eval_steps_per_second": 12.716, "step": 1000 }, { "epoch": 6.0, "grad_norm": 4.369117259979248, "learning_rate": 0.00016414141414141414, "loss": 0.3452, "step": 1200 }, { "epoch": 6.0, "eval_accuracy": 0.8925, "eval_loss": 0.37597933411598206, "eval_runtime": 7.6457, "eval_samples_per_second": 52.317, "eval_steps_per_second": 13.079, "step": 1200 }, { "epoch": 7.0, "grad_norm": 7.87866735458374, "learning_rate": 0.00015782828282828284, "loss": 0.2461, "step": 1400 }, { "epoch": 7.0, "eval_accuracy": 0.83, "eval_loss": 0.5486177206039429, "eval_runtime": 7.9064, "eval_samples_per_second": 50.592, "eval_steps_per_second": 12.648, "step": 1400 }, { "epoch": 8.0, "grad_norm": 9.048334121704102, "learning_rate": 0.00015151515151515152, "loss": 0.2389, "step": 1600 }, { "epoch": 8.0, "eval_accuracy": 0.895, "eval_loss": 0.40351569652557373, "eval_runtime": 7.8304, "eval_samples_per_second": 51.083, "eval_steps_per_second": 12.771, "step": 1600 }, { "epoch": 9.0, "grad_norm": 2.520663022994995, "learning_rate": 0.00014520202020202022, "loss": 0.2272, "step": 1800 }, { "epoch": 9.0, "eval_accuracy": 0.8875, "eval_loss": 0.4761699438095093, "eval_runtime": 7.8414, "eval_samples_per_second": 51.011, "eval_steps_per_second": 12.753, "step": 1800 }, { "epoch": 10.0, "grad_norm": 0.026233581826090813, "learning_rate": 0.0001388888888888889, "loss": 0.2008, "step": 2000 }, { "epoch": 10.0, "eval_accuracy": 0.905, "eval_loss": 0.35686829686164856, "eval_runtime": 7.6276, "eval_samples_per_second": 52.441, "eval_steps_per_second": 13.11, "step": 2000 }, { "epoch": 11.0, "grad_norm": 0.08601810783147812, "learning_rate": 0.00013257575757575756, "loss": 0.2173, "step": 2200 }, { "epoch": 11.0, "eval_accuracy": 0.9075, "eval_loss": 0.41256073117256165, "eval_runtime": 7.8809, "eval_samples_per_second": 50.756, "eval_steps_per_second": 12.689, "step": 2200 }, { "epoch": 12.0, "grad_norm": 0.09381477534770966, "learning_rate": 0.00012626262626262626, "loss": 0.182, "step": 2400 }, { "epoch": 12.0, "eval_accuracy": 0.9125, "eval_loss": 0.3593887984752655, "eval_runtime": 7.8239, "eval_samples_per_second": 51.125, "eval_steps_per_second": 12.781, "step": 2400 }, { "epoch": 13.0, "grad_norm": 23.23141098022461, "learning_rate": 0.00011994949494949495, "loss": 0.1622, "step": 2600 }, { "epoch": 13.0, "eval_accuracy": 0.89, "eval_loss": 0.3750649392604828, "eval_runtime": 7.7578, "eval_samples_per_second": 51.561, "eval_steps_per_second": 12.89, "step": 2600 }, { "epoch": 14.0, "grad_norm": 0.19918544590473175, "learning_rate": 0.00011363636363636365, "loss": 0.1573, "step": 2800 }, { "epoch": 14.0, "eval_accuracy": 0.92, "eval_loss": 0.33412453532218933, "eval_runtime": 7.6756, "eval_samples_per_second": 52.113, "eval_steps_per_second": 13.028, "step": 2800 }, { "epoch": 15.0, "grad_norm": 5.899987697601318, "learning_rate": 0.00010732323232323234, "loss": 0.148, "step": 3000 }, { "epoch": 15.0, "eval_accuracy": 0.935, "eval_loss": 0.2885863184928894, "eval_runtime": 7.6591, "eval_samples_per_second": 52.226, "eval_steps_per_second": 13.056, "step": 3000 }, { "epoch": 16.0, "grad_norm": 3.5471909046173096, "learning_rate": 0.00010101010101010102, "loss": 0.1234, "step": 3200 }, { "epoch": 16.0, "eval_accuracy": 0.9075, "eval_loss": 0.3466532230377197, "eval_runtime": 7.8092, "eval_samples_per_second": 51.222, "eval_steps_per_second": 12.805, "step": 3200 }, { "epoch": 17.0, "grad_norm": 9.151985168457031, "learning_rate": 9.469696969696971e-05, "loss": 0.1254, "step": 3400 }, { "epoch": 17.0, "eval_accuracy": 0.925, "eval_loss": 0.33074432611465454, "eval_runtime": 7.9693, "eval_samples_per_second": 50.193, "eval_steps_per_second": 12.548, "step": 3400 }, { "epoch": 18.0, "grad_norm": 0.017998667433857918, "learning_rate": 8.83838383838384e-05, "loss": 0.1104, "step": 3600 }, { "epoch": 18.0, "eval_accuracy": 0.9375, "eval_loss": 0.2563402056694031, "eval_runtime": 7.8711, "eval_samples_per_second": 50.819, "eval_steps_per_second": 12.705, "step": 3600 }, { "epoch": 19.0, "grad_norm": 0.07069282978773117, "learning_rate": 8.207070707070707e-05, "loss": 0.1017, "step": 3800 }, { "epoch": 19.0, "eval_accuracy": 0.9525, "eval_loss": 0.23432372510433197, "eval_runtime": 8.1296, "eval_samples_per_second": 49.203, "eval_steps_per_second": 12.301, "step": 3800 }, { "epoch": 20.0, "grad_norm": 0.00819435901939869, "learning_rate": 7.575757575757576e-05, "loss": 0.0912, "step": 4000 }, { "epoch": 20.0, "eval_accuracy": 0.925, "eval_loss": 0.2641538679599762, "eval_runtime": 7.8726, "eval_samples_per_second": 50.809, "eval_steps_per_second": 12.702, "step": 4000 }, { "epoch": 21.0, "grad_norm": 0.1772022545337677, "learning_rate": 6.944444444444444e-05, "loss": 0.093, "step": 4200 }, { "epoch": 21.0, "eval_accuracy": 0.9375, "eval_loss": 0.2690765857696533, "eval_runtime": 7.671, "eval_samples_per_second": 52.144, "eval_steps_per_second": 13.036, "step": 4200 }, { "epoch": 22.0, "grad_norm": 6.940694332122803, "learning_rate": 6.313131313131313e-05, "loss": 0.1264, "step": 4400 }, { "epoch": 22.0, "eval_accuracy": 0.9625, "eval_loss": 0.18011243641376495, "eval_runtime": 8.061, "eval_samples_per_second": 49.622, "eval_steps_per_second": 12.405, "step": 4400 }, { "epoch": 23.0, "grad_norm": 0.007489996496587992, "learning_rate": 5.6818181818181825e-05, "loss": 0.0951, "step": 4600 }, { "epoch": 23.0, "eval_accuracy": 0.93, "eval_loss": 0.3289620280265808, "eval_runtime": 7.6993, "eval_samples_per_second": 51.953, "eval_steps_per_second": 12.988, "step": 4600 }, { "epoch": 24.0, "grad_norm": 0.008240079507231712, "learning_rate": 5.050505050505051e-05, "loss": 0.0793, "step": 4800 }, { "epoch": 24.0, "eval_accuracy": 0.935, "eval_loss": 0.30677276849746704, "eval_runtime": 8.0535, "eval_samples_per_second": 49.668, "eval_steps_per_second": 12.417, "step": 4800 }, { "epoch": 25.0, "grad_norm": 0.06566061079502106, "learning_rate": 4.41919191919192e-05, "loss": 0.0489, "step": 5000 }, { "epoch": 25.0, "eval_accuracy": 0.9575, "eval_loss": 0.2502322494983673, "eval_runtime": 7.9899, "eval_samples_per_second": 50.063, "eval_steps_per_second": 12.516, "step": 5000 }, { "epoch": 26.0, "grad_norm": 0.006019990891218185, "learning_rate": 3.787878787878788e-05, "loss": 0.0824, "step": 5200 }, { "epoch": 26.0, "eval_accuracy": 0.9425, "eval_loss": 0.27752983570098877, "eval_runtime": 7.7875, "eval_samples_per_second": 51.364, "eval_steps_per_second": 12.841, "step": 5200 }, { "epoch": 27.0, "grad_norm": 0.006204684264957905, "learning_rate": 3.1565656565656566e-05, "loss": 0.0784, "step": 5400 }, { "epoch": 27.0, "eval_accuracy": 0.9525, "eval_loss": 0.21709877252578735, "eval_runtime": 7.8619, "eval_samples_per_second": 50.878, "eval_steps_per_second": 12.72, "step": 5400 }, { "epoch": 28.0, "grad_norm": 0.436471164226532, "learning_rate": 2.5252525252525256e-05, "loss": 0.0463, "step": 5600 }, { "epoch": 28.0, "eval_accuracy": 0.96, "eval_loss": 0.22668342292308807, "eval_runtime": 7.8277, "eval_samples_per_second": 51.101, "eval_steps_per_second": 12.775, "step": 5600 }, { "epoch": 29.0, "grad_norm": 0.006457165349274874, "learning_rate": 1.893939393939394e-05, "loss": 0.0553, "step": 5800 }, { "epoch": 29.0, "eval_accuracy": 0.96, "eval_loss": 0.20785684883594513, "eval_runtime": 7.5766, "eval_samples_per_second": 52.794, "eval_steps_per_second": 13.199, "step": 5800 }, { "epoch": 30.0, "grad_norm": 5.3711652755737305, "learning_rate": 1.2626262626262628e-05, "loss": 0.0247, "step": 6000 }, { "epoch": 30.0, "eval_accuracy": 0.9675, "eval_loss": 0.1625244915485382, "eval_runtime": 7.7936, "eval_samples_per_second": 51.324, "eval_steps_per_second": 12.831, "step": 6000 }, { "epoch": 31.0, "grad_norm": 0.718065619468689, "learning_rate": 6.313131313131314e-06, "loss": 0.0356, "step": 6200 }, { "epoch": 31.0, "eval_accuracy": 0.9575, "eval_loss": 0.2557324469089508, "eval_runtime": 7.6107, "eval_samples_per_second": 52.557, "eval_steps_per_second": 13.139, "step": 6200 } ], "logging_steps": 500, "max_steps": 6400, "num_input_tokens_seen": 0, "num_train_epochs": 32, "save_steps": 500, "total_flos": 3.843878276844749e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }