| { | |
| "best_metric": 0.9675, | |
| "best_model_checkpoint": "Wound-classification/checkpoint-6000", | |
| "epoch": 31.0, | |
| "eval_steps": 500, | |
| "global_step": 6200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.2730700969696045, | |
| "learning_rate": 0.0001957070707070707, | |
| "loss": 1.2288, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.775, | |
| "eval_loss": 0.7021821737289429, | |
| "eval_runtime": 7.7413, | |
| "eval_samples_per_second": 51.671, | |
| "eval_steps_per_second": 12.918, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.5704953670501709, | |
| "learning_rate": 0.00018939393939393942, | |
| "loss": 0.6343, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7875, | |
| "eval_loss": 0.6516726613044739, | |
| "eval_runtime": 7.6496, | |
| "eval_samples_per_second": 52.29, | |
| "eval_steps_per_second": 13.073, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 6.674056053161621, | |
| "learning_rate": 0.0001830808080808081, | |
| "loss": 0.5022, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.73, | |
| "eval_loss": 0.8923927545547485, | |
| "eval_runtime": 7.8144, | |
| "eval_samples_per_second": 51.187, | |
| "eval_steps_per_second": 12.797, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 3.7206950187683105, | |
| "learning_rate": 0.0001767676767676768, | |
| "loss": 0.5071, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8475, | |
| "eval_loss": 0.46834319829940796, | |
| "eval_runtime": 7.622, | |
| "eval_samples_per_second": 52.48, | |
| "eval_steps_per_second": 13.12, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.3697607517242432, | |
| "learning_rate": 0.00017045454545454547, | |
| "loss": 0.3484, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8675, | |
| "eval_loss": 0.44843006134033203, | |
| "eval_runtime": 7.8641, | |
| "eval_samples_per_second": 50.864, | |
| "eval_steps_per_second": 12.716, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 4.369117259979248, | |
| "learning_rate": 0.00016414141414141414, | |
| "loss": 0.3452, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8925, | |
| "eval_loss": 0.37597933411598206, | |
| "eval_runtime": 7.6457, | |
| "eval_samples_per_second": 52.317, | |
| "eval_steps_per_second": 13.079, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 7.87866735458374, | |
| "learning_rate": 0.00015782828282828284, | |
| "loss": 0.2461, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.83, | |
| "eval_loss": 0.5486177206039429, | |
| "eval_runtime": 7.9064, | |
| "eval_samples_per_second": 50.592, | |
| "eval_steps_per_second": 12.648, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 9.048334121704102, | |
| "learning_rate": 0.00015151515151515152, | |
| "loss": 0.2389, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.895, | |
| "eval_loss": 0.40351569652557373, | |
| "eval_runtime": 7.8304, | |
| "eval_samples_per_second": 51.083, | |
| "eval_steps_per_second": 12.771, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 2.520663022994995, | |
| "learning_rate": 0.00014520202020202022, | |
| "loss": 0.2272, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8875, | |
| "eval_loss": 0.4761699438095093, | |
| "eval_runtime": 7.8414, | |
| "eval_samples_per_second": 51.011, | |
| "eval_steps_per_second": 12.753, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.026233581826090813, | |
| "learning_rate": 0.0001388888888888889, | |
| "loss": 0.2008, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.905, | |
| "eval_loss": 0.35686829686164856, | |
| "eval_runtime": 7.6276, | |
| "eval_samples_per_second": 52.441, | |
| "eval_steps_per_second": 13.11, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.08601810783147812, | |
| "learning_rate": 0.00013257575757575756, | |
| "loss": 0.2173, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9075, | |
| "eval_loss": 0.41256073117256165, | |
| "eval_runtime": 7.8809, | |
| "eval_samples_per_second": 50.756, | |
| "eval_steps_per_second": 12.689, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.09381477534770966, | |
| "learning_rate": 0.00012626262626262626, | |
| "loss": 0.182, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9125, | |
| "eval_loss": 0.3593887984752655, | |
| "eval_runtime": 7.8239, | |
| "eval_samples_per_second": 51.125, | |
| "eval_steps_per_second": 12.781, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 23.23141098022461, | |
| "learning_rate": 0.00011994949494949495, | |
| "loss": 0.1622, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.89, | |
| "eval_loss": 0.3750649392604828, | |
| "eval_runtime": 7.7578, | |
| "eval_samples_per_second": 51.561, | |
| "eval_steps_per_second": 12.89, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.19918544590473175, | |
| "learning_rate": 0.00011363636363636365, | |
| "loss": 0.1573, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.92, | |
| "eval_loss": 0.33412453532218933, | |
| "eval_runtime": 7.6756, | |
| "eval_samples_per_second": 52.113, | |
| "eval_steps_per_second": 13.028, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 5.899987697601318, | |
| "learning_rate": 0.00010732323232323234, | |
| "loss": 0.148, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.935, | |
| "eval_loss": 0.2885863184928894, | |
| "eval_runtime": 7.6591, | |
| "eval_samples_per_second": 52.226, | |
| "eval_steps_per_second": 13.056, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 3.5471909046173096, | |
| "learning_rate": 0.00010101010101010102, | |
| "loss": 0.1234, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.9075, | |
| "eval_loss": 0.3466532230377197, | |
| "eval_runtime": 7.8092, | |
| "eval_samples_per_second": 51.222, | |
| "eval_steps_per_second": 12.805, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 9.151985168457031, | |
| "learning_rate": 9.469696969696971e-05, | |
| "loss": 0.1254, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.925, | |
| "eval_loss": 0.33074432611465454, | |
| "eval_runtime": 7.9693, | |
| "eval_samples_per_second": 50.193, | |
| "eval_steps_per_second": 12.548, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.017998667433857918, | |
| "learning_rate": 8.83838383838384e-05, | |
| "loss": 0.1104, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.9375, | |
| "eval_loss": 0.2563402056694031, | |
| "eval_runtime": 7.8711, | |
| "eval_samples_per_second": 50.819, | |
| "eval_steps_per_second": 12.705, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.07069282978773117, | |
| "learning_rate": 8.207070707070707e-05, | |
| "loss": 0.1017, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.9525, | |
| "eval_loss": 0.23432372510433197, | |
| "eval_runtime": 8.1296, | |
| "eval_samples_per_second": 49.203, | |
| "eval_steps_per_second": 12.301, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.00819435901939869, | |
| "learning_rate": 7.575757575757576e-05, | |
| "loss": 0.0912, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.925, | |
| "eval_loss": 0.2641538679599762, | |
| "eval_runtime": 7.8726, | |
| "eval_samples_per_second": 50.809, | |
| "eval_steps_per_second": 12.702, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 0.1772022545337677, | |
| "learning_rate": 6.944444444444444e-05, | |
| "loss": 0.093, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.9375, | |
| "eval_loss": 0.2690765857696533, | |
| "eval_runtime": 7.671, | |
| "eval_samples_per_second": 52.144, | |
| "eval_steps_per_second": 13.036, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 6.940694332122803, | |
| "learning_rate": 6.313131313131313e-05, | |
| "loss": 0.1264, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.9625, | |
| "eval_loss": 0.18011243641376495, | |
| "eval_runtime": 8.061, | |
| "eval_samples_per_second": 49.622, | |
| "eval_steps_per_second": 12.405, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 0.007489996496587992, | |
| "learning_rate": 5.6818181818181825e-05, | |
| "loss": 0.0951, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.93, | |
| "eval_loss": 0.3289620280265808, | |
| "eval_runtime": 7.6993, | |
| "eval_samples_per_second": 51.953, | |
| "eval_steps_per_second": 12.988, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.008240079507231712, | |
| "learning_rate": 5.050505050505051e-05, | |
| "loss": 0.0793, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.935, | |
| "eval_loss": 0.30677276849746704, | |
| "eval_runtime": 8.0535, | |
| "eval_samples_per_second": 49.668, | |
| "eval_steps_per_second": 12.417, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.06566061079502106, | |
| "learning_rate": 4.41919191919192e-05, | |
| "loss": 0.0489, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.9575, | |
| "eval_loss": 0.2502322494983673, | |
| "eval_runtime": 7.9899, | |
| "eval_samples_per_second": 50.063, | |
| "eval_steps_per_second": 12.516, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.006019990891218185, | |
| "learning_rate": 3.787878787878788e-05, | |
| "loss": 0.0824, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.9425, | |
| "eval_loss": 0.27752983570098877, | |
| "eval_runtime": 7.7875, | |
| "eval_samples_per_second": 51.364, | |
| "eval_steps_per_second": 12.841, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 0.006204684264957905, | |
| "learning_rate": 3.1565656565656566e-05, | |
| "loss": 0.0784, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.9525, | |
| "eval_loss": 0.21709877252578735, | |
| "eval_runtime": 7.8619, | |
| "eval_samples_per_second": 50.878, | |
| "eval_steps_per_second": 12.72, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.436471164226532, | |
| "learning_rate": 2.5252525252525256e-05, | |
| "loss": 0.0463, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.96, | |
| "eval_loss": 0.22668342292308807, | |
| "eval_runtime": 7.8277, | |
| "eval_samples_per_second": 51.101, | |
| "eval_steps_per_second": 12.775, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 0.006457165349274874, | |
| "learning_rate": 1.893939393939394e-05, | |
| "loss": 0.0553, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.96, | |
| "eval_loss": 0.20785684883594513, | |
| "eval_runtime": 7.5766, | |
| "eval_samples_per_second": 52.794, | |
| "eval_steps_per_second": 13.199, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 5.3711652755737305, | |
| "learning_rate": 1.2626262626262628e-05, | |
| "loss": 0.0247, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.9675, | |
| "eval_loss": 0.1625244915485382, | |
| "eval_runtime": 7.7936, | |
| "eval_samples_per_second": 51.324, | |
| "eval_steps_per_second": 12.831, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "grad_norm": 0.718065619468689, | |
| "learning_rate": 6.313131313131314e-06, | |
| "loss": 0.0356, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.9575, | |
| "eval_loss": 0.2557324469089508, | |
| "eval_runtime": 7.6107, | |
| "eval_samples_per_second": 52.557, | |
| "eval_steps_per_second": 13.139, | |
| "step": 6200 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 6400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 32, | |
| "save_steps": 500, | |
| "total_flos": 3.843878276844749e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |