| { | |
| "best_metric": 0.010909353382885456, | |
| "best_model_checkpoint": "emigomez/vit-cropped-faces/checkpoint-200", | |
| "epoch": 20.0, | |
| "eval_steps": 100, | |
| "global_step": 640, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.3125, | |
| "grad_norm": 1.6401095390319824, | |
| "learning_rate": 0.000196875, | |
| "loss": 0.692, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 1.1231616735458374, | |
| "learning_rate": 0.00019375000000000002, | |
| "loss": 0.2743, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.9375, | |
| "grad_norm": 0.7256351709365845, | |
| "learning_rate": 0.000190625, | |
| "loss": 0.0795, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.17406541109085083, | |
| "learning_rate": 0.0001875, | |
| "loss": 0.0414, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.5625, | |
| "grad_norm": 6.439935684204102, | |
| "learning_rate": 0.000184375, | |
| "loss": 0.0465, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 2.296980857849121, | |
| "learning_rate": 0.00018125000000000001, | |
| "loss": 0.0526, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.1875, | |
| "grad_norm": 0.09786612540483475, | |
| "learning_rate": 0.000178125, | |
| "loss": 0.0372, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.07251214236021042, | |
| "learning_rate": 0.000175, | |
| "loss": 0.0158, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.8125, | |
| "grad_norm": 0.06504912674427032, | |
| "learning_rate": 0.00017187500000000002, | |
| "loss": 0.1044, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 0.08557004481554031, | |
| "learning_rate": 0.00016875, | |
| "loss": 0.0254, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.013552392832934856, | |
| "eval_runtime": 1.5523, | |
| "eval_samples_per_second": 36.077, | |
| "eval_steps_per_second": 4.51, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.4375, | |
| "grad_norm": 0.06062212213873863, | |
| "learning_rate": 0.000165625, | |
| "loss": 0.0122, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 0.050220146775245667, | |
| "learning_rate": 0.00016250000000000002, | |
| "loss": 0.0098, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.0625, | |
| "grad_norm": 0.04843802750110626, | |
| "learning_rate": 0.000159375, | |
| "loss": 0.0091, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 4.375, | |
| "grad_norm": 0.04158018156886101, | |
| "learning_rate": 0.00015625, | |
| "loss": 0.0081, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 4.6875, | |
| "grad_norm": 0.03899329528212547, | |
| "learning_rate": 0.000153125, | |
| "loss": 0.0075, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.05264412984251976, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 0.0069, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 5.3125, | |
| "grad_norm": 0.036546722054481506, | |
| "learning_rate": 0.000146875, | |
| "loss": 0.0064, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 5.625, | |
| "grad_norm": 0.035343725234270096, | |
| "learning_rate": 0.00014375, | |
| "loss": 0.006, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 5.9375, | |
| "grad_norm": 0.03636905550956726, | |
| "learning_rate": 0.00014062500000000002, | |
| "loss": 0.0056, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 0.033068690448999405, | |
| "learning_rate": 0.0001375, | |
| "loss": 0.0053, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.010909353382885456, | |
| "eval_runtime": 1.5116, | |
| "eval_samples_per_second": 37.048, | |
| "eval_steps_per_second": 4.631, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 6.5625, | |
| "grad_norm": 0.027593158185482025, | |
| "learning_rate": 0.000134375, | |
| "loss": 0.0049, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 6.875, | |
| "grad_norm": 0.02486700937151909, | |
| "learning_rate": 0.00013125000000000002, | |
| "loss": 0.0047, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 7.1875, | |
| "grad_norm": 0.025286702439188957, | |
| "learning_rate": 0.000128125, | |
| "loss": 0.0045, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 0.02420409955084324, | |
| "learning_rate": 0.000125, | |
| "loss": 0.0042, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 7.8125, | |
| "grad_norm": 0.02255989797413349, | |
| "learning_rate": 0.00012187500000000001, | |
| "loss": 0.0041, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 8.125, | |
| "grad_norm": 0.02108701318502426, | |
| "learning_rate": 0.00011875, | |
| "loss": 0.0038, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 8.4375, | |
| "grad_norm": 0.020098580047488213, | |
| "learning_rate": 0.000115625, | |
| "loss": 0.0037, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "grad_norm": 0.019474145025014877, | |
| "learning_rate": 0.00011250000000000001, | |
| "loss": 0.0036, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 9.0625, | |
| "grad_norm": 0.018637843430042267, | |
| "learning_rate": 0.000109375, | |
| "loss": 0.0035, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 9.375, | |
| "grad_norm": 0.018013661727309227, | |
| "learning_rate": 0.00010625000000000001, | |
| "loss": 0.0033, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 9.375, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.013854571618139744, | |
| "eval_runtime": 1.54, | |
| "eval_samples_per_second": 36.363, | |
| "eval_steps_per_second": 4.545, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 9.6875, | |
| "grad_norm": 0.01775149628520012, | |
| "learning_rate": 0.000103125, | |
| "loss": 0.0032, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.01715986803174019, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0031, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 10.3125, | |
| "grad_norm": 0.016615115106105804, | |
| "learning_rate": 9.687500000000001e-05, | |
| "loss": 0.003, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 10.625, | |
| "grad_norm": 0.016151374205946922, | |
| "learning_rate": 9.375e-05, | |
| "loss": 0.0029, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 10.9375, | |
| "grad_norm": 0.01536835078150034, | |
| "learning_rate": 9.062500000000001e-05, | |
| "loss": 0.0028, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "grad_norm": 0.014975355938076973, | |
| "learning_rate": 8.75e-05, | |
| "loss": 0.0027, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 11.5625, | |
| "grad_norm": 0.014888996258378029, | |
| "learning_rate": 8.4375e-05, | |
| "loss": 0.0027, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 11.875, | |
| "grad_norm": 0.014734145253896713, | |
| "learning_rate": 8.125000000000001e-05, | |
| "loss": 0.0026, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 12.1875, | |
| "grad_norm": 0.014760036021471024, | |
| "learning_rate": 7.8125e-05, | |
| "loss": 0.0025, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 0.013639590702950954, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.0025, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.012760639190673828, | |
| "eval_runtime": 1.5727, | |
| "eval_samples_per_second": 35.608, | |
| "eval_steps_per_second": 4.451, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 12.8125, | |
| "grad_norm": 0.013434410095214844, | |
| "learning_rate": 7.1875e-05, | |
| "loss": 0.0024, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 13.125, | |
| "grad_norm": 0.013357667252421379, | |
| "learning_rate": 6.875e-05, | |
| "loss": 0.0024, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 13.4375, | |
| "grad_norm": 0.01299892459064722, | |
| "learning_rate": 6.562500000000001e-05, | |
| "loss": 0.0023, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "grad_norm": 0.01252039521932602, | |
| "learning_rate": 6.25e-05, | |
| "loss": 0.0023, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 14.0625, | |
| "grad_norm": 0.012350697070360184, | |
| "learning_rate": 5.9375e-05, | |
| "loss": 0.0022, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 14.375, | |
| "grad_norm": 0.012504469603300095, | |
| "learning_rate": 5.6250000000000005e-05, | |
| "loss": 0.0022, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 14.6875, | |
| "grad_norm": 0.01206807978451252, | |
| "learning_rate": 5.3125000000000004e-05, | |
| "loss": 0.0022, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.013769702985882759, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0021, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 15.3125, | |
| "grad_norm": 0.01167486421763897, | |
| "learning_rate": 4.6875e-05, | |
| "loss": 0.0021, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 15.625, | |
| "grad_norm": 0.011528071016073227, | |
| "learning_rate": 4.375e-05, | |
| "loss": 0.0021, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 15.625, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.012214643880724907, | |
| "eval_runtime": 1.5413, | |
| "eval_samples_per_second": 36.334, | |
| "eval_steps_per_second": 4.542, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 15.9375, | |
| "grad_norm": 0.011460980400443077, | |
| "learning_rate": 4.0625000000000005e-05, | |
| "loss": 0.002, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "grad_norm": 0.011635087430477142, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.002, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 16.5625, | |
| "grad_norm": 0.012200203724205494, | |
| "learning_rate": 3.4375e-05, | |
| "loss": 0.002, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 16.875, | |
| "grad_norm": 0.011691519059240818, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.002, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 17.1875, | |
| "grad_norm": 0.010916861705482006, | |
| "learning_rate": 2.8125000000000003e-05, | |
| "loss": 0.002, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "grad_norm": 0.010833012871444225, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0019, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 17.8125, | |
| "grad_norm": 0.010946971364319324, | |
| "learning_rate": 2.1875e-05, | |
| "loss": 0.0019, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 18.125, | |
| "grad_norm": 0.01121476013213396, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.0019, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 18.4375, | |
| "grad_norm": 0.010572544299066067, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 0.0019, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "grad_norm": 0.01052548922598362, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0019, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.011984586715698242, | |
| "eval_runtime": 1.5235, | |
| "eval_samples_per_second": 36.758, | |
| "eval_steps_per_second": 4.595, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 19.0625, | |
| "grad_norm": 0.011718451976776123, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 0.0019, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 19.375, | |
| "grad_norm": 0.011368807405233383, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.0019, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 19.6875, | |
| "grad_norm": 0.01071301568299532, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.0019, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.011912377551198006, | |
| "learning_rate": 0.0, | |
| "loss": 0.0019, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 640, | |
| "total_flos": 7.764766951908557e+17, | |
| "train_loss": 0.024429786371183582, | |
| "train_runtime": 337.6815, | |
| "train_samples_per_second": 29.673, | |
| "train_steps_per_second": 1.895 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 640, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.764766951908557e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |