| { | |
| "best_metric": 0.8716852010265184, | |
| "best_model_checkpoint": "skincare-detection/checkpoint-553", | |
| "epoch": 11.902439024390244, | |
| "eval_steps": 500, | |
| "global_step": 732, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.0143417119979858, | |
| "learning_rate": 8.64864864864865e-05, | |
| "loss": 1.3961, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.7724550898203593, | |
| "eval_loss": 0.5629431009292603, | |
| "eval_runtime": 12.1216, | |
| "eval_samples_per_second": 96.439, | |
| "eval_steps_per_second": 3.052, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.6657726764678955, | |
| "learning_rate": 0.000172972972972973, | |
| "loss": 0.6454, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.6649633049964905, | |
| "learning_rate": 0.0001933130699088146, | |
| "loss": 0.4982, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8434559452523525, | |
| "eval_loss": 0.3991105854511261, | |
| "eval_runtime": 12.1709, | |
| "eval_samples_per_second": 96.049, | |
| "eval_steps_per_second": 3.04, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.7564620971679688, | |
| "learning_rate": 0.00018358662613981763, | |
| "loss": 0.4536, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.6977857351303101, | |
| "learning_rate": 0.00017386018237082067, | |
| "loss": 0.3563, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.8272027373823782, | |
| "eval_loss": 0.43296942114830017, | |
| "eval_runtime": 12.3181, | |
| "eval_samples_per_second": 94.901, | |
| "eval_steps_per_second": 3.004, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.7642468214035034, | |
| "learning_rate": 0.0001641337386018237, | |
| "loss": 0.3169, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 0.9004422426223755, | |
| "learning_rate": 0.00015440729483282676, | |
| "loss": 0.2314, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8554319931565441, | |
| "eval_loss": 0.39688870310783386, | |
| "eval_runtime": 12.3055, | |
| "eval_samples_per_second": 94.998, | |
| "eval_steps_per_second": 3.007, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.9273125529289246, | |
| "learning_rate": 0.0001446808510638298, | |
| "loss": 0.2055, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.6541422009468079, | |
| "learning_rate": 0.00013495440729483285, | |
| "loss": 0.1815, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_accuracy": 0.8434559452523525, | |
| "eval_loss": 0.44923701882362366, | |
| "eval_runtime": 12.4125, | |
| "eval_samples_per_second": 94.179, | |
| "eval_steps_per_second": 2.981, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 1.0498323440551758, | |
| "learning_rate": 0.00012522796352583589, | |
| "loss": 0.1514, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "grad_norm": 1.087367057800293, | |
| "learning_rate": 0.00011550151975683892, | |
| "loss": 0.1332, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8579982891360137, | |
| "eval_loss": 0.44741156697273254, | |
| "eval_runtime": 12.1735, | |
| "eval_samples_per_second": 96.029, | |
| "eval_steps_per_second": 3.039, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 0.9595869183540344, | |
| "learning_rate": 0.00010577507598784195, | |
| "loss": 0.1201, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "grad_norm": 0.39300984144210815, | |
| "learning_rate": 9.6048632218845e-05, | |
| "loss": 0.0869, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_accuracy": 0.863130881094953, | |
| "eval_loss": 0.45202794671058655, | |
| "eval_runtime": 12.4693, | |
| "eval_samples_per_second": 93.75, | |
| "eval_steps_per_second": 2.967, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "grad_norm": 0.9669052362442017, | |
| "learning_rate": 8.632218844984803e-05, | |
| "loss": 0.0991, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "grad_norm": 0.8003025650978088, | |
| "learning_rate": 7.659574468085106e-05, | |
| "loss": 0.0844, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8639863130881095, | |
| "eval_loss": 0.44686540961265564, | |
| "eval_runtime": 12.1322, | |
| "eval_samples_per_second": 96.355, | |
| "eval_steps_per_second": 3.05, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "grad_norm": 0.3683207333087921, | |
| "learning_rate": 6.686930091185411e-05, | |
| "loss": 0.0811, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "grad_norm": 0.6750203371047974, | |
| "learning_rate": 5.714285714285714e-05, | |
| "loss": 0.0681, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_accuracy": 0.8716852010265184, | |
| "eval_loss": 0.45333394408226013, | |
| "eval_runtime": 12.2392, | |
| "eval_samples_per_second": 95.513, | |
| "eval_steps_per_second": 3.023, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "grad_norm": 0.48275861144065857, | |
| "learning_rate": 4.741641337386019e-05, | |
| "loss": 0.0635, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "grad_norm": 0.8461657762527466, | |
| "learning_rate": 3.768996960486322e-05, | |
| "loss": 0.0574, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8597091531223268, | |
| "eval_loss": 0.4952048361301422, | |
| "eval_runtime": 12.278, | |
| "eval_samples_per_second": 95.211, | |
| "eval_steps_per_second": 3.014, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "grad_norm": 0.2595687806606293, | |
| "learning_rate": 2.796352583586626e-05, | |
| "loss": 0.0518, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 10.93, | |
| "grad_norm": 0.39481160044670105, | |
| "learning_rate": 1.82370820668693e-05, | |
| "loss": 0.0477, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_accuracy": 0.8674080410607357, | |
| "eval_loss": 0.4772116541862488, | |
| "eval_runtime": 12.2102, | |
| "eval_samples_per_second": 95.74, | |
| "eval_steps_per_second": 3.03, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "grad_norm": 0.43194687366485596, | |
| "learning_rate": 8.510638297872341e-06, | |
| "loss": 0.0454, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "eval_accuracy": 0.864841745081266, | |
| "eval_loss": 0.48397254943847656, | |
| "eval_runtime": 12.6433, | |
| "eval_samples_per_second": 92.46, | |
| "eval_steps_per_second": 2.926, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "step": 732, | |
| "total_flos": 7.238851133027512e+18, | |
| "train_loss": 0.2366401759978852, | |
| "train_runtime": 2240.6293, | |
| "train_samples_per_second": 42.02, | |
| "train_steps_per_second": 0.327 | |
| } | |
| ], | |
| "logging_steps": 32, | |
| "max_steps": 732, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 12, | |
| "save_steps": 500, | |
| "total_flos": 7.238851133027512e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |