| { | |
| "best_metric": 0.13810816407203674, | |
| "best_model_checkpoint": "beard detection/checkpoint-792", | |
| "epoch": 44.0, | |
| "eval_steps": 500, | |
| "global_step": 792, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.821522309711286, | |
| "eval_loss": 0.4623703360557556, | |
| "eval_runtime": 15.5872, | |
| "eval_samples_per_second": 48.886, | |
| "eval_steps_per_second": 3.079, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8595800524934383, | |
| "eval_loss": 0.3484641909599304, | |
| "eval_runtime": 16.0868, | |
| "eval_samples_per_second": 47.368, | |
| "eval_steps_per_second": 2.984, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8989501312335958, | |
| "eval_loss": 0.23234562575817108, | |
| "eval_runtime": 15.6679, | |
| "eval_samples_per_second": 48.635, | |
| "eval_steps_per_second": 3.064, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9291338582677166, | |
| "eval_loss": 0.19474641978740692, | |
| "eval_runtime": 15.5907, | |
| "eval_samples_per_second": 48.875, | |
| "eval_steps_per_second": 3.079, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9330708661417323, | |
| "eval_loss": 0.17998209595680237, | |
| "eval_runtime": 15.6016, | |
| "eval_samples_per_second": 48.841, | |
| "eval_steps_per_second": 3.077, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.937007874015748, | |
| "eval_loss": 0.17380793392658234, | |
| "eval_runtime": 15.5655, | |
| "eval_samples_per_second": 48.954, | |
| "eval_steps_per_second": 3.084, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9343832020997376, | |
| "eval_loss": 0.1779448390007019, | |
| "eval_runtime": 15.5371, | |
| "eval_samples_per_second": 49.044, | |
| "eval_steps_per_second": 3.089, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9409448818897638, | |
| "eval_loss": 0.17088069021701813, | |
| "eval_runtime": 15.4793, | |
| "eval_samples_per_second": 49.227, | |
| "eval_steps_per_second": 3.101, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9422572178477691, | |
| "eval_loss": 0.16723193228244781, | |
| "eval_runtime": 15.4321, | |
| "eval_samples_per_second": 49.378, | |
| "eval_steps_per_second": 3.11, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9422572178477691, | |
| "eval_loss": 0.16475225985050201, | |
| "eval_runtime": 15.6535, | |
| "eval_samples_per_second": 48.679, | |
| "eval_steps_per_second": 3.066, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9435695538057742, | |
| "eval_loss": 0.1620875746011734, | |
| "eval_runtime": 15.5668, | |
| "eval_samples_per_second": 48.95, | |
| "eval_steps_per_second": 3.083, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9435695538057742, | |
| "eval_loss": 0.16018643975257874, | |
| "eval_runtime": 15.6106, | |
| "eval_samples_per_second": 48.813, | |
| "eval_steps_per_second": 3.075, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.9435695538057742, | |
| "eval_loss": 0.160698801279068, | |
| "eval_runtime": 15.3643, | |
| "eval_samples_per_second": 49.596, | |
| "eval_steps_per_second": 3.124, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.9435695538057742, | |
| "eval_loss": 0.1587902307510376, | |
| "eval_runtime": 15.4701, | |
| "eval_samples_per_second": 49.256, | |
| "eval_steps_per_second": 3.103, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.9435695538057742, | |
| "eval_loss": 0.1586388796567917, | |
| "eval_runtime": 15.979, | |
| "eval_samples_per_second": 47.688, | |
| "eval_steps_per_second": 3.004, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.9448818897637795, | |
| "eval_loss": 0.15598240494728088, | |
| "eval_runtime": 15.4614, | |
| "eval_samples_per_second": 49.284, | |
| "eval_steps_per_second": 3.105, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.9448818897637795, | |
| "eval_loss": 0.15293540060520172, | |
| "eval_runtime": 15.5549, | |
| "eval_samples_per_second": 48.988, | |
| "eval_steps_per_second": 3.086, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.9461942257217848, | |
| "eval_loss": 0.1483953446149826, | |
| "eval_runtime": 15.5258, | |
| "eval_samples_per_second": 49.08, | |
| "eval_steps_per_second": 3.092, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.9448818897637795, | |
| "eval_loss": 0.14892004430294037, | |
| "eval_runtime": 15.383, | |
| "eval_samples_per_second": 49.535, | |
| "eval_steps_per_second": 3.12, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.9448818897637795, | |
| "eval_loss": 0.14962303638458252, | |
| "eval_runtime": 15.5979, | |
| "eval_samples_per_second": 48.853, | |
| "eval_steps_per_second": 3.077, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.9448818897637795, | |
| "eval_loss": 0.14834122359752655, | |
| "eval_runtime": 15.2733, | |
| "eval_samples_per_second": 49.891, | |
| "eval_steps_per_second": 3.143, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.9488188976377953, | |
| "eval_loss": 0.1453154981136322, | |
| "eval_runtime": 15.5367, | |
| "eval_samples_per_second": 49.045, | |
| "eval_steps_per_second": 3.089, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.94750656167979, | |
| "eval_loss": 0.14801156520843506, | |
| "eval_runtime": 15.2319, | |
| "eval_samples_per_second": 50.027, | |
| "eval_steps_per_second": 3.151, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.9448818897637795, | |
| "eval_loss": 0.14965900778770447, | |
| "eval_runtime": 15.5253, | |
| "eval_samples_per_second": 49.081, | |
| "eval_steps_per_second": 3.092, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.9461942257217848, | |
| "eval_loss": 0.14808818697929382, | |
| "eval_runtime": 15.4954, | |
| "eval_samples_per_second": 49.176, | |
| "eval_steps_per_second": 3.098, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.9461942257217848, | |
| "eval_loss": 0.14772780239582062, | |
| "eval_runtime": 15.6303, | |
| "eval_samples_per_second": 48.751, | |
| "eval_steps_per_second": 3.071, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.9488188976377953, | |
| "eval_loss": 0.14494165778160095, | |
| "eval_runtime": 15.3557, | |
| "eval_samples_per_second": 49.623, | |
| "eval_steps_per_second": 3.126, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 27.77777777777778, | |
| "grad_norm": 1.454528570175171, | |
| "learning_rate": 4.705882352941176e-07, | |
| "loss": 0.2091, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.9461942257217848, | |
| "eval_loss": 0.14676755666732788, | |
| "eval_runtime": 15.271, | |
| "eval_samples_per_second": 49.899, | |
| "eval_steps_per_second": 3.143, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.9488188976377953, | |
| "eval_loss": 0.14510495960712433, | |
| "eval_runtime": 15.5258, | |
| "eval_samples_per_second": 49.079, | |
| "eval_steps_per_second": 3.092, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.9514435695538058, | |
| "eval_loss": 0.14374305307865143, | |
| "eval_runtime": 15.5529, | |
| "eval_samples_per_second": 48.994, | |
| "eval_steps_per_second": 3.086, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.9501312335958005, | |
| "eval_loss": 0.14374108612537384, | |
| "eval_runtime": 15.2492, | |
| "eval_samples_per_second": 49.97, | |
| "eval_steps_per_second": 3.148, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.9501312335958005, | |
| "eval_loss": 0.14367464184761047, | |
| "eval_runtime": 15.2728, | |
| "eval_samples_per_second": 49.893, | |
| "eval_steps_per_second": 3.143, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.9501312335958005, | |
| "eval_loss": 0.14097823202610016, | |
| "eval_runtime": 15.4432, | |
| "eval_samples_per_second": 49.342, | |
| "eval_steps_per_second": 3.108, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.9501312335958005, | |
| "eval_loss": 0.14191950857639313, | |
| "eval_runtime": 15.3388, | |
| "eval_samples_per_second": 49.678, | |
| "eval_steps_per_second": 3.129, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.9501312335958005, | |
| "eval_loss": 0.1414215862751007, | |
| "eval_runtime": 15.9739, | |
| "eval_samples_per_second": 47.703, | |
| "eval_steps_per_second": 3.005, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.9501312335958005, | |
| "eval_loss": 0.14236187934875488, | |
| "eval_runtime": 15.5397, | |
| "eval_samples_per_second": 49.036, | |
| "eval_steps_per_second": 3.089, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.9501312335958005, | |
| "eval_loss": 0.14204008877277374, | |
| "eval_runtime": 15.3972, | |
| "eval_samples_per_second": 49.49, | |
| "eval_steps_per_second": 3.117, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.9501312335958005, | |
| "eval_loss": 0.1410112977027893, | |
| "eval_runtime": 15.479, | |
| "eval_samples_per_second": 49.228, | |
| "eval_steps_per_second": 3.101, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.9501312335958005, | |
| "eval_loss": 0.1409473717212677, | |
| "eval_runtime": 15.3208, | |
| "eval_samples_per_second": 49.736, | |
| "eval_steps_per_second": 3.133, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.9514435695538058, | |
| "eval_loss": 0.13949158787727356, | |
| "eval_runtime": 15.5092, | |
| "eval_samples_per_second": 49.132, | |
| "eval_steps_per_second": 3.095, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_accuracy": 0.952755905511811, | |
| "eval_loss": 0.13874106109142303, | |
| "eval_runtime": 15.6493, | |
| "eval_samples_per_second": 48.692, | |
| "eval_steps_per_second": 3.067, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.952755905511811, | |
| "eval_loss": 0.13839222490787506, | |
| "eval_runtime": 15.5039, | |
| "eval_samples_per_second": 49.149, | |
| "eval_steps_per_second": 3.096, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_accuracy": 0.9514435695538058, | |
| "eval_loss": 0.1389077603816986, | |
| "eval_runtime": 15.5822, | |
| "eval_samples_per_second": 48.902, | |
| "eval_steps_per_second": 3.08, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.952755905511811, | |
| "eval_loss": 0.13810816407203674, | |
| "eval_runtime": 15.4669, | |
| "eval_samples_per_second": 49.267, | |
| "eval_steps_per_second": 3.103, | |
| "step": 792 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 900, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 3.893817494154019e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |