| { | |
| "best_metric": 0.9347826086956522, | |
| "best_model_checkpoint": "vit-base-patch16-224-ve-U13b-R\\checkpoint-360", | |
| "epoch": 39.61165048543689, | |
| "eval_steps": 500, | |
| "global_step": 2040, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 5.392156862745098e-06, | |
| "loss": 1.3862, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.0784313725490196e-05, | |
| "loss": 1.3853, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.6176470588235296e-05, | |
| "loss": 1.3787, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.156862745098039e-05, | |
| "loss": 1.3618, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.696078431372549e-05, | |
| "loss": 1.3157, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.34782608695652173, | |
| "eval_loss": 1.296690583229065, | |
| "eval_runtime": 0.977, | |
| "eval_samples_per_second": 47.083, | |
| "eval_steps_per_second": 6.141, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.235294117647059e-05, | |
| "loss": 1.2679, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.774509803921569e-05, | |
| "loss": 1.1855, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.313725490196078e-05, | |
| "loss": 1.1615, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 4.8529411764705885e-05, | |
| "loss": 1.0505, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 5.392156862745098e-05, | |
| "loss": 0.9801, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5869565217391305, | |
| "eval_loss": 0.9965764880180359, | |
| "eval_runtime": 0.7837, | |
| "eval_samples_per_second": 58.697, | |
| "eval_steps_per_second": 7.656, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 5.477296181630547e-05, | |
| "loss": 0.95, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 5.448916408668731e-05, | |
| "loss": 0.9522, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 5.4205366357069146e-05, | |
| "loss": 0.8341, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 5.392156862745098e-05, | |
| "loss": 0.7555, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 5.363777089783282e-05, | |
| "loss": 0.7385, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.7600191235542297, | |
| "eval_runtime": 0.7802, | |
| "eval_samples_per_second": 58.961, | |
| "eval_steps_per_second": 7.691, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 5.3353973168214655e-05, | |
| "loss": 0.7079, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 5.3070175438596496e-05, | |
| "loss": 0.5291, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 5.278637770897833e-05, | |
| "loss": 0.5315, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 5.2502579979360165e-05, | |
| "loss": 0.5752, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 5.2218782249742006e-05, | |
| "loss": 0.572, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 0.6425113677978516, | |
| "eval_runtime": 0.7782, | |
| "eval_samples_per_second": 59.112, | |
| "eval_steps_per_second": 7.71, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 5.193498452012384e-05, | |
| "loss": 0.4306, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 5.165118679050568e-05, | |
| "loss": 0.4744, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 5.1367389060887515e-05, | |
| "loss": 0.4993, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 5.108359133126935e-05, | |
| "loss": 0.3998, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 5.079979360165119e-05, | |
| "loss": 0.3646, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.768746554851532, | |
| "eval_runtime": 0.8062, | |
| "eval_samples_per_second": 57.057, | |
| "eval_steps_per_second": 7.442, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 5.0515995872033025e-05, | |
| "loss": 0.327, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 5.023219814241486e-05, | |
| "loss": 0.3172, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 4.99484004127967e-05, | |
| "loss": 0.3136, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 4.9664602683178534e-05, | |
| "loss": 0.4175, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 4.9380804953560375e-05, | |
| "loss": 0.3033, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.6335754990577698, | |
| "eval_runtime": 0.7852, | |
| "eval_samples_per_second": 58.586, | |
| "eval_steps_per_second": 7.642, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 4.909700722394221e-05, | |
| "loss": 0.2792, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 4.8813209494324044e-05, | |
| "loss": 0.2537, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 4.8529411764705885e-05, | |
| "loss": 0.3938, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 4.824561403508772e-05, | |
| "loss": 0.3292, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 4.796181630546956e-05, | |
| "loss": 0.2956, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 4.7678018575851394e-05, | |
| "loss": 0.3073, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.35338208079338074, | |
| "eval_runtime": 0.7677, | |
| "eval_samples_per_second": 59.921, | |
| "eval_steps_per_second": 7.816, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 4.739422084623323e-05, | |
| "loss": 0.2049, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 4.711042311661507e-05, | |
| "loss": 0.2429, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 4.6826625386996904e-05, | |
| "loss": 0.2084, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 4.6542827657378745e-05, | |
| "loss": 0.3038, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 4.625902992776058e-05, | |
| "loss": 0.1623, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.8559263348579407, | |
| "eval_runtime": 0.8107, | |
| "eval_samples_per_second": 56.742, | |
| "eval_steps_per_second": 7.401, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 4.597523219814241e-05, | |
| "loss": 0.1462, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 4.5691434468524254e-05, | |
| "loss": 0.124, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 4.540763673890609e-05, | |
| "loss": 0.1477, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 4.512383900928793e-05, | |
| "loss": 0.1319, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 4.4840041279669764e-05, | |
| "loss": 0.1079, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.973010778427124, | |
| "eval_runtime": 0.8122, | |
| "eval_samples_per_second": 56.638, | |
| "eval_steps_per_second": 7.388, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 4.45562435500516e-05, | |
| "loss": 0.1712, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 4.427244582043344e-05, | |
| "loss": 0.1125, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 4.398864809081527e-05, | |
| "loss": 0.1858, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 4.3704850361197114e-05, | |
| "loss": 0.1768, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 4.342105263157895e-05, | |
| "loss": 0.2703, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 0.7767899632453918, | |
| "eval_runtime": 0.7982, | |
| "eval_samples_per_second": 57.63, | |
| "eval_steps_per_second": 7.517, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 10.1, | |
| "learning_rate": 4.313725490196078e-05, | |
| "loss": 0.0645, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "learning_rate": 4.2853457172342624e-05, | |
| "loss": 0.1355, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "learning_rate": 4.2569659442724465e-05, | |
| "loss": 0.2119, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 10.68, | |
| "learning_rate": 4.22858617131063e-05, | |
| "loss": 0.2935, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "learning_rate": 4.200206398348813e-05, | |
| "loss": 0.178, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 0.8520329594612122, | |
| "eval_runtime": 0.7822, | |
| "eval_samples_per_second": 58.809, | |
| "eval_steps_per_second": 7.671, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 11.07, | |
| "learning_rate": 4.171826625386997e-05, | |
| "loss": 0.1782, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 11.26, | |
| "learning_rate": 4.143446852425181e-05, | |
| "loss": 0.1341, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 11.46, | |
| "learning_rate": 4.115067079463365e-05, | |
| "loss": 0.0238, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 11.65, | |
| "learning_rate": 4.0866873065015484e-05, | |
| "loss": 0.1414, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 11.84, | |
| "learning_rate": 4.058307533539732e-05, | |
| "loss": 0.2191, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.0049434900283813, | |
| "eval_runtime": 0.7802, | |
| "eval_samples_per_second": 58.961, | |
| "eval_steps_per_second": 7.691, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "learning_rate": 4.029927760577915e-05, | |
| "loss": 0.1217, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "learning_rate": 4.001547987616099e-05, | |
| "loss": 0.1584, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 12.43, | |
| "learning_rate": 3.9731682146542834e-05, | |
| "loss": 0.1076, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 12.62, | |
| "learning_rate": 3.944788441692467e-05, | |
| "loss": 0.0764, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 12.82, | |
| "learning_rate": 3.91640866873065e-05, | |
| "loss": 0.0597, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.833352267742157, | |
| "eval_runtime": 0.7732, | |
| "eval_samples_per_second": 59.495, | |
| "eval_steps_per_second": 7.76, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "learning_rate": 3.888028895768834e-05, | |
| "loss": 0.071, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "learning_rate": 3.859649122807018e-05, | |
| "loss": 0.0797, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 13.4, | |
| "learning_rate": 3.831269349845202e-05, | |
| "loss": 0.1046, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "learning_rate": 3.802889576883385e-05, | |
| "loss": 0.1188, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "learning_rate": 3.774509803921569e-05, | |
| "loss": 0.0627, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 13.98, | |
| "learning_rate": 3.746130030959752e-05, | |
| "loss": 0.0881, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.9984623193740845, | |
| "eval_runtime": 0.7912, | |
| "eval_samples_per_second": 58.141, | |
| "eval_steps_per_second": 7.584, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 14.17, | |
| "learning_rate": 3.7177502579979356e-05, | |
| "loss": 0.1591, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 14.37, | |
| "learning_rate": 3.6893704850361204e-05, | |
| "loss": 0.1445, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 14.56, | |
| "learning_rate": 3.660990712074304e-05, | |
| "loss": 0.0678, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 14.76, | |
| "learning_rate": 3.632610939112487e-05, | |
| "loss": 0.0799, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "learning_rate": 3.6042311661506706e-05, | |
| "loss": 0.1265, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 0.9443035125732422, | |
| "eval_runtime": 0.8091, | |
| "eval_samples_per_second": 56.851, | |
| "eval_steps_per_second": 7.415, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 15.15, | |
| "learning_rate": 3.575851393188854e-05, | |
| "loss": 0.1045, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 15.34, | |
| "learning_rate": 3.547471620227039e-05, | |
| "loss": 0.1274, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 15.53, | |
| "learning_rate": 3.519091847265222e-05, | |
| "loss": 0.105, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 15.73, | |
| "learning_rate": 3.490712074303406e-05, | |
| "loss": 0.1277, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 15.92, | |
| "learning_rate": 3.462332301341589e-05, | |
| "loss": 0.0696, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8260869565217391, | |
| "eval_loss": 0.9877853393554688, | |
| "eval_runtime": 0.808, | |
| "eval_samples_per_second": 56.928, | |
| "eval_steps_per_second": 7.425, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "learning_rate": 3.4339525283797725e-05, | |
| "loss": 0.1507, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 16.31, | |
| "learning_rate": 3.405572755417957e-05, | |
| "loss": 0.1025, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 16.5, | |
| "learning_rate": 3.377192982456141e-05, | |
| "loss": 0.0113, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 16.7, | |
| "learning_rate": 3.348813209494324e-05, | |
| "loss": 0.0393, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 16.89, | |
| "learning_rate": 3.3204334365325076e-05, | |
| "loss": 0.1198, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 0.8784380555152893, | |
| "eval_runtime": 0.8375, | |
| "eval_samples_per_second": 54.923, | |
| "eval_steps_per_second": 7.164, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 17.09, | |
| "learning_rate": 3.292053663570691e-05, | |
| "loss": 0.1388, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 17.28, | |
| "learning_rate": 3.263673890608876e-05, | |
| "loss": 0.1075, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 17.48, | |
| "learning_rate": 3.235294117647059e-05, | |
| "loss": 0.0449, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 17.67, | |
| "learning_rate": 3.2069143446852426e-05, | |
| "loss": 0.0916, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 17.86, | |
| "learning_rate": 3.178534571723426e-05, | |
| "loss": 0.1484, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.9595004320144653, | |
| "eval_runtime": 0.7957, | |
| "eval_samples_per_second": 57.814, | |
| "eval_steps_per_second": 7.541, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 18.06, | |
| "learning_rate": 3.1501547987616095e-05, | |
| "loss": 0.0764, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 18.25, | |
| "learning_rate": 3.121775025799794e-05, | |
| "loss": 0.0681, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 18.45, | |
| "learning_rate": 3.093395252837978e-05, | |
| "loss": 0.064, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 18.64, | |
| "learning_rate": 3.065015479876161e-05, | |
| "loss": 0.067, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 18.83, | |
| "learning_rate": 3.0366357069143445e-05, | |
| "loss": 0.2887, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 1.0562622547149658, | |
| "eval_runtime": 0.8164, | |
| "eval_samples_per_second": 56.345, | |
| "eval_steps_per_second": 7.349, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 19.03, | |
| "learning_rate": 3.0082559339525283e-05, | |
| "loss": 0.1123, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 19.22, | |
| "learning_rate": 2.9798761609907124e-05, | |
| "loss": 0.0329, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 19.42, | |
| "learning_rate": 2.9514963880288958e-05, | |
| "loss": 0.0561, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 19.61, | |
| "learning_rate": 2.9231166150670796e-05, | |
| "loss": 0.1581, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 19.81, | |
| "learning_rate": 2.894736842105263e-05, | |
| "loss": 0.1128, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2.8663570691434468e-05, | |
| "loss": 0.1423, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 0.8549535870552063, | |
| "eval_runtime": 0.8176, | |
| "eval_samples_per_second": 56.263, | |
| "eval_steps_per_second": 7.339, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 20.19, | |
| "learning_rate": 2.837977296181631e-05, | |
| "loss": 0.0869, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 20.39, | |
| "learning_rate": 2.8095975232198143e-05, | |
| "loss": 0.0625, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 20.58, | |
| "learning_rate": 2.781217750257998e-05, | |
| "loss": 0.136, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 20.78, | |
| "learning_rate": 2.7528379772961815e-05, | |
| "loss": 0.2042, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 20.97, | |
| "learning_rate": 2.7244582043343656e-05, | |
| "loss": 0.083, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 0.9092756509780884, | |
| "eval_runtime": 0.8048, | |
| "eval_samples_per_second": 57.154, | |
| "eval_steps_per_second": 7.455, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 21.17, | |
| "learning_rate": 2.696078431372549e-05, | |
| "loss": 0.0607, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 21.36, | |
| "learning_rate": 2.6676986584107328e-05, | |
| "loss": 0.0796, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 21.55, | |
| "learning_rate": 2.6393188854489165e-05, | |
| "loss": 0.0582, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 21.75, | |
| "learning_rate": 2.6109391124871003e-05, | |
| "loss": 0.0274, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 21.94, | |
| "learning_rate": 2.582559339525284e-05, | |
| "loss": 0.0695, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.2757601737976074, | |
| "eval_runtime": 0.8066, | |
| "eval_samples_per_second": 57.031, | |
| "eval_steps_per_second": 7.439, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 22.14, | |
| "learning_rate": 2.5541795665634675e-05, | |
| "loss": 0.0193, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 22.33, | |
| "learning_rate": 2.5257997936016512e-05, | |
| "loss": 0.0856, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 22.52, | |
| "learning_rate": 2.497420020639835e-05, | |
| "loss": 0.0969, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 22.72, | |
| "learning_rate": 2.4690402476780188e-05, | |
| "loss": 0.1214, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 22.91, | |
| "learning_rate": 2.4406604747162022e-05, | |
| "loss": 0.0285, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.0851949453353882, | |
| "eval_runtime": 0.8033, | |
| "eval_samples_per_second": 57.263, | |
| "eval_steps_per_second": 7.469, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 23.11, | |
| "learning_rate": 2.412280701754386e-05, | |
| "loss": 0.1273, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 23.3, | |
| "learning_rate": 2.3839009287925697e-05, | |
| "loss": 0.0777, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 23.5, | |
| "learning_rate": 2.3555211558307535e-05, | |
| "loss": 0.0507, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 23.69, | |
| "learning_rate": 2.3271413828689372e-05, | |
| "loss": 0.1113, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 23.88, | |
| "learning_rate": 2.2987616099071207e-05, | |
| "loss": 0.0132, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.3340740203857422, | |
| "eval_runtime": 0.8554, | |
| "eval_samples_per_second": 53.774, | |
| "eval_steps_per_second": 7.014, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 24.08, | |
| "learning_rate": 2.2703818369453044e-05, | |
| "loss": 0.054, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 24.27, | |
| "learning_rate": 2.2420020639834882e-05, | |
| "loss": 0.0702, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 24.47, | |
| "learning_rate": 2.213622291021672e-05, | |
| "loss": 0.0461, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 24.66, | |
| "learning_rate": 2.1852425180598557e-05, | |
| "loss": 0.0837, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 24.85, | |
| "learning_rate": 2.156862745098039e-05, | |
| "loss": 0.0957, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.1964709758758545, | |
| "eval_runtime": 0.8164, | |
| "eval_samples_per_second": 56.344, | |
| "eval_steps_per_second": 7.349, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 25.05, | |
| "learning_rate": 2.1284829721362232e-05, | |
| "loss": 0.0968, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 25.24, | |
| "learning_rate": 2.1001031991744067e-05, | |
| "loss": 0.1526, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 25.44, | |
| "learning_rate": 2.0717234262125904e-05, | |
| "loss": 0.0653, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 25.63, | |
| "learning_rate": 2.0433436532507742e-05, | |
| "loss": 0.1046, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 25.83, | |
| "learning_rate": 2.0149638802889576e-05, | |
| "loss": 0.0633, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.1199151277542114, | |
| "eval_runtime": 0.8717, | |
| "eval_samples_per_second": 52.769, | |
| "eval_steps_per_second": 6.883, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 26.02, | |
| "learning_rate": 1.9865841073271417e-05, | |
| "loss": 0.008, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 26.21, | |
| "learning_rate": 1.958204334365325e-05, | |
| "loss": 0.123, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 26.41, | |
| "learning_rate": 1.929824561403509e-05, | |
| "loss": 0.0715, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 26.6, | |
| "learning_rate": 1.9014447884416927e-05, | |
| "loss": 0.0765, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 26.8, | |
| "learning_rate": 1.873065015479876e-05, | |
| "loss": 0.0991, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 26.99, | |
| "learning_rate": 1.8446852425180602e-05, | |
| "loss": 0.0705, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 26.99, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 1.0550963878631592, | |
| "eval_runtime": 0.8012, | |
| "eval_samples_per_second": 57.414, | |
| "eval_steps_per_second": 7.489, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 27.18, | |
| "learning_rate": 1.8163054695562436e-05, | |
| "loss": 0.0501, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 27.38, | |
| "learning_rate": 1.787925696594427e-05, | |
| "loss": 0.0841, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 27.57, | |
| "learning_rate": 1.759545923632611e-05, | |
| "loss": 0.0257, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 27.77, | |
| "learning_rate": 1.7311661506707946e-05, | |
| "loss": 0.0027, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 27.96, | |
| "learning_rate": 1.7027863777089787e-05, | |
| "loss": 0.0564, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.4332396984100342, | |
| "eval_runtime": 0.8092, | |
| "eval_samples_per_second": 56.846, | |
| "eval_steps_per_second": 7.415, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 28.16, | |
| "learning_rate": 1.674406604747162e-05, | |
| "loss": 0.1458, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 28.35, | |
| "learning_rate": 1.6460268317853455e-05, | |
| "loss": 0.0965, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 28.54, | |
| "learning_rate": 1.6176470588235296e-05, | |
| "loss": 0.0443, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 28.74, | |
| "learning_rate": 1.589267285861713e-05, | |
| "loss": 0.0277, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 28.93, | |
| "learning_rate": 1.560887512899897e-05, | |
| "loss": 0.0798, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.3854602575302124, | |
| "eval_runtime": 0.7817, | |
| "eval_samples_per_second": 58.847, | |
| "eval_steps_per_second": 7.676, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 29.13, | |
| "learning_rate": 1.5325077399380806e-05, | |
| "loss": 0.0701, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 29.32, | |
| "learning_rate": 1.5041279669762642e-05, | |
| "loss": 0.0716, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 29.51, | |
| "learning_rate": 1.4757481940144479e-05, | |
| "loss": 0.0463, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 29.71, | |
| "learning_rate": 1.4473684210526315e-05, | |
| "loss": 0.0273, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 29.9, | |
| "learning_rate": 1.4189886480908154e-05, | |
| "loss": 0.0326, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 1.0533674955368042, | |
| "eval_runtime": 0.8902, | |
| "eval_samples_per_second": 51.674, | |
| "eval_steps_per_second": 6.74, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 30.1, | |
| "learning_rate": 1.390608875128999e-05, | |
| "loss": 0.0074, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 30.29, | |
| "learning_rate": 1.3622291021671828e-05, | |
| "loss": 0.0241, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 30.49, | |
| "learning_rate": 1.3338493292053664e-05, | |
| "loss": 0.089, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 30.68, | |
| "learning_rate": 1.3054695562435501e-05, | |
| "loss": 0.0401, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 30.87, | |
| "learning_rate": 1.2770897832817337e-05, | |
| "loss": 0.092, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 30.99, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.17453932762146, | |
| "eval_runtime": 0.8884, | |
| "eval_samples_per_second": 51.778, | |
| "eval_steps_per_second": 6.754, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 31.07, | |
| "learning_rate": 1.2487100103199175e-05, | |
| "loss": 0.0336, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 31.26, | |
| "learning_rate": 1.2203302373581011e-05, | |
| "loss": 0.0161, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 31.46, | |
| "learning_rate": 1.1919504643962849e-05, | |
| "loss": 0.0428, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 31.65, | |
| "learning_rate": 1.1635706914344686e-05, | |
| "loss": 0.0379, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 31.84, | |
| "learning_rate": 1.1351909184726522e-05, | |
| "loss": 0.1243, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 1.1341125965118408, | |
| "eval_runtime": 0.8712, | |
| "eval_samples_per_second": 52.801, | |
| "eval_steps_per_second": 6.887, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 32.04, | |
| "learning_rate": 1.106811145510836e-05, | |
| "loss": 0.0214, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 32.23, | |
| "learning_rate": 1.0784313725490196e-05, | |
| "loss": 0.0496, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 32.43, | |
| "learning_rate": 1.0500515995872033e-05, | |
| "loss": 0.0483, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 32.62, | |
| "learning_rate": 1.0216718266253871e-05, | |
| "loss": 0.0087, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 32.82, | |
| "learning_rate": 9.932920536635709e-06, | |
| "loss": 0.062, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 32.99, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 1.2647993564605713, | |
| "eval_runtime": 0.8321, | |
| "eval_samples_per_second": 55.281, | |
| "eval_steps_per_second": 7.211, | |
| "step": 1699 | |
| }, | |
| { | |
| "epoch": 33.01, | |
| "learning_rate": 9.649122807017545e-06, | |
| "loss": 0.1248, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 33.2, | |
| "learning_rate": 9.36532507739938e-06, | |
| "loss": 0.0765, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 33.4, | |
| "learning_rate": 9.081527347781218e-06, | |
| "loss": 0.0796, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 33.59, | |
| "learning_rate": 8.797729618163056e-06, | |
| "loss": 0.0274, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 33.79, | |
| "learning_rate": 8.513931888544893e-06, | |
| "loss": 0.0525, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 33.98, | |
| "learning_rate": 8.230134158926728e-06, | |
| "loss": 0.0941, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 1.1236172914505005, | |
| "eval_runtime": 0.8314, | |
| "eval_samples_per_second": 55.331, | |
| "eval_steps_per_second": 7.217, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 34.17, | |
| "learning_rate": 7.946336429308565e-06, | |
| "loss": 0.0313, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 34.37, | |
| "learning_rate": 7.662538699690403e-06, | |
| "loss": 0.046, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 34.56, | |
| "learning_rate": 7.3787409700722396e-06, | |
| "loss": 0.0394, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 34.76, | |
| "learning_rate": 7.094943240454077e-06, | |
| "loss": 0.0209, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 34.95, | |
| "learning_rate": 6.811145510835914e-06, | |
| "loss": 0.0119, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 34.99, | |
| "eval_accuracy": 0.8043478260869565, | |
| "eval_loss": 1.1302545070648193, | |
| "eval_runtime": 0.8257, | |
| "eval_samples_per_second": 55.711, | |
| "eval_steps_per_second": 7.267, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 35.15, | |
| "learning_rate": 6.527347781217751e-06, | |
| "loss": 0.0719, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 35.34, | |
| "learning_rate": 6.2435500515995875e-06, | |
| "loss": 0.0428, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 35.53, | |
| "learning_rate": 5.959752321981424e-06, | |
| "loss": 0.0183, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 35.73, | |
| "learning_rate": 5.675954592363261e-06, | |
| "loss": 0.027, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 35.92, | |
| "learning_rate": 5.392156862745098e-06, | |
| "loss": 0.044, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 1.1848399639129639, | |
| "eval_runtime": 0.8214, | |
| "eval_samples_per_second": 56.0, | |
| "eval_steps_per_second": 7.304, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 36.12, | |
| "learning_rate": 5.1083591331269355e-06, | |
| "loss": 0.0312, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 36.31, | |
| "learning_rate": 4.824561403508772e-06, | |
| "loss": 0.0166, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 36.5, | |
| "learning_rate": 4.540763673890609e-06, | |
| "loss": 0.0951, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 36.7, | |
| "learning_rate": 4.256965944272447e-06, | |
| "loss": 0.1538, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 36.89, | |
| "learning_rate": 3.973168214654283e-06, | |
| "loss": 0.0073, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 36.99, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.1795865297317505, | |
| "eval_runtime": 0.8136, | |
| "eval_samples_per_second": 56.539, | |
| "eval_steps_per_second": 7.375, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 37.09, | |
| "learning_rate": 3.6893704850361198e-06, | |
| "loss": 0.0617, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 37.28, | |
| "learning_rate": 3.405572755417957e-06, | |
| "loss": 0.0482, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 37.48, | |
| "learning_rate": 3.1217750257997938e-06, | |
| "loss": 0.0452, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 37.67, | |
| "learning_rate": 2.8379772961816305e-06, | |
| "loss": 0.0511, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 37.86, | |
| "learning_rate": 2.5541795665634677e-06, | |
| "loss": 0.0149, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 1.2491459846496582, | |
| "eval_runtime": 0.8526, | |
| "eval_samples_per_second": 53.952, | |
| "eval_steps_per_second": 7.037, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 38.06, | |
| "learning_rate": 2.2703818369453045e-06, | |
| "loss": 0.0527, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 38.25, | |
| "learning_rate": 1.9865841073271413e-06, | |
| "loss": 0.0693, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 38.45, | |
| "learning_rate": 1.7027863777089785e-06, | |
| "loss": 0.0776, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 38.64, | |
| "learning_rate": 1.4189886480908153e-06, | |
| "loss": 0.0413, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 38.83, | |
| "learning_rate": 1.1351909184726523e-06, | |
| "loss": 0.0194, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 38.99, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 1.1812182664871216, | |
| "eval_runtime": 0.8162, | |
| "eval_samples_per_second": 56.361, | |
| "eval_steps_per_second": 7.351, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 39.03, | |
| "learning_rate": 8.513931888544892e-07, | |
| "loss": 0.0202, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 39.22, | |
| "learning_rate": 5.675954592363261e-07, | |
| "loss": 0.0311, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 39.42, | |
| "learning_rate": 2.8379772961816306e-07, | |
| "loss": 0.037, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 39.61, | |
| "learning_rate": 0.0, | |
| "loss": 0.0577, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 39.61, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.177699327468872, | |
| "eval_runtime": 0.8677, | |
| "eval_samples_per_second": 53.015, | |
| "eval_steps_per_second": 6.915, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 39.61, | |
| "step": 2040, | |
| "total_flos": 2.5142726714989363e+18, | |
| "train_loss": 0.19644491698172892, | |
| "train_runtime": 666.0117, | |
| "train_samples_per_second": 49.188, | |
| "train_steps_per_second": 3.063 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2040, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "total_flos": 2.5142726714989363e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |