| { | |
| "best_metric": 0.9811772758384668, | |
| "best_model_checkpoint": "swin-tiny-patch4-window7-224-spa_saloon_classification/checkpoint-1849", | |
| "epoch": 9.975669099756692, | |
| "eval_steps": 500, | |
| "global_step": 2050, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.2195121951219514e-06, | |
| "loss": 1.8606, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.4390243902439027e-06, | |
| "loss": 1.8294, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.6585365853658537e-06, | |
| "loss": 1.8047, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.8780487804878055e-06, | |
| "loss": 1.7415, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 6.0975609756097564e-06, | |
| "loss": 1.6899, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 7.317073170731707e-06, | |
| "loss": 1.6166, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 8.53658536585366e-06, | |
| "loss": 1.518, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.756097560975611e-06, | |
| "loss": 1.4472, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.0975609756097562e-05, | |
| "loss": 1.2949, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.2195121951219513e-05, | |
| "loss": 1.1811, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.3414634146341466e-05, | |
| "loss": 1.0548, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.4634146341463415e-05, | |
| "loss": 0.9112, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.5853658536585366e-05, | |
| "loss": 0.8244, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.707317073170732e-05, | |
| "loss": 0.7256, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.8292682926829268e-05, | |
| "loss": 0.6848, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.9512195121951222e-05, | |
| "loss": 0.6532, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.073170731707317e-05, | |
| "loss": 0.5777, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.1951219512195124e-05, | |
| "loss": 0.6014, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.3170731707317075e-05, | |
| "loss": 0.5424, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.4390243902439026e-05, | |
| "loss": 0.4876, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.5609756097560977e-05, | |
| "loss": 0.4372, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.682926829268293e-05, | |
| "loss": 0.4551, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.8048780487804882e-05, | |
| "loss": 0.4751, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.926829268292683e-05, | |
| "loss": 0.4652, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.048780487804878e-05, | |
| "loss": 0.4264, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.170731707317073e-05, | |
| "loss": 0.399, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.292682926829269e-05, | |
| "loss": 0.4399, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.414634146341464e-05, | |
| "loss": 0.4074, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.5365853658536584e-05, | |
| "loss": 0.397, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.6585365853658535e-05, | |
| "loss": 0.3578, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.780487804878049e-05, | |
| "loss": 0.3771, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.9024390243902444e-05, | |
| "loss": 0.366, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.0243902439024395e-05, | |
| "loss": 0.3792, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.146341463414634e-05, | |
| "loss": 0.3706, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.26829268292683e-05, | |
| "loss": 0.352, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.390243902439025e-05, | |
| "loss": 0.3481, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.51219512195122e-05, | |
| "loss": 0.2698, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.634146341463415e-05, | |
| "loss": 0.3573, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.75609756097561e-05, | |
| "loss": 0.3479, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.878048780487805e-05, | |
| "loss": 0.3917, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.337, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9175222450376455, | |
| "eval_loss": 0.21079373359680176, | |
| "eval_runtime": 21.0784, | |
| "eval_samples_per_second": 138.625, | |
| "eval_steps_per_second": 4.365, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.986449864498645e-05, | |
| "loss": 0.3492, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.97289972899729e-05, | |
| "loss": 0.3251, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.959349593495935e-05, | |
| "loss": 0.2809, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.9457994579945803e-05, | |
| "loss": 0.3412, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.932249322493225e-05, | |
| "loss": 0.3049, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.9186991869918704e-05, | |
| "loss": 0.3123, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.905149051490515e-05, | |
| "loss": 0.2915, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.89159891598916e-05, | |
| "loss": 0.2875, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.878048780487805e-05, | |
| "loss": 0.2721, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.86449864498645e-05, | |
| "loss": 0.2551, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.8509485094850945e-05, | |
| "loss": 0.2956, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.8373983739837406e-05, | |
| "loss": 0.2643, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.823848238482385e-05, | |
| "loss": 0.3148, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.81029810298103e-05, | |
| "loss": 0.2628, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.796747967479675e-05, | |
| "loss": 0.2258, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.78319783197832e-05, | |
| "loss": 0.2483, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.769647696476965e-05, | |
| "loss": 0.2805, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.75609756097561e-05, | |
| "loss": 0.2861, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 4.7425474254742554e-05, | |
| "loss": 0.2972, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 4.7289972899729e-05, | |
| "loss": 0.2432, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 4.715447154471545e-05, | |
| "loss": 0.2517, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.70189701897019e-05, | |
| "loss": 0.2707, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 4.688346883468835e-05, | |
| "loss": 0.2512, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.6747967479674795e-05, | |
| "loss": 0.2036, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 4.661246612466125e-05, | |
| "loss": 0.2346, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 4.6476964769647696e-05, | |
| "loss": 0.2921, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 4.634146341463415e-05, | |
| "loss": 0.2789, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 4.62059620596206e-05, | |
| "loss": 0.2456, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 4.607046070460705e-05, | |
| "loss": 0.2536, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 4.59349593495935e-05, | |
| "loss": 0.282, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 4.579945799457995e-05, | |
| "loss": 0.2525, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 4.56639566395664e-05, | |
| "loss": 0.2284, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 4.5528455284552844e-05, | |
| "loss": 0.2389, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.53929539295393e-05, | |
| "loss": 0.2425, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 4.525745257452575e-05, | |
| "loss": 0.2051, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 4.51219512195122e-05, | |
| "loss": 0.278, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.4986449864498645e-05, | |
| "loss": 0.2004, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.48509485094851e-05, | |
| "loss": 0.2467, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 4.4715447154471546e-05, | |
| "loss": 0.2411, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.457994579945799e-05, | |
| "loss": 0.218, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.196, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9620123203285421, | |
| "eval_loss": 0.11370620876550674, | |
| "eval_runtime": 21.3607, | |
| "eval_samples_per_second": 136.793, | |
| "eval_steps_per_second": 4.307, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 4.43089430894309e-05, | |
| "loss": 0.1786, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.417344173441735e-05, | |
| "loss": 0.2308, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 4.4037940379403794e-05, | |
| "loss": 0.2158, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 4.390243902439025e-05, | |
| "loss": 0.1875, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 4.3766937669376695e-05, | |
| "loss": 0.1998, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 4.363143631436314e-05, | |
| "loss": 0.2001, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 4.3495934959349595e-05, | |
| "loss": 0.2349, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 4.336043360433605e-05, | |
| "loss": 0.2191, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 4.3224932249322496e-05, | |
| "loss": 0.2052, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 4.308943089430895e-05, | |
| "loss": 0.1963, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 4.2953929539295396e-05, | |
| "loss": 0.2318, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 4.281842818428184e-05, | |
| "loss": 0.2105, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 4.26829268292683e-05, | |
| "loss": 0.2185, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 4.2547425474254744e-05, | |
| "loss": 0.2426, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 4.241192411924119e-05, | |
| "loss": 0.2248, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 4.2276422764227644e-05, | |
| "loss": 0.232, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 4.21409214092141e-05, | |
| "loss": 0.2263, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 4.2005420054200545e-05, | |
| "loss": 0.1929, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 4.186991869918699e-05, | |
| "loss": 0.2151, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 4.1734417344173445e-05, | |
| "loss": 0.2043, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 4.159891598915989e-05, | |
| "loss": 0.1585, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 4.146341463414634e-05, | |
| "loss": 0.2013, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 4.132791327913279e-05, | |
| "loss": 0.2072, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 4.1192411924119246e-05, | |
| "loss": 0.1806, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 4.105691056910569e-05, | |
| "loss": 0.1539, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 4.092140921409214e-05, | |
| "loss": 0.1582, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 4.0785907859078594e-05, | |
| "loss": 0.1856, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 4.065040650406504e-05, | |
| "loss": 0.1966, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.051490514905149e-05, | |
| "loss": 0.205, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.037940379403794e-05, | |
| "loss": 0.2359, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.0243902439024395e-05, | |
| "loss": 0.1834, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.010840108401084e-05, | |
| "loss": 0.2631, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.9972899728997295e-05, | |
| "loss": 0.2, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.983739837398374e-05, | |
| "loss": 0.1802, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 3.970189701897019e-05, | |
| "loss": 0.2305, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 3.956639566395664e-05, | |
| "loss": 0.1662, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 3.943089430894309e-05, | |
| "loss": 0.166, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 3.9295392953929537e-05, | |
| "loss": 0.167, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 3.915989159891599e-05, | |
| "loss": 0.2017, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 3.9024390243902444e-05, | |
| "loss": 0.1826, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.1502, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9668035592060232, | |
| "eval_loss": 0.1030467301607132, | |
| "eval_runtime": 22.6678, | |
| "eval_samples_per_second": 128.905, | |
| "eval_steps_per_second": 4.059, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 3.875338753387534e-05, | |
| "loss": 0.1764, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 3.861788617886179e-05, | |
| "loss": 0.1602, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 3.848238482384824e-05, | |
| "loss": 0.139, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 3.8346883468834685e-05, | |
| "loss": 0.1811, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 3.8211382113821145e-05, | |
| "loss": 0.1824, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 3.807588075880759e-05, | |
| "loss": 0.1987, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 3.794037940379404e-05, | |
| "loss": 0.1861, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 3.780487804878049e-05, | |
| "loss": 0.2177, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 3.766937669376694e-05, | |
| "loss": 0.1591, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 3.753387533875339e-05, | |
| "loss": 0.2049, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 3.739837398373984e-05, | |
| "loss": 0.1844, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 3.726287262872629e-05, | |
| "loss": 0.1668, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 3.712737127371274e-05, | |
| "loss": 0.1768, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 3.699186991869919e-05, | |
| "loss": 0.1619, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 3.685636856368564e-05, | |
| "loss": 0.2358, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 3.672086720867209e-05, | |
| "loss": 0.1658, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 3.6585365853658535e-05, | |
| "loss": 0.163, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 3.644986449864499e-05, | |
| "loss": 0.1658, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 3.6314363143631436e-05, | |
| "loss": 0.2312, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 3.617886178861789e-05, | |
| "loss": 0.2043, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 3.6043360433604336e-05, | |
| "loss": 0.1625, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 3.590785907859079e-05, | |
| "loss": 0.1954, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 3.577235772357724e-05, | |
| "loss": 0.1613, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 3.5636856368563684e-05, | |
| "loss": 0.1924, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 3.550135501355014e-05, | |
| "loss": 0.177, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 3.5365853658536584e-05, | |
| "loss": 0.1707, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 3.523035230352303e-05, | |
| "loss": 0.1454, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 3.509485094850949e-05, | |
| "loss": 0.1731, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 3.495934959349594e-05, | |
| "loss": 0.1361, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 3.4823848238482385e-05, | |
| "loss": 0.1592, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 3.468834688346884e-05, | |
| "loss": 0.1691, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 3.4552845528455286e-05, | |
| "loss": 0.1321, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 3.441734417344173e-05, | |
| "loss": 0.133, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 3.4281842818428186e-05, | |
| "loss": 0.1367, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 3.414634146341464e-05, | |
| "loss": 0.1517, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 3.401084010840109e-05, | |
| "loss": 0.1478, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 3.3875338753387534e-05, | |
| "loss": 0.1408, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 3.373983739837399e-05, | |
| "loss": 0.2103, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 3.3604336043360434e-05, | |
| "loss": 0.2157, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 3.346883468834688e-05, | |
| "loss": 0.1526, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.1476, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9736481861738535, | |
| "eval_loss": 0.08152312785387039, | |
| "eval_runtime": 21.5748, | |
| "eval_samples_per_second": 135.436, | |
| "eval_steps_per_second": 4.264, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 3.319783197831978e-05, | |
| "loss": 0.1679, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 3.3062330623306235e-05, | |
| "loss": 0.1668, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 3.292682926829269e-05, | |
| "loss": 0.1528, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 3.2791327913279136e-05, | |
| "loss": 0.1984, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 3.265582655826558e-05, | |
| "loss": 0.1575, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 3.2520325203252037e-05, | |
| "loss": 0.156, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 3.2384823848238483e-05, | |
| "loss": 0.1227, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 3.224932249322493e-05, | |
| "loss": 0.1759, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 3.2113821138211384e-05, | |
| "loss": 0.1261, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 3.197831978319784e-05, | |
| "loss": 0.1566, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 3.1842818428184285e-05, | |
| "loss": 0.1424, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 3.170731707317073e-05, | |
| "loss": 0.1353, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 3.1571815718157185e-05, | |
| "loss": 0.1595, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 3.143631436314363e-05, | |
| "loss": 0.1823, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 3.130081300813008e-05, | |
| "loss": 0.1428, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 3.116531165311653e-05, | |
| "loss": 0.1566, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 3.1029810298102986e-05, | |
| "loss": 0.099, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 3.089430894308943e-05, | |
| "loss": 0.1311, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 3.075880758807588e-05, | |
| "loss": 0.1317, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 3.0623306233062334e-05, | |
| "loss": 0.1647, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 3.048780487804878e-05, | |
| "loss": 0.1646, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 3.035230352303523e-05, | |
| "loss": 0.1429, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 3.021680216802168e-05, | |
| "loss": 0.1511, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 3.0081300813008135e-05, | |
| "loss": 0.1691, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 2.9945799457994585e-05, | |
| "loss": 0.1174, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 2.9810298102981032e-05, | |
| "loss": 0.1743, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 2.9674796747967482e-05, | |
| "loss": 0.1757, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 2.9539295392953932e-05, | |
| "loss": 0.1164, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 2.940379403794038e-05, | |
| "loss": 0.1469, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 2.926829268292683e-05, | |
| "loss": 0.1539, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 2.9132791327913276e-05, | |
| "loss": 0.1577, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 2.8997289972899733e-05, | |
| "loss": 0.141, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 2.886178861788618e-05, | |
| "loss": 0.1382, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 2.872628726287263e-05, | |
| "loss": 0.1445, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 2.859078590785908e-05, | |
| "loss": 0.179, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 2.8455284552845528e-05, | |
| "loss": 0.1244, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 2.8319783197831978e-05, | |
| "loss": 0.172, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 2.8184281842818428e-05, | |
| "loss": 0.1341, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 2.8048780487804882e-05, | |
| "loss": 0.1456, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 2.7913279132791332e-05, | |
| "loss": 0.1839, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.1532, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9760438056125941, | |
| "eval_loss": 0.08150195330381393, | |
| "eval_runtime": 21.7472, | |
| "eval_samples_per_second": 134.362, | |
| "eval_steps_per_second": 4.23, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 2.764227642276423e-05, | |
| "loss": 0.1426, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 2.750677506775068e-05, | |
| "loss": 0.1323, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 2.7371273712737127e-05, | |
| "loss": 0.101, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 2.7235772357723577e-05, | |
| "loss": 0.1545, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 2.7100271002710027e-05, | |
| "loss": 0.1368, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 2.696476964769648e-05, | |
| "loss": 0.1524, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 2.682926829268293e-05, | |
| "loss": 0.1483, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 2.6693766937669378e-05, | |
| "loss": 0.1225, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 2.6558265582655828e-05, | |
| "loss": 0.1314, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 2.642276422764228e-05, | |
| "loss": 0.1565, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 2.6287262872628725e-05, | |
| "loss": 0.1479, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 2.6151761517615176e-05, | |
| "loss": 0.1391, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 2.601626016260163e-05, | |
| "loss": 0.1049, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 2.588075880758808e-05, | |
| "loss": 0.156, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 2.574525745257453e-05, | |
| "loss": 0.1518, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 2.5609756097560977e-05, | |
| "loss": 0.1462, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 2.5474254742547427e-05, | |
| "loss": 0.1438, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 2.5338753387533877e-05, | |
| "loss": 0.1274, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 2.5203252032520324e-05, | |
| "loss": 0.1378, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 2.5067750677506774e-05, | |
| "loss": 0.1273, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 2.4932249322493225e-05, | |
| "loss": 0.1386, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 2.4796747967479675e-05, | |
| "loss": 0.1615, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 2.4661246612466125e-05, | |
| "loss": 0.1052, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 2.4525745257452575e-05, | |
| "loss": 0.1767, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 2.4390243902439026e-05, | |
| "loss": 0.1354, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 2.4254742547425473e-05, | |
| "loss": 0.1458, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 2.4119241192411926e-05, | |
| "loss": 0.1144, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 2.3983739837398377e-05, | |
| "loss": 0.127, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 2.3848238482384823e-05, | |
| "loss": 0.1103, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 2.3712737127371277e-05, | |
| "loss": 0.1934, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 2.3577235772357724e-05, | |
| "loss": 0.1208, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 2.3441734417344174e-05, | |
| "loss": 0.1286, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 2.3306233062330625e-05, | |
| "loss": 0.1495, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 2.3170731707317075e-05, | |
| "loss": 0.1243, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 2.3035230352303525e-05, | |
| "loss": 0.1316, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 2.2899728997289975e-05, | |
| "loss": 0.1777, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 2.2764227642276422e-05, | |
| "loss": 0.1363, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 2.2628726287262876e-05, | |
| "loss": 0.1467, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 2.2493224932249323e-05, | |
| "loss": 0.1484, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 2.2357723577235773e-05, | |
| "loss": 0.1041, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.1311, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9804928131416838, | |
| "eval_loss": 0.06673090904951096, | |
| "eval_runtime": 21.2329, | |
| "eval_samples_per_second": 137.616, | |
| "eval_steps_per_second": 4.333, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 2.2086720867208674e-05, | |
| "loss": 0.1585, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 2.1951219512195124e-05, | |
| "loss": 0.0896, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 2.181571815718157e-05, | |
| "loss": 0.1285, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 2.1680216802168024e-05, | |
| "loss": 0.1674, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 2.1544715447154475e-05, | |
| "loss": 0.129, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 2.140921409214092e-05, | |
| "loss": 0.1062, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 2.1273712737127372e-05, | |
| "loss": 0.1407, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 2.1138211382113822e-05, | |
| "loss": 0.1303, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 2.1002710027100272e-05, | |
| "loss": 0.125, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 2.0867208672086723e-05, | |
| "loss": 0.1012, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 2.073170731707317e-05, | |
| "loss": 0.1349, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 2.0596205962059623e-05, | |
| "loss": 0.1091, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 2.046070460704607e-05, | |
| "loss": 0.1129, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 2.032520325203252e-05, | |
| "loss": 0.1401, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 2.018970189701897e-05, | |
| "loss": 0.1221, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 2.005420054200542e-05, | |
| "loss": 0.0942, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 1.991869918699187e-05, | |
| "loss": 0.1388, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 1.978319783197832e-05, | |
| "loss": 0.1218, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 1.9647696476964768e-05, | |
| "loss": 0.1153, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 1.9512195121951222e-05, | |
| "loss": 0.152, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 1.937669376693767e-05, | |
| "loss": 0.1268, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 1.924119241192412e-05, | |
| "loss": 0.1328, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 1.9105691056910573e-05, | |
| "loss": 0.1374, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 1.897018970189702e-05, | |
| "loss": 0.1316, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 1.883468834688347e-05, | |
| "loss": 0.1332, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 1.869918699186992e-05, | |
| "loss": 0.1308, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 1.856368563685637e-05, | |
| "loss": 0.1097, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 1.842818428184282e-05, | |
| "loss": 0.1391, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 1.8292682926829268e-05, | |
| "loss": 0.1464, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 1.8157181571815718e-05, | |
| "loss": 0.1092, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 1.8021680216802168e-05, | |
| "loss": 0.1048, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 1.788617886178862e-05, | |
| "loss": 0.1089, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 1.775067750677507e-05, | |
| "loss": 0.1597, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 1.7615176151761516e-05, | |
| "loss": 0.1062, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 1.747967479674797e-05, | |
| "loss": 0.1317, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 1.734417344173442e-05, | |
| "loss": 0.083, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 1.7208672086720866e-05, | |
| "loss": 0.1756, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 1.707317073170732e-05, | |
| "loss": 0.1093, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 1.6937669376693767e-05, | |
| "loss": 0.1497, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 1.6802168021680217e-05, | |
| "loss": 0.1338, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.1212, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9804928131416838, | |
| "eval_loss": 0.0675366148352623, | |
| "eval_runtime": 21.3826, | |
| "eval_samples_per_second": 136.653, | |
| "eval_steps_per_second": 4.303, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 1.6531165311653118e-05, | |
| "loss": 0.1285, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 1.6395663956639568e-05, | |
| "loss": 0.1137, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 1.6260162601626018e-05, | |
| "loss": 0.1065, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 1.6124661246612465e-05, | |
| "loss": 0.1344, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 1.598915989159892e-05, | |
| "loss": 0.1343, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 1.5853658536585366e-05, | |
| "loss": 0.1364, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 1.5718157181571816e-05, | |
| "loss": 0.0916, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 1.5582655826558266e-05, | |
| "loss": 0.1183, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 1.5447154471544717e-05, | |
| "loss": 0.0924, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 1.5311653116531167e-05, | |
| "loss": 0.0979, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 1.5176151761517615e-05, | |
| "loss": 0.1393, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 1.5040650406504067e-05, | |
| "loss": 0.1332, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 1.4905149051490516e-05, | |
| "loss": 0.1017, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 1.4769647696476966e-05, | |
| "loss": 0.1124, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 1.4634146341463415e-05, | |
| "loss": 0.133, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 1.4498644986449867e-05, | |
| "loss": 0.1389, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 1.4363143631436315e-05, | |
| "loss": 0.1296, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 1.4227642276422764e-05, | |
| "loss": 0.144, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 1.4092140921409214e-05, | |
| "loss": 0.1104, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 1.3956639566395666e-05, | |
| "loss": 0.1089, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 1.3821138211382115e-05, | |
| "loss": 0.1098, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 1.3685636856368563e-05, | |
| "loss": 0.1404, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 1.3550135501355014e-05, | |
| "loss": 0.121, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 1.3414634146341466e-05, | |
| "loss": 0.1223, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 1.3279132791327914e-05, | |
| "loss": 0.1421, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 1.3143631436314363e-05, | |
| "loss": 0.1299, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 1.3008130081300815e-05, | |
| "loss": 0.1048, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 1.2872628726287265e-05, | |
| "loss": 0.0924, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 1.2737127371273713e-05, | |
| "loss": 0.1248, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 1.2601626016260162e-05, | |
| "loss": 0.1488, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 1.2466124661246612e-05, | |
| "loss": 0.1429, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 1.2330623306233063e-05, | |
| "loss": 0.1318, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 1.2195121951219513e-05, | |
| "loss": 0.1389, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 1.2059620596205963e-05, | |
| "loss": 0.0929, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 1.1924119241192412e-05, | |
| "loss": 0.0922, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 1.1788617886178862e-05, | |
| "loss": 0.0889, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 1.1653116531165312e-05, | |
| "loss": 0.1047, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 1.1517615176151763e-05, | |
| "loss": 0.1099, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 1.1382113821138211e-05, | |
| "loss": 0.1038, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 1.1246612466124661e-05, | |
| "loss": 0.0829, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.1637, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9798083504449008, | |
| "eval_loss": 0.06967572867870331, | |
| "eval_runtime": 21.6361, | |
| "eval_samples_per_second": 135.052, | |
| "eval_steps_per_second": 4.252, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1.0975609756097562e-05, | |
| "loss": 0.0999, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "learning_rate": 1.0840108401084012e-05, | |
| "loss": 0.1016, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 1.070460704607046e-05, | |
| "loss": 0.1218, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 1.0569105691056911e-05, | |
| "loss": 0.1293, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 1.0433604336043361e-05, | |
| "loss": 0.1151, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 1.0298102981029812e-05, | |
| "loss": 0.1309, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 1.016260162601626e-05, | |
| "loss": 0.12, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 1.002710027100271e-05, | |
| "loss": 0.1278, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 9.89159891598916e-06, | |
| "loss": 0.0949, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 9.756097560975611e-06, | |
| "loss": 0.108, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 9.62059620596206e-06, | |
| "loss": 0.1144, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 9.48509485094851e-06, | |
| "loss": 0.1016, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 9.34959349593496e-06, | |
| "loss": 0.0884, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 9.21409214092141e-06, | |
| "loss": 0.093, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 9.078590785907859e-06, | |
| "loss": 0.0766, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 8.94308943089431e-06, | |
| "loss": 0.1495, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 8.807588075880758e-06, | |
| "loss": 0.1037, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 8.67208672086721e-06, | |
| "loss": 0.1294, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 8.53658536585366e-06, | |
| "loss": 0.0902, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 8.401084010840109e-06, | |
| "loss": 0.0931, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "learning_rate": 8.265582655826559e-06, | |
| "loss": 0.108, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 8.130081300813009e-06, | |
| "loss": 0.0974, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 7.99457994579946e-06, | |
| "loss": 0.092, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 7.859078590785908e-06, | |
| "loss": 0.095, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 7.723577235772358e-06, | |
| "loss": 0.1177, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 7.588075880758808e-06, | |
| "loss": 0.1242, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 7.452574525745258e-06, | |
| "loss": 0.1228, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 7.317073170731707e-06, | |
| "loss": 0.1003, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 7.181571815718158e-06, | |
| "loss": 0.1152, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 7.046070460704607e-06, | |
| "loss": 0.0998, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 6.910569105691057e-06, | |
| "loss": 0.0974, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "learning_rate": 6.775067750677507e-06, | |
| "loss": 0.0862, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 6.639566395663957e-06, | |
| "loss": 0.134, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 6.504065040650407e-06, | |
| "loss": 0.1038, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 6.368563685636857e-06, | |
| "loss": 0.1416, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 6.233062330623306e-06, | |
| "loss": 0.1246, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 6.0975609756097564e-06, | |
| "loss": 0.0954, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 5.962059620596206e-06, | |
| "loss": 0.1269, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 5.826558265582656e-06, | |
| "loss": 0.0913, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 5.6910569105691056e-06, | |
| "loss": 0.1073, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.116, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9811772758384668, | |
| "eval_loss": 0.06383541971445084, | |
| "eval_runtime": 21.8412, | |
| "eval_samples_per_second": 133.784, | |
| "eval_steps_per_second": 4.212, | |
| "step": 1849 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 5.420054200542006e-06, | |
| "loss": 0.0959, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 5.2845528455284555e-06, | |
| "loss": 0.1091, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "learning_rate": 5.149051490514906e-06, | |
| "loss": 0.0918, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "learning_rate": 5.013550135501355e-06, | |
| "loss": 0.0973, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 4.8780487804878055e-06, | |
| "loss": 0.0981, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 4.742547425474255e-06, | |
| "loss": 0.0797, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 4.607046070460705e-06, | |
| "loss": 0.1161, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 4.471544715447155e-06, | |
| "loss": 0.1062, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 4.336043360433605e-06, | |
| "loss": 0.0973, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 4.200542005420054e-06, | |
| "loss": 0.0964, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 4.0650406504065046e-06, | |
| "loss": 0.0953, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 3.929539295392954e-06, | |
| "loss": 0.0841, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 3.794037940379404e-06, | |
| "loss": 0.1088, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 3.6585365853658537e-06, | |
| "loss": 0.1275, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 3.5230352303523035e-06, | |
| "loss": 0.1055, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "learning_rate": 3.3875338753387534e-06, | |
| "loss": 0.1103, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 3.2520325203252037e-06, | |
| "loss": 0.0943, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 3.116531165311653e-06, | |
| "loss": 0.1117, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "learning_rate": 2.981029810298103e-06, | |
| "loss": 0.0888, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 2.8455284552845528e-06, | |
| "loss": 0.1169, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 2.710027100271003e-06, | |
| "loss": 0.0857, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 2.574525745257453e-06, | |
| "loss": 0.0776, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "learning_rate": 2.4390243902439027e-06, | |
| "loss": 0.0843, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "learning_rate": 2.3035230352303526e-06, | |
| "loss": 0.0758, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 2.1680216802168024e-06, | |
| "loss": 0.1109, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "learning_rate": 2.0325203252032523e-06, | |
| "loss": 0.1277, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 1.897018970189702e-06, | |
| "loss": 0.1218, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 1.7615176151761518e-06, | |
| "loss": 0.1389, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 1.6260162601626018e-06, | |
| "loss": 0.1061, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 1.4905149051490515e-06, | |
| "loss": 0.1006, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 1.3550135501355015e-06, | |
| "loss": 0.116, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 1.2195121951219514e-06, | |
| "loss": 0.1143, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 1.0840108401084012e-06, | |
| "loss": 0.1214, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 9.48509485094851e-07, | |
| "loss": 0.1353, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 8.130081300813009e-07, | |
| "loss": 0.1078, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "learning_rate": 6.775067750677508e-07, | |
| "loss": 0.0809, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "learning_rate": 5.420054200542006e-07, | |
| "loss": 0.1068, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 4.0650406504065046e-07, | |
| "loss": 0.0909, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "learning_rate": 2.710027100271003e-07, | |
| "loss": 0.0828, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "learning_rate": 1.3550135501355015e-07, | |
| "loss": 0.1054, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "learning_rate": 0.0, | |
| "loss": 0.085, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "eval_accuracy": 0.9798083504449008, | |
| "eval_loss": 0.06385670602321625, | |
| "eval_runtime": 21.9428, | |
| "eval_samples_per_second": 133.164, | |
| "eval_steps_per_second": 4.193, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "step": 2050, | |
| "total_flos": 6.521150663842333e+18, | |
| "train_loss": 0.21471095208714647, | |
| "train_runtime": 4563.3157, | |
| "train_samples_per_second": 57.625, | |
| "train_steps_per_second": 0.449 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2050, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "total_flos": 6.521150663842333e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |