| { |
| "best_global_step": 546, |
| "best_metric": 0.9727272727272728, |
| "best_model_checkpoint": "font-identifier/checkpoint-546", |
| "epoch": 46.0, |
| "eval_steps": 500, |
| "global_step": 644, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 9.430075645446777, |
| "learning_rate": 6.428571428571429e-06, |
| "loss": 3.3213, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.06818181818181818, |
| "eval_loss": 3.2453081607818604, |
| "eval_runtime": 2.4333, |
| "eval_samples_per_second": 90.411, |
| "eval_steps_per_second": 5.753, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.4363636363636363, |
| "grad_norm": 9.360711097717285, |
| "learning_rate": 1.357142857142857e-05, |
| "loss": 3.1711, |
| "step": 20 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.12727272727272726, |
| "eval_loss": 3.0051403045654297, |
| "eval_runtime": 2.0612, |
| "eval_samples_per_second": 106.735, |
| "eval_steps_per_second": 6.792, |
| "step": 28 |
| }, |
| { |
| "epoch": 2.1454545454545455, |
| "grad_norm": 8.500679016113281, |
| "learning_rate": 2.0714285714285718e-05, |
| "loss": 2.983, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.8727272727272726, |
| "grad_norm": 9.82868766784668, |
| "learning_rate": 2.785714285714286e-05, |
| "loss": 2.8729, |
| "step": 40 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.20909090909090908, |
| "eval_loss": 2.6284613609313965, |
| "eval_runtime": 2.0503, |
| "eval_samples_per_second": 107.302, |
| "eval_steps_per_second": 6.828, |
| "step": 42 |
| }, |
| { |
| "epoch": 3.581818181818182, |
| "grad_norm": 7.700014114379883, |
| "learning_rate": 3.5e-05, |
| "loss": 2.562, |
| "step": 50 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.37727272727272726, |
| "eval_loss": 2.160020112991333, |
| "eval_runtime": 2.3625, |
| "eval_samples_per_second": 93.121, |
| "eval_steps_per_second": 5.926, |
| "step": 56 |
| }, |
| { |
| "epoch": 4.290909090909091, |
| "grad_norm": 7.527115821838379, |
| "learning_rate": 4.214285714285714e-05, |
| "loss": 2.2406, |
| "step": 60 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 5.579476833343506, |
| "learning_rate": 4.928571428571429e-05, |
| "loss": 1.8675, |
| "step": 70 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.5363636363636364, |
| "eval_loss": 1.639161229133606, |
| "eval_runtime": 2.0879, |
| "eval_samples_per_second": 105.372, |
| "eval_steps_per_second": 6.705, |
| "step": 70 |
| }, |
| { |
| "epoch": 5.7272727272727275, |
| "grad_norm": 6.313197135925293, |
| "learning_rate": 4.928571428571429e-05, |
| "loss": 1.6359, |
| "step": 80 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.6681818181818182, |
| "eval_loss": 1.2266921997070312, |
| "eval_runtime": 2.1098, |
| "eval_samples_per_second": 104.274, |
| "eval_steps_per_second": 6.636, |
| "step": 84 |
| }, |
| { |
| "epoch": 6.4363636363636365, |
| "grad_norm": 5.8209004402160645, |
| "learning_rate": 4.84920634920635e-05, |
| "loss": 1.3499, |
| "step": 90 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.6818181818181818, |
| "eval_loss": 1.0587564706802368, |
| "eval_runtime": 2.9116, |
| "eval_samples_per_second": 75.56, |
| "eval_steps_per_second": 4.808, |
| "step": 98 |
| }, |
| { |
| "epoch": 7.1454545454545455, |
| "grad_norm": 6.231616973876953, |
| "learning_rate": 4.7698412698412706e-05, |
| "loss": 1.2501, |
| "step": 100 |
| }, |
| { |
| "epoch": 7.872727272727273, |
| "grad_norm": 5.9844584465026855, |
| "learning_rate": 4.690476190476191e-05, |
| "loss": 1.076, |
| "step": 110 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.6909090909090909, |
| "eval_loss": 0.8790816068649292, |
| "eval_runtime": 4.5841, |
| "eval_samples_per_second": 47.992, |
| "eval_steps_per_second": 3.054, |
| "step": 112 |
| }, |
| { |
| "epoch": 8.581818181818182, |
| "grad_norm": 5.29907751083374, |
| "learning_rate": 4.6111111111111115e-05, |
| "loss": 0.9811, |
| "step": 120 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.7545454545454545, |
| "eval_loss": 0.7573429346084595, |
| "eval_runtime": 4.4185, |
| "eval_samples_per_second": 49.79, |
| "eval_steps_per_second": 3.168, |
| "step": 126 |
| }, |
| { |
| "epoch": 9.290909090909091, |
| "grad_norm": 5.92799711227417, |
| "learning_rate": 4.531746031746032e-05, |
| "loss": 0.8655, |
| "step": 130 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 3.577127695083618, |
| "learning_rate": 4.4523809523809525e-05, |
| "loss": 0.7309, |
| "step": 140 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.7818181818181819, |
| "eval_loss": 0.6194924712181091, |
| "eval_runtime": 4.0981, |
| "eval_samples_per_second": 53.683, |
| "eval_steps_per_second": 3.416, |
| "step": 140 |
| }, |
| { |
| "epoch": 10.727272727272727, |
| "grad_norm": 5.252236843109131, |
| "learning_rate": 4.373015873015873e-05, |
| "loss": 0.7776, |
| "step": 150 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.8, |
| "eval_loss": 0.5426079034805298, |
| "eval_runtime": 4.0014, |
| "eval_samples_per_second": 54.981, |
| "eval_steps_per_second": 3.499, |
| "step": 154 |
| }, |
| { |
| "epoch": 11.436363636363636, |
| "grad_norm": 4.398848533630371, |
| "learning_rate": 4.2936507936507935e-05, |
| "loss": 0.7365, |
| "step": 160 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.8772727272727273, |
| "eval_loss": 0.40285420417785645, |
| "eval_runtime": 4.3238, |
| "eval_samples_per_second": 50.882, |
| "eval_steps_per_second": 3.238, |
| "step": 168 |
| }, |
| { |
| "epoch": 12.145454545454545, |
| "grad_norm": 5.8070549964904785, |
| "learning_rate": 4.214285714285714e-05, |
| "loss": 0.6116, |
| "step": 170 |
| }, |
| { |
| "epoch": 12.872727272727273, |
| "grad_norm": 6.383784770965576, |
| "learning_rate": 4.134920634920635e-05, |
| "loss": 0.5767, |
| "step": 180 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.8363636363636363, |
| "eval_loss": 0.4417766332626343, |
| "eval_runtime": 4.8355, |
| "eval_samples_per_second": 45.497, |
| "eval_steps_per_second": 2.895, |
| "step": 182 |
| }, |
| { |
| "epoch": 13.581818181818182, |
| "grad_norm": 4.959994316101074, |
| "learning_rate": 4.055555555555556e-05, |
| "loss": 0.5838, |
| "step": 190 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.8818181818181818, |
| "eval_loss": 0.35380780696868896, |
| "eval_runtime": 3.8077, |
| "eval_samples_per_second": 57.777, |
| "eval_steps_per_second": 3.677, |
| "step": 196 |
| }, |
| { |
| "epoch": 14.290909090909091, |
| "grad_norm": 5.2857770919799805, |
| "learning_rate": 3.976190476190476e-05, |
| "loss": 0.4826, |
| "step": 200 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 3.971461296081543, |
| "learning_rate": 3.896825396825397e-05, |
| "loss": 0.4491, |
| "step": 210 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.8636363636363636, |
| "eval_loss": 0.3833666145801544, |
| "eval_runtime": 4.0015, |
| "eval_samples_per_second": 54.979, |
| "eval_steps_per_second": 3.499, |
| "step": 210 |
| }, |
| { |
| "epoch": 15.727272727272727, |
| "grad_norm": 4.3175764083862305, |
| "learning_rate": 3.817460317460317e-05, |
| "loss": 0.5056, |
| "step": 220 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.9227272727272727, |
| "eval_loss": 0.27014854550361633, |
| "eval_runtime": 3.7487, |
| "eval_samples_per_second": 58.687, |
| "eval_steps_per_second": 3.735, |
| "step": 224 |
| }, |
| { |
| "epoch": 16.436363636363637, |
| "grad_norm": 4.672767162322998, |
| "learning_rate": 3.7380952380952386e-05, |
| "loss": 0.4364, |
| "step": 230 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.8818181818181818, |
| "eval_loss": 0.3142429292201996, |
| "eval_runtime": 3.9413, |
| "eval_samples_per_second": 55.819, |
| "eval_steps_per_second": 3.552, |
| "step": 238 |
| }, |
| { |
| "epoch": 17.145454545454545, |
| "grad_norm": 5.194856643676758, |
| "learning_rate": 3.658730158730159e-05, |
| "loss": 0.4631, |
| "step": 240 |
| }, |
| { |
| "epoch": 17.87272727272727, |
| "grad_norm": 5.319342136383057, |
| "learning_rate": 3.5793650793650795e-05, |
| "loss": 0.364, |
| "step": 250 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.9136363636363637, |
| "eval_loss": 0.2616939842700958, |
| "eval_runtime": 3.7398, |
| "eval_samples_per_second": 58.827, |
| "eval_steps_per_second": 3.744, |
| "step": 252 |
| }, |
| { |
| "epoch": 18.581818181818182, |
| "grad_norm": 5.951942443847656, |
| "learning_rate": 3.5e-05, |
| "loss": 0.3845, |
| "step": 260 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.8818181818181818, |
| "eval_loss": 0.3091982901096344, |
| "eval_runtime": 4.1464, |
| "eval_samples_per_second": 53.059, |
| "eval_steps_per_second": 3.376, |
| "step": 266 |
| }, |
| { |
| "epoch": 19.29090909090909, |
| "grad_norm": 4.990575790405273, |
| "learning_rate": 3.420634920634921e-05, |
| "loss": 0.4096, |
| "step": 270 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 4.905520439147949, |
| "learning_rate": 3.3412698412698413e-05, |
| "loss": 0.3873, |
| "step": 280 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.9136363636363637, |
| "eval_loss": 0.23085929453372955, |
| "eval_runtime": 4.0568, |
| "eval_samples_per_second": 54.23, |
| "eval_steps_per_second": 3.451, |
| "step": 280 |
| }, |
| { |
| "epoch": 20.727272727272727, |
| "grad_norm": 3.991994857788086, |
| "learning_rate": 3.261904761904762e-05, |
| "loss": 0.3397, |
| "step": 290 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.9181818181818182, |
| "eval_loss": 0.22669094800949097, |
| "eval_runtime": 3.7565, |
| "eval_samples_per_second": 58.566, |
| "eval_steps_per_second": 3.727, |
| "step": 294 |
| }, |
| { |
| "epoch": 21.436363636363637, |
| "grad_norm": 3.291044235229492, |
| "learning_rate": 3.182539682539682e-05, |
| "loss": 0.3731, |
| "step": 300 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.9136363636363637, |
| "eval_loss": 0.22054576873779297, |
| "eval_runtime": 3.7364, |
| "eval_samples_per_second": 58.88, |
| "eval_steps_per_second": 3.747, |
| "step": 308 |
| }, |
| { |
| "epoch": 22.145454545454545, |
| "grad_norm": 5.276898384094238, |
| "learning_rate": 3.103174603174603e-05, |
| "loss": 0.3431, |
| "step": 310 |
| }, |
| { |
| "epoch": 22.87272727272727, |
| "grad_norm": 4.37393856048584, |
| "learning_rate": 3.0238095238095236e-05, |
| "loss": 0.329, |
| "step": 320 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.95, |
| "eval_loss": 0.1515827178955078, |
| "eval_runtime": 3.8186, |
| "eval_samples_per_second": 57.612, |
| "eval_steps_per_second": 3.666, |
| "step": 322 |
| }, |
| { |
| "epoch": 23.581818181818182, |
| "grad_norm": 6.394493103027344, |
| "learning_rate": 2.9444444444444448e-05, |
| "loss": 0.3041, |
| "step": 330 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.9318181818181818, |
| "eval_loss": 0.20813795924186707, |
| "eval_runtime": 3.6425, |
| "eval_samples_per_second": 60.399, |
| "eval_steps_per_second": 3.844, |
| "step": 336 |
| }, |
| { |
| "epoch": 24.29090909090909, |
| "grad_norm": 4.260524272918701, |
| "learning_rate": 2.865079365079365e-05, |
| "loss": 0.2926, |
| "step": 340 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 4.748322486877441, |
| "learning_rate": 2.785714285714286e-05, |
| "loss": 0.2996, |
| "step": 350 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.9272727272727272, |
| "eval_loss": 0.18759672343730927, |
| "eval_runtime": 3.5621, |
| "eval_samples_per_second": 61.761, |
| "eval_steps_per_second": 3.93, |
| "step": 350 |
| }, |
| { |
| "epoch": 25.727272727272727, |
| "grad_norm": 3.307917356491089, |
| "learning_rate": 2.7063492063492062e-05, |
| "loss": 0.2825, |
| "step": 360 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.9272727272727272, |
| "eval_loss": 0.224106565117836, |
| "eval_runtime": 3.6057, |
| "eval_samples_per_second": 61.015, |
| "eval_steps_per_second": 3.883, |
| "step": 364 |
| }, |
| { |
| "epoch": 26.436363636363637, |
| "grad_norm": 4.714457988739014, |
| "learning_rate": 2.626984126984127e-05, |
| "loss": 0.2929, |
| "step": 370 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.9318181818181818, |
| "eval_loss": 0.2054794728755951, |
| "eval_runtime": 3.627, |
| "eval_samples_per_second": 60.656, |
| "eval_steps_per_second": 3.86, |
| "step": 378 |
| }, |
| { |
| "epoch": 27.145454545454545, |
| "grad_norm": 3.81807541847229, |
| "learning_rate": 2.5476190476190476e-05, |
| "loss": 0.296, |
| "step": 380 |
| }, |
| { |
| "epoch": 27.87272727272727, |
| "grad_norm": 3.4400370121002197, |
| "learning_rate": 2.4682539682539684e-05, |
| "loss": 0.2574, |
| "step": 390 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.9318181818181818, |
| "eval_loss": 0.16666613519191742, |
| "eval_runtime": 3.5829, |
| "eval_samples_per_second": 61.402, |
| "eval_steps_per_second": 3.907, |
| "step": 392 |
| }, |
| { |
| "epoch": 28.581818181818182, |
| "grad_norm": 4.114112854003906, |
| "learning_rate": 2.3888888888888892e-05, |
| "loss": 0.2662, |
| "step": 400 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.9545454545454546, |
| "eval_loss": 0.1585608422756195, |
| "eval_runtime": 3.738, |
| "eval_samples_per_second": 58.855, |
| "eval_steps_per_second": 3.745, |
| "step": 406 |
| }, |
| { |
| "epoch": 29.29090909090909, |
| "grad_norm": 4.3739728927612305, |
| "learning_rate": 2.3095238095238097e-05, |
| "loss": 0.2585, |
| "step": 410 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 2.7083423137664795, |
| "learning_rate": 2.2301587301587305e-05, |
| "loss": 0.2391, |
| "step": 420 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.9272727272727272, |
| "eval_loss": 0.17817389965057373, |
| "eval_runtime": 3.6137, |
| "eval_samples_per_second": 60.879, |
| "eval_steps_per_second": 3.874, |
| "step": 420 |
| }, |
| { |
| "epoch": 30.727272727272727, |
| "grad_norm": 5.332400321960449, |
| "learning_rate": 2.150793650793651e-05, |
| "loss": 0.2642, |
| "step": 430 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_accuracy": 0.9409090909090909, |
| "eval_loss": 0.15896284580230713, |
| "eval_runtime": 3.6786, |
| "eval_samples_per_second": 59.805, |
| "eval_steps_per_second": 3.806, |
| "step": 434 |
| }, |
| { |
| "epoch": 31.436363636363637, |
| "grad_norm": 2.9920341968536377, |
| "learning_rate": 2.0714285714285718e-05, |
| "loss": 0.2323, |
| "step": 440 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_accuracy": 0.9363636363636364, |
| "eval_loss": 0.1662004142999649, |
| "eval_runtime": 3.5572, |
| "eval_samples_per_second": 61.846, |
| "eval_steps_per_second": 3.936, |
| "step": 448 |
| }, |
| { |
| "epoch": 32.14545454545455, |
| "grad_norm": 4.736130714416504, |
| "learning_rate": 1.992063492063492e-05, |
| "loss": 0.2073, |
| "step": 450 |
| }, |
| { |
| "epoch": 32.872727272727275, |
| "grad_norm": 2.845208168029785, |
| "learning_rate": 1.9126984126984128e-05, |
| "loss": 0.2261, |
| "step": 460 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_accuracy": 0.9454545454545454, |
| "eval_loss": 0.15488994121551514, |
| "eval_runtime": 3.6011, |
| "eval_samples_per_second": 61.092, |
| "eval_steps_per_second": 3.888, |
| "step": 462 |
| }, |
| { |
| "epoch": 33.58181818181818, |
| "grad_norm": 3.733150005340576, |
| "learning_rate": 1.8333333333333333e-05, |
| "loss": 0.2116, |
| "step": 470 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_accuracy": 0.95, |
| "eval_loss": 0.15380583703517914, |
| "eval_runtime": 3.5186, |
| "eval_samples_per_second": 62.525, |
| "eval_steps_per_second": 3.979, |
| "step": 476 |
| }, |
| { |
| "epoch": 34.29090909090909, |
| "grad_norm": 3.829446315765381, |
| "learning_rate": 1.7539682539682538e-05, |
| "loss": 0.2694, |
| "step": 480 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 3.4856090545654297, |
| "learning_rate": 1.6746031746031746e-05, |
| "loss": 0.211, |
| "step": 490 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_accuracy": 0.9636363636363636, |
| "eval_loss": 0.14965535700321198, |
| "eval_runtime": 3.5432, |
| "eval_samples_per_second": 62.091, |
| "eval_steps_per_second": 3.951, |
| "step": 490 |
| }, |
| { |
| "epoch": 35.72727272727273, |
| "grad_norm": 3.3690829277038574, |
| "learning_rate": 1.595238095238095e-05, |
| "loss": 0.2472, |
| "step": 500 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_accuracy": 0.9590909090909091, |
| "eval_loss": 0.15793798863887787, |
| "eval_runtime": 3.5484, |
| "eval_samples_per_second": 62.0, |
| "eval_steps_per_second": 3.945, |
| "step": 504 |
| }, |
| { |
| "epoch": 36.43636363636364, |
| "grad_norm": 4.752399444580078, |
| "learning_rate": 1.5158730158730159e-05, |
| "loss": 0.2185, |
| "step": 510 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_accuracy": 0.9636363636363636, |
| "eval_loss": 0.12271636724472046, |
| "eval_runtime": 3.9513, |
| "eval_samples_per_second": 55.678, |
| "eval_steps_per_second": 3.543, |
| "step": 518 |
| }, |
| { |
| "epoch": 37.14545454545455, |
| "grad_norm": 3.8303024768829346, |
| "learning_rate": 1.4365079365079364e-05, |
| "loss": 0.1708, |
| "step": 520 |
| }, |
| { |
| "epoch": 37.872727272727275, |
| "grad_norm": 3.642642021179199, |
| "learning_rate": 1.357142857142857e-05, |
| "loss": 0.2123, |
| "step": 530 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_accuracy": 0.95, |
| "eval_loss": 0.13889062404632568, |
| "eval_runtime": 3.9208, |
| "eval_samples_per_second": 56.111, |
| "eval_steps_per_second": 3.571, |
| "step": 532 |
| }, |
| { |
| "epoch": 38.58181818181818, |
| "grad_norm": 2.8652360439300537, |
| "learning_rate": 1.2777777777777777e-05, |
| "loss": 0.1691, |
| "step": 540 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_accuracy": 0.9727272727272728, |
| "eval_loss": 0.10398547351360321, |
| "eval_runtime": 4.7148, |
| "eval_samples_per_second": 46.662, |
| "eval_steps_per_second": 2.969, |
| "step": 546 |
| }, |
| { |
| "epoch": 39.29090909090909, |
| "grad_norm": 4.876323223114014, |
| "learning_rate": 1.1984126984126985e-05, |
| "loss": 0.1969, |
| "step": 550 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 3.435255289077759, |
| "learning_rate": 1.1190476190476192e-05, |
| "loss": 0.1805, |
| "step": 560 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_accuracy": 0.9545454545454546, |
| "eval_loss": 0.14451798796653748, |
| "eval_runtime": 5.5215, |
| "eval_samples_per_second": 39.844, |
| "eval_steps_per_second": 2.536, |
| "step": 560 |
| }, |
| { |
| "epoch": 40.72727272727273, |
| "grad_norm": 4.050150394439697, |
| "learning_rate": 1.0396825396825398e-05, |
| "loss": 0.1828, |
| "step": 570 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_accuracy": 0.9454545454545454, |
| "eval_loss": 0.13493339717388153, |
| "eval_runtime": 3.8663, |
| "eval_samples_per_second": 56.902, |
| "eval_steps_per_second": 3.621, |
| "step": 574 |
| }, |
| { |
| "epoch": 41.43636363636364, |
| "grad_norm": 2.726930856704712, |
| "learning_rate": 9.603174603174605e-06, |
| "loss": 0.2005, |
| "step": 580 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_accuracy": 0.9454545454545454, |
| "eval_loss": 0.1417720913887024, |
| "eval_runtime": 4.2, |
| "eval_samples_per_second": 52.381, |
| "eval_steps_per_second": 3.333, |
| "step": 588 |
| }, |
| { |
| "epoch": 42.14545454545455, |
| "grad_norm": 5.61188268661499, |
| "learning_rate": 8.80952380952381e-06, |
| "loss": 0.1956, |
| "step": 590 |
| }, |
| { |
| "epoch": 42.872727272727275, |
| "grad_norm": 2.0175414085388184, |
| "learning_rate": 8.015873015873016e-06, |
| "loss": 0.1986, |
| "step": 600 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_accuracy": 0.9454545454545454, |
| "eval_loss": 0.16132618486881256, |
| "eval_runtime": 3.9372, |
| "eval_samples_per_second": 55.877, |
| "eval_steps_per_second": 3.556, |
| "step": 602 |
| }, |
| { |
| "epoch": 43.58181818181818, |
| "grad_norm": 3.885935068130493, |
| "learning_rate": 7.222222222222222e-06, |
| "loss": 0.2012, |
| "step": 610 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_accuracy": 0.9590909090909091, |
| "eval_loss": 0.12059630453586578, |
| "eval_runtime": 4.4119, |
| "eval_samples_per_second": 49.865, |
| "eval_steps_per_second": 3.173, |
| "step": 616 |
| }, |
| { |
| "epoch": 44.29090909090909, |
| "grad_norm": 4.978816986083984, |
| "learning_rate": 6.428571428571429e-06, |
| "loss": 0.2198, |
| "step": 620 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 4.146751403808594, |
| "learning_rate": 5.634920634920635e-06, |
| "loss": 0.1494, |
| "step": 630 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_accuracy": 0.9590909090909091, |
| "eval_loss": 0.14049798250198364, |
| "eval_runtime": 3.9252, |
| "eval_samples_per_second": 56.048, |
| "eval_steps_per_second": 3.567, |
| "step": 630 |
| }, |
| { |
| "epoch": 45.72727272727273, |
| "grad_norm": 4.996974945068359, |
| "learning_rate": 4.841269841269842e-06, |
| "loss": 0.1891, |
| "step": 640 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_accuracy": 0.9727272727272728, |
| "eval_loss": 0.11218445748090744, |
| "eval_runtime": 3.493, |
| "eval_samples_per_second": 62.984, |
| "eval_steps_per_second": 4.008, |
| "step": 644 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 700, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.090285871117107e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|