| { |
| "best_global_step": 5000, |
| "best_metric": 0.790944661822247, |
| "best_model_checkpoint": "./results/checkpoint-5000", |
| "epoch": 3.0, |
| "eval_steps": 250, |
| "global_step": 5340, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0056179775280898875, |
| "grad_norm": 37.83753967285156, |
| "learning_rate": 3.6e-07, |
| "loss": 0.5628, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.011235955056179775, |
| "grad_norm": 18.735244750976562, |
| "learning_rate": 7.6e-07, |
| "loss": 0.5392, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.016853932584269662, |
| "grad_norm": 27.133329391479492, |
| "learning_rate": 1.1600000000000001e-06, |
| "loss": 0.4695, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02247191011235955, |
| "grad_norm": 18.776302337646484, |
| "learning_rate": 1.56e-06, |
| "loss": 0.5545, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.028089887640449437, |
| "grad_norm": 23.711435317993164, |
| "learning_rate": 1.9600000000000003e-06, |
| "loss": 0.5058, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.033707865168539325, |
| "grad_norm": 21.259469985961914, |
| "learning_rate": 2.3600000000000003e-06, |
| "loss": 0.4333, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03932584269662921, |
| "grad_norm": 26.772079467773438, |
| "learning_rate": 2.7600000000000003e-06, |
| "loss": 0.4378, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0449438202247191, |
| "grad_norm": 18.497085571289062, |
| "learning_rate": 3.1600000000000002e-06, |
| "loss": 0.4262, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05056179775280899, |
| "grad_norm": 15.191393852233887, |
| "learning_rate": 3.5600000000000002e-06, |
| "loss": 0.3317, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.056179775280898875, |
| "grad_norm": 14.540416717529297, |
| "learning_rate": 3.96e-06, |
| "loss": 0.3036, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06179775280898876, |
| "grad_norm": 7.341688632965088, |
| "learning_rate": 4.360000000000001e-06, |
| "loss": 0.315, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06741573033707865, |
| "grad_norm": 8.017271041870117, |
| "learning_rate": 4.76e-06, |
| "loss": 0.3194, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07303370786516854, |
| "grad_norm": 8.424580574035645, |
| "learning_rate": 5.1600000000000006e-06, |
| "loss": 0.2942, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07865168539325842, |
| "grad_norm": 3.8868699073791504, |
| "learning_rate": 5.560000000000001e-06, |
| "loss": 0.2451, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08426966292134831, |
| "grad_norm": 4.187905311584473, |
| "learning_rate": 5.9600000000000005e-06, |
| "loss": 0.2187, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0898876404494382, |
| "grad_norm": 3.5908498764038086, |
| "learning_rate": 6.360000000000001e-06, |
| "loss": 0.1697, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.09550561797752809, |
| "grad_norm": 2.1136908531188965, |
| "learning_rate": 6.760000000000001e-06, |
| "loss": 0.1783, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.10112359550561797, |
| "grad_norm": 2.29725980758667, |
| "learning_rate": 7.16e-06, |
| "loss": 0.161, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.10674157303370786, |
| "grad_norm": 1.8517777919769287, |
| "learning_rate": 7.5600000000000005e-06, |
| "loss": 0.0987, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.11235955056179775, |
| "grad_norm": 3.0938291549682617, |
| "learning_rate": 7.960000000000002e-06, |
| "loss": 0.1304, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.11797752808988764, |
| "grad_norm": 1.1395810842514038, |
| "learning_rate": 8.36e-06, |
| "loss": 0.0922, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.12359550561797752, |
| "grad_norm": 1.0684705972671509, |
| "learning_rate": 8.76e-06, |
| "loss": 0.0657, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.12921348314606743, |
| "grad_norm": 1.4070613384246826, |
| "learning_rate": 9.16e-06, |
| "loss": 0.1045, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1348314606741573, |
| "grad_norm": 1.3161265850067139, |
| "learning_rate": 9.56e-06, |
| "loss": 0.0942, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1404494382022472, |
| "grad_norm": 1.0167522430419922, |
| "learning_rate": 9.960000000000001e-06, |
| "loss": 0.0972, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1404494382022472, |
| "eval_f1": 0.5152618314197704, |
| "eval_loss": 0.0704234316945076, |
| "eval_precision": 0.4922418405564473, |
| "eval_recall": 0.5405405405405406, |
| "eval_runtime": 28.0049, |
| "eval_samples_per_second": 140.333, |
| "eval_steps_per_second": 4.392, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.14606741573033707, |
| "grad_norm": 0.8816568851470947, |
| "learning_rate": 1.036e-05, |
| "loss": 0.0733, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.15168539325842698, |
| "grad_norm": 1.3037965297698975, |
| "learning_rate": 1.0760000000000002e-05, |
| "loss": 0.0576, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.15730337078651685, |
| "grad_norm": 0.5775825381278992, |
| "learning_rate": 1.1160000000000002e-05, |
| "loss": 0.0648, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.16292134831460675, |
| "grad_norm": 0.48878130316734314, |
| "learning_rate": 1.156e-05, |
| "loss": 0.0566, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.16853932584269662, |
| "grad_norm": 1.132553219795227, |
| "learning_rate": 1.196e-05, |
| "loss": 0.0665, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.17415730337078653, |
| "grad_norm": 3.24303936958313, |
| "learning_rate": 1.236e-05, |
| "loss": 0.0756, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1797752808988764, |
| "grad_norm": 1.5602407455444336, |
| "learning_rate": 1.2760000000000001e-05, |
| "loss": 0.0588, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1853932584269663, |
| "grad_norm": 0.718769371509552, |
| "learning_rate": 1.3160000000000001e-05, |
| "loss": 0.065, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.19101123595505617, |
| "grad_norm": 0.5588079690933228, |
| "learning_rate": 1.3560000000000002e-05, |
| "loss": 0.0655, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.19662921348314608, |
| "grad_norm": 0.7195122838020325, |
| "learning_rate": 1.396e-05, |
| "loss": 0.0547, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.20224719101123595, |
| "grad_norm": 0.3126872777938843, |
| "learning_rate": 1.4360000000000001e-05, |
| "loss": 0.0399, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.20786516853932585, |
| "grad_norm": 0.4424603581428528, |
| "learning_rate": 1.4760000000000001e-05, |
| "loss": 0.0822, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.21348314606741572, |
| "grad_norm": 0.514411449432373, |
| "learning_rate": 1.516e-05, |
| "loss": 0.058, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.21910112359550563, |
| "grad_norm": 0.8999485969543457, |
| "learning_rate": 1.556e-05, |
| "loss": 0.0696, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2247191011235955, |
| "grad_norm": 0.376237154006958, |
| "learning_rate": 1.5960000000000003e-05, |
| "loss": 0.0431, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2303370786516854, |
| "grad_norm": 0.3700561821460724, |
| "learning_rate": 1.636e-05, |
| "loss": 0.0355, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.23595505617977527, |
| "grad_norm": 0.6543495059013367, |
| "learning_rate": 1.6760000000000002e-05, |
| "loss": 0.0539, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.24157303370786518, |
| "grad_norm": 0.6010973453521729, |
| "learning_rate": 1.7160000000000002e-05, |
| "loss": 0.038, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.24719101123595505, |
| "grad_norm": 0.723072350025177, |
| "learning_rate": 1.756e-05, |
| "loss": 0.0452, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.25280898876404495, |
| "grad_norm": 0.7555616497993469, |
| "learning_rate": 1.796e-05, |
| "loss": 0.037, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.25842696629213485, |
| "grad_norm": 0.9911591410636902, |
| "learning_rate": 1.8360000000000004e-05, |
| "loss": 0.054, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2640449438202247, |
| "grad_norm": 0.38651639223098755, |
| "learning_rate": 1.876e-05, |
| "loss": 0.0761, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2696629213483146, |
| "grad_norm": 0.461109459400177, |
| "learning_rate": 1.916e-05, |
| "loss": 0.0487, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2752808988764045, |
| "grad_norm": 0.5480913519859314, |
| "learning_rate": 1.9560000000000002e-05, |
| "loss": 0.0688, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2808988764044944, |
| "grad_norm": 0.5451459288597107, |
| "learning_rate": 1.9960000000000002e-05, |
| "loss": 0.0385, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2808988764044944, |
| "eval_f1": 0.6540447504302925, |
| "eval_loss": 0.040211886167526245, |
| "eval_precision": 0.6390134529147982, |
| "eval_recall": 0.6698002350176263, |
| "eval_runtime": 27.5658, |
| "eval_samples_per_second": 142.568, |
| "eval_steps_per_second": 4.462, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.28651685393258425, |
| "grad_norm": 0.4356236159801483, |
| "learning_rate": 1.9962809917355374e-05, |
| "loss": 0.0509, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.29213483146067415, |
| "grad_norm": 0.39902523159980774, |
| "learning_rate": 1.9921487603305786e-05, |
| "loss": 0.0419, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.29775280898876405, |
| "grad_norm": 0.6652863025665283, |
| "learning_rate": 1.98801652892562e-05, |
| "loss": 0.0321, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.30337078651685395, |
| "grad_norm": 0.6239707469940186, |
| "learning_rate": 1.9838842975206615e-05, |
| "loss": 0.0499, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3089887640449438, |
| "grad_norm": 0.6594592332839966, |
| "learning_rate": 1.9797520661157025e-05, |
| "loss": 0.0526, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3146067415730337, |
| "grad_norm": 1.5072342157363892, |
| "learning_rate": 1.975619834710744e-05, |
| "loss": 0.0493, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3202247191011236, |
| "grad_norm": 0.5700026154518127, |
| "learning_rate": 1.9714876033057854e-05, |
| "loss": 0.0492, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3258426966292135, |
| "grad_norm": 5.261657238006592, |
| "learning_rate": 1.9673553719008267e-05, |
| "loss": 0.0709, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.33146067415730335, |
| "grad_norm": 0.6704587936401367, |
| "learning_rate": 1.963223140495868e-05, |
| "loss": 0.0417, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.33707865168539325, |
| "grad_norm": 0.7397480607032776, |
| "learning_rate": 1.9590909090909092e-05, |
| "loss": 0.0501, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.34269662921348315, |
| "grad_norm": 1.0854114294052124, |
| "learning_rate": 1.9549586776859505e-05, |
| "loss": 0.0397, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.34831460674157305, |
| "grad_norm": 0.7938683032989502, |
| "learning_rate": 1.9508264462809918e-05, |
| "loss": 0.0344, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3539325842696629, |
| "grad_norm": 0.5936018228530884, |
| "learning_rate": 1.9466942148760334e-05, |
| "loss": 0.0428, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3595505617977528, |
| "grad_norm": 0.37717097997665405, |
| "learning_rate": 1.9425619834710743e-05, |
| "loss": 0.0457, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3651685393258427, |
| "grad_norm": 0.5075058937072754, |
| "learning_rate": 1.938429752066116e-05, |
| "loss": 0.0345, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3707865168539326, |
| "grad_norm": 1.0874806642532349, |
| "learning_rate": 1.9342975206611572e-05, |
| "loss": 0.0406, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.37640449438202245, |
| "grad_norm": 0.7143360376358032, |
| "learning_rate": 1.9301652892561985e-05, |
| "loss": 0.0392, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.38202247191011235, |
| "grad_norm": 1.010405421257019, |
| "learning_rate": 1.9260330578512398e-05, |
| "loss": 0.0489, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.38764044943820225, |
| "grad_norm": 0.44814518094062805, |
| "learning_rate": 1.921900826446281e-05, |
| "loss": 0.0344, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.39325842696629215, |
| "grad_norm": 0.5441784262657166, |
| "learning_rate": 1.9177685950413224e-05, |
| "loss": 0.0423, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.398876404494382, |
| "grad_norm": 0.4021058678627014, |
| "learning_rate": 1.9136363636363636e-05, |
| "loss": 0.0432, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4044943820224719, |
| "grad_norm": 0.34465810656547546, |
| "learning_rate": 1.9095041322314053e-05, |
| "loss": 0.0428, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4101123595505618, |
| "grad_norm": 0.14510081708431244, |
| "learning_rate": 1.9053719008264465e-05, |
| "loss": 0.0295, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4157303370786517, |
| "grad_norm": 0.330493688583374, |
| "learning_rate": 1.9012396694214878e-05, |
| "loss": 0.0305, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.42134831460674155, |
| "grad_norm": 0.21993879973888397, |
| "learning_rate": 1.897107438016529e-05, |
| "loss": 0.0334, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.42134831460674155, |
| "eval_f1": 0.7062535857716581, |
| "eval_loss": 0.03633005544543266, |
| "eval_precision": 0.6900224215246636, |
| "eval_recall": 0.7232667450058754, |
| "eval_runtime": 28.2949, |
| "eval_samples_per_second": 138.895, |
| "eval_steps_per_second": 4.347, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.42696629213483145, |
| "grad_norm": 0.623182475566864, |
| "learning_rate": 1.8929752066115704e-05, |
| "loss": 0.0434, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.43258426966292135, |
| "grad_norm": 1.1757659912109375, |
| "learning_rate": 1.8888429752066117e-05, |
| "loss": 0.0588, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.43820224719101125, |
| "grad_norm": 0.37911972403526306, |
| "learning_rate": 1.884710743801653e-05, |
| "loss": 0.0293, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.4438202247191011, |
| "grad_norm": 0.671259343624115, |
| "learning_rate": 1.8805785123966946e-05, |
| "loss": 0.034, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.449438202247191, |
| "grad_norm": 0.46087324619293213, |
| "learning_rate": 1.8764462809917355e-05, |
| "loss": 0.0339, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4550561797752809, |
| "grad_norm": 0.33612871170043945, |
| "learning_rate": 1.872314049586777e-05, |
| "loss": 0.0358, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4606741573033708, |
| "grad_norm": 0.30411240458488464, |
| "learning_rate": 1.8681818181818184e-05, |
| "loss": 0.0467, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.46629213483146065, |
| "grad_norm": 0.26056644320487976, |
| "learning_rate": 1.8640495867768597e-05, |
| "loss": 0.0404, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.47191011235955055, |
| "grad_norm": 0.7425059676170349, |
| "learning_rate": 1.859917355371901e-05, |
| "loss": 0.0519, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.47752808988764045, |
| "grad_norm": 0.20595073699951172, |
| "learning_rate": 1.8557851239669422e-05, |
| "loss": 0.0373, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.48314606741573035, |
| "grad_norm": 0.706141471862793, |
| "learning_rate": 1.8516528925619835e-05, |
| "loss": 0.0427, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.4887640449438202, |
| "grad_norm": 0.27146199345588684, |
| "learning_rate": 1.8475206611570248e-05, |
| "loss": 0.0406, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4943820224719101, |
| "grad_norm": 0.40938302874565125, |
| "learning_rate": 1.8433884297520664e-05, |
| "loss": 0.0373, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.43672415614128113, |
| "learning_rate": 1.8392561983471077e-05, |
| "loss": 0.0613, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5056179775280899, |
| "grad_norm": 0.3265509009361267, |
| "learning_rate": 1.835123966942149e-05, |
| "loss": 0.0433, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5112359550561798, |
| "grad_norm": 0.33285167813301086, |
| "learning_rate": 1.8309917355371903e-05, |
| "loss": 0.0435, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5168539325842697, |
| "grad_norm": 0.46705156564712524, |
| "learning_rate": 1.8268595041322316e-05, |
| "loss": 0.0316, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5224719101123596, |
| "grad_norm": 0.34717079997062683, |
| "learning_rate": 1.822727272727273e-05, |
| "loss": 0.0432, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5280898876404494, |
| "grad_norm": 0.6267735362052917, |
| "learning_rate": 1.818595041322314e-05, |
| "loss": 0.0391, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5337078651685393, |
| "grad_norm": 0.29985982179641724, |
| "learning_rate": 1.8144628099173557e-05, |
| "loss": 0.0314, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5393258426966292, |
| "grad_norm": 0.5540522933006287, |
| "learning_rate": 1.8103305785123967e-05, |
| "loss": 0.0434, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5449438202247191, |
| "grad_norm": 1.0504409074783325, |
| "learning_rate": 1.8061983471074383e-05, |
| "loss": 0.0369, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.550561797752809, |
| "grad_norm": 0.1691039800643921, |
| "learning_rate": 1.8020661157024796e-05, |
| "loss": 0.0288, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5561797752808989, |
| "grad_norm": 0.20762376487255096, |
| "learning_rate": 1.797933884297521e-05, |
| "loss": 0.021, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5617977528089888, |
| "grad_norm": 0.5355738997459412, |
| "learning_rate": 1.793801652892562e-05, |
| "loss": 0.0335, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5617977528089888, |
| "eval_f1": 0.7190332326283987, |
| "eval_loss": 0.03450481593608856, |
| "eval_precision": 0.6750902527075813, |
| "eval_recall": 0.7690951821386603, |
| "eval_runtime": 32.9589, |
| "eval_samples_per_second": 119.239, |
| "eval_steps_per_second": 3.732, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5674157303370787, |
| "grad_norm": 0.2527494430541992, |
| "learning_rate": 1.7896694214876034e-05, |
| "loss": 0.0336, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.5730337078651685, |
| "grad_norm": 0.21574370563030243, |
| "learning_rate": 1.7855371900826447e-05, |
| "loss": 0.0401, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.5786516853932584, |
| "grad_norm": 0.3417276442050934, |
| "learning_rate": 1.781404958677686e-05, |
| "loss": 0.0268, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.5842696629213483, |
| "grad_norm": 0.3619694411754608, |
| "learning_rate": 1.7772727272727276e-05, |
| "loss": 0.052, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.5898876404494382, |
| "grad_norm": 0.4339875876903534, |
| "learning_rate": 1.7731404958677685e-05, |
| "loss": 0.0409, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5955056179775281, |
| "grad_norm": 0.31473612785339355, |
| "learning_rate": 1.76900826446281e-05, |
| "loss": 0.0433, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.601123595505618, |
| "grad_norm": 0.3506283760070801, |
| "learning_rate": 1.7648760330578514e-05, |
| "loss": 0.0345, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6067415730337079, |
| "grad_norm": 0.31608250737190247, |
| "learning_rate": 1.7607438016528927e-05, |
| "loss": 0.0373, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6123595505617978, |
| "grad_norm": 0.9107354879379272, |
| "learning_rate": 1.756611570247934e-05, |
| "loss": 0.0325, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6179775280898876, |
| "grad_norm": 1.0891706943511963, |
| "learning_rate": 1.7524793388429753e-05, |
| "loss": 0.0424, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6235955056179775, |
| "grad_norm": 0.716555118560791, |
| "learning_rate": 1.7483471074380166e-05, |
| "loss": 0.0332, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6292134831460674, |
| "grad_norm": 0.33739280700683594, |
| "learning_rate": 1.744214876033058e-05, |
| "loss": 0.029, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6348314606741573, |
| "grad_norm": 0.25892215967178345, |
| "learning_rate": 1.7400826446280995e-05, |
| "loss": 0.0205, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6404494382022472, |
| "grad_norm": 0.6945488452911377, |
| "learning_rate": 1.7359504132231407e-05, |
| "loss": 0.0262, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6460674157303371, |
| "grad_norm": 0.511842668056488, |
| "learning_rate": 1.731818181818182e-05, |
| "loss": 0.0379, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.651685393258427, |
| "grad_norm": 4.446812152862549, |
| "learning_rate": 1.7276859504132233e-05, |
| "loss": 0.0473, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6573033707865169, |
| "grad_norm": 0.35227376222610474, |
| "learning_rate": 1.7235537190082646e-05, |
| "loss": 0.0397, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.6629213483146067, |
| "grad_norm": 0.489005446434021, |
| "learning_rate": 1.719421487603306e-05, |
| "loss": 0.033, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6685393258426966, |
| "grad_norm": 0.20524592697620392, |
| "learning_rate": 1.715289256198347e-05, |
| "loss": 0.0384, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.6741573033707865, |
| "grad_norm": 0.2928679287433624, |
| "learning_rate": 1.7111570247933888e-05, |
| "loss": 0.0308, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6797752808988764, |
| "grad_norm": 0.3542841970920563, |
| "learning_rate": 1.7070247933884297e-05, |
| "loss": 0.0411, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.6853932584269663, |
| "grad_norm": 0.39853426814079285, |
| "learning_rate": 1.7028925619834713e-05, |
| "loss": 0.0332, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.6910112359550562, |
| "grad_norm": 0.7216328382492065, |
| "learning_rate": 1.6987603305785126e-05, |
| "loss": 0.0283, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.6966292134831461, |
| "grad_norm": 0.5684111714363098, |
| "learning_rate": 1.694628099173554e-05, |
| "loss": 0.0368, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.702247191011236, |
| "grad_norm": 0.2529934048652649, |
| "learning_rate": 1.690495867768595e-05, |
| "loss": 0.0329, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.702247191011236, |
| "eval_f1": 0.7409268565047459, |
| "eval_loss": 0.03298617899417877, |
| "eval_precision": 0.7058510638297872, |
| "eval_recall": 0.7796709753231492, |
| "eval_runtime": 32.8203, |
| "eval_samples_per_second": 119.743, |
| "eval_steps_per_second": 3.748, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7078651685393258, |
| "grad_norm": 0.9135796427726746, |
| "learning_rate": 1.6863636363636364e-05, |
| "loss": 0.0296, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.7134831460674157, |
| "grad_norm": 0.4056197702884674, |
| "learning_rate": 1.6822314049586777e-05, |
| "loss": 0.0324, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7191011235955056, |
| "grad_norm": 0.36684852838516235, |
| "learning_rate": 1.678099173553719e-05, |
| "loss": 0.0322, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7247191011235955, |
| "grad_norm": 0.2179303914308548, |
| "learning_rate": 1.6739669421487606e-05, |
| "loss": 0.0208, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7303370786516854, |
| "grad_norm": 1.9967374801635742, |
| "learning_rate": 1.669834710743802e-05, |
| "loss": 0.0344, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7359550561797753, |
| "grad_norm": 0.5518152713775635, |
| "learning_rate": 1.6657024793388432e-05, |
| "loss": 0.033, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7415730337078652, |
| "grad_norm": 0.8944317698478699, |
| "learning_rate": 1.6615702479338845e-05, |
| "loss": 0.0336, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.7471910112359551, |
| "grad_norm": 1.0821423530578613, |
| "learning_rate": 1.6574380165289258e-05, |
| "loss": 0.0359, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.7528089887640449, |
| "grad_norm": 0.30826064944267273, |
| "learning_rate": 1.653305785123967e-05, |
| "loss": 0.0229, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.7584269662921348, |
| "grad_norm": 0.12171895056962967, |
| "learning_rate": 1.6491735537190083e-05, |
| "loss": 0.0325, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7640449438202247, |
| "grad_norm": 0.304765909910202, |
| "learning_rate": 1.64504132231405e-05, |
| "loss": 0.0422, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.7696629213483146, |
| "grad_norm": 0.2779518961906433, |
| "learning_rate": 1.640909090909091e-05, |
| "loss": 0.0241, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.7752808988764045, |
| "grad_norm": 0.5539456605911255, |
| "learning_rate": 1.6367768595041325e-05, |
| "loss": 0.0338, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.7808988764044944, |
| "grad_norm": 0.3549717664718628, |
| "learning_rate": 1.6326446280991738e-05, |
| "loss": 0.0379, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.7865168539325843, |
| "grad_norm": 2.0511608123779297, |
| "learning_rate": 1.628512396694215e-05, |
| "loss": 0.0432, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7921348314606742, |
| "grad_norm": 0.6535409688949585, |
| "learning_rate": 1.6243801652892563e-05, |
| "loss": 0.0263, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.797752808988764, |
| "grad_norm": 0.5641036629676819, |
| "learning_rate": 1.6202479338842976e-05, |
| "loss": 0.0444, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.8033707865168539, |
| "grad_norm": 0.2755410373210907, |
| "learning_rate": 1.616115702479339e-05, |
| "loss": 0.0382, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.8089887640449438, |
| "grad_norm": 0.48516207933425903, |
| "learning_rate": 1.6119834710743802e-05, |
| "loss": 0.0436, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.8146067415730337, |
| "grad_norm": 0.2544703781604767, |
| "learning_rate": 1.6078512396694218e-05, |
| "loss": 0.0259, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8202247191011236, |
| "grad_norm": 0.38116955757141113, |
| "learning_rate": 1.603719008264463e-05, |
| "loss": 0.0363, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.8258426966292135, |
| "grad_norm": 0.1579870879650116, |
| "learning_rate": 1.5995867768595044e-05, |
| "loss": 0.0312, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8314606741573034, |
| "grad_norm": 0.31899958848953247, |
| "learning_rate": 1.5954545454545456e-05, |
| "loss": 0.0328, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8370786516853933, |
| "grad_norm": 0.2832179367542267, |
| "learning_rate": 1.591322314049587e-05, |
| "loss": 0.0321, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.8426966292134831, |
| "grad_norm": 0.43299055099487305, |
| "learning_rate": 1.5871900826446282e-05, |
| "loss": 0.0366, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.8426966292134831, |
| "eval_f1": 0.7306652244456463, |
| "eval_loss": 0.03298529237508774, |
| "eval_precision": 0.6768537074148296, |
| "eval_recall": 0.7937720329024677, |
| "eval_runtime": 33.137, |
| "eval_samples_per_second": 118.599, |
| "eval_steps_per_second": 3.712, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.848314606741573, |
| "grad_norm": 0.5625007748603821, |
| "learning_rate": 1.5830578512396695e-05, |
| "loss": 0.0256, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.8539325842696629, |
| "grad_norm": 0.5187763571739197, |
| "learning_rate": 1.5789256198347108e-05, |
| "loss": 0.0328, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.8595505617977528, |
| "grad_norm": 0.4886994957923889, |
| "learning_rate": 1.574793388429752e-05, |
| "loss": 0.0547, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.8651685393258427, |
| "grad_norm": 0.5035399198532104, |
| "learning_rate": 1.5706611570247937e-05, |
| "loss": 0.0387, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.8707865168539326, |
| "grad_norm": 0.8656401634216309, |
| "learning_rate": 1.566528925619835e-05, |
| "loss": 0.0335, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8764044943820225, |
| "grad_norm": 0.5307649970054626, |
| "learning_rate": 1.562396694214876e-05, |
| "loss": 0.0287, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.8820224719101124, |
| "grad_norm": 0.47168630361557007, |
| "learning_rate": 1.5582644628099175e-05, |
| "loss": 0.0368, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.8876404494382022, |
| "grad_norm": 0.38605886697769165, |
| "learning_rate": 1.5541322314049588e-05, |
| "loss": 0.0277, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.8932584269662921, |
| "grad_norm": 0.18422210216522217, |
| "learning_rate": 1.55e-05, |
| "loss": 0.03, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.898876404494382, |
| "grad_norm": 0.5309029221534729, |
| "learning_rate": 1.5458677685950413e-05, |
| "loss": 0.0361, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9044943820224719, |
| "grad_norm": 0.13399222493171692, |
| "learning_rate": 1.541735537190083e-05, |
| "loss": 0.0324, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.9101123595505618, |
| "grad_norm": 0.441811740398407, |
| "learning_rate": 1.537603305785124e-05, |
| "loss": 0.0314, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.9157303370786517, |
| "grad_norm": 0.4198859930038452, |
| "learning_rate": 1.5334710743801655e-05, |
| "loss": 0.0405, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.9213483146067416, |
| "grad_norm": 1.2370645999908447, |
| "learning_rate": 1.5293388429752068e-05, |
| "loss": 0.0436, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.9269662921348315, |
| "grad_norm": 0.31183677911758423, |
| "learning_rate": 1.525206611570248e-05, |
| "loss": 0.0311, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.9325842696629213, |
| "grad_norm": 0.6841112375259399, |
| "learning_rate": 1.5210743801652894e-05, |
| "loss": 0.0263, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.9382022471910112, |
| "grad_norm": 0.744534969329834, |
| "learning_rate": 1.5169421487603307e-05, |
| "loss": 0.0434, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.9438202247191011, |
| "grad_norm": 0.5860826969146729, |
| "learning_rate": 1.5128099173553721e-05, |
| "loss": 0.0309, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.949438202247191, |
| "grad_norm": 0.30711379647254944, |
| "learning_rate": 1.5086776859504134e-05, |
| "loss": 0.0349, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.9550561797752809, |
| "grad_norm": 0.46106937527656555, |
| "learning_rate": 1.5045454545454548e-05, |
| "loss": 0.0238, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9606741573033708, |
| "grad_norm": 0.7039032578468323, |
| "learning_rate": 1.500413223140496e-05, |
| "loss": 0.0495, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.9662921348314607, |
| "grad_norm": 0.5371297597885132, |
| "learning_rate": 1.4962809917355372e-05, |
| "loss": 0.024, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.9719101123595506, |
| "grad_norm": 0.18199366331100464, |
| "learning_rate": 1.4921487603305787e-05, |
| "loss": 0.0294, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.9775280898876404, |
| "grad_norm": 0.4313061535358429, |
| "learning_rate": 1.48801652892562e-05, |
| "loss": 0.0314, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.9831460674157303, |
| "grad_norm": 0.633383572101593, |
| "learning_rate": 1.4838842975206614e-05, |
| "loss": 0.0307, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.9831460674157303, |
| "eval_f1": 0.7417628836947339, |
| "eval_loss": 0.02960079535841942, |
| "eval_precision": 0.7122769064359114, |
| "eval_recall": 0.7737955346650999, |
| "eval_runtime": 33.4177, |
| "eval_samples_per_second": 117.602, |
| "eval_steps_per_second": 3.681, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.9887640449438202, |
| "grad_norm": 0.36454710364341736, |
| "learning_rate": 1.4797520661157025e-05, |
| "loss": 0.0314, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.9943820224719101, |
| "grad_norm": 0.4246029555797577, |
| "learning_rate": 1.475619834710744e-05, |
| "loss": 0.0262, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.33757057785987854, |
| "learning_rate": 1.4714876033057852e-05, |
| "loss": 0.028, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.0056179775280898, |
| "grad_norm": 0.30218544602394104, |
| "learning_rate": 1.4673553719008267e-05, |
| "loss": 0.0185, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.0112359550561798, |
| "grad_norm": 0.4351919889450073, |
| "learning_rate": 1.4632231404958678e-05, |
| "loss": 0.0235, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.0168539325842696, |
| "grad_norm": 1.160334587097168, |
| "learning_rate": 1.4590909090909091e-05, |
| "loss": 0.0348, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.0224719101123596, |
| "grad_norm": 0.324881911277771, |
| "learning_rate": 1.4549586776859505e-05, |
| "loss": 0.0287, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.0280898876404494, |
| "grad_norm": 0.21142716705799103, |
| "learning_rate": 1.4508264462809918e-05, |
| "loss": 0.0421, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.0337078651685394, |
| "grad_norm": 0.14528660476207733, |
| "learning_rate": 1.4466942148760333e-05, |
| "loss": 0.0281, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.0393258426966292, |
| "grad_norm": 0.33770596981048584, |
| "learning_rate": 1.4425619834710744e-05, |
| "loss": 0.0365, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.0449438202247192, |
| "grad_norm": 0.3050376772880554, |
| "learning_rate": 1.4384297520661158e-05, |
| "loss": 0.0253, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.050561797752809, |
| "grad_norm": 0.706710696220398, |
| "learning_rate": 1.4342975206611571e-05, |
| "loss": 0.0245, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.0561797752808988, |
| "grad_norm": 0.23844891786575317, |
| "learning_rate": 1.4301652892561986e-05, |
| "loss": 0.02, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.0617977528089888, |
| "grad_norm": 0.4411591589450836, |
| "learning_rate": 1.4260330578512398e-05, |
| "loss": 0.0251, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.0674157303370786, |
| "grad_norm": 0.23174303770065308, |
| "learning_rate": 1.421900826446281e-05, |
| "loss": 0.0203, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.0730337078651686, |
| "grad_norm": 1.1798287630081177, |
| "learning_rate": 1.4177685950413224e-05, |
| "loss": 0.0356, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.0786516853932584, |
| "grad_norm": 0.33650219440460205, |
| "learning_rate": 1.4136363636363637e-05, |
| "loss": 0.0265, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.0842696629213484, |
| "grad_norm": 0.21325266361236572, |
| "learning_rate": 1.4095041322314051e-05, |
| "loss": 0.0183, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.0898876404494382, |
| "grad_norm": 0.4521240293979645, |
| "learning_rate": 1.4053719008264464e-05, |
| "loss": 0.0268, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.095505617977528, |
| "grad_norm": 0.4072856307029724, |
| "learning_rate": 1.4012396694214879e-05, |
| "loss": 0.0402, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.101123595505618, |
| "grad_norm": 0.47774896025657654, |
| "learning_rate": 1.397107438016529e-05, |
| "loss": 0.0419, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.1067415730337078, |
| "grad_norm": 0.4504724442958832, |
| "learning_rate": 1.3929752066115703e-05, |
| "loss": 0.0273, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.1123595505617978, |
| "grad_norm": 0.4287577271461487, |
| "learning_rate": 1.3888429752066117e-05, |
| "loss": 0.0367, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.1179775280898876, |
| "grad_norm": 0.37722307443618774, |
| "learning_rate": 1.384710743801653e-05, |
| "loss": 0.0239, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.1235955056179776, |
| "grad_norm": 0.22570855915546417, |
| "learning_rate": 1.3805785123966944e-05, |
| "loss": 0.0224, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1235955056179776, |
| "eval_f1": 0.7568022440392708, |
| "eval_loss": 0.02905386872589588, |
| "eval_precision": 0.7241009125067096, |
| "eval_recall": 0.7925969447708578, |
| "eval_runtime": 28.6809, |
| "eval_samples_per_second": 137.025, |
| "eval_steps_per_second": 4.289, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1292134831460674, |
| "grad_norm": 0.728447437286377, |
| "learning_rate": 1.3764462809917355e-05, |
| "loss": 0.022, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.1348314606741572, |
| "grad_norm": 0.10380419343709946, |
| "learning_rate": 1.372314049586777e-05, |
| "loss": 0.0181, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.1404494382022472, |
| "grad_norm": 0.38085779547691345, |
| "learning_rate": 1.3681818181818183e-05, |
| "loss": 0.0308, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.146067415730337, |
| "grad_norm": 0.5472461581230164, |
| "learning_rate": 1.3640495867768597e-05, |
| "loss": 0.0391, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.151685393258427, |
| "grad_norm": 0.4686163663864136, |
| "learning_rate": 1.359917355371901e-05, |
| "loss": 0.0259, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.1573033707865168, |
| "grad_norm": 0.5144456028938293, |
| "learning_rate": 1.3557851239669421e-05, |
| "loss": 0.0252, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.1629213483146068, |
| "grad_norm": 0.5633528232574463, |
| "learning_rate": 1.3516528925619836e-05, |
| "loss": 0.0309, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.1685393258426966, |
| "grad_norm": 0.23379996418952942, |
| "learning_rate": 1.3475206611570249e-05, |
| "loss": 0.0206, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.1741573033707866, |
| "grad_norm": 1.475098729133606, |
| "learning_rate": 1.3433884297520663e-05, |
| "loss": 0.0399, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.1797752808988764, |
| "grad_norm": 0.6528710126876831, |
| "learning_rate": 1.3392561983471076e-05, |
| "loss": 0.0442, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.1853932584269664, |
| "grad_norm": 0.5824321508407593, |
| "learning_rate": 1.335123966942149e-05, |
| "loss": 0.0314, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.1910112359550562, |
| "grad_norm": 0.8506532311439514, |
| "learning_rate": 1.3309917355371901e-05, |
| "loss": 0.0245, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.196629213483146, |
| "grad_norm": 0.7218942046165466, |
| "learning_rate": 1.3268595041322314e-05, |
| "loss": 0.0299, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.202247191011236, |
| "grad_norm": 0.2650141716003418, |
| "learning_rate": 1.3227272727272729e-05, |
| "loss": 0.0235, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.2078651685393258, |
| "grad_norm": 0.1138467863202095, |
| "learning_rate": 1.3185950413223142e-05, |
| "loss": 0.0311, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.2134831460674158, |
| "grad_norm": 0.3181060254573822, |
| "learning_rate": 1.3144628099173556e-05, |
| "loss": 0.0321, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.2191011235955056, |
| "grad_norm": 0.2905648648738861, |
| "learning_rate": 1.3103305785123967e-05, |
| "loss": 0.0241, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.2247191011235956, |
| "grad_norm": 0.18806235492229462, |
| "learning_rate": 1.3061983471074382e-05, |
| "loss": 0.0233, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.2303370786516854, |
| "grad_norm": 0.3551190495491028, |
| "learning_rate": 1.3020661157024794e-05, |
| "loss": 0.0348, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.2359550561797752, |
| "grad_norm": 0.2311626672744751, |
| "learning_rate": 1.2979338842975209e-05, |
| "loss": 0.0352, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.2415730337078652, |
| "grad_norm": 0.41256803274154663, |
| "learning_rate": 1.2938016528925622e-05, |
| "loss": 0.0334, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.247191011235955, |
| "grad_norm": 0.23223748803138733, |
| "learning_rate": 1.2896694214876033e-05, |
| "loss": 0.0285, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.252808988764045, |
| "grad_norm": 0.2413235753774643, |
| "learning_rate": 1.2855371900826447e-05, |
| "loss": 0.0193, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.2584269662921348, |
| "grad_norm": 0.5607805252075195, |
| "learning_rate": 1.281404958677686e-05, |
| "loss": 0.022, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.2640449438202248, |
| "grad_norm": 0.1772303432226181, |
| "learning_rate": 1.2772727272727275e-05, |
| "loss": 0.0331, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.2640449438202248, |
| "eval_f1": 0.760931289040318, |
| "eval_loss": 0.028664391487836838, |
| "eval_precision": 0.7362637362637363, |
| "eval_recall": 0.7873090481786134, |
| "eval_runtime": 28.3945, |
| "eval_samples_per_second": 138.407, |
| "eval_steps_per_second": 4.332, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.2696629213483146, |
| "grad_norm": 0.2562411427497864, |
| "learning_rate": 1.2731404958677686e-05, |
| "loss": 0.0295, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.2752808988764044, |
| "grad_norm": 0.13123421370983124, |
| "learning_rate": 1.26900826446281e-05, |
| "loss": 0.0306, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.2808988764044944, |
| "grad_norm": 0.1774195283651352, |
| "learning_rate": 1.2648760330578513e-05, |
| "loss": 0.0221, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.2865168539325842, |
| "grad_norm": 0.21137966215610504, |
| "learning_rate": 1.2607438016528926e-05, |
| "loss": 0.024, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.2921348314606742, |
| "grad_norm": 0.3009016513824463, |
| "learning_rate": 1.256611570247934e-05, |
| "loss": 0.024, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.297752808988764, |
| "grad_norm": 1.0978662967681885, |
| "learning_rate": 1.2524793388429752e-05, |
| "loss": 0.0285, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.303370786516854, |
| "grad_norm": 0.17469504475593567, |
| "learning_rate": 1.2483471074380166e-05, |
| "loss": 0.0242, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.3089887640449438, |
| "grad_norm": 0.227843776345253, |
| "learning_rate": 1.2442148760330579e-05, |
| "loss": 0.0248, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.3146067415730336, |
| "grad_norm": 0.2597135901451111, |
| "learning_rate": 1.2400826446280993e-05, |
| "loss": 0.0184, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.3202247191011236, |
| "grad_norm": 0.3049301207065582, |
| "learning_rate": 1.2359504132231406e-05, |
| "loss": 0.0352, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.3258426966292136, |
| "grad_norm": 0.6089704036712646, |
| "learning_rate": 1.231818181818182e-05, |
| "loss": 0.0267, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.3314606741573034, |
| "grad_norm": 0.29360252618789673, |
| "learning_rate": 1.2276859504132232e-05, |
| "loss": 0.042, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.3370786516853932, |
| "grad_norm": 0.21009930968284607, |
| "learning_rate": 1.2235537190082645e-05, |
| "loss": 0.0242, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.3426966292134832, |
| "grad_norm": 0.3115074932575226, |
| "learning_rate": 1.2194214876033059e-05, |
| "loss": 0.022, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.348314606741573, |
| "grad_norm": 0.5562008023262024, |
| "learning_rate": 1.2152892561983472e-05, |
| "loss": 0.0355, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.3539325842696628, |
| "grad_norm": 0.5737345218658447, |
| "learning_rate": 1.2111570247933886e-05, |
| "loss": 0.0268, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.3595505617977528, |
| "grad_norm": 0.10395320504903793, |
| "learning_rate": 1.2070247933884298e-05, |
| "loss": 0.0139, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.3651685393258428, |
| "grad_norm": 0.3552614450454712, |
| "learning_rate": 1.2028925619834712e-05, |
| "loss": 0.0332, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.3707865168539326, |
| "grad_norm": 0.46564894914627075, |
| "learning_rate": 1.1987603305785125e-05, |
| "loss": 0.0295, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.3764044943820224, |
| "grad_norm": 0.16816848516464233, |
| "learning_rate": 1.1946280991735538e-05, |
| "loss": 0.0368, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.3820224719101124, |
| "grad_norm": 0.6996704936027527, |
| "learning_rate": 1.1904958677685952e-05, |
| "loss": 0.0274, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.3876404494382022, |
| "grad_norm": 0.14424335956573486, |
| "learning_rate": 1.1863636363636363e-05, |
| "loss": 0.0256, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.3932584269662922, |
| "grad_norm": 0.287166565656662, |
| "learning_rate": 1.1822314049586778e-05, |
| "loss": 0.0251, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.398876404494382, |
| "grad_norm": 0.31948205828666687, |
| "learning_rate": 1.178099173553719e-05, |
| "loss": 0.0325, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.404494382022472, |
| "grad_norm": 0.5256792902946472, |
| "learning_rate": 1.1739669421487605e-05, |
| "loss": 0.028, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.404494382022472, |
| "eval_f1": 0.7634016278417064, |
| "eval_loss": 0.027553008869290352, |
| "eval_precision": 0.7307898979043524, |
| "eval_recall": 0.799059929494712, |
| "eval_runtime": 28.5911, |
| "eval_samples_per_second": 137.455, |
| "eval_steps_per_second": 4.302, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.4101123595505618, |
| "grad_norm": 0.47131842374801636, |
| "learning_rate": 1.1698347107438018e-05, |
| "loss": 0.0224, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.4157303370786516, |
| "grad_norm": 0.45454141497612, |
| "learning_rate": 1.1657024793388432e-05, |
| "loss": 0.0212, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.4213483146067416, |
| "grad_norm": 0.542829155921936, |
| "learning_rate": 1.1615702479338843e-05, |
| "loss": 0.0362, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.4269662921348314, |
| "grad_norm": 0.2345789521932602, |
| "learning_rate": 1.1574380165289256e-05, |
| "loss": 0.0325, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.4325842696629214, |
| "grad_norm": 0.3538082540035248, |
| "learning_rate": 1.153305785123967e-05, |
| "loss": 0.0319, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.4382022471910112, |
| "grad_norm": 0.30572107434272766, |
| "learning_rate": 1.1491735537190084e-05, |
| "loss": 0.0258, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.4438202247191012, |
| "grad_norm": 0.6635351777076721, |
| "learning_rate": 1.1450413223140498e-05, |
| "loss": 0.0184, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.449438202247191, |
| "grad_norm": 0.15726110339164734, |
| "learning_rate": 1.140909090909091e-05, |
| "loss": 0.0285, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.4550561797752808, |
| "grad_norm": 0.46479687094688416, |
| "learning_rate": 1.1367768595041324e-05, |
| "loss": 0.0282, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.4606741573033708, |
| "grad_norm": 0.3639979064464569, |
| "learning_rate": 1.1326446280991737e-05, |
| "loss": 0.022, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.4662921348314606, |
| "grad_norm": 0.2872810959815979, |
| "learning_rate": 1.128512396694215e-05, |
| "loss": 0.0234, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.4719101123595506, |
| "grad_norm": 0.30308109521865845, |
| "learning_rate": 1.1243801652892564e-05, |
| "loss": 0.0258, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.4775280898876404, |
| "grad_norm": 0.1484086513519287, |
| "learning_rate": 1.1202479338842975e-05, |
| "loss": 0.0244, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.4831460674157304, |
| "grad_norm": 0.2520122230052948, |
| "learning_rate": 1.116115702479339e-05, |
| "loss": 0.0269, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.4887640449438202, |
| "grad_norm": 0.4813540577888489, |
| "learning_rate": 1.1119834710743802e-05, |
| "loss": 0.0295, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.49438202247191, |
| "grad_norm": 0.2896086871623993, |
| "learning_rate": 1.1078512396694217e-05, |
| "loss": 0.0302, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.093878984451294, |
| "learning_rate": 1.103719008264463e-05, |
| "loss": 0.023, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.50561797752809, |
| "grad_norm": 0.21631434559822083, |
| "learning_rate": 1.0995867768595044e-05, |
| "loss": 0.0276, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.5112359550561798, |
| "grad_norm": 0.19131970405578613, |
| "learning_rate": 1.0954545454545455e-05, |
| "loss": 0.0255, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.5168539325842696, |
| "grad_norm": 0.38947793841362, |
| "learning_rate": 1.0913223140495868e-05, |
| "loss": 0.044, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.5224719101123596, |
| "grad_norm": 0.2285880446434021, |
| "learning_rate": 1.0871900826446282e-05, |
| "loss": 0.0203, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.5280898876404494, |
| "grad_norm": 0.32444268465042114, |
| "learning_rate": 1.0830578512396695e-05, |
| "loss": 0.0349, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.5337078651685392, |
| "grad_norm": 0.19382227957248688, |
| "learning_rate": 1.0789256198347108e-05, |
| "loss": 0.0217, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.5393258426966292, |
| "grad_norm": 0.5336052775382996, |
| "learning_rate": 1.0747933884297521e-05, |
| "loss": 0.0278, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.5449438202247192, |
| "grad_norm": 0.33937588334083557, |
| "learning_rate": 1.0706611570247935e-05, |
| "loss": 0.0308, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.5449438202247192, |
| "eval_f1": 0.775, |
| "eval_loss": 0.027629304677248, |
| "eval_precision": 0.7349841938883035, |
| "eval_recall": 0.8196239717978848, |
| "eval_runtime": 28.5578, |
| "eval_samples_per_second": 137.616, |
| "eval_steps_per_second": 4.307, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.550561797752809, |
| "grad_norm": 0.5089320540428162, |
| "learning_rate": 1.0665289256198348e-05, |
| "loss": 0.0417, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.5561797752808988, |
| "grad_norm": 0.3134651780128479, |
| "learning_rate": 1.062396694214876e-05, |
| "loss": 0.0194, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.5617977528089888, |
| "grad_norm": 0.27301183342933655, |
| "learning_rate": 1.0582644628099174e-05, |
| "loss": 0.0232, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.5674157303370788, |
| "grad_norm": 0.4512588679790497, |
| "learning_rate": 1.0541322314049587e-05, |
| "loss": 0.0279, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.5730337078651684, |
| "grad_norm": 0.0741652399301529, |
| "learning_rate": 1.0500000000000001e-05, |
| "loss": 0.0304, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.5786516853932584, |
| "grad_norm": 0.5196810960769653, |
| "learning_rate": 1.0458677685950414e-05, |
| "loss": 0.0191, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.5842696629213484, |
| "grad_norm": 0.6707150340080261, |
| "learning_rate": 1.0417355371900828e-05, |
| "loss": 0.0223, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.5898876404494382, |
| "grad_norm": 0.10301195830106735, |
| "learning_rate": 1.037603305785124e-05, |
| "loss": 0.0256, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.595505617977528, |
| "grad_norm": 0.41582268476486206, |
| "learning_rate": 1.0334710743801654e-05, |
| "loss": 0.0211, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.601123595505618, |
| "grad_norm": 0.07321290671825409, |
| "learning_rate": 1.0293388429752067e-05, |
| "loss": 0.0206, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.606741573033708, |
| "grad_norm": 0.1504063755273819, |
| "learning_rate": 1.025206611570248e-05, |
| "loss": 0.0209, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.6123595505617978, |
| "grad_norm": 0.18410630524158478, |
| "learning_rate": 1.0210743801652894e-05, |
| "loss": 0.0224, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.6179775280898876, |
| "grad_norm": 0.30197209119796753, |
| "learning_rate": 1.0169421487603305e-05, |
| "loss": 0.037, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.6235955056179776, |
| "grad_norm": 0.35574042797088623, |
| "learning_rate": 1.012809917355372e-05, |
| "loss": 0.0369, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.6292134831460674, |
| "grad_norm": 0.6198880076408386, |
| "learning_rate": 1.0086776859504133e-05, |
| "loss": 0.0289, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.6348314606741572, |
| "grad_norm": 0.282822847366333, |
| "learning_rate": 1.0045454545454547e-05, |
| "loss": 0.0237, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.6404494382022472, |
| "grad_norm": 0.4256187677383423, |
| "learning_rate": 1.000413223140496e-05, |
| "loss": 0.0262, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.6460674157303372, |
| "grad_norm": 0.5048171877861023, |
| "learning_rate": 9.962809917355373e-06, |
| "loss": 0.019, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.651685393258427, |
| "grad_norm": 0.3408055603504181, |
| "learning_rate": 9.921487603305785e-06, |
| "loss": 0.0154, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.6573033707865168, |
| "grad_norm": 0.8848744630813599, |
| "learning_rate": 9.8801652892562e-06, |
| "loss": 0.0242, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.6629213483146068, |
| "grad_norm": 0.5149729251861572, |
| "learning_rate": 9.838842975206613e-06, |
| "loss": 0.0348, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.6685393258426966, |
| "grad_norm": 0.9948667287826538, |
| "learning_rate": 9.797520661157026e-06, |
| "loss": 0.0329, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.6741573033707864, |
| "grad_norm": 0.5616855621337891, |
| "learning_rate": 9.756198347107438e-06, |
| "loss": 0.0218, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.6797752808988764, |
| "grad_norm": 0.23508839309215546, |
| "learning_rate": 9.714876033057851e-06, |
| "loss": 0.0218, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.6853932584269664, |
| "grad_norm": 1.1748582124710083, |
| "learning_rate": 9.673553719008266e-06, |
| "loss": 0.0207, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.6853932584269664, |
| "eval_f1": 0.774390243902439, |
| "eval_loss": 0.028053877875208855, |
| "eval_precision": 0.7329485834207765, |
| "eval_recall": 0.8207990599294948, |
| "eval_runtime": 28.6894, |
| "eval_samples_per_second": 136.985, |
| "eval_steps_per_second": 4.287, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.6910112359550562, |
| "grad_norm": 0.5501458048820496, |
| "learning_rate": 9.632231404958679e-06, |
| "loss": 0.0447, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.696629213483146, |
| "grad_norm": 0.2999919056892395, |
| "learning_rate": 9.590909090909091e-06, |
| "loss": 0.0251, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.702247191011236, |
| "grad_norm": 0.3625814914703369, |
| "learning_rate": 9.549586776859506e-06, |
| "loss": 0.022, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.7078651685393258, |
| "grad_norm": 0.3906262516975403, |
| "learning_rate": 9.508264462809919e-06, |
| "loss": 0.0263, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.7134831460674156, |
| "grad_norm": 0.37318113446235657, |
| "learning_rate": 9.466942148760331e-06, |
| "loss": 0.0283, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.7191011235955056, |
| "grad_norm": 0.2729286849498749, |
| "learning_rate": 9.425619834710744e-06, |
| "loss": 0.0293, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.7247191011235956, |
| "grad_norm": 0.2665950059890747, |
| "learning_rate": 9.384297520661157e-06, |
| "loss": 0.0264, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.7303370786516854, |
| "grad_norm": 0.3190416991710663, |
| "learning_rate": 9.342975206611572e-06, |
| "loss": 0.028, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.7359550561797752, |
| "grad_norm": 0.1753334254026413, |
| "learning_rate": 9.301652892561984e-06, |
| "loss": 0.0182, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.7415730337078652, |
| "grad_norm": 0.20632904767990112, |
| "learning_rate": 9.260330578512397e-06, |
| "loss": 0.0276, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.7471910112359552, |
| "grad_norm": 0.9477939605712891, |
| "learning_rate": 9.219008264462812e-06, |
| "loss": 0.0144, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.7528089887640448, |
| "grad_norm": 0.09771013259887695, |
| "learning_rate": 9.177685950413224e-06, |
| "loss": 0.0391, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.7584269662921348, |
| "grad_norm": 0.46412840485572815, |
| "learning_rate": 9.136363636363637e-06, |
| "loss": 0.0301, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.7640449438202248, |
| "grad_norm": 0.29480355978012085, |
| "learning_rate": 9.09504132231405e-06, |
| "loss": 0.0248, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.7696629213483146, |
| "grad_norm": 0.2871951758861542, |
| "learning_rate": 9.053719008264463e-06, |
| "loss": 0.0256, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.7752808988764044, |
| "grad_norm": 0.6297323107719421, |
| "learning_rate": 9.012396694214877e-06, |
| "loss": 0.0366, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.7808988764044944, |
| "grad_norm": 0.16934314370155334, |
| "learning_rate": 8.97107438016529e-06, |
| "loss": 0.0301, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.7865168539325844, |
| "grad_norm": 0.33454304933547974, |
| "learning_rate": 8.929752066115703e-06, |
| "loss": 0.0198, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.7921348314606742, |
| "grad_norm": 2.09133243560791, |
| "learning_rate": 8.888429752066118e-06, |
| "loss": 0.0395, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.797752808988764, |
| "grad_norm": 0.31659209728240967, |
| "learning_rate": 8.84710743801653e-06, |
| "loss": 0.0289, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.803370786516854, |
| "grad_norm": 0.44267770648002625, |
| "learning_rate": 8.805785123966943e-06, |
| "loss": 0.0218, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.8089887640449438, |
| "grad_norm": 0.615263044834137, |
| "learning_rate": 8.764462809917356e-06, |
| "loss": 0.0186, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.8146067415730336, |
| "grad_norm": 0.5823076367378235, |
| "learning_rate": 8.723140495867769e-06, |
| "loss": 0.0274, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.8202247191011236, |
| "grad_norm": 0.7892521619796753, |
| "learning_rate": 8.681818181818182e-06, |
| "loss": 0.0268, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.8258426966292136, |
| "grad_norm": 0.285401314496994, |
| "learning_rate": 8.640495867768596e-06, |
| "loss": 0.0227, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.8258426966292136, |
| "eval_f1": 0.7772403982930299, |
| "eval_loss": 0.027760878205299377, |
| "eval_precision": 0.7534473248758963, |
| "eval_recall": 0.8025851938895417, |
| "eval_runtime": 29.8401, |
| "eval_samples_per_second": 131.702, |
| "eval_steps_per_second": 4.122, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.8314606741573034, |
| "grad_norm": 0.387917160987854, |
| "learning_rate": 8.599173553719009e-06, |
| "loss": 0.029, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.8370786516853932, |
| "grad_norm": 0.9873700141906738, |
| "learning_rate": 8.557851239669422e-06, |
| "loss": 0.031, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.8426966292134832, |
| "grad_norm": 1.048519253730774, |
| "learning_rate": 8.516528925619836e-06, |
| "loss": 0.0401, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.848314606741573, |
| "grad_norm": 0.311575323343277, |
| "learning_rate": 8.475206611570249e-06, |
| "loss": 0.0296, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.8539325842696628, |
| "grad_norm": 0.5239911675453186, |
| "learning_rate": 8.433884297520662e-06, |
| "loss": 0.0264, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.8595505617977528, |
| "grad_norm": 0.28296995162963867, |
| "learning_rate": 8.392561983471075e-06, |
| "loss": 0.0179, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.8651685393258428, |
| "grad_norm": 0.19286566972732544, |
| "learning_rate": 8.351239669421487e-06, |
| "loss": 0.0251, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.8707865168539326, |
| "grad_norm": 0.19410869479179382, |
| "learning_rate": 8.309917355371902e-06, |
| "loss": 0.0225, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.8764044943820224, |
| "grad_norm": 0.253444641828537, |
| "learning_rate": 8.268595041322315e-06, |
| "loss": 0.0238, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.8820224719101124, |
| "grad_norm": 0.9381386041641235, |
| "learning_rate": 8.227272727272728e-06, |
| "loss": 0.0169, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.8876404494382022, |
| "grad_norm": 0.33516111969947815, |
| "learning_rate": 8.185950413223142e-06, |
| "loss": 0.016, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.893258426966292, |
| "grad_norm": 1.103043556213379, |
| "learning_rate": 8.144628099173555e-06, |
| "loss": 0.0236, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.898876404494382, |
| "grad_norm": 0.5476918816566467, |
| "learning_rate": 8.103305785123968e-06, |
| "loss": 0.0358, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.904494382022472, |
| "grad_norm": 0.23972313106060028, |
| "learning_rate": 8.06198347107438e-06, |
| "loss": 0.0141, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.9101123595505618, |
| "grad_norm": 0.3573669493198395, |
| "learning_rate": 8.020661157024793e-06, |
| "loss": 0.0221, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.9157303370786516, |
| "grad_norm": 0.9739108085632324, |
| "learning_rate": 7.979338842975208e-06, |
| "loss": 0.0284, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.9213483146067416, |
| "grad_norm": 0.7444821000099182, |
| "learning_rate": 7.93801652892562e-06, |
| "loss": 0.0391, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.9269662921348316, |
| "grad_norm": 0.3812079131603241, |
| "learning_rate": 7.896694214876033e-06, |
| "loss": 0.0285, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.9325842696629212, |
| "grad_norm": 0.5086238980293274, |
| "learning_rate": 7.855371900826448e-06, |
| "loss": 0.0227, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.9382022471910112, |
| "grad_norm": 0.32544034719467163, |
| "learning_rate": 7.81404958677686e-06, |
| "loss": 0.0181, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.9438202247191012, |
| "grad_norm": 1.2971950769424438, |
| "learning_rate": 7.772727272727273e-06, |
| "loss": 0.0422, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.949438202247191, |
| "grad_norm": 0.2870737910270691, |
| "learning_rate": 7.731404958677686e-06, |
| "loss": 0.024, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.9550561797752808, |
| "grad_norm": 0.3665461540222168, |
| "learning_rate": 7.690082644628099e-06, |
| "loss": 0.0222, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.9606741573033708, |
| "grad_norm": 0.3672967255115509, |
| "learning_rate": 7.648760330578514e-06, |
| "loss": 0.0332, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.9662921348314608, |
| "grad_norm": 0.8471900224685669, |
| "learning_rate": 7.607438016528926e-06, |
| "loss": 0.0427, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.9662921348314608, |
| "eval_f1": 0.782293986636971, |
| "eval_loss": 0.026905611157417297, |
| "eval_precision": 0.7433862433862434, |
| "eval_recall": 0.8254994124559342, |
| "eval_runtime": 27.5479, |
| "eval_samples_per_second": 142.66, |
| "eval_steps_per_second": 4.465, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.9719101123595506, |
| "grad_norm": 0.32538992166519165, |
| "learning_rate": 7.56611570247934e-06, |
| "loss": 0.0269, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.9775280898876404, |
| "grad_norm": 0.10877460241317749, |
| "learning_rate": 7.524793388429753e-06, |
| "loss": 0.0212, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.9831460674157304, |
| "grad_norm": 0.0975649505853653, |
| "learning_rate": 7.4834710743801665e-06, |
| "loss": 0.0232, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.9887640449438202, |
| "grad_norm": 0.32634270191192627, |
| "learning_rate": 7.4421487603305785e-06, |
| "loss": 0.0217, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.99438202247191, |
| "grad_norm": 0.10080187767744064, |
| "learning_rate": 7.400826446280992e-06, |
| "loss": 0.0299, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.27767449617385864, |
| "learning_rate": 7.359504132231406e-06, |
| "loss": 0.026, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.00561797752809, |
| "grad_norm": 0.2725074291229248, |
| "learning_rate": 7.3181818181818186e-06, |
| "loss": 0.0243, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.0112359550561796, |
| "grad_norm": 0.41963133215904236, |
| "learning_rate": 7.276859504132232e-06, |
| "loss": 0.027, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.0168539325842696, |
| "grad_norm": 0.3473270535469055, |
| "learning_rate": 7.235537190082645e-06, |
| "loss": 0.0238, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.0224719101123596, |
| "grad_norm": 0.38403695821762085, |
| "learning_rate": 7.194214876033059e-06, |
| "loss": 0.0261, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.0280898876404496, |
| "grad_norm": 0.42964357137680054, |
| "learning_rate": 7.152892561983472e-06, |
| "loss": 0.0343, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.033707865168539, |
| "grad_norm": 0.5099667310714722, |
| "learning_rate": 7.111570247933884e-06, |
| "loss": 0.0209, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.039325842696629, |
| "grad_norm": 0.34141868352890015, |
| "learning_rate": 7.070247933884298e-06, |
| "loss": 0.0295, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.044943820224719, |
| "grad_norm": 0.08186814934015274, |
| "learning_rate": 7.028925619834711e-06, |
| "loss": 0.021, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.050561797752809, |
| "grad_norm": 0.15661613643169403, |
| "learning_rate": 6.987603305785124e-06, |
| "loss": 0.0183, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.056179775280899, |
| "grad_norm": 0.24707892537117004, |
| "learning_rate": 6.946280991735538e-06, |
| "loss": 0.0204, |
| "step": 3660 |
| }, |
| { |
| "epoch": 2.061797752808989, |
| "grad_norm": 0.1336873322725296, |
| "learning_rate": 6.904958677685951e-06, |
| "loss": 0.0186, |
| "step": 3670 |
| }, |
| { |
| "epoch": 2.067415730337079, |
| "grad_norm": 0.07359451055526733, |
| "learning_rate": 6.8636363636363645e-06, |
| "loss": 0.0295, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.0730337078651684, |
| "grad_norm": 0.8857179880142212, |
| "learning_rate": 6.822314049586778e-06, |
| "loss": 0.0185, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.0786516853932584, |
| "grad_norm": 0.5854625105857849, |
| "learning_rate": 6.78099173553719e-06, |
| "loss": 0.0202, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.0842696629213484, |
| "grad_norm": 0.3949134647846222, |
| "learning_rate": 6.739669421487604e-06, |
| "loss": 0.0277, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.0898876404494384, |
| "grad_norm": 1.1485154628753662, |
| "learning_rate": 6.698347107438017e-06, |
| "loss": 0.0191, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.095505617977528, |
| "grad_norm": 0.5519204139709473, |
| "learning_rate": 6.65702479338843e-06, |
| "loss": 0.0245, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.101123595505618, |
| "grad_norm": 0.6175654530525208, |
| "learning_rate": 6.615702479338844e-06, |
| "loss": 0.0353, |
| "step": 3740 |
| }, |
| { |
| "epoch": 2.106741573033708, |
| "grad_norm": 0.23454013466835022, |
| "learning_rate": 6.574380165289257e-06, |
| "loss": 0.0366, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.106741573033708, |
| "eval_f1": 0.7762276785714285, |
| "eval_loss": 0.026860907673835754, |
| "eval_precision": 0.7391073326248672, |
| "eval_recall": 0.8172737955346651, |
| "eval_runtime": 27.5328, |
| "eval_samples_per_second": 142.739, |
| "eval_steps_per_second": 4.467, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.1123595505617976, |
| "grad_norm": 0.7361034750938416, |
| "learning_rate": 6.53305785123967e-06, |
| "loss": 0.0334, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.1179775280898876, |
| "grad_norm": 0.6150330901145935, |
| "learning_rate": 6.491735537190084e-06, |
| "loss": 0.0153, |
| "step": 3770 |
| }, |
| { |
| "epoch": 2.1235955056179776, |
| "grad_norm": 0.45887675881385803, |
| "learning_rate": 6.450413223140496e-06, |
| "loss": 0.0193, |
| "step": 3780 |
| }, |
| { |
| "epoch": 2.1292134831460676, |
| "grad_norm": 0.658532440662384, |
| "learning_rate": 6.40909090909091e-06, |
| "loss": 0.017, |
| "step": 3790 |
| }, |
| { |
| "epoch": 2.134831460674157, |
| "grad_norm": 0.3698727786540985, |
| "learning_rate": 6.3677685950413224e-06, |
| "loss": 0.0154, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.140449438202247, |
| "grad_norm": 0.3840799033641815, |
| "learning_rate": 6.326446280991736e-06, |
| "loss": 0.0185, |
| "step": 3810 |
| }, |
| { |
| "epoch": 2.146067415730337, |
| "grad_norm": 0.5709568858146667, |
| "learning_rate": 6.285123966942149e-06, |
| "loss": 0.0166, |
| "step": 3820 |
| }, |
| { |
| "epoch": 2.151685393258427, |
| "grad_norm": 0.5471051335334778, |
| "learning_rate": 6.2438016528925626e-06, |
| "loss": 0.0284, |
| "step": 3830 |
| }, |
| { |
| "epoch": 2.157303370786517, |
| "grad_norm": 0.2647755444049835, |
| "learning_rate": 6.202479338842976e-06, |
| "loss": 0.0266, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.162921348314607, |
| "grad_norm": 0.8098225593566895, |
| "learning_rate": 6.161157024793389e-06, |
| "loss": 0.0324, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.168539325842697, |
| "grad_norm": 0.060193296521902084, |
| "learning_rate": 6.119834710743802e-06, |
| "loss": 0.0198, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.1741573033707864, |
| "grad_norm": 0.29814398288726807, |
| "learning_rate": 6.078512396694215e-06, |
| "loss": 0.022, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.1797752808988764, |
| "grad_norm": 0.4286690652370453, |
| "learning_rate": 6.037190082644628e-06, |
| "loss": 0.0268, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.1853932584269664, |
| "grad_norm": 0.29490065574645996, |
| "learning_rate": 5.995867768595042e-06, |
| "loss": 0.0276, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.191011235955056, |
| "grad_norm": 0.19046035408973694, |
| "learning_rate": 5.954545454545455e-06, |
| "loss": 0.0166, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.196629213483146, |
| "grad_norm": 0.17454463243484497, |
| "learning_rate": 5.913223140495868e-06, |
| "loss": 0.0248, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.202247191011236, |
| "grad_norm": 0.4883466362953186, |
| "learning_rate": 5.871900826446282e-06, |
| "loss": 0.0227, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.207865168539326, |
| "grad_norm": 0.4298355281352997, |
| "learning_rate": 5.830578512396695e-06, |
| "loss": 0.0193, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.2134831460674156, |
| "grad_norm": 0.348326176404953, |
| "learning_rate": 5.789256198347108e-06, |
| "loss": 0.0197, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.2191011235955056, |
| "grad_norm": 0.25717148184776306, |
| "learning_rate": 5.7479338842975205e-06, |
| "loss": 0.0191, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.2247191011235956, |
| "grad_norm": 0.217723548412323, |
| "learning_rate": 5.706611570247934e-06, |
| "loss": 0.0132, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.2303370786516856, |
| "grad_norm": 0.1498790681362152, |
| "learning_rate": 5.665289256198348e-06, |
| "loss": 0.0193, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.235955056179775, |
| "grad_norm": 0.30186113715171814, |
| "learning_rate": 5.623966942148761e-06, |
| "loss": 0.0157, |
| "step": 3980 |
| }, |
| { |
| "epoch": 2.241573033707865, |
| "grad_norm": 0.5343513488769531, |
| "learning_rate": 5.582644628099174e-06, |
| "loss": 0.0238, |
| "step": 3990 |
| }, |
| { |
| "epoch": 2.247191011235955, |
| "grad_norm": 0.05514984950423241, |
| "learning_rate": 5.541322314049588e-06, |
| "loss": 0.0232, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.247191011235955, |
| "eval_f1": 0.7822648969217735, |
| "eval_loss": 0.027265124022960663, |
| "eval_precision": 0.7531266992930941, |
| "eval_recall": 0.8137485311398355, |
| "eval_runtime": 27.5955, |
| "eval_samples_per_second": 142.414, |
| "eval_steps_per_second": 4.457, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.252808988764045, |
| "grad_norm": 0.28251585364341736, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 0.0171, |
| "step": 4010 |
| }, |
| { |
| "epoch": 2.258426966292135, |
| "grad_norm": 0.5261769890785217, |
| "learning_rate": 5.4586776859504135e-06, |
| "loss": 0.0333, |
| "step": 4020 |
| }, |
| { |
| "epoch": 2.264044943820225, |
| "grad_norm": 0.5585474967956543, |
| "learning_rate": 5.417355371900826e-06, |
| "loss": 0.0226, |
| "step": 4030 |
| }, |
| { |
| "epoch": 2.2696629213483144, |
| "grad_norm": 0.3328685760498047, |
| "learning_rate": 5.37603305785124e-06, |
| "loss": 0.026, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.2752808988764044, |
| "grad_norm": 0.16690856218338013, |
| "learning_rate": 5.334710743801654e-06, |
| "loss": 0.0231, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.2808988764044944, |
| "grad_norm": 0.2629925608634949, |
| "learning_rate": 5.2933884297520664e-06, |
| "loss": 0.0209, |
| "step": 4060 |
| }, |
| { |
| "epoch": 2.2865168539325844, |
| "grad_norm": 0.21847395598888397, |
| "learning_rate": 5.25206611570248e-06, |
| "loss": 0.0214, |
| "step": 4070 |
| }, |
| { |
| "epoch": 2.292134831460674, |
| "grad_norm": 0.4954984188079834, |
| "learning_rate": 5.210743801652893e-06, |
| "loss": 0.0224, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.297752808988764, |
| "grad_norm": 0.19115880131721497, |
| "learning_rate": 5.1694214876033065e-06, |
| "loss": 0.0294, |
| "step": 4090 |
| }, |
| { |
| "epoch": 2.303370786516854, |
| "grad_norm": 0.9964483976364136, |
| "learning_rate": 5.1280991735537185e-06, |
| "loss": 0.0264, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.308988764044944, |
| "grad_norm": 0.4354044795036316, |
| "learning_rate": 5.086776859504132e-06, |
| "loss": 0.0159, |
| "step": 4110 |
| }, |
| { |
| "epoch": 2.3146067415730336, |
| "grad_norm": 0.6680939793586731, |
| "learning_rate": 5.045454545454546e-06, |
| "loss": 0.0297, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.3202247191011236, |
| "grad_norm": 0.39689382910728455, |
| "learning_rate": 5.004132231404959e-06, |
| "loss": 0.0234, |
| "step": 4130 |
| }, |
| { |
| "epoch": 2.3258426966292136, |
| "grad_norm": 0.23396579921245575, |
| "learning_rate": 4.962809917355372e-06, |
| "loss": 0.0267, |
| "step": 4140 |
| }, |
| { |
| "epoch": 2.331460674157303, |
| "grad_norm": 0.19987128674983978, |
| "learning_rate": 4.921487603305786e-06, |
| "loss": 0.0158, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.337078651685393, |
| "grad_norm": 0.3705327808856964, |
| "learning_rate": 4.880165289256199e-06, |
| "loss": 0.0213, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.342696629213483, |
| "grad_norm": 0.1622433066368103, |
| "learning_rate": 4.8388429752066115e-06, |
| "loss": 0.0216, |
| "step": 4170 |
| }, |
| { |
| "epoch": 2.348314606741573, |
| "grad_norm": 0.1764877438545227, |
| "learning_rate": 4.797520661157025e-06, |
| "loss": 0.0202, |
| "step": 4180 |
| }, |
| { |
| "epoch": 2.353932584269663, |
| "grad_norm": 0.612802267074585, |
| "learning_rate": 4.756198347107439e-06, |
| "loss": 0.0296, |
| "step": 4190 |
| }, |
| { |
| "epoch": 2.359550561797753, |
| "grad_norm": 0.29788005352020264, |
| "learning_rate": 4.714876033057852e-06, |
| "loss": 0.0163, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.365168539325843, |
| "grad_norm": 0.9954004883766174, |
| "learning_rate": 4.6735537190082645e-06, |
| "loss": 0.0236, |
| "step": 4210 |
| }, |
| { |
| "epoch": 2.370786516853933, |
| "grad_norm": 0.6486590504646301, |
| "learning_rate": 4.632231404958678e-06, |
| "loss": 0.0218, |
| "step": 4220 |
| }, |
| { |
| "epoch": 2.3764044943820224, |
| "grad_norm": 0.3950415551662445, |
| "learning_rate": 4.590909090909092e-06, |
| "loss": 0.0264, |
| "step": 4230 |
| }, |
| { |
| "epoch": 2.3820224719101124, |
| "grad_norm": 0.8847681879997253, |
| "learning_rate": 4.549586776859505e-06, |
| "loss": 0.0192, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.3876404494382024, |
| "grad_norm": 1.5672686100006104, |
| "learning_rate": 4.508264462809917e-06, |
| "loss": 0.0395, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.3876404494382024, |
| "eval_f1": 0.7883418222976797, |
| "eval_loss": 0.026355577632784843, |
| "eval_precision": 0.7603711790393013, |
| "eval_recall": 0.818448883666275, |
| "eval_runtime": 27.5406, |
| "eval_samples_per_second": 142.699, |
| "eval_steps_per_second": 4.466, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.393258426966292, |
| "grad_norm": 0.16888077557086945, |
| "learning_rate": 4.466942148760331e-06, |
| "loss": 0.0189, |
| "step": 4260 |
| }, |
| { |
| "epoch": 2.398876404494382, |
| "grad_norm": 0.2687821686267853, |
| "learning_rate": 4.425619834710745e-06, |
| "loss": 0.0234, |
| "step": 4270 |
| }, |
| { |
| "epoch": 2.404494382022472, |
| "grad_norm": 0.22109387814998627, |
| "learning_rate": 4.3842975206611575e-06, |
| "loss": 0.0196, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.4101123595505616, |
| "grad_norm": 0.36069610714912415, |
| "learning_rate": 4.34297520661157e-06, |
| "loss": 0.0212, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.4157303370786516, |
| "grad_norm": 0.2530086636543274, |
| "learning_rate": 4.301652892561984e-06, |
| "loss": 0.0244, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.4213483146067416, |
| "grad_norm": 0.5015830397605896, |
| "learning_rate": 4.260330578512397e-06, |
| "loss": 0.0226, |
| "step": 4310 |
| }, |
| { |
| "epoch": 2.4269662921348316, |
| "grad_norm": 0.18124614655971527, |
| "learning_rate": 4.21900826446281e-06, |
| "loss": 0.0202, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.432584269662921, |
| "grad_norm": 0.2587762176990509, |
| "learning_rate": 4.177685950413223e-06, |
| "loss": 0.0157, |
| "step": 4330 |
| }, |
| { |
| "epoch": 2.438202247191011, |
| "grad_norm": 0.42097729444503784, |
| "learning_rate": 4.136363636363637e-06, |
| "loss": 0.0265, |
| "step": 4340 |
| }, |
| { |
| "epoch": 2.443820224719101, |
| "grad_norm": 0.2626970410346985, |
| "learning_rate": 4.09504132231405e-06, |
| "loss": 0.0163, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.449438202247191, |
| "grad_norm": 0.42754292488098145, |
| "learning_rate": 4.053719008264463e-06, |
| "loss": 0.0281, |
| "step": 4360 |
| }, |
| { |
| "epoch": 2.455056179775281, |
| "grad_norm": 0.32999956607818604, |
| "learning_rate": 4.012396694214876e-06, |
| "loss": 0.0206, |
| "step": 4370 |
| }, |
| { |
| "epoch": 2.460674157303371, |
| "grad_norm": 0.26131534576416016, |
| "learning_rate": 3.97107438016529e-06, |
| "loss": 0.0204, |
| "step": 4380 |
| }, |
| { |
| "epoch": 2.466292134831461, |
| "grad_norm": 0.6028513312339783, |
| "learning_rate": 3.929752066115703e-06, |
| "loss": 0.0237, |
| "step": 4390 |
| }, |
| { |
| "epoch": 2.4719101123595504, |
| "grad_norm": 0.42414769530296326, |
| "learning_rate": 3.888429752066116e-06, |
| "loss": 0.0233, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.4775280898876404, |
| "grad_norm": 0.21420839428901672, |
| "learning_rate": 3.847107438016529e-06, |
| "loss": 0.0179, |
| "step": 4410 |
| }, |
| { |
| "epoch": 2.4831460674157304, |
| "grad_norm": 0.13455547392368317, |
| "learning_rate": 3.8057851239669423e-06, |
| "loss": 0.0305, |
| "step": 4420 |
| }, |
| { |
| "epoch": 2.48876404494382, |
| "grad_norm": 0.2912025451660156, |
| "learning_rate": 3.764462809917356e-06, |
| "loss": 0.0239, |
| "step": 4430 |
| }, |
| { |
| "epoch": 2.49438202247191, |
| "grad_norm": 0.2664025127887726, |
| "learning_rate": 3.723140495867769e-06, |
| "loss": 0.0253, |
| "step": 4440 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.22163937985897064, |
| "learning_rate": 3.681818181818182e-06, |
| "loss": 0.0235, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.50561797752809, |
| "grad_norm": 0.8055572509765625, |
| "learning_rate": 3.6404958677685952e-06, |
| "loss": 0.0208, |
| "step": 4460 |
| }, |
| { |
| "epoch": 2.51123595505618, |
| "grad_norm": 0.42651161551475525, |
| "learning_rate": 3.599173553719009e-06, |
| "loss": 0.0262, |
| "step": 4470 |
| }, |
| { |
| "epoch": 2.5168539325842696, |
| "grad_norm": 0.29703494906425476, |
| "learning_rate": 3.557851239669422e-06, |
| "loss": 0.0145, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.5224719101123596, |
| "grad_norm": 0.2598767578601837, |
| "learning_rate": 3.516528925619835e-06, |
| "loss": 0.0188, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.5280898876404496, |
| "grad_norm": 0.39948177337646484, |
| "learning_rate": 3.475206611570248e-06, |
| "loss": 0.0231, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.5280898876404496, |
| "eval_f1": 0.7884074282498592, |
| "eval_loss": 0.02689271606504917, |
| "eval_precision": 0.7564794816414687, |
| "eval_recall": 0.8231492361927144, |
| "eval_runtime": 27.545, |
| "eval_samples_per_second": 142.676, |
| "eval_steps_per_second": 4.465, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.533707865168539, |
| "grad_norm": 0.36148425936698914, |
| "learning_rate": 3.4338842975206614e-06, |
| "loss": 0.0221, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.539325842696629, |
| "grad_norm": 0.05635352060198784, |
| "learning_rate": 3.392561983471075e-06, |
| "loss": 0.0115, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.544943820224719, |
| "grad_norm": 0.15450328588485718, |
| "learning_rate": 3.351239669421488e-06, |
| "loss": 0.0142, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.550561797752809, |
| "grad_norm": 0.7420428395271301, |
| "learning_rate": 3.309917355371901e-06, |
| "loss": 0.0242, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.556179775280899, |
| "grad_norm": 0.21072138845920563, |
| "learning_rate": 3.2685950413223143e-06, |
| "loss": 0.0154, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.561797752808989, |
| "grad_norm": 0.41004472970962524, |
| "learning_rate": 3.227272727272728e-06, |
| "loss": 0.027, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.567415730337079, |
| "grad_norm": 0.2680492401123047, |
| "learning_rate": 3.1859504132231408e-06, |
| "loss": 0.0328, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.5730337078651684, |
| "grad_norm": 0.2726670503616333, |
| "learning_rate": 3.144628099173554e-06, |
| "loss": 0.0208, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.5786516853932584, |
| "grad_norm": 0.07600165903568268, |
| "learning_rate": 3.1033057851239672e-06, |
| "loss": 0.0275, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.5842696629213484, |
| "grad_norm": 0.4380427598953247, |
| "learning_rate": 3.0619834710743804e-06, |
| "loss": 0.0441, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.5898876404494384, |
| "grad_norm": 1.735329270362854, |
| "learning_rate": 3.0206611570247932e-06, |
| "loss": 0.0298, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.595505617977528, |
| "grad_norm": 0.2738408148288727, |
| "learning_rate": 2.979338842975207e-06, |
| "loss": 0.0175, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.601123595505618, |
| "grad_norm": 0.46852007508277893, |
| "learning_rate": 2.93801652892562e-06, |
| "loss": 0.0166, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.606741573033708, |
| "grad_norm": 0.13703812658786774, |
| "learning_rate": 2.8966942148760334e-06, |
| "loss": 0.0298, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.6123595505617976, |
| "grad_norm": 0.3436073958873749, |
| "learning_rate": 2.855371900826446e-06, |
| "loss": 0.0203, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.6179775280898876, |
| "grad_norm": 0.5731604695320129, |
| "learning_rate": 2.81404958677686e-06, |
| "loss": 0.0166, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.6235955056179776, |
| "grad_norm": 0.3746987283229828, |
| "learning_rate": 2.772727272727273e-06, |
| "loss": 0.0223, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.629213483146067, |
| "grad_norm": 0.30169859528541565, |
| "learning_rate": 2.7314049586776863e-06, |
| "loss": 0.0277, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.634831460674157, |
| "grad_norm": 0.2838500738143921, |
| "learning_rate": 2.690082644628099e-06, |
| "loss": 0.0176, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.640449438202247, |
| "grad_norm": 0.14983642101287842, |
| "learning_rate": 2.6487603305785127e-06, |
| "loss": 0.0218, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.646067415730337, |
| "grad_norm": 0.28590282797813416, |
| "learning_rate": 2.607438016528926e-06, |
| "loss": 0.0248, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.6516853932584272, |
| "grad_norm": 0.16218622028827667, |
| "learning_rate": 2.566115702479339e-06, |
| "loss": 0.0192, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.657303370786517, |
| "grad_norm": 0.2653314471244812, |
| "learning_rate": 2.524793388429752e-06, |
| "loss": 0.0224, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.662921348314607, |
| "grad_norm": 0.19694961607456207, |
| "learning_rate": 2.4834710743801652e-06, |
| "loss": 0.0163, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.668539325842697, |
| "grad_norm": 0.396456241607666, |
| "learning_rate": 2.442148760330579e-06, |
| "loss": 0.0184, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.668539325842697, |
| "eval_f1": 0.7902769926512153, |
| "eval_loss": 0.0268043614923954, |
| "eval_precision": 0.761437908496732, |
| "eval_recall": 0.8213866039952996, |
| "eval_runtime": 27.5363, |
| "eval_samples_per_second": 142.721, |
| "eval_steps_per_second": 4.467, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.6741573033707864, |
| "grad_norm": 0.17081989347934723, |
| "learning_rate": 2.4008264462809917e-06, |
| "loss": 0.0165, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.6797752808988764, |
| "grad_norm": 0.248233824968338, |
| "learning_rate": 2.3595041322314054e-06, |
| "loss": 0.0165, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.6853932584269664, |
| "grad_norm": 0.5152795910835266, |
| "learning_rate": 2.318181818181818e-06, |
| "loss": 0.0232, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.691011235955056, |
| "grad_norm": 0.3962818384170532, |
| "learning_rate": 2.276859504132232e-06, |
| "loss": 0.0165, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.696629213483146, |
| "grad_norm": 0.8177412152290344, |
| "learning_rate": 2.2355371900826446e-06, |
| "loss": 0.0237, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.702247191011236, |
| "grad_norm": 0.3415655791759491, |
| "learning_rate": 2.1942148760330583e-06, |
| "loss": 0.0293, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.7078651685393256, |
| "grad_norm": 0.39049091935157776, |
| "learning_rate": 2.152892561983471e-06, |
| "loss": 0.0151, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.7134831460674156, |
| "grad_norm": 0.25125929713249207, |
| "learning_rate": 2.1115702479338847e-06, |
| "loss": 0.0246, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.7191011235955056, |
| "grad_norm": 0.3042113482952118, |
| "learning_rate": 2.0702479338842975e-06, |
| "loss": 0.0131, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.7247191011235956, |
| "grad_norm": 0.3315228223800659, |
| "learning_rate": 2.0289256198347108e-06, |
| "loss": 0.0355, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.7303370786516856, |
| "grad_norm": 0.7296513319015503, |
| "learning_rate": 1.987603305785124e-06, |
| "loss": 0.0167, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.735955056179775, |
| "grad_norm": 0.1494741439819336, |
| "learning_rate": 1.9462809917355372e-06, |
| "loss": 0.0158, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.741573033707865, |
| "grad_norm": 1.2954223155975342, |
| "learning_rate": 1.9049586776859505e-06, |
| "loss": 0.0243, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.747191011235955, |
| "grad_norm": 0.5072038769721985, |
| "learning_rate": 1.863636363636364e-06, |
| "loss": 0.0257, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.752808988764045, |
| "grad_norm": 0.3296859562397003, |
| "learning_rate": 1.822314049586777e-06, |
| "loss": 0.0198, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.758426966292135, |
| "grad_norm": 0.11316215991973877, |
| "learning_rate": 1.7809917355371904e-06, |
| "loss": 0.0185, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.764044943820225, |
| "grad_norm": 0.15724743902683258, |
| "learning_rate": 1.7396694214876034e-06, |
| "loss": 0.0156, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.7696629213483144, |
| "grad_norm": 0.20462727546691895, |
| "learning_rate": 1.6983471074380168e-06, |
| "loss": 0.0225, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.7752808988764044, |
| "grad_norm": 0.3503582179546356, |
| "learning_rate": 1.6570247933884298e-06, |
| "loss": 0.0191, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.7808988764044944, |
| "grad_norm": 0.3289487361907959, |
| "learning_rate": 1.615702479338843e-06, |
| "loss": 0.0241, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.7865168539325844, |
| "grad_norm": 0.3454688787460327, |
| "learning_rate": 1.5743801652892563e-06, |
| "loss": 0.0154, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.7921348314606744, |
| "grad_norm": 0.07760006934404373, |
| "learning_rate": 1.5330578512396695e-06, |
| "loss": 0.0124, |
| "step": 4970 |
| }, |
| { |
| "epoch": 2.797752808988764, |
| "grad_norm": 0.6230753064155579, |
| "learning_rate": 1.4917355371900828e-06, |
| "loss": 0.0198, |
| "step": 4980 |
| }, |
| { |
| "epoch": 2.803370786516854, |
| "grad_norm": 0.399117648601532, |
| "learning_rate": 1.450413223140496e-06, |
| "loss": 0.022, |
| "step": 4990 |
| }, |
| { |
| "epoch": 2.808988764044944, |
| "grad_norm": 0.24584145843982697, |
| "learning_rate": 1.409090909090909e-06, |
| "loss": 0.0199, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.808988764044944, |
| "eval_f1": 0.790944661822247, |
| "eval_loss": 0.02688935212790966, |
| "eval_precision": 0.7542643923240938, |
| "eval_recall": 0.8313748531139835, |
| "eval_runtime": 27.5368, |
| "eval_samples_per_second": 142.718, |
| "eval_steps_per_second": 4.467, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.8146067415730336, |
| "grad_norm": 0.27072691917419434, |
| "learning_rate": 1.3677685950413225e-06, |
| "loss": 0.0147, |
| "step": 5010 |
| }, |
| { |
| "epoch": 2.8202247191011236, |
| "grad_norm": 0.2784029543399811, |
| "learning_rate": 1.3264462809917355e-06, |
| "loss": 0.0365, |
| "step": 5020 |
| }, |
| { |
| "epoch": 2.8258426966292136, |
| "grad_norm": 0.3331715166568756, |
| "learning_rate": 1.285123966942149e-06, |
| "loss": 0.0161, |
| "step": 5030 |
| }, |
| { |
| "epoch": 2.831460674157303, |
| "grad_norm": 0.432522177696228, |
| "learning_rate": 1.2438016528925622e-06, |
| "loss": 0.0199, |
| "step": 5040 |
| }, |
| { |
| "epoch": 2.837078651685393, |
| "grad_norm": 0.22144052386283875, |
| "learning_rate": 1.2024793388429754e-06, |
| "loss": 0.0218, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.842696629213483, |
| "grad_norm": 0.30622825026512146, |
| "learning_rate": 1.1611570247933886e-06, |
| "loss": 0.0235, |
| "step": 5060 |
| }, |
| { |
| "epoch": 2.8483146067415728, |
| "grad_norm": 0.29084160923957825, |
| "learning_rate": 1.1198347107438018e-06, |
| "loss": 0.0136, |
| "step": 5070 |
| }, |
| { |
| "epoch": 2.853932584269663, |
| "grad_norm": 0.4032902717590332, |
| "learning_rate": 1.078512396694215e-06, |
| "loss": 0.0169, |
| "step": 5080 |
| }, |
| { |
| "epoch": 2.859550561797753, |
| "grad_norm": 0.2889857292175293, |
| "learning_rate": 1.0371900826446283e-06, |
| "loss": 0.0248, |
| "step": 5090 |
| }, |
| { |
| "epoch": 2.865168539325843, |
| "grad_norm": 0.40357986092567444, |
| "learning_rate": 9.958677685950415e-07, |
| "loss": 0.0242, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.870786516853933, |
| "grad_norm": 0.2112303227186203, |
| "learning_rate": 9.545454545454548e-07, |
| "loss": 0.0295, |
| "step": 5110 |
| }, |
| { |
| "epoch": 2.8764044943820224, |
| "grad_norm": 0.12360019236803055, |
| "learning_rate": 9.132231404958679e-07, |
| "loss": 0.0315, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.8820224719101124, |
| "grad_norm": 0.37716934084892273, |
| "learning_rate": 8.719008264462811e-07, |
| "loss": 0.0181, |
| "step": 5130 |
| }, |
| { |
| "epoch": 2.8876404494382024, |
| "grad_norm": 0.3970218002796173, |
| "learning_rate": 8.305785123966943e-07, |
| "loss": 0.0219, |
| "step": 5140 |
| }, |
| { |
| "epoch": 2.893258426966292, |
| "grad_norm": 0.32977351546287537, |
| "learning_rate": 7.892561983471076e-07, |
| "loss": 0.0232, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.898876404494382, |
| "grad_norm": 1.0419588088989258, |
| "learning_rate": 7.479338842975208e-07, |
| "loss": 0.0296, |
| "step": 5160 |
| }, |
| { |
| "epoch": 2.904494382022472, |
| "grad_norm": 0.6743459105491638, |
| "learning_rate": 7.066115702479339e-07, |
| "loss": 0.0232, |
| "step": 5170 |
| }, |
| { |
| "epoch": 2.9101123595505616, |
| "grad_norm": 0.37269556522369385, |
| "learning_rate": 6.652892561983472e-07, |
| "loss": 0.0246, |
| "step": 5180 |
| }, |
| { |
| "epoch": 2.9157303370786516, |
| "grad_norm": 0.29990169405937195, |
| "learning_rate": 6.239669421487604e-07, |
| "loss": 0.0195, |
| "step": 5190 |
| }, |
| { |
| "epoch": 2.9213483146067416, |
| "grad_norm": 0.31683966517448425, |
| "learning_rate": 5.826446280991736e-07, |
| "loss": 0.0184, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.9269662921348316, |
| "grad_norm": 0.3079575002193451, |
| "learning_rate": 5.413223140495869e-07, |
| "loss": 0.0207, |
| "step": 5210 |
| }, |
| { |
| "epoch": 2.932584269662921, |
| "grad_norm": 0.4728926718235016, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 0.0227, |
| "step": 5220 |
| }, |
| { |
| "epoch": 2.938202247191011, |
| "grad_norm": 0.3681657910346985, |
| "learning_rate": 4.586776859504133e-07, |
| "loss": 0.0165, |
| "step": 5230 |
| }, |
| { |
| "epoch": 2.943820224719101, |
| "grad_norm": 0.2549396753311157, |
| "learning_rate": 4.173553719008265e-07, |
| "loss": 0.019, |
| "step": 5240 |
| }, |
| { |
| "epoch": 2.949438202247191, |
| "grad_norm": 0.36846932768821716, |
| "learning_rate": 3.760330578512397e-07, |
| "loss": 0.0245, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.949438202247191, |
| "eval_f1": 0.7906976744186046, |
| "eval_loss": 0.02675323560833931, |
| "eval_precision": 0.7557579003749331, |
| "eval_recall": 0.8290246768507638, |
| "eval_runtime": 28.1644, |
| "eval_samples_per_second": 139.538, |
| "eval_steps_per_second": 4.367, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.955056179775281, |
| "grad_norm": 0.3710382282733917, |
| "learning_rate": 3.3471074380165295e-07, |
| "loss": 0.0188, |
| "step": 5260 |
| }, |
| { |
| "epoch": 2.960674157303371, |
| "grad_norm": 0.36082541942596436, |
| "learning_rate": 2.9338842975206613e-07, |
| "loss": 0.027, |
| "step": 5270 |
| }, |
| { |
| "epoch": 2.966292134831461, |
| "grad_norm": 0.573581874370575, |
| "learning_rate": 2.5206611570247936e-07, |
| "loss": 0.0183, |
| "step": 5280 |
| }, |
| { |
| "epoch": 2.9719101123595504, |
| "grad_norm": 0.30395030975341797, |
| "learning_rate": 2.1074380165289256e-07, |
| "loss": 0.0232, |
| "step": 5290 |
| }, |
| { |
| "epoch": 2.9775280898876404, |
| "grad_norm": 0.38595783710479736, |
| "learning_rate": 1.694214876033058e-07, |
| "loss": 0.0173, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.9831460674157304, |
| "grad_norm": 0.25339680910110474, |
| "learning_rate": 1.2809917355371902e-07, |
| "loss": 0.0167, |
| "step": 5310 |
| }, |
| { |
| "epoch": 2.98876404494382, |
| "grad_norm": 0.41702669858932495, |
| "learning_rate": 8.677685950413224e-08, |
| "loss": 0.0205, |
| "step": 5320 |
| }, |
| { |
| "epoch": 2.99438202247191, |
| "grad_norm": 0.18185748159885406, |
| "learning_rate": 4.545454545454546e-08, |
| "loss": 0.0151, |
| "step": 5330 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.7380737662315369, |
| "learning_rate": 4.132231404958678e-09, |
| "loss": 0.0193, |
| "step": 5340 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 5340, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 1 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5580565286704128.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|