| { |
| "best_metric": 0.157407745718956, |
| "best_model_checkpoint": "hlbooks-topic-classifier-bert-multilingual-uncased/checkpoint-7578", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 7578, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009897070467141725, |
| "grad_norm": 3.174437999725342, |
| "learning_rate": 1.5171503957783642e-06, |
| "loss": 3.0686, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01979414093428345, |
| "grad_norm": 5.321998119354248, |
| "learning_rate": 3.1002638522427443e-06, |
| "loss": 2.9657, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.029691211401425176, |
| "grad_norm": 7.038234710693359, |
| "learning_rate": 4.617414248021108e-06, |
| "loss": 2.8352, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0395882818685669, |
| "grad_norm": 6.537544250488281, |
| "learning_rate": 6.200527704485489e-06, |
| "loss": 2.738, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04948535233570863, |
| "grad_norm": 6.735942840576172, |
| "learning_rate": 7.849604221635884e-06, |
| "loss": 2.6217, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.05938242280285035, |
| "grad_norm": 5.756272792816162, |
| "learning_rate": 9.49868073878628e-06, |
| "loss": 2.4216, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06927949326999208, |
| "grad_norm": 5.069972991943359, |
| "learning_rate": 1.1147757255936676e-05, |
| "loss": 2.2817, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0791765637371338, |
| "grad_norm": 8.664924621582031, |
| "learning_rate": 1.2796833773087072e-05, |
| "loss": 2.1123, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08907363420427554, |
| "grad_norm": 9.564645767211914, |
| "learning_rate": 1.4445910290237468e-05, |
| "loss": 1.8298, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.09897070467141726, |
| "grad_norm": 18.612655639648438, |
| "learning_rate": 1.6094986807387864e-05, |
| "loss": 1.4781, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.10886777513855898, |
| "grad_norm": 7.96259069442749, |
| "learning_rate": 1.774406332453826e-05, |
| "loss": 1.176, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.1187648456057007, |
| "grad_norm": 9.508382797241211, |
| "learning_rate": 1.9393139841688653e-05, |
| "loss": 1.038, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.12866191607284244, |
| "grad_norm": 9.386768341064453, |
| "learning_rate": 2.104221635883905e-05, |
| "loss": 0.9101, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.13855898653998416, |
| "grad_norm": 8.491036415100098, |
| "learning_rate": 2.269129287598945e-05, |
| "loss": 0.8534, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.14845605700712589, |
| "grad_norm": 13.790663719177246, |
| "learning_rate": 2.4340369393139843e-05, |
| "loss": 0.6119, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1583531274742676, |
| "grad_norm": 14.771766662597656, |
| "learning_rate": 2.5989445910290237e-05, |
| "loss": 0.6075, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16825019794140933, |
| "grad_norm": 5.174952030181885, |
| "learning_rate": 2.763852242744063e-05, |
| "loss": 0.6037, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.17814726840855108, |
| "grad_norm": 12.023221015930176, |
| "learning_rate": 2.9287598944591033e-05, |
| "loss": 0.5757, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1880443388756928, |
| "grad_norm": 6.669355869293213, |
| "learning_rate": 3.093667546174143e-05, |
| "loss": 0.5343, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.19794140934283452, |
| "grad_norm": 11.855779647827148, |
| "learning_rate": 3.258575197889182e-05, |
| "loss": 0.5008, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.20783847980997625, |
| "grad_norm": 3.5531342029571533, |
| "learning_rate": 3.423482849604222e-05, |
| "loss": 0.4706, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.21773555027711797, |
| "grad_norm": 26.04022216796875, |
| "learning_rate": 3.588390501319262e-05, |
| "loss": 0.483, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.2276326207442597, |
| "grad_norm": 7.481447696685791, |
| "learning_rate": 3.753298153034301e-05, |
| "loss": 0.4332, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.2375296912114014, |
| "grad_norm": 18.177900314331055, |
| "learning_rate": 3.9182058047493406e-05, |
| "loss": 0.444, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.24742676167854316, |
| "grad_norm": 10.481646537780762, |
| "learning_rate": 4.08311345646438e-05, |
| "loss": 0.3346, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.2573238321456849, |
| "grad_norm": 15.196333885192871, |
| "learning_rate": 4.2480211081794194e-05, |
| "loss": 0.3647, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2672209026128266, |
| "grad_norm": 8.303031921386719, |
| "learning_rate": 4.412928759894459e-05, |
| "loss": 0.4139, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.2771179730799683, |
| "grad_norm": 9.322664260864258, |
| "learning_rate": 4.577836411609499e-05, |
| "loss": 0.4656, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.28701504354711005, |
| "grad_norm": 7.468392848968506, |
| "learning_rate": 4.7427440633245384e-05, |
| "loss": 0.3996, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.29691211401425177, |
| "grad_norm": 7.190126895904541, |
| "learning_rate": 4.907651715039578e-05, |
| "loss": 0.3771, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3068091844813935, |
| "grad_norm": 8.512736320495605, |
| "learning_rate": 4.991935483870968e-05, |
| "loss": 0.4247, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.3167062549485352, |
| "grad_norm": 15.00452995300293, |
| "learning_rate": 4.973607038123168e-05, |
| "loss": 0.3576, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.32660332541567694, |
| "grad_norm": 19.776445388793945, |
| "learning_rate": 4.955278592375367e-05, |
| "loss": 0.347, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.33650039588281866, |
| "grad_norm": 9.140093803405762, |
| "learning_rate": 4.9369501466275664e-05, |
| "loss": 0.3799, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.34639746634996044, |
| "grad_norm": 12.198457717895508, |
| "learning_rate": 4.918621700879766e-05, |
| "loss": 0.4086, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.35629453681710216, |
| "grad_norm": 3.6618921756744385, |
| "learning_rate": 4.900293255131965e-05, |
| "loss": 0.3457, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3661916072842439, |
| "grad_norm": 17.983041763305664, |
| "learning_rate": 4.881964809384165e-05, |
| "loss": 0.364, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.3760886777513856, |
| "grad_norm": 0.4606013000011444, |
| "learning_rate": 4.863636363636364e-05, |
| "loss": 0.2662, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3859857482185273, |
| "grad_norm": 5.343178749084473, |
| "learning_rate": 4.8453079178885635e-05, |
| "loss": 0.3289, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.39588281868566905, |
| "grad_norm": 11.04477596282959, |
| "learning_rate": 4.826979472140763e-05, |
| "loss": 0.3723, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.40577988915281077, |
| "grad_norm": 8.415637016296387, |
| "learning_rate": 4.808651026392962e-05, |
| "loss": 0.29, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.4156769596199525, |
| "grad_norm": 37.04660415649414, |
| "learning_rate": 4.790322580645161e-05, |
| "loss": 0.4181, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4255740300870942, |
| "grad_norm": 11.579482078552246, |
| "learning_rate": 4.7719941348973606e-05, |
| "loss": 0.2785, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.43547110055423593, |
| "grad_norm": 8.857477188110352, |
| "learning_rate": 4.75366568914956e-05, |
| "loss": 0.3266, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.44536817102137766, |
| "grad_norm": 5.317532539367676, |
| "learning_rate": 4.73533724340176e-05, |
| "loss": 0.344, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.4552652414885194, |
| "grad_norm": 4.178307056427002, |
| "learning_rate": 4.717008797653959e-05, |
| "loss": 0.3613, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4651623119556611, |
| "grad_norm": 8.49862003326416, |
| "learning_rate": 4.6986803519061584e-05, |
| "loss": 0.2527, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.4750593824228028, |
| "grad_norm": 0.5914684534072876, |
| "learning_rate": 4.6803519061583577e-05, |
| "loss": 0.258, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4849564528899446, |
| "grad_norm": 3.90535306930542, |
| "learning_rate": 4.662023460410557e-05, |
| "loss": 0.3091, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.4948535233570863, |
| "grad_norm": 7.211574077606201, |
| "learning_rate": 4.643695014662757e-05, |
| "loss": 0.3302, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.504750593824228, |
| "grad_norm": 9.782793045043945, |
| "learning_rate": 4.625366568914956e-05, |
| "loss": 0.3487, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.5146476642913698, |
| "grad_norm": 9.339737892150879, |
| "learning_rate": 4.6070381231671554e-05, |
| "loss": 0.2164, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5245447347585115, |
| "grad_norm": 3.444295644760132, |
| "learning_rate": 4.588709677419355e-05, |
| "loss": 0.2874, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.5344418052256532, |
| "grad_norm": 15.61107063293457, |
| "learning_rate": 4.570381231671555e-05, |
| "loss": 0.3161, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5443388756927949, |
| "grad_norm": 12.160496711730957, |
| "learning_rate": 4.552052785923754e-05, |
| "loss": 0.2242, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.5542359461599367, |
| "grad_norm": 7.081170558929443, |
| "learning_rate": 4.533724340175953e-05, |
| "loss": 0.2625, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5641330166270784, |
| "grad_norm": 2.664806365966797, |
| "learning_rate": 4.5153958944281525e-05, |
| "loss": 0.2885, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.5740300870942201, |
| "grad_norm": 5.956775188446045, |
| "learning_rate": 4.497067448680352e-05, |
| "loss": 0.3064, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5839271575613618, |
| "grad_norm": 10.270496368408203, |
| "learning_rate": 4.478739002932552e-05, |
| "loss": 0.2519, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.5938242280285035, |
| "grad_norm": 11.354063987731934, |
| "learning_rate": 4.460410557184751e-05, |
| "loss": 0.2874, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6037212984956453, |
| "grad_norm": 4.400442600250244, |
| "learning_rate": 4.44208211143695e-05, |
| "loss": 0.248, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.613618368962787, |
| "grad_norm": 5.145227909088135, |
| "learning_rate": 4.4237536656891496e-05, |
| "loss": 0.2757, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.6235154394299287, |
| "grad_norm": 6.018128395080566, |
| "learning_rate": 4.4054252199413495e-05, |
| "loss": 0.2929, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.6334125098970704, |
| "grad_norm": 11.10319709777832, |
| "learning_rate": 4.387096774193549e-05, |
| "loss": 0.2645, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6433095803642122, |
| "grad_norm": 4.846808433532715, |
| "learning_rate": 4.368768328445748e-05, |
| "loss": 0.1884, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.6532066508313539, |
| "grad_norm": 20.15575408935547, |
| "learning_rate": 4.3504398826979474e-05, |
| "loss": 0.2996, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.6631037212984956, |
| "grad_norm": 15.41659164428711, |
| "learning_rate": 4.3321114369501466e-05, |
| "loss": 0.2613, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.6730007917656373, |
| "grad_norm": 9.153544425964355, |
| "learning_rate": 4.3137829912023466e-05, |
| "loss": 0.2867, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6828978622327792, |
| "grad_norm": 6.918684482574463, |
| "learning_rate": 4.295454545454546e-05, |
| "loss": 0.1998, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.6927949326999209, |
| "grad_norm": 3.967953681945801, |
| "learning_rate": 4.277126099706745e-05, |
| "loss": 0.2632, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7026920031670626, |
| "grad_norm": 6.128458023071289, |
| "learning_rate": 4.2587976539589444e-05, |
| "loss": 0.2471, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.7125890736342043, |
| "grad_norm": 3.5776052474975586, |
| "learning_rate": 4.2404692082111444e-05, |
| "loss": 0.2609, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.722486144101346, |
| "grad_norm": 8.483747482299805, |
| "learning_rate": 4.222140762463344e-05, |
| "loss": 0.2468, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.7323832145684878, |
| "grad_norm": 3.365809679031372, |
| "learning_rate": 4.203812316715543e-05, |
| "loss": 0.2166, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.7422802850356295, |
| "grad_norm": 6.7934489250183105, |
| "learning_rate": 4.1854838709677415e-05, |
| "loss": 0.207, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.7521773555027712, |
| "grad_norm": 10.689802169799805, |
| "learning_rate": 4.1671554252199415e-05, |
| "loss": 0.2137, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7620744259699129, |
| "grad_norm": 8.500500679016113, |
| "learning_rate": 4.148826979472141e-05, |
| "loss": 0.2068, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.7719714964370546, |
| "grad_norm": 3.0795400142669678, |
| "learning_rate": 4.13049853372434e-05, |
| "loss": 0.2335, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7818685669041964, |
| "grad_norm": 6.6960768699646, |
| "learning_rate": 4.112170087976539e-05, |
| "loss": 0.3198, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.7917656373713381, |
| "grad_norm": 9.185827255249023, |
| "learning_rate": 4.093841642228739e-05, |
| "loss": 0.2917, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8016627078384798, |
| "grad_norm": 10.589933395385742, |
| "learning_rate": 4.0755131964809386e-05, |
| "loss": 0.2764, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.8115597783056215, |
| "grad_norm": 4.64451789855957, |
| "learning_rate": 4.057184750733138e-05, |
| "loss": 0.2737, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.8214568487727633, |
| "grad_norm": 17.72431182861328, |
| "learning_rate": 4.038856304985337e-05, |
| "loss": 0.2484, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.831353919239905, |
| "grad_norm": 10.842966079711914, |
| "learning_rate": 4.0205278592375364e-05, |
| "loss": 0.2172, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.8412509897070467, |
| "grad_norm": 4.673035621643066, |
| "learning_rate": 4.0021994134897364e-05, |
| "loss": 0.2094, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.8511480601741884, |
| "grad_norm": 7.97542142868042, |
| "learning_rate": 3.9838709677419356e-05, |
| "loss": 0.2674, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.8610451306413301, |
| "grad_norm": 3.362852096557617, |
| "learning_rate": 3.965542521994135e-05, |
| "loss": 0.2136, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.8709422011084719, |
| "grad_norm": 8.911001205444336, |
| "learning_rate": 3.947214076246334e-05, |
| "loss": 0.1915, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8808392715756136, |
| "grad_norm": 8.764837265014648, |
| "learning_rate": 3.928885630498534e-05, |
| "loss": 0.1691, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.8907363420427553, |
| "grad_norm": 9.035571098327637, |
| "learning_rate": 3.9105571847507334e-05, |
| "loss": 0.2329, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.900633412509897, |
| "grad_norm": 3.9743757247924805, |
| "learning_rate": 3.892228739002933e-05, |
| "loss": 0.2151, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.9105304829770388, |
| "grad_norm": 4.488095283508301, |
| "learning_rate": 3.873900293255132e-05, |
| "loss": 0.2525, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.9204275534441805, |
| "grad_norm": 7.458625316619873, |
| "learning_rate": 3.855571847507331e-05, |
| "loss": 0.225, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.9303246239113222, |
| "grad_norm": 12.148482322692871, |
| "learning_rate": 3.837243401759531e-05, |
| "loss": 0.2642, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.9402216943784639, |
| "grad_norm": 5.104764461517334, |
| "learning_rate": 3.8189149560117305e-05, |
| "loss": 0.2268, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.9501187648456056, |
| "grad_norm": 4.338552951812744, |
| "learning_rate": 3.80058651026393e-05, |
| "loss": 0.217, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9600158353127475, |
| "grad_norm": 4.058621406555176, |
| "learning_rate": 3.782258064516129e-05, |
| "loss": 0.2058, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.9699129057798892, |
| "grad_norm": 4.036930084228516, |
| "learning_rate": 3.763929618768329e-05, |
| "loss": 0.2721, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9798099762470309, |
| "grad_norm": 5.858448028564453, |
| "learning_rate": 3.745601173020528e-05, |
| "loss": 0.2379, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.9897070467141726, |
| "grad_norm": 10.197368621826172, |
| "learning_rate": 3.7272727272727276e-05, |
| "loss": 0.2552, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9996041171813144, |
| "grad_norm": 3.942063093185425, |
| "learning_rate": 3.708944281524927e-05, |
| "loss": 0.1938, |
| "step": 2525 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9472, |
| "eval_f1_macro": 0.9359430887114246, |
| "eval_f1_micro": 0.9472, |
| "eval_f1_weighted": 0.947114042501874, |
| "eval_loss": 0.20199425518512726, |
| "eval_precision_macro": 0.9529085930911292, |
| "eval_precision_micro": 0.9472, |
| "eval_precision_weighted": 0.948726542737522, |
| "eval_recall_macro": 0.9217770604016604, |
| "eval_recall_micro": 0.9472, |
| "eval_recall_weighted": 0.9472, |
| "eval_runtime": 5.1028, |
| "eval_samples_per_second": 979.852, |
| "eval_steps_per_second": 15.482, |
| "step": 2526 |
| }, |
| { |
| "epoch": 1.009501187648456, |
| "grad_norm": 2.8100507259368896, |
| "learning_rate": 3.690615835777126e-05, |
| "loss": 0.1958, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.0193982581155978, |
| "grad_norm": 4.251704692840576, |
| "learning_rate": 3.672287390029326e-05, |
| "loss": 0.1684, |
| "step": 2575 |
| }, |
| { |
| "epoch": 1.0292953285827395, |
| "grad_norm": 13.66088581085205, |
| "learning_rate": 3.6539589442815254e-05, |
| "loss": 0.1564, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.0391923990498813, |
| "grad_norm": 6.338856220245361, |
| "learning_rate": 3.6356304985337246e-05, |
| "loss": 0.1673, |
| "step": 2625 |
| }, |
| { |
| "epoch": 1.049089469517023, |
| "grad_norm": 0.7401424646377563, |
| "learning_rate": 3.617302052785924e-05, |
| "loss": 0.1579, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.0589865399841647, |
| "grad_norm": 7.7578325271606445, |
| "learning_rate": 3.598973607038124e-05, |
| "loss": 0.1639, |
| "step": 2675 |
| }, |
| { |
| "epoch": 1.0688836104513064, |
| "grad_norm": 10.281119346618652, |
| "learning_rate": 3.580645161290323e-05, |
| "loss": 0.1367, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.0787806809184481, |
| "grad_norm": 9.214536666870117, |
| "learning_rate": 3.562316715542522e-05, |
| "loss": 0.1913, |
| "step": 2725 |
| }, |
| { |
| "epoch": 1.0886777513855899, |
| "grad_norm": 7.096231937408447, |
| "learning_rate": 3.543988269794721e-05, |
| "loss": 0.1718, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.0985748218527316, |
| "grad_norm": 1.1231356859207153, |
| "learning_rate": 3.525659824046921e-05, |
| "loss": 0.1623, |
| "step": 2775 |
| }, |
| { |
| "epoch": 1.1084718923198733, |
| "grad_norm": 9.105703353881836, |
| "learning_rate": 3.50733137829912e-05, |
| "loss": 0.1516, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.118368962787015, |
| "grad_norm": 2.2311670780181885, |
| "learning_rate": 3.4890029325513195e-05, |
| "loss": 0.1094, |
| "step": 2825 |
| }, |
| { |
| "epoch": 1.1282660332541568, |
| "grad_norm": 5.654956817626953, |
| "learning_rate": 3.470674486803519e-05, |
| "loss": 0.2151, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.1381631037212985, |
| "grad_norm": 0.6430861353874207, |
| "learning_rate": 3.452346041055719e-05, |
| "loss": 0.1211, |
| "step": 2875 |
| }, |
| { |
| "epoch": 1.1480601741884402, |
| "grad_norm": 4.405457496643066, |
| "learning_rate": 3.434017595307918e-05, |
| "loss": 0.1062, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.157957244655582, |
| "grad_norm": 3.2275402545928955, |
| "learning_rate": 3.415689149560117e-05, |
| "loss": 0.1378, |
| "step": 2925 |
| }, |
| { |
| "epoch": 1.1678543151227236, |
| "grad_norm": 3.635753870010376, |
| "learning_rate": 3.3973607038123166e-05, |
| "loss": 0.1489, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.1777513855898654, |
| "grad_norm": 2.695546865463257, |
| "learning_rate": 3.379032258064516e-05, |
| "loss": 0.1703, |
| "step": 2975 |
| }, |
| { |
| "epoch": 1.187648456057007, |
| "grad_norm": 1.3425699472427368, |
| "learning_rate": 3.360703812316716e-05, |
| "loss": 0.1421, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.1975455265241488, |
| "grad_norm": 11.01319408416748, |
| "learning_rate": 3.342375366568915e-05, |
| "loss": 0.1221, |
| "step": 3025 |
| }, |
| { |
| "epoch": 1.2074425969912905, |
| "grad_norm": 12.686071395874023, |
| "learning_rate": 3.3240469208211144e-05, |
| "loss": 0.1699, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.2173396674584323, |
| "grad_norm": 8.48775577545166, |
| "learning_rate": 3.305718475073314e-05, |
| "loss": 0.1505, |
| "step": 3075 |
| }, |
| { |
| "epoch": 1.227236737925574, |
| "grad_norm": 5.593795299530029, |
| "learning_rate": 3.2873900293255136e-05, |
| "loss": 0.0906, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.2371338083927157, |
| "grad_norm": 13.118975639343262, |
| "learning_rate": 3.269061583577713e-05, |
| "loss": 0.1389, |
| "step": 3125 |
| }, |
| { |
| "epoch": 1.2470308788598574, |
| "grad_norm": 10.423203468322754, |
| "learning_rate": 3.250733137829912e-05, |
| "loss": 0.1789, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.2569279493269991, |
| "grad_norm": 8.423437118530273, |
| "learning_rate": 3.2324046920821115e-05, |
| "loss": 0.1683, |
| "step": 3175 |
| }, |
| { |
| "epoch": 1.2668250197941409, |
| "grad_norm": 1.197938323020935, |
| "learning_rate": 3.214076246334311e-05, |
| "loss": 0.1356, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.2767220902612826, |
| "grad_norm": 4.034856796264648, |
| "learning_rate": 3.195747800586511e-05, |
| "loss": 0.2099, |
| "step": 3225 |
| }, |
| { |
| "epoch": 1.2866191607284243, |
| "grad_norm": 6.324892997741699, |
| "learning_rate": 3.17741935483871e-05, |
| "loss": 0.1658, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.2965162311955662, |
| "grad_norm": 7.48460054397583, |
| "learning_rate": 3.159090909090909e-05, |
| "loss": 0.131, |
| "step": 3275 |
| }, |
| { |
| "epoch": 1.3064133016627077, |
| "grad_norm": 21.121984481811523, |
| "learning_rate": 3.1407624633431085e-05, |
| "loss": 0.1781, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.3163103721298497, |
| "grad_norm": 3.5356669425964355, |
| "learning_rate": 3.1224340175953085e-05, |
| "loss": 0.1873, |
| "step": 3325 |
| }, |
| { |
| "epoch": 1.3262074425969912, |
| "grad_norm": 9.41774845123291, |
| "learning_rate": 3.104105571847508e-05, |
| "loss": 0.1325, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.3361045130641331, |
| "grad_norm": 14.932723045349121, |
| "learning_rate": 3.085777126099707e-05, |
| "loss": 0.1727, |
| "step": 3375 |
| }, |
| { |
| "epoch": 1.3460015835312746, |
| "grad_norm": 6.5432233810424805, |
| "learning_rate": 3.067448680351906e-05, |
| "loss": 0.1487, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.3558986539984166, |
| "grad_norm": 2.8642232418060303, |
| "learning_rate": 3.049120234604106e-05, |
| "loss": 0.1365, |
| "step": 3425 |
| }, |
| { |
| "epoch": 1.365795724465558, |
| "grad_norm": 5.638850688934326, |
| "learning_rate": 3.0307917888563052e-05, |
| "loss": 0.1628, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.3756927949327, |
| "grad_norm": 0.21325694024562836, |
| "learning_rate": 3.0124633431085048e-05, |
| "loss": 0.1555, |
| "step": 3475 |
| }, |
| { |
| "epoch": 1.3855898653998415, |
| "grad_norm": 3.200031042098999, |
| "learning_rate": 2.994134897360704e-05, |
| "loss": 0.1263, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.3954869358669835, |
| "grad_norm": 10.525226593017578, |
| "learning_rate": 2.9758064516129037e-05, |
| "loss": 0.1657, |
| "step": 3525 |
| }, |
| { |
| "epoch": 1.405384006334125, |
| "grad_norm": 4.1915283203125, |
| "learning_rate": 2.957478005865103e-05, |
| "loss": 0.1551, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.415281076801267, |
| "grad_norm": 9.446343421936035, |
| "learning_rate": 2.9391495601173026e-05, |
| "loss": 0.1293, |
| "step": 3575 |
| }, |
| { |
| "epoch": 1.4251781472684084, |
| "grad_norm": 15.011846542358398, |
| "learning_rate": 2.9208211143695012e-05, |
| "loss": 0.1402, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.4350752177355504, |
| "grad_norm": 0.7313398718833923, |
| "learning_rate": 2.902492668621701e-05, |
| "loss": 0.1057, |
| "step": 3625 |
| }, |
| { |
| "epoch": 1.444972288202692, |
| "grad_norm": 19.252836227416992, |
| "learning_rate": 2.8841642228739e-05, |
| "loss": 0.1155, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.4548693586698338, |
| "grad_norm": 1.615921974182129, |
| "learning_rate": 2.8658357771260997e-05, |
| "loss": 0.1466, |
| "step": 3675 |
| }, |
| { |
| "epoch": 1.4647664291369755, |
| "grad_norm": 2.299511432647705, |
| "learning_rate": 2.847507331378299e-05, |
| "loss": 0.1147, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.4746634996041172, |
| "grad_norm": 3.294553756713867, |
| "learning_rate": 2.8291788856304986e-05, |
| "loss": 0.1232, |
| "step": 3725 |
| }, |
| { |
| "epoch": 1.484560570071259, |
| "grad_norm": 0.17168129980564117, |
| "learning_rate": 2.810850439882698e-05, |
| "loss": 0.1586, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.4944576405384007, |
| "grad_norm": 12.83199691772461, |
| "learning_rate": 2.7925219941348972e-05, |
| "loss": 0.1096, |
| "step": 3775 |
| }, |
| { |
| "epoch": 1.5043547110055424, |
| "grad_norm": 12.708085060119629, |
| "learning_rate": 2.7741935483870968e-05, |
| "loss": 0.1043, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.5142517814726841, |
| "grad_norm": 4.487904071807861, |
| "learning_rate": 2.755865102639296e-05, |
| "loss": 0.1116, |
| "step": 3825 |
| }, |
| { |
| "epoch": 1.5241488519398259, |
| "grad_norm": 5.373720645904541, |
| "learning_rate": 2.7375366568914957e-05, |
| "loss": 0.1456, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.5340459224069676, |
| "grad_norm": 0.20354461669921875, |
| "learning_rate": 2.719208211143695e-05, |
| "loss": 0.1209, |
| "step": 3875 |
| }, |
| { |
| "epoch": 1.5439429928741093, |
| "grad_norm": 5.304108142852783, |
| "learning_rate": 2.7008797653958946e-05, |
| "loss": 0.1816, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.553840063341251, |
| "grad_norm": 0.092073954641819, |
| "learning_rate": 2.682551319648094e-05, |
| "loss": 0.1568, |
| "step": 3925 |
| }, |
| { |
| "epoch": 1.5637371338083927, |
| "grad_norm": 9.854479789733887, |
| "learning_rate": 2.6642228739002935e-05, |
| "loss": 0.1741, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.5736342042755345, |
| "grad_norm": 0.4590989947319031, |
| "learning_rate": 2.6458944281524928e-05, |
| "loss": 0.1702, |
| "step": 3975 |
| }, |
| { |
| "epoch": 1.5835312747426762, |
| "grad_norm": 0.07429279386997223, |
| "learning_rate": 2.6275659824046924e-05, |
| "loss": 0.0975, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.593428345209818, |
| "grad_norm": 5.391401290893555, |
| "learning_rate": 2.6092375366568917e-05, |
| "loss": 0.1497, |
| "step": 4025 |
| }, |
| { |
| "epoch": 1.6033254156769596, |
| "grad_norm": 0.18375837802886963, |
| "learning_rate": 2.590909090909091e-05, |
| "loss": 0.116, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.6132224861441014, |
| "grad_norm": 13.557960510253906, |
| "learning_rate": 2.5725806451612905e-05, |
| "loss": 0.1927, |
| "step": 4075 |
| }, |
| { |
| "epoch": 1.623119556611243, |
| "grad_norm": 1.467595100402832, |
| "learning_rate": 2.5542521994134898e-05, |
| "loss": 0.1396, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.6330166270783848, |
| "grad_norm": 5.85172700881958, |
| "learning_rate": 2.5359237536656894e-05, |
| "loss": 0.1363, |
| "step": 4125 |
| }, |
| { |
| "epoch": 1.6429136975455265, |
| "grad_norm": 1.1925976276397705, |
| "learning_rate": 2.5175953079178887e-05, |
| "loss": 0.1604, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.6528107680126682, |
| "grad_norm": 8.975228309631348, |
| "learning_rate": 2.4992668621700883e-05, |
| "loss": 0.1534, |
| "step": 4175 |
| }, |
| { |
| "epoch": 1.66270783847981, |
| "grad_norm": 3.5665903091430664, |
| "learning_rate": 2.4809384164222876e-05, |
| "loss": 0.1466, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.6726049089469517, |
| "grad_norm": 5.208387851715088, |
| "learning_rate": 2.462609970674487e-05, |
| "loss": 0.101, |
| "step": 4225 |
| }, |
| { |
| "epoch": 1.6825019794140934, |
| "grad_norm": 8.346717834472656, |
| "learning_rate": 2.444281524926686e-05, |
| "loss": 0.0966, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.6923990498812351, |
| "grad_norm": 1.0066956281661987, |
| "learning_rate": 2.4259530791788858e-05, |
| "loss": 0.1251, |
| "step": 4275 |
| }, |
| { |
| "epoch": 1.7022961203483769, |
| "grad_norm": 8.269057273864746, |
| "learning_rate": 2.407624633431085e-05, |
| "loss": 0.1595, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.7121931908155186, |
| "grad_norm": 6.28223180770874, |
| "learning_rate": 2.3892961876832843e-05, |
| "loss": 0.1732, |
| "step": 4325 |
| }, |
| { |
| "epoch": 1.7220902612826603, |
| "grad_norm": 5.962674140930176, |
| "learning_rate": 2.370967741935484e-05, |
| "loss": 0.1601, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.731987331749802, |
| "grad_norm": 4.525330066680908, |
| "learning_rate": 2.3526392961876832e-05, |
| "loss": 0.1427, |
| "step": 4375 |
| }, |
| { |
| "epoch": 1.7418844022169437, |
| "grad_norm": 9.384072303771973, |
| "learning_rate": 2.334310850439883e-05, |
| "loss": 0.1489, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.7517814726840855, |
| "grad_norm": 16.098506927490234, |
| "learning_rate": 2.315982404692082e-05, |
| "loss": 0.1101, |
| "step": 4425 |
| }, |
| { |
| "epoch": 1.7616785431512272, |
| "grad_norm": 4.9497480392456055, |
| "learning_rate": 2.2976539589442817e-05, |
| "loss": 0.1364, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.771575613618369, |
| "grad_norm": 4.449967384338379, |
| "learning_rate": 2.279325513196481e-05, |
| "loss": 0.1799, |
| "step": 4475 |
| }, |
| { |
| "epoch": 1.7814726840855108, |
| "grad_norm": 3.7315053939819336, |
| "learning_rate": 2.2609970674486806e-05, |
| "loss": 0.1342, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.7913697545526523, |
| "grad_norm": 3.46779727935791, |
| "learning_rate": 2.24266862170088e-05, |
| "loss": 0.1348, |
| "step": 4525 |
| }, |
| { |
| "epoch": 1.8012668250197943, |
| "grad_norm": 6.9282402992248535, |
| "learning_rate": 2.2243401759530792e-05, |
| "loss": 0.1259, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.8111638954869358, |
| "grad_norm": 6.039886951446533, |
| "learning_rate": 2.2060117302052788e-05, |
| "loss": 0.1177, |
| "step": 4575 |
| }, |
| { |
| "epoch": 1.8210609659540777, |
| "grad_norm": 8.866342544555664, |
| "learning_rate": 2.187683284457478e-05, |
| "loss": 0.0904, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.8309580364212192, |
| "grad_norm": 11.28046989440918, |
| "learning_rate": 2.1693548387096777e-05, |
| "loss": 0.1127, |
| "step": 4625 |
| }, |
| { |
| "epoch": 1.8408551068883612, |
| "grad_norm": 0.10267776250839233, |
| "learning_rate": 2.1510263929618766e-05, |
| "loss": 0.1338, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.8507521773555027, |
| "grad_norm": 4.542361259460449, |
| "learning_rate": 2.1326979472140763e-05, |
| "loss": 0.1465, |
| "step": 4675 |
| }, |
| { |
| "epoch": 1.8606492478226446, |
| "grad_norm": 6.66448974609375, |
| "learning_rate": 2.1143695014662755e-05, |
| "loss": 0.1373, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.8705463182897861, |
| "grad_norm": 5.7664690017700195, |
| "learning_rate": 2.096041055718475e-05, |
| "loss": 0.1329, |
| "step": 4725 |
| }, |
| { |
| "epoch": 1.880443388756928, |
| "grad_norm": 6.261977195739746, |
| "learning_rate": 2.0777126099706744e-05, |
| "loss": 0.0891, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.8903404592240696, |
| "grad_norm": 0.11381009221076965, |
| "learning_rate": 2.059384164222874e-05, |
| "loss": 0.1099, |
| "step": 4775 |
| }, |
| { |
| "epoch": 1.9002375296912115, |
| "grad_norm": 2.1505606174468994, |
| "learning_rate": 2.0410557184750733e-05, |
| "loss": 0.157, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.910134600158353, |
| "grad_norm": 8.369518280029297, |
| "learning_rate": 2.022727272727273e-05, |
| "loss": 0.1188, |
| "step": 4825 |
| }, |
| { |
| "epoch": 1.920031670625495, |
| "grad_norm": 14.388636589050293, |
| "learning_rate": 2.0043988269794722e-05, |
| "loss": 0.1404, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.9299287410926365, |
| "grad_norm": 0.20757636427879333, |
| "learning_rate": 1.9860703812316715e-05, |
| "loss": 0.1269, |
| "step": 4875 |
| }, |
| { |
| "epoch": 1.9398258115597784, |
| "grad_norm": 4.5515875816345215, |
| "learning_rate": 1.967741935483871e-05, |
| "loss": 0.0991, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.94972288202692, |
| "grad_norm": 0.5187767744064331, |
| "learning_rate": 1.9494134897360704e-05, |
| "loss": 0.0744, |
| "step": 4925 |
| }, |
| { |
| "epoch": 1.9596199524940618, |
| "grad_norm": 7.752375602722168, |
| "learning_rate": 1.93108504398827e-05, |
| "loss": 0.1262, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.9695170229612033, |
| "grad_norm": 4.192614555358887, |
| "learning_rate": 1.9127565982404693e-05, |
| "loss": 0.1082, |
| "step": 4975 |
| }, |
| { |
| "epoch": 1.9794140934283453, |
| "grad_norm": 12.195773124694824, |
| "learning_rate": 1.894428152492669e-05, |
| "loss": 0.118, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.9893111638954868, |
| "grad_norm": 4.470797061920166, |
| "learning_rate": 1.8760997067448682e-05, |
| "loss": 0.1131, |
| "step": 5025 |
| }, |
| { |
| "epoch": 1.9992082343626287, |
| "grad_norm": 2.6608407497406006, |
| "learning_rate": 1.8577712609970678e-05, |
| "loss": 0.1176, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.962, |
| "eval_f1_macro": 0.9527593111357376, |
| "eval_f1_micro": 0.962, |
| "eval_f1_weighted": 0.961887560772812, |
| "eval_loss": 0.16408780217170715, |
| "eval_precision_macro": 0.9611804451780929, |
| "eval_precision_micro": 0.962, |
| "eval_precision_weighted": 0.9624429314047246, |
| "eval_recall_macro": 0.9460711525998422, |
| "eval_recall_micro": 0.962, |
| "eval_recall_weighted": 0.962, |
| "eval_runtime": 5.1023, |
| "eval_samples_per_second": 979.953, |
| "eval_steps_per_second": 15.483, |
| "step": 5052 |
| }, |
| { |
| "epoch": 2.0091053048297702, |
| "grad_norm": 6.416041374206543, |
| "learning_rate": 1.8394428152492667e-05, |
| "loss": 0.0793, |
| "step": 5075 |
| }, |
| { |
| "epoch": 2.019002375296912, |
| "grad_norm": 9.853547096252441, |
| "learning_rate": 1.8211143695014664e-05, |
| "loss": 0.0642, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.0288994457640537, |
| "grad_norm": 9.938668251037598, |
| "learning_rate": 1.8027859237536656e-05, |
| "loss": 0.0778, |
| "step": 5125 |
| }, |
| { |
| "epoch": 2.0387965162311956, |
| "grad_norm": 0.6218538880348206, |
| "learning_rate": 1.7844574780058653e-05, |
| "loss": 0.07, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.048693586698337, |
| "grad_norm": 7.014169216156006, |
| "learning_rate": 1.7661290322580645e-05, |
| "loss": 0.0591, |
| "step": 5175 |
| }, |
| { |
| "epoch": 2.058590657165479, |
| "grad_norm": 0.011782053858041763, |
| "learning_rate": 1.7478005865102638e-05, |
| "loss": 0.0638, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.0684877276326206, |
| "grad_norm": 2.117039918899536, |
| "learning_rate": 1.7294721407624634e-05, |
| "loss": 0.0571, |
| "step": 5225 |
| }, |
| { |
| "epoch": 2.0783847980997625, |
| "grad_norm": 6.522469997406006, |
| "learning_rate": 1.7111436950146627e-05, |
| "loss": 0.0623, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.088281868566904, |
| "grad_norm": 0.01880364678800106, |
| "learning_rate": 1.6928152492668623e-05, |
| "loss": 0.0487, |
| "step": 5275 |
| }, |
| { |
| "epoch": 2.098178939034046, |
| "grad_norm": 0.3510414958000183, |
| "learning_rate": 1.6744868035190616e-05, |
| "loss": 0.0678, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.1080760095011875, |
| "grad_norm": 0.7997303009033203, |
| "learning_rate": 1.6561583577712612e-05, |
| "loss": 0.0633, |
| "step": 5325 |
| }, |
| { |
| "epoch": 2.1179730799683294, |
| "grad_norm": 0.026792127639055252, |
| "learning_rate": 1.6378299120234605e-05, |
| "loss": 0.0861, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.127870150435471, |
| "grad_norm": 0.4307959973812103, |
| "learning_rate": 1.61950146627566e-05, |
| "loss": 0.0593, |
| "step": 5375 |
| }, |
| { |
| "epoch": 2.137767220902613, |
| "grad_norm": 0.2015238106250763, |
| "learning_rate": 1.6011730205278594e-05, |
| "loss": 0.0576, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.147664291369755, |
| "grad_norm": 0.7783300876617432, |
| "learning_rate": 1.5828445747800587e-05, |
| "loss": 0.0734, |
| "step": 5425 |
| }, |
| { |
| "epoch": 2.1575613618368963, |
| "grad_norm": 0.03129582852125168, |
| "learning_rate": 1.5645161290322583e-05, |
| "loss": 0.0778, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.167458432304038, |
| "grad_norm": 3.1593480110168457, |
| "learning_rate": 1.5461876832844576e-05, |
| "loss": 0.0872, |
| "step": 5475 |
| }, |
| { |
| "epoch": 2.1773555027711797, |
| "grad_norm": 0.04930011183023453, |
| "learning_rate": 1.527859237536657e-05, |
| "loss": 0.0458, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.1872525732383217, |
| "grad_norm": 8.676435470581055, |
| "learning_rate": 1.5095307917888563e-05, |
| "loss": 0.0268, |
| "step": 5525 |
| }, |
| { |
| "epoch": 2.197149643705463, |
| "grad_norm": 13.317243576049805, |
| "learning_rate": 1.4912023460410557e-05, |
| "loss": 0.0375, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.2070467141726047, |
| "grad_norm": 6.073598861694336, |
| "learning_rate": 1.4728739002932552e-05, |
| "loss": 0.045, |
| "step": 5575 |
| }, |
| { |
| "epoch": 2.2169437846397466, |
| "grad_norm": 1.5578258037567139, |
| "learning_rate": 1.4545454545454545e-05, |
| "loss": 0.0858, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.2268408551068886, |
| "grad_norm": 1.701669692993164, |
| "learning_rate": 1.4362170087976539e-05, |
| "loss": 0.0505, |
| "step": 5625 |
| }, |
| { |
| "epoch": 2.23673792557403, |
| "grad_norm": 9.207208633422852, |
| "learning_rate": 1.4178885630498534e-05, |
| "loss": 0.0729, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.246634996041172, |
| "grad_norm": 0.026624349877238274, |
| "learning_rate": 1.3995601173020528e-05, |
| "loss": 0.0368, |
| "step": 5675 |
| }, |
| { |
| "epoch": 2.2565320665083135, |
| "grad_norm": 0.5754280686378479, |
| "learning_rate": 1.3812316715542523e-05, |
| "loss": 0.0518, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.2664291369754554, |
| "grad_norm": 0.3010414242744446, |
| "learning_rate": 1.3629032258064517e-05, |
| "loss": 0.0675, |
| "step": 5725 |
| }, |
| { |
| "epoch": 2.276326207442597, |
| "grad_norm": 0.0495084747672081, |
| "learning_rate": 1.3445747800586511e-05, |
| "loss": 0.0861, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.286223277909739, |
| "grad_norm": 2.4626643657684326, |
| "learning_rate": 1.3262463343108506e-05, |
| "loss": 0.0792, |
| "step": 5775 |
| }, |
| { |
| "epoch": 2.2961203483768804, |
| "grad_norm": 0.9338565468788147, |
| "learning_rate": 1.30791788856305e-05, |
| "loss": 0.0385, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.3060174188440223, |
| "grad_norm": 3.80712628364563, |
| "learning_rate": 1.2895894428152493e-05, |
| "loss": 0.0907, |
| "step": 5825 |
| }, |
| { |
| "epoch": 2.315914489311164, |
| "grad_norm": 6.940539836883545, |
| "learning_rate": 1.2712609970674488e-05, |
| "loss": 0.0802, |
| "step": 5850 |
| }, |
| { |
| "epoch": 2.325811559778306, |
| "grad_norm": 4.521027088165283, |
| "learning_rate": 1.2529325513196482e-05, |
| "loss": 0.0752, |
| "step": 5875 |
| }, |
| { |
| "epoch": 2.3357086302454473, |
| "grad_norm": 1.9812321662902832, |
| "learning_rate": 1.2346041055718475e-05, |
| "loss": 0.082, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.3456057007125892, |
| "grad_norm": 11.957037925720215, |
| "learning_rate": 1.216275659824047e-05, |
| "loss": 0.0492, |
| "step": 5925 |
| }, |
| { |
| "epoch": 2.3555027711797307, |
| "grad_norm": 0.16896741092205048, |
| "learning_rate": 1.1979472140762464e-05, |
| "loss": 0.0519, |
| "step": 5950 |
| }, |
| { |
| "epoch": 2.3653998416468727, |
| "grad_norm": 0.6698777675628662, |
| "learning_rate": 1.1796187683284458e-05, |
| "loss": 0.1032, |
| "step": 5975 |
| }, |
| { |
| "epoch": 2.375296912114014, |
| "grad_norm": 13.273818969726562, |
| "learning_rate": 1.1612903225806453e-05, |
| "loss": 0.0844, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.385193982581156, |
| "grad_norm": 2.729861259460449, |
| "learning_rate": 1.1429618768328447e-05, |
| "loss": 0.0663, |
| "step": 6025 |
| }, |
| { |
| "epoch": 2.3950910530482976, |
| "grad_norm": 15.863778114318848, |
| "learning_rate": 1.124633431085044e-05, |
| "loss": 0.039, |
| "step": 6050 |
| }, |
| { |
| "epoch": 2.4049881235154396, |
| "grad_norm": 8.72951602935791, |
| "learning_rate": 1.1063049853372435e-05, |
| "loss": 0.0667, |
| "step": 6075 |
| }, |
| { |
| "epoch": 2.414885193982581, |
| "grad_norm": 0.06009228155016899, |
| "learning_rate": 1.0879765395894429e-05, |
| "loss": 0.0962, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.424782264449723, |
| "grad_norm": 1.6163275241851807, |
| "learning_rate": 1.0696480938416424e-05, |
| "loss": 0.0765, |
| "step": 6125 |
| }, |
| { |
| "epoch": 2.4346793349168645, |
| "grad_norm": 0.023228373378515244, |
| "learning_rate": 1.0513196480938416e-05, |
| "loss": 0.0786, |
| "step": 6150 |
| }, |
| { |
| "epoch": 2.4445764053840064, |
| "grad_norm": 0.010257094167172909, |
| "learning_rate": 1.032991202346041e-05, |
| "loss": 0.0398, |
| "step": 6175 |
| }, |
| { |
| "epoch": 2.454473475851148, |
| "grad_norm": 5.985715389251709, |
| "learning_rate": 1.0146627565982405e-05, |
| "loss": 0.0757, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.46437054631829, |
| "grad_norm": 0.06866980344057083, |
| "learning_rate": 9.9633431085044e-06, |
| "loss": 0.0554, |
| "step": 6225 |
| }, |
| { |
| "epoch": 2.4742676167854314, |
| "grad_norm": 4.456401348114014, |
| "learning_rate": 9.780058651026392e-06, |
| "loss": 0.0496, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.4841646872525733, |
| "grad_norm": 17.26448631286621, |
| "learning_rate": 9.596774193548387e-06, |
| "loss": 0.0679, |
| "step": 6275 |
| }, |
| { |
| "epoch": 2.494061757719715, |
| "grad_norm": 8.371593475341797, |
| "learning_rate": 9.413489736070381e-06, |
| "loss": 0.0745, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.5039588281868568, |
| "grad_norm": 0.02239610068500042, |
| "learning_rate": 9.230205278592376e-06, |
| "loss": 0.0557, |
| "step": 6325 |
| }, |
| { |
| "epoch": 2.5138558986539983, |
| "grad_norm": 0.046766772866249084, |
| "learning_rate": 9.04692082111437e-06, |
| "loss": 0.0438, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.52375296912114, |
| "grad_norm": 8.119983673095703, |
| "learning_rate": 8.863636363636365e-06, |
| "loss": 0.0713, |
| "step": 6375 |
| }, |
| { |
| "epoch": 2.5336500395882817, |
| "grad_norm": 0.0916699767112732, |
| "learning_rate": 8.68035190615836e-06, |
| "loss": 0.1, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.5435471100554237, |
| "grad_norm": 6.661031246185303, |
| "learning_rate": 8.497067448680352e-06, |
| "loss": 0.0949, |
| "step": 6425 |
| }, |
| { |
| "epoch": 2.553444180522565, |
| "grad_norm": 5.773529529571533, |
| "learning_rate": 8.313782991202347e-06, |
| "loss": 0.0834, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.563341250989707, |
| "grad_norm": 19.465478897094727, |
| "learning_rate": 8.13049853372434e-06, |
| "loss": 0.0927, |
| "step": 6475 |
| }, |
| { |
| "epoch": 2.5732383214568486, |
| "grad_norm": 9.270087242126465, |
| "learning_rate": 7.947214076246334e-06, |
| "loss": 0.0591, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.5831353919239906, |
| "grad_norm": 1.235212802886963, |
| "learning_rate": 7.763929618768328e-06, |
| "loss": 0.1145, |
| "step": 6525 |
| }, |
| { |
| "epoch": 2.5930324623911325, |
| "grad_norm": 0.10507909208536148, |
| "learning_rate": 7.580645161290323e-06, |
| "loss": 0.0928, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.602929532858274, |
| "grad_norm": 1.12295663356781, |
| "learning_rate": 7.397360703812317e-06, |
| "loss": 0.0602, |
| "step": 6575 |
| }, |
| { |
| "epoch": 2.6128266033254155, |
| "grad_norm": 8.503447532653809, |
| "learning_rate": 7.214076246334312e-06, |
| "loss": 0.077, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.6227236737925574, |
| "grad_norm": 0.9171582460403442, |
| "learning_rate": 7.030791788856305e-06, |
| "loss": 0.048, |
| "step": 6625 |
| }, |
| { |
| "epoch": 2.6326207442596994, |
| "grad_norm": 0.28691366314888, |
| "learning_rate": 6.8475073313783e-06, |
| "loss": 0.0548, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.642517814726841, |
| "grad_norm": 2.4092676639556885, |
| "learning_rate": 6.664222873900293e-06, |
| "loss": 0.0654, |
| "step": 6675 |
| }, |
| { |
| "epoch": 2.6524148851939824, |
| "grad_norm": 0.248480886220932, |
| "learning_rate": 6.480938416422287e-06, |
| "loss": 0.0776, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.6623119556611243, |
| "grad_norm": 1.0501718521118164, |
| "learning_rate": 6.2976539589442816e-06, |
| "loss": 0.0749, |
| "step": 6725 |
| }, |
| { |
| "epoch": 2.6722090261282663, |
| "grad_norm": 0.16996045410633087, |
| "learning_rate": 6.114369501466276e-06, |
| "loss": 0.0448, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.6821060965954078, |
| "grad_norm": 0.015705592930316925, |
| "learning_rate": 5.93108504398827e-06, |
| "loss": 0.043, |
| "step": 6775 |
| }, |
| { |
| "epoch": 2.6920031670625493, |
| "grad_norm": 13.008760452270508, |
| "learning_rate": 5.747800586510264e-06, |
| "loss": 0.045, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.701900237529691, |
| "grad_norm": 0.49796026945114136, |
| "learning_rate": 5.564516129032259e-06, |
| "loss": 0.0387, |
| "step": 6825 |
| }, |
| { |
| "epoch": 2.711797307996833, |
| "grad_norm": 6.566326141357422, |
| "learning_rate": 5.381231671554252e-06, |
| "loss": 0.0412, |
| "step": 6850 |
| }, |
| { |
| "epoch": 2.7216943784639747, |
| "grad_norm": 0.014399710111320019, |
| "learning_rate": 5.197947214076247e-06, |
| "loss": 0.0596, |
| "step": 6875 |
| }, |
| { |
| "epoch": 2.731591448931116, |
| "grad_norm": 0.3872062563896179, |
| "learning_rate": 5.014662756598241e-06, |
| "loss": 0.0523, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.741488519398258, |
| "grad_norm": 0.1425359696149826, |
| "learning_rate": 4.831378299120235e-06, |
| "loss": 0.0461, |
| "step": 6925 |
| }, |
| { |
| "epoch": 2.7513855898654, |
| "grad_norm": 0.4896790683269501, |
| "learning_rate": 4.6480938416422284e-06, |
| "loss": 0.0616, |
| "step": 6950 |
| }, |
| { |
| "epoch": 2.7612826603325415, |
| "grad_norm": 6.286714553833008, |
| "learning_rate": 4.464809384164223e-06, |
| "loss": 0.0896, |
| "step": 6975 |
| }, |
| { |
| "epoch": 2.771179730799683, |
| "grad_norm": 0.058547962456941605, |
| "learning_rate": 4.281524926686217e-06, |
| "loss": 0.0648, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.781076801266825, |
| "grad_norm": 0.3147684335708618, |
| "learning_rate": 4.098240469208212e-06, |
| "loss": 0.0701, |
| "step": 7025 |
| }, |
| { |
| "epoch": 2.790973871733967, |
| "grad_norm": 2.779256582260132, |
| "learning_rate": 3.9149560117302055e-06, |
| "loss": 0.0352, |
| "step": 7050 |
| }, |
| { |
| "epoch": 2.8008709422011084, |
| "grad_norm": 0.26303109526634216, |
| "learning_rate": 3.7316715542521995e-06, |
| "loss": 0.0663, |
| "step": 7075 |
| }, |
| { |
| "epoch": 2.81076801266825, |
| "grad_norm": 11.363053321838379, |
| "learning_rate": 3.5483870967741936e-06, |
| "loss": 0.0883, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.820665083135392, |
| "grad_norm": 0.36071789264678955, |
| "learning_rate": 3.365102639296188e-06, |
| "loss": 0.0762, |
| "step": 7125 |
| }, |
| { |
| "epoch": 2.830562153602534, |
| "grad_norm": 0.014336947351694107, |
| "learning_rate": 3.1818181818181817e-06, |
| "loss": 0.0433, |
| "step": 7150 |
| }, |
| { |
| "epoch": 2.8404592240696753, |
| "grad_norm": 8.244430541992188, |
| "learning_rate": 2.9985337243401757e-06, |
| "loss": 0.0436, |
| "step": 7175 |
| }, |
| { |
| "epoch": 2.850356294536817, |
| "grad_norm": 0.018452562391757965, |
| "learning_rate": 2.8152492668621702e-06, |
| "loss": 0.0478, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.8602533650039588, |
| "grad_norm": 0.9135558605194092, |
| "learning_rate": 2.6319648093841647e-06, |
| "loss": 0.0582, |
| "step": 7225 |
| }, |
| { |
| "epoch": 2.8701504354711007, |
| "grad_norm": 0.03040502220392227, |
| "learning_rate": 2.4486803519061583e-06, |
| "loss": 0.0353, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.880047505938242, |
| "grad_norm": 0.026754941791296005, |
| "learning_rate": 2.265395894428153e-06, |
| "loss": 0.0769, |
| "step": 7275 |
| }, |
| { |
| "epoch": 2.889944576405384, |
| "grad_norm": 0.09019900858402252, |
| "learning_rate": 2.082111436950147e-06, |
| "loss": 0.0506, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.8998416468725257, |
| "grad_norm": 0.04084889218211174, |
| "learning_rate": 1.8988269794721409e-06, |
| "loss": 0.0411, |
| "step": 7325 |
| }, |
| { |
| "epoch": 2.9097387173396676, |
| "grad_norm": 2.3881072998046875, |
| "learning_rate": 1.715542521994135e-06, |
| "loss": 0.0549, |
| "step": 7350 |
| }, |
| { |
| "epoch": 2.919635787806809, |
| "grad_norm": 7.836863040924072, |
| "learning_rate": 1.532258064516129e-06, |
| "loss": 0.0514, |
| "step": 7375 |
| }, |
| { |
| "epoch": 2.929532858273951, |
| "grad_norm": 1.4827078580856323, |
| "learning_rate": 1.3489736070381233e-06, |
| "loss": 0.0608, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.9394299287410925, |
| "grad_norm": 0.012712684459984303, |
| "learning_rate": 1.1656891495601175e-06, |
| "loss": 0.0542, |
| "step": 7425 |
| }, |
| { |
| "epoch": 2.9493269992082345, |
| "grad_norm": 6.55382776260376, |
| "learning_rate": 9.824046920821116e-07, |
| "loss": 0.0518, |
| "step": 7450 |
| }, |
| { |
| "epoch": 2.959224069675376, |
| "grad_norm": 0.21079152822494507, |
| "learning_rate": 7.991202346041056e-07, |
| "loss": 0.0618, |
| "step": 7475 |
| }, |
| { |
| "epoch": 2.969121140142518, |
| "grad_norm": 4.107754707336426, |
| "learning_rate": 6.158357771260998e-07, |
| "loss": 0.0368, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.9790182106096594, |
| "grad_norm": 0.4540683925151825, |
| "learning_rate": 4.325513196480939e-07, |
| "loss": 0.0595, |
| "step": 7525 |
| }, |
| { |
| "epoch": 2.9889152810768014, |
| "grad_norm": 0.416092187166214, |
| "learning_rate": 2.4926686217008803e-07, |
| "loss": 0.0626, |
| "step": 7550 |
| }, |
| { |
| "epoch": 2.998812351543943, |
| "grad_norm": 3.699631452560425, |
| "learning_rate": 6.598240469208211e-08, |
| "loss": 0.0729, |
| "step": 7575 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9694, |
| "eval_f1_macro": 0.9613886456444749, |
| "eval_f1_micro": 0.9694, |
| "eval_f1_weighted": 0.9693030681223207, |
| "eval_loss": 0.157407745718956, |
| "eval_precision_macro": 0.9679892485977634, |
| "eval_precision_micro": 0.9694, |
| "eval_precision_weighted": 0.9695713537396466, |
| "eval_recall_macro": 0.9560667596679707, |
| "eval_recall_micro": 0.9694, |
| "eval_recall_weighted": 0.9694, |
| "eval_runtime": 5.0753, |
| "eval_samples_per_second": 985.172, |
| "eval_steps_per_second": 15.566, |
| "step": 7578 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 7578, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.01 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 1 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.381368787756646e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|