{ "best_metric": 1.1161140203475952, "best_model_checkpoint": "square_run_age_gender/checkpoint-261", "epoch": 35.0, "eval_steps": 500, "global_step": 1015, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06896551724137931, "grad_norm": 13.471597671508789, "learning_rate": 1.96078431372549e-06, "loss": 1.979, "step": 2 }, { "epoch": 0.13793103448275862, "grad_norm": 6.632089138031006, "learning_rate": 3.92156862745098e-06, "loss": 1.8207, "step": 4 }, { "epoch": 0.20689655172413793, "grad_norm": 10.463000297546387, "learning_rate": 5.882352941176471e-06, "loss": 1.9296, "step": 6 }, { "epoch": 0.27586206896551724, "grad_norm": 7.69188117980957, "learning_rate": 7.84313725490196e-06, "loss": 1.9443, "step": 8 }, { "epoch": 0.3448275862068966, "grad_norm": 10.529653549194336, "learning_rate": 9.803921568627451e-06, "loss": 1.9129, "step": 10 }, { "epoch": 0.41379310344827586, "grad_norm": 9.45837116241455, "learning_rate": 1.1764705882352942e-05, "loss": 2.0835, "step": 12 }, { "epoch": 0.4827586206896552, "grad_norm": 6.899144649505615, "learning_rate": 1.3725490196078432e-05, "loss": 1.905, "step": 14 }, { "epoch": 0.5517241379310345, "grad_norm": 6.275972843170166, "learning_rate": 1.568627450980392e-05, "loss": 2.0193, "step": 16 }, { "epoch": 0.6206896551724138, "grad_norm": 7.337122440338135, "learning_rate": 1.7647058823529414e-05, "loss": 1.7395, "step": 18 }, { "epoch": 0.6896551724137931, "grad_norm": 8.741500854492188, "learning_rate": 1.9607843137254903e-05, "loss": 1.8184, "step": 20 }, { "epoch": 0.7586206896551724, "grad_norm": 5.90638542175293, "learning_rate": 2.1568627450980395e-05, "loss": 1.8613, "step": 22 }, { "epoch": 0.8275862068965517, "grad_norm": 5.549325942993164, "learning_rate": 2.3529411764705884e-05, "loss": 1.8278, "step": 24 }, { "epoch": 0.896551724137931, "grad_norm": 9.570854187011719, "learning_rate": 2.5490196078431373e-05, "loss": 1.8713, "step": 26 }, { "epoch": 0.9655172413793104, "grad_norm": 5.850081920623779, "learning_rate": 2.7450980392156865e-05, "loss": 1.8891, "step": 28 }, { "epoch": 1.0, "eval_accuracy": 0.25757575757575757, "eval_f1_macro": 0.17420690764344018, "eval_f1_micro": 0.25757575757575757, "eval_f1_weighted": 0.2100702351405982, "eval_loss": 1.867130160331726, "eval_precision_macro": 0.1681240063593005, "eval_precision_micro": 0.25757575757575757, "eval_precision_weighted": 0.20448824492942141, "eval_recall_macro": 0.21421012849584278, "eval_recall_micro": 0.25757575757575757, "eval_recall_weighted": 0.25757575757575757, "eval_runtime": 2.1698, "eval_samples_per_second": 60.835, "eval_steps_per_second": 7.835, "step": 29 }, { "epoch": 1.0344827586206897, "grad_norm": 8.4002103805542, "learning_rate": 2.9411764705882354e-05, "loss": 1.8553, "step": 30 }, { "epoch": 1.103448275862069, "grad_norm": 6.214775085449219, "learning_rate": 3.137254901960784e-05, "loss": 1.8232, "step": 32 }, { "epoch": 1.1724137931034484, "grad_norm": 5.48581600189209, "learning_rate": 3.3333333333333335e-05, "loss": 1.9136, "step": 34 }, { "epoch": 1.2413793103448276, "grad_norm": 3.806295871734619, "learning_rate": 3.529411764705883e-05, "loss": 1.9254, "step": 36 }, { "epoch": 1.3103448275862069, "grad_norm": 5.554256439208984, "learning_rate": 3.725490196078432e-05, "loss": 1.8243, "step": 38 }, { "epoch": 1.3793103448275863, "grad_norm": 7.334174156188965, "learning_rate": 3.9215686274509805e-05, "loss": 1.9214, "step": 40 }, { "epoch": 1.4482758620689655, "grad_norm": 4.776826858520508, "learning_rate": 4.11764705882353e-05, "loss": 1.8654, "step": 42 }, { "epoch": 1.5172413793103448, "grad_norm": 10.249964714050293, "learning_rate": 4.313725490196079e-05, "loss": 1.9152, "step": 44 }, { "epoch": 1.5862068965517242, "grad_norm": 5.083812713623047, "learning_rate": 4.5098039215686275e-05, "loss": 1.9904, "step": 46 }, { "epoch": 1.6551724137931034, "grad_norm": 4.7975029945373535, "learning_rate": 4.705882352941177e-05, "loss": 1.839, "step": 48 }, { "epoch": 1.7241379310344827, "grad_norm": 4.822813987731934, "learning_rate": 4.901960784313725e-05, "loss": 1.9303, "step": 50 }, { "epoch": 1.793103448275862, "grad_norm": 6.845904350280762, "learning_rate": 5.0980392156862745e-05, "loss": 1.8255, "step": 52 }, { "epoch": 1.8620689655172413, "grad_norm": 7.084125518798828, "learning_rate": 5.294117647058824e-05, "loss": 1.871, "step": 54 }, { "epoch": 1.9310344827586206, "grad_norm": 8.454116821289062, "learning_rate": 5.490196078431373e-05, "loss": 1.817, "step": 56 }, { "epoch": 2.0, "grad_norm": 3.273526430130005, "learning_rate": 5.6862745098039215e-05, "loss": 1.8327, "step": 58 }, { "epoch": 2.0, "eval_accuracy": 0.3181818181818182, "eval_f1_macro": 0.15698350488823806, "eval_f1_micro": 0.3181818181818182, "eval_f1_weighted": 0.19373566841158035, "eval_loss": 1.8123832941055298, "eval_precision_macro": 0.13350340136054423, "eval_precision_micro": 0.3181818181818182, "eval_precision_weighted": 0.16110209235209236, "eval_recall_macro": 0.25083144368858656, "eval_recall_micro": 0.3181818181818182, "eval_recall_weighted": 0.3181818181818182, "eval_runtime": 2.1978, "eval_samples_per_second": 60.059, "eval_steps_per_second": 7.735, "step": 58 }, { "epoch": 2.0689655172413794, "grad_norm": 7.48805570602417, "learning_rate": 5.882352941176471e-05, "loss": 1.875, "step": 60 }, { "epoch": 2.1379310344827585, "grad_norm": 3.892385721206665, "learning_rate": 6.078431372549019e-05, "loss": 1.7235, "step": 62 }, { "epoch": 2.206896551724138, "grad_norm": 8.482718467712402, "learning_rate": 6.274509803921569e-05, "loss": 1.8003, "step": 64 }, { "epoch": 2.2758620689655173, "grad_norm": 13.920659065246582, "learning_rate": 6.470588235294118e-05, "loss": 1.984, "step": 66 }, { "epoch": 2.344827586206897, "grad_norm": 7.427146911621094, "learning_rate": 6.666666666666667e-05, "loss": 1.7298, "step": 68 }, { "epoch": 2.413793103448276, "grad_norm": 9.012772560119629, "learning_rate": 6.862745098039216e-05, "loss": 1.8282, "step": 70 }, { "epoch": 2.4827586206896552, "grad_norm": 8.024530410766602, "learning_rate": 7.058823529411765e-05, "loss": 1.8988, "step": 72 }, { "epoch": 2.5517241379310347, "grad_norm": 6.818090438842773, "learning_rate": 7.254901960784314e-05, "loss": 1.6532, "step": 74 }, { "epoch": 2.6206896551724137, "grad_norm": 10.93878173828125, "learning_rate": 7.450980392156864e-05, "loss": 2.0298, "step": 76 }, { "epoch": 2.689655172413793, "grad_norm": 9.51241397857666, "learning_rate": 7.647058823529411e-05, "loss": 2.0224, "step": 78 }, { "epoch": 2.7586206896551726, "grad_norm": 9.287165641784668, "learning_rate": 7.843137254901961e-05, "loss": 1.8984, "step": 80 }, { "epoch": 2.8275862068965516, "grad_norm": 8.898433685302734, "learning_rate": 8.039215686274511e-05, "loss": 1.746, "step": 82 }, { "epoch": 2.896551724137931, "grad_norm": 6.119718074798584, "learning_rate": 8.23529411764706e-05, "loss": 1.9194, "step": 84 }, { "epoch": 2.9655172413793105, "grad_norm": 6.862753391265869, "learning_rate": 8.431372549019608e-05, "loss": 1.9127, "step": 86 }, { "epoch": 3.0, "eval_accuracy": 0.3181818181818182, "eval_f1_macro": 0.20846518382633952, "eval_f1_micro": 0.3181818181818182, "eval_f1_weighted": 0.25755123465797625, "eval_loss": 1.7830312252044678, "eval_precision_macro": 0.21283015309910777, "eval_precision_micro": 0.3181818181818182, "eval_precision_weighted": 0.2617880187903288, "eval_recall_macro": 0.26250188964474674, "eval_recall_micro": 0.3181818181818182, "eval_recall_weighted": 0.3181818181818182, "eval_runtime": 2.2371, "eval_samples_per_second": 59.004, "eval_steps_per_second": 7.599, "step": 87 }, { "epoch": 3.0344827586206895, "grad_norm": 10.097410202026367, "learning_rate": 8.627450980392158e-05, "loss": 1.896, "step": 88 }, { "epoch": 3.103448275862069, "grad_norm": 6.835166931152344, "learning_rate": 8.823529411764706e-05, "loss": 1.5849, "step": 90 }, { "epoch": 3.1724137931034484, "grad_norm": 7.843909740447998, "learning_rate": 9.019607843137255e-05, "loss": 1.7373, "step": 92 }, { "epoch": 3.2413793103448274, "grad_norm": 7.719568252563477, "learning_rate": 9.215686274509804e-05, "loss": 1.5069, "step": 94 }, { "epoch": 3.310344827586207, "grad_norm": 8.885810852050781, "learning_rate": 9.411764705882353e-05, "loss": 1.523, "step": 96 }, { "epoch": 3.3793103448275863, "grad_norm": 6.505782127380371, "learning_rate": 9.607843137254903e-05, "loss": 1.5585, "step": 98 }, { "epoch": 3.4482758620689653, "grad_norm": 7.679609298706055, "learning_rate": 9.80392156862745e-05, "loss": 1.5167, "step": 100 }, { "epoch": 3.5172413793103448, "grad_norm": 12.342342376708984, "learning_rate": 0.0001, "loss": 1.9155, "step": 102 }, { "epoch": 3.586206896551724, "grad_norm": 5.502551078796387, "learning_rate": 9.978094194961665e-05, "loss": 1.3553, "step": 104 }, { "epoch": 3.655172413793103, "grad_norm": 8.53254222869873, "learning_rate": 9.95618838992333e-05, "loss": 1.6346, "step": 106 }, { "epoch": 3.7241379310344827, "grad_norm": 11.95875072479248, "learning_rate": 9.934282584884996e-05, "loss": 1.5988, "step": 108 }, { "epoch": 3.793103448275862, "grad_norm": 7.445318222045898, "learning_rate": 9.912376779846659e-05, "loss": 1.613, "step": 110 }, { "epoch": 3.862068965517241, "grad_norm": 12.528891563415527, "learning_rate": 9.890470974808325e-05, "loss": 1.7571, "step": 112 }, { "epoch": 3.9310344827586206, "grad_norm": 9.875731468200684, "learning_rate": 9.86856516976999e-05, "loss": 1.53, "step": 114 }, { "epoch": 4.0, "grad_norm": 6.541798114776611, "learning_rate": 9.846659364731654e-05, "loss": 1.4498, "step": 116 }, { "epoch": 4.0, "eval_accuracy": 0.38636363636363635, "eval_f1_macro": 0.29356560009870114, "eval_f1_micro": 0.38636363636363635, "eval_f1_weighted": 0.3437889854048391, "eval_loss": 1.579649806022644, "eval_precision_macro": 0.43422035480859006, "eval_precision_micro": 0.38636363636363635, "eval_precision_weighted": 0.4527406417112299, "eval_recall_macro": 0.3179440665154951, "eval_recall_micro": 0.38636363636363635, "eval_recall_weighted": 0.38636363636363635, "eval_runtime": 2.2188, "eval_samples_per_second": 59.491, "eval_steps_per_second": 7.662, "step": 116 }, { "epoch": 4.068965517241379, "grad_norm": 11.127625465393066, "learning_rate": 9.824753559693319e-05, "loss": 1.5055, "step": 118 }, { "epoch": 4.137931034482759, "grad_norm": 7.331289768218994, "learning_rate": 9.802847754654983e-05, "loss": 1.3358, "step": 120 }, { "epoch": 4.206896551724138, "grad_norm": 8.787720680236816, "learning_rate": 9.78094194961665e-05, "loss": 1.6528, "step": 122 }, { "epoch": 4.275862068965517, "grad_norm": 8.822704315185547, "learning_rate": 9.759036144578314e-05, "loss": 1.2185, "step": 124 }, { "epoch": 4.344827586206897, "grad_norm": 7.718049049377441, "learning_rate": 9.737130339539979e-05, "loss": 1.376, "step": 126 }, { "epoch": 4.413793103448276, "grad_norm": 6.089763641357422, "learning_rate": 9.715224534501643e-05, "loss": 1.2114, "step": 128 }, { "epoch": 4.482758620689655, "grad_norm": 11.10245418548584, "learning_rate": 9.693318729463309e-05, "loss": 1.5952, "step": 130 }, { "epoch": 4.551724137931035, "grad_norm": 9.193964958190918, "learning_rate": 9.671412924424972e-05, "loss": 1.691, "step": 132 }, { "epoch": 4.620689655172414, "grad_norm": 7.160553455352783, "learning_rate": 9.649507119386638e-05, "loss": 1.7942, "step": 134 }, { "epoch": 4.689655172413794, "grad_norm": 10.51407527923584, "learning_rate": 9.627601314348302e-05, "loss": 1.541, "step": 136 }, { "epoch": 4.758620689655173, "grad_norm": 7.315539836883545, "learning_rate": 9.605695509309968e-05, "loss": 1.4463, "step": 138 }, { "epoch": 4.827586206896552, "grad_norm": 5.069845676422119, "learning_rate": 9.583789704271632e-05, "loss": 1.2094, "step": 140 }, { "epoch": 4.896551724137931, "grad_norm": 7.61802864074707, "learning_rate": 9.561883899233297e-05, "loss": 1.3717, "step": 142 }, { "epoch": 4.9655172413793105, "grad_norm": 5.559959888458252, "learning_rate": 9.539978094194963e-05, "loss": 1.2166, "step": 144 }, { "epoch": 5.0, "eval_accuracy": 0.4772727272727273, "eval_f1_macro": 0.3867993464710291, "eval_f1_micro": 0.4772727272727273, "eval_f1_weighted": 0.4441923804921774, "eval_loss": 1.3484843969345093, "eval_precision_macro": 0.5067572493188256, "eval_precision_micro": 0.4772727272727273, "eval_precision_weighted": 0.5372770333115161, "eval_recall_macro": 0.4076719576719577, "eval_recall_micro": 0.4772727272727273, "eval_recall_weighted": 0.4772727272727273, "eval_runtime": 2.192, "eval_samples_per_second": 60.219, "eval_steps_per_second": 7.756, "step": 145 }, { "epoch": 5.0344827586206895, "grad_norm": 6.8597941398620605, "learning_rate": 9.518072289156626e-05, "loss": 1.5412, "step": 146 }, { "epoch": 5.103448275862069, "grad_norm": 5.627894401550293, "learning_rate": 9.496166484118292e-05, "loss": 1.0841, "step": 148 }, { "epoch": 5.172413793103448, "grad_norm": 10.26051139831543, "learning_rate": 9.474260679079957e-05, "loss": 1.1118, "step": 150 }, { "epoch": 5.241379310344827, "grad_norm": 7.57983922958374, "learning_rate": 9.452354874041621e-05, "loss": 1.1301, "step": 152 }, { "epoch": 5.310344827586207, "grad_norm": 7.729267120361328, "learning_rate": 9.430449069003286e-05, "loss": 1.3092, "step": 154 }, { "epoch": 5.379310344827586, "grad_norm": 5.462944507598877, "learning_rate": 9.40854326396495e-05, "loss": 1.0817, "step": 156 }, { "epoch": 5.448275862068965, "grad_norm": 11.766180992126465, "learning_rate": 9.386637458926615e-05, "loss": 1.747, "step": 158 }, { "epoch": 5.517241379310345, "grad_norm": 8.114238739013672, "learning_rate": 9.364731653888281e-05, "loss": 1.1059, "step": 160 }, { "epoch": 5.586206896551724, "grad_norm": 7.437196254730225, "learning_rate": 9.342825848849946e-05, "loss": 1.2654, "step": 162 }, { "epoch": 5.655172413793103, "grad_norm": 9.772777557373047, "learning_rate": 9.32092004381161e-05, "loss": 1.1972, "step": 164 }, { "epoch": 5.724137931034483, "grad_norm": 5.461746692657471, "learning_rate": 9.299014238773275e-05, "loss": 1.1775, "step": 166 }, { "epoch": 5.793103448275862, "grad_norm": 6.507452964782715, "learning_rate": 9.27710843373494e-05, "loss": 1.0831, "step": 168 }, { "epoch": 5.862068965517241, "grad_norm": 10.11184024810791, "learning_rate": 9.255202628696606e-05, "loss": 1.002, "step": 170 }, { "epoch": 5.931034482758621, "grad_norm": 9.433456420898438, "learning_rate": 9.233296823658269e-05, "loss": 1.5848, "step": 172 }, { "epoch": 6.0, "grad_norm": 9.081482887268066, "learning_rate": 9.211391018619935e-05, "loss": 1.5704, "step": 174 }, { "epoch": 6.0, "eval_accuracy": 0.5606060606060606, "eval_f1_macro": 0.48525454733832823, "eval_f1_micro": 0.5606060606060606, "eval_f1_weighted": 0.5509911571429003, "eval_loss": 1.2560298442840576, "eval_precision_macro": 0.4905962384953981, "eval_precision_micro": 0.5606060606060606, "eval_precision_weighted": 0.5678953399541635, "eval_recall_macro": 0.5025774754346183, "eval_recall_micro": 0.5606060606060606, "eval_recall_weighted": 0.5606060606060606, "eval_runtime": 2.2113, "eval_samples_per_second": 59.694, "eval_steps_per_second": 7.688, "step": 174 }, { "epoch": 6.068965517241379, "grad_norm": 7.783880710601807, "learning_rate": 9.1894852135816e-05, "loss": 1.1088, "step": 176 }, { "epoch": 6.137931034482759, "grad_norm": 8.41838550567627, "learning_rate": 9.167579408543264e-05, "loss": 1.2991, "step": 178 }, { "epoch": 6.206896551724138, "grad_norm": 7.9511799812316895, "learning_rate": 9.14567360350493e-05, "loss": 1.14, "step": 180 }, { "epoch": 6.275862068965517, "grad_norm": 8.29940128326416, "learning_rate": 9.123767798466593e-05, "loss": 1.5207, "step": 182 }, { "epoch": 6.344827586206897, "grad_norm": 7.130605220794678, "learning_rate": 9.10186199342826e-05, "loss": 1.2116, "step": 184 }, { "epoch": 6.413793103448276, "grad_norm": 6.235842227935791, "learning_rate": 9.079956188389924e-05, "loss": 0.9688, "step": 186 }, { "epoch": 6.482758620689655, "grad_norm": 4.8011980056762695, "learning_rate": 9.058050383351589e-05, "loss": 0.8892, "step": 188 }, { "epoch": 6.551724137931035, "grad_norm": 7.332155227661133, "learning_rate": 9.036144578313253e-05, "loss": 1.0801, "step": 190 }, { "epoch": 6.620689655172414, "grad_norm": 6.098062515258789, "learning_rate": 9.014238773274918e-05, "loss": 0.9725, "step": 192 }, { "epoch": 6.689655172413794, "grad_norm": 7.8977813720703125, "learning_rate": 8.992332968236583e-05, "loss": 1.1209, "step": 194 }, { "epoch": 6.758620689655173, "grad_norm": 6.769626617431641, "learning_rate": 8.970427163198248e-05, "loss": 1.305, "step": 196 }, { "epoch": 6.827586206896552, "grad_norm": 6.7080793380737305, "learning_rate": 8.948521358159913e-05, "loss": 1.097, "step": 198 }, { "epoch": 6.896551724137931, "grad_norm": 6.5601806640625, "learning_rate": 8.926615553121578e-05, "loss": 0.9768, "step": 200 }, { "epoch": 6.9655172413793105, "grad_norm": 6.294341564178467, "learning_rate": 8.904709748083242e-05, "loss": 1.2465, "step": 202 }, { "epoch": 7.0, "eval_accuracy": 0.49242424242424243, "eval_f1_macro": 0.38536093384329645, "eval_f1_micro": 0.49242424242424243, "eval_f1_weighted": 0.4392739197478552, "eval_loss": 1.4968072175979614, "eval_precision_macro": 0.5611372180451127, "eval_precision_micro": 0.49242424242424243, "eval_precision_weighted": 0.5975304027113237, "eval_recall_macro": 0.4107180650037793, "eval_recall_micro": 0.49242424242424243, "eval_recall_weighted": 0.49242424242424243, "eval_runtime": 2.2083, "eval_samples_per_second": 59.774, "eval_steps_per_second": 7.698, "step": 203 }, { "epoch": 7.0344827586206895, "grad_norm": 6.664538383483887, "learning_rate": 8.882803943044907e-05, "loss": 1.0662, "step": 204 }, { "epoch": 7.103448275862069, "grad_norm": 8.425392150878906, "learning_rate": 8.860898138006573e-05, "loss": 1.034, "step": 206 }, { "epoch": 7.172413793103448, "grad_norm": 8.941866874694824, "learning_rate": 8.838992332968236e-05, "loss": 1.3566, "step": 208 }, { "epoch": 7.241379310344827, "grad_norm": 7.900031089782715, "learning_rate": 8.817086527929902e-05, "loss": 1.3013, "step": 210 }, { "epoch": 7.310344827586207, "grad_norm": 7.721550464630127, "learning_rate": 8.795180722891567e-05, "loss": 1.1345, "step": 212 }, { "epoch": 7.379310344827586, "grad_norm": 6.119128704071045, "learning_rate": 8.773274917853231e-05, "loss": 0.7996, "step": 214 }, { "epoch": 7.448275862068965, "grad_norm": 6.922367095947266, "learning_rate": 8.751369112814896e-05, "loss": 1.1724, "step": 216 }, { "epoch": 7.517241379310345, "grad_norm": 7.787768840789795, "learning_rate": 8.72946330777656e-05, "loss": 1.0874, "step": 218 }, { "epoch": 7.586206896551724, "grad_norm": 5.789196014404297, "learning_rate": 8.707557502738227e-05, "loss": 0.7744, "step": 220 }, { "epoch": 7.655172413793103, "grad_norm": 8.260876655578613, "learning_rate": 8.685651697699891e-05, "loss": 0.9799, "step": 222 }, { "epoch": 7.724137931034483, "grad_norm": 5.3789520263671875, "learning_rate": 8.663745892661556e-05, "loss": 0.8668, "step": 224 }, { "epoch": 7.793103448275862, "grad_norm": 10.147786140441895, "learning_rate": 8.64184008762322e-05, "loss": 1.1608, "step": 226 }, { "epoch": 7.862068965517241, "grad_norm": 5.489473342895508, "learning_rate": 8.619934282584885e-05, "loss": 0.8116, "step": 228 }, { "epoch": 7.931034482758621, "grad_norm": 7.813507080078125, "learning_rate": 8.59802847754655e-05, "loss": 1.1165, "step": 230 }, { "epoch": 8.0, "grad_norm": 9.428513526916504, "learning_rate": 8.576122672508216e-05, "loss": 1.2531, "step": 232 }, { "epoch": 8.0, "eval_accuracy": 0.5, "eval_f1_macro": 0.4380420530832049, "eval_f1_micro": 0.5, "eval_f1_weighted": 0.48411055093350336, "eval_loss": 1.4662878513336182, "eval_precision_macro": 0.46228529523343914, "eval_precision_micro": 0.5, "eval_precision_weighted": 0.5301592857204586, "eval_recall_macro": 0.46928949357520783, "eval_recall_micro": 0.5, "eval_recall_weighted": 0.5, "eval_runtime": 2.2486, "eval_samples_per_second": 58.704, "eval_steps_per_second": 7.56, "step": 232 }, { "epoch": 8.068965517241379, "grad_norm": 8.723676681518555, "learning_rate": 8.55421686746988e-05, "loss": 0.7241, "step": 234 }, { "epoch": 8.137931034482758, "grad_norm": 5.1509904861450195, "learning_rate": 8.532311062431545e-05, "loss": 1.1178, "step": 236 }, { "epoch": 8.206896551724139, "grad_norm": 9.173816680908203, "learning_rate": 8.51040525739321e-05, "loss": 1.1048, "step": 238 }, { "epoch": 8.275862068965518, "grad_norm": 5.16646146774292, "learning_rate": 8.488499452354874e-05, "loss": 0.8407, "step": 240 }, { "epoch": 8.344827586206897, "grad_norm": 7.476856708526611, "learning_rate": 8.46659364731654e-05, "loss": 0.7104, "step": 242 }, { "epoch": 8.413793103448276, "grad_norm": 4.857934474945068, "learning_rate": 8.444687842278203e-05, "loss": 0.8153, "step": 244 }, { "epoch": 8.482758620689655, "grad_norm": 4.849685192108154, "learning_rate": 8.42278203723987e-05, "loss": 0.7941, "step": 246 }, { "epoch": 8.551724137931034, "grad_norm": 6.878391265869141, "learning_rate": 8.400876232201533e-05, "loss": 0.8408, "step": 248 }, { "epoch": 8.620689655172415, "grad_norm": 9.568788528442383, "learning_rate": 8.378970427163199e-05, "loss": 1.1011, "step": 250 }, { "epoch": 8.689655172413794, "grad_norm": 6.0624284744262695, "learning_rate": 8.357064622124863e-05, "loss": 0.6055, "step": 252 }, { "epoch": 8.758620689655173, "grad_norm": 8.931193351745605, "learning_rate": 8.335158817086528e-05, "loss": 1.1554, "step": 254 }, { "epoch": 8.827586206896552, "grad_norm": 9.992157936096191, "learning_rate": 8.313253012048194e-05, "loss": 0.9706, "step": 256 }, { "epoch": 8.89655172413793, "grad_norm": 7.536012649536133, "learning_rate": 8.291347207009858e-05, "loss": 0.9864, "step": 258 }, { "epoch": 8.96551724137931, "grad_norm": 5.046841144561768, "learning_rate": 8.269441401971523e-05, "loss": 0.5318, "step": 260 }, { "epoch": 9.0, "eval_accuracy": 0.5909090909090909, "eval_f1_macro": 0.4938130613497124, "eval_f1_micro": 0.5909090909090909, "eval_f1_weighted": 0.564608679657, "eval_loss": 1.1161140203475952, "eval_precision_macro": 0.48919183057838517, "eval_precision_micro": 0.5909090909090909, "eval_precision_weighted": 0.5594657793187205, "eval_recall_macro": 0.5175661375661376, "eval_recall_micro": 0.5909090909090909, "eval_recall_weighted": 0.5909090909090909, "eval_runtime": 2.2385, "eval_samples_per_second": 58.969, "eval_steps_per_second": 7.594, "step": 261 }, { "epoch": 9.03448275862069, "grad_norm": 8.074467658996582, "learning_rate": 8.247535596933188e-05, "loss": 0.9298, "step": 262 }, { "epoch": 9.10344827586207, "grad_norm": 5.22785758972168, "learning_rate": 8.225629791894852e-05, "loss": 0.7831, "step": 264 }, { "epoch": 9.172413793103448, "grad_norm": 9.326375007629395, "learning_rate": 8.203723986856517e-05, "loss": 0.8289, "step": 266 }, { "epoch": 9.241379310344827, "grad_norm": 5.424740791320801, "learning_rate": 8.181818181818183e-05, "loss": 0.5332, "step": 268 }, { "epoch": 9.310344827586206, "grad_norm": 8.164321899414062, "learning_rate": 8.159912376779846e-05, "loss": 0.7783, "step": 270 }, { "epoch": 9.379310344827585, "grad_norm": 7.742315769195557, "learning_rate": 8.138006571741512e-05, "loss": 0.7159, "step": 272 }, { "epoch": 9.448275862068966, "grad_norm": 6.30488920211792, "learning_rate": 8.116100766703177e-05, "loss": 0.9866, "step": 274 }, { "epoch": 9.517241379310345, "grad_norm": 7.696253776550293, "learning_rate": 8.094194961664841e-05, "loss": 0.521, "step": 276 }, { "epoch": 9.586206896551724, "grad_norm": 4.019304275512695, "learning_rate": 8.072289156626507e-05, "loss": 0.4281, "step": 278 }, { "epoch": 9.655172413793103, "grad_norm": 4.379205703735352, "learning_rate": 8.05038335158817e-05, "loss": 0.3542, "step": 280 }, { "epoch": 9.724137931034482, "grad_norm": 7.670277118682861, "learning_rate": 8.028477546549837e-05, "loss": 0.9465, "step": 282 }, { "epoch": 9.793103448275861, "grad_norm": 8.019712448120117, "learning_rate": 8.0065717415115e-05, "loss": 1.087, "step": 284 }, { "epoch": 9.862068965517242, "grad_norm": 8.645779609680176, "learning_rate": 7.984665936473166e-05, "loss": 0.88, "step": 286 }, { "epoch": 9.931034482758621, "grad_norm": 5.542499542236328, "learning_rate": 7.96276013143483e-05, "loss": 0.7297, "step": 288 }, { "epoch": 10.0, "grad_norm": 5.367166042327881, "learning_rate": 7.940854326396495e-05, "loss": 0.6824, "step": 290 }, { "epoch": 10.0, "eval_accuracy": 0.5909090909090909, "eval_f1_macro": 0.48022039225046736, "eval_f1_micro": 0.5909090909090909, "eval_f1_weighted": 0.5515227462595883, "eval_loss": 1.1811466217041016, "eval_precision_macro": 0.4813612313612314, "eval_precision_micro": 0.5909090909090909, "eval_precision_weighted": 0.549845041322314, "eval_recall_macro": 0.5147770219198791, "eval_recall_micro": 0.5909090909090909, "eval_recall_weighted": 0.5909090909090909, "eval_runtime": 2.1897, "eval_samples_per_second": 60.283, "eval_steps_per_second": 7.764, "step": 290 }, { "epoch": 10.068965517241379, "grad_norm": 8.408239364624023, "learning_rate": 7.918948521358161e-05, "loss": 0.5506, "step": 292 }, { "epoch": 10.137931034482758, "grad_norm": 4.473087787628174, "learning_rate": 7.897042716319824e-05, "loss": 0.4722, "step": 294 }, { "epoch": 10.206896551724139, "grad_norm": 5.755477428436279, "learning_rate": 7.87513691128149e-05, "loss": 0.6369, "step": 296 }, { "epoch": 10.275862068965518, "grad_norm": 9.0516939163208, "learning_rate": 7.853231106243155e-05, "loss": 0.6167, "step": 298 }, { "epoch": 10.344827586206897, "grad_norm": 5.995102405548096, "learning_rate": 7.83132530120482e-05, "loss": 0.6051, "step": 300 }, { "epoch": 10.413793103448276, "grad_norm": 7.3448805809021, "learning_rate": 7.809419496166484e-05, "loss": 0.5321, "step": 302 }, { "epoch": 10.482758620689655, "grad_norm": 8.903775215148926, "learning_rate": 7.78751369112815e-05, "loss": 0.6208, "step": 304 }, { "epoch": 10.551724137931034, "grad_norm": 9.240314483642578, "learning_rate": 7.765607886089813e-05, "loss": 0.9838, "step": 306 }, { "epoch": 10.620689655172415, "grad_norm": 10.112192153930664, "learning_rate": 7.74370208105148e-05, "loss": 0.905, "step": 308 }, { "epoch": 10.689655172413794, "grad_norm": 9.252533912658691, "learning_rate": 7.721796276013144e-05, "loss": 1.002, "step": 310 }, { "epoch": 10.758620689655173, "grad_norm": 7.741162300109863, "learning_rate": 7.699890470974809e-05, "loss": 1.0869, "step": 312 }, { "epoch": 10.827586206896552, "grad_norm": 9.742755889892578, "learning_rate": 7.677984665936475e-05, "loss": 0.5421, "step": 314 }, { "epoch": 10.89655172413793, "grad_norm": 8.84914493560791, "learning_rate": 7.656078860898138e-05, "loss": 0.523, "step": 316 }, { "epoch": 10.96551724137931, "grad_norm": 7.173616409301758, "learning_rate": 7.634173055859804e-05, "loss": 0.6324, "step": 318 }, { "epoch": 11.0, "eval_accuracy": 0.5757575757575758, "eval_f1_macro": 0.4926929392446634, "eval_f1_micro": 0.5757575757575758, "eval_f1_weighted": 0.5506095437129921, "eval_loss": 1.2358123064041138, "eval_precision_macro": 0.5015354104024055, "eval_precision_micro": 0.5757575757575758, "eval_precision_weighted": 0.5689625015643824, "eval_recall_macro": 0.5226228269085412, "eval_recall_micro": 0.5757575757575758, "eval_recall_weighted": 0.5757575757575758, "eval_runtime": 2.2118, "eval_samples_per_second": 59.681, "eval_steps_per_second": 7.686, "step": 319 }, { "epoch": 11.03448275862069, "grad_norm": 8.598217964172363, "learning_rate": 7.612267250821467e-05, "loss": 0.703, "step": 320 }, { "epoch": 11.10344827586207, "grad_norm": 4.415513038635254, "learning_rate": 7.590361445783133e-05, "loss": 0.5108, "step": 322 }, { "epoch": 11.172413793103448, "grad_norm": 6.3496479988098145, "learning_rate": 7.568455640744798e-05, "loss": 0.673, "step": 324 }, { "epoch": 11.241379310344827, "grad_norm": 5.767419338226318, "learning_rate": 7.546549835706462e-05, "loss": 0.3173, "step": 326 }, { "epoch": 11.310344827586206, "grad_norm": 5.924855709075928, "learning_rate": 7.524644030668127e-05, "loss": 0.4236, "step": 328 }, { "epoch": 11.379310344827585, "grad_norm": 6.807033538818359, "learning_rate": 7.502738225629792e-05, "loss": 0.7664, "step": 330 }, { "epoch": 11.448275862068966, "grad_norm": 11.941972732543945, "learning_rate": 7.480832420591458e-05, "loss": 0.8078, "step": 332 }, { "epoch": 11.517241379310345, "grad_norm": 5.140421390533447, "learning_rate": 7.458926615553122e-05, "loss": 0.4366, "step": 334 }, { "epoch": 11.586206896551724, "grad_norm": 4.931862831115723, "learning_rate": 7.437020810514787e-05, "loss": 0.552, "step": 336 }, { "epoch": 11.655172413793103, "grad_norm": 6.9343647956848145, "learning_rate": 7.415115005476451e-05, "loss": 0.5526, "step": 338 }, { "epoch": 11.724137931034482, "grad_norm": 4.292028903961182, "learning_rate": 7.393209200438116e-05, "loss": 0.5248, "step": 340 }, { "epoch": 11.793103448275861, "grad_norm": 6.613484859466553, "learning_rate": 7.371303395399781e-05, "loss": 0.7224, "step": 342 }, { "epoch": 11.862068965517242, "grad_norm": 5.9594502449035645, "learning_rate": 7.349397590361447e-05, "loss": 0.4696, "step": 344 }, { "epoch": 11.931034482758621, "grad_norm": 5.859204292297363, "learning_rate": 7.327491785323111e-05, "loss": 0.3449, "step": 346 }, { "epoch": 12.0, "grad_norm": 5.6179046630859375, "learning_rate": 7.305585980284776e-05, "loss": 0.4145, "step": 348 }, { "epoch": 12.0, "eval_accuracy": 0.6742424242424242, "eval_f1_macro": 0.5845780796908616, "eval_f1_micro": 0.6742424242424242, "eval_f1_weighted": 0.6643483452693979, "eval_loss": 1.160757064819336, "eval_precision_macro": 0.5822360668405294, "eval_precision_micro": 0.6742424242424242, "eval_precision_weighted": 0.6680857766304014, "eval_recall_macro": 0.6004686318972033, "eval_recall_micro": 0.6742424242424242, "eval_recall_weighted": 0.6742424242424242, "eval_runtime": 2.2477, "eval_samples_per_second": 58.726, "eval_steps_per_second": 7.563, "step": 348 }, { "epoch": 12.068965517241379, "grad_norm": 8.443059921264648, "learning_rate": 7.28368017524644e-05, "loss": 0.5175, "step": 350 }, { "epoch": 12.137931034482758, "grad_norm": 9.789414405822754, "learning_rate": 7.261774370208105e-05, "loss": 0.8298, "step": 352 }, { "epoch": 12.206896551724139, "grad_norm": 4.579267978668213, "learning_rate": 7.239868565169771e-05, "loss": 0.372, "step": 354 }, { "epoch": 12.275862068965518, "grad_norm": 7.095308303833008, "learning_rate": 7.217962760131434e-05, "loss": 0.4208, "step": 356 }, { "epoch": 12.344827586206897, "grad_norm": 2.6553964614868164, "learning_rate": 7.1960569550931e-05, "loss": 0.2939, "step": 358 }, { "epoch": 12.413793103448276, "grad_norm": 6.320093631744385, "learning_rate": 7.174151150054765e-05, "loss": 0.556, "step": 360 }, { "epoch": 12.482758620689655, "grad_norm": 8.005858421325684, "learning_rate": 7.15224534501643e-05, "loss": 0.4923, "step": 362 }, { "epoch": 12.551724137931034, "grad_norm": 4.577536106109619, "learning_rate": 7.130339539978094e-05, "loss": 0.2823, "step": 364 }, { "epoch": 12.620689655172415, "grad_norm": 7.0326008796691895, "learning_rate": 7.108433734939759e-05, "loss": 0.3633, "step": 366 }, { "epoch": 12.689655172413794, "grad_norm": 8.876154899597168, "learning_rate": 7.086527929901425e-05, "loss": 0.5113, "step": 368 }, { "epoch": 12.758620689655173, "grad_norm": 9.319496154785156, "learning_rate": 7.06462212486309e-05, "loss": 0.9363, "step": 370 }, { "epoch": 12.827586206896552, "grad_norm": 4.129659175872803, "learning_rate": 7.042716319824754e-05, "loss": 0.43, "step": 372 }, { "epoch": 12.89655172413793, "grad_norm": 8.008423805236816, "learning_rate": 7.020810514786419e-05, "loss": 0.4344, "step": 374 }, { "epoch": 12.96551724137931, "grad_norm": 6.894300937652588, "learning_rate": 6.998904709748083e-05, "loss": 0.4805, "step": 376 }, { "epoch": 13.0, "eval_accuracy": 0.5757575757575758, "eval_f1_macro": 0.527595781401402, "eval_f1_micro": 0.5757575757575758, "eval_f1_weighted": 0.5689098612906363, "eval_loss": 1.319955587387085, "eval_precision_macro": 0.5767229968910641, "eval_precision_micro": 0.5757575757575758, "eval_precision_weighted": 0.6137522608110844, "eval_recall_macro": 0.5268707482993198, "eval_recall_micro": 0.5757575757575758, "eval_recall_weighted": 0.5757575757575758, "eval_runtime": 2.206, "eval_samples_per_second": 59.836, "eval_steps_per_second": 7.706, "step": 377 }, { "epoch": 13.03448275862069, "grad_norm": 7.305329322814941, "learning_rate": 6.976998904709748e-05, "loss": 0.4888, "step": 378 }, { "epoch": 13.10344827586207, "grad_norm": 7.0207624435424805, "learning_rate": 6.955093099671414e-05, "loss": 0.4489, "step": 380 }, { "epoch": 13.172413793103448, "grad_norm": 3.134613513946533, "learning_rate": 6.933187294633077e-05, "loss": 0.2369, "step": 382 }, { "epoch": 13.241379310344827, "grad_norm": 4.9292097091674805, "learning_rate": 6.911281489594743e-05, "loss": 0.4839, "step": 384 }, { "epoch": 13.310344827586206, "grad_norm": 2.2589919567108154, "learning_rate": 6.889375684556408e-05, "loss": 0.222, "step": 386 }, { "epoch": 13.379310344827585, "grad_norm": 4.867913246154785, "learning_rate": 6.867469879518072e-05, "loss": 0.2502, "step": 388 }, { "epoch": 13.448275862068966, "grad_norm": 3.433598756790161, "learning_rate": 6.845564074479738e-05, "loss": 0.2846, "step": 390 }, { "epoch": 13.517241379310345, "grad_norm": 4.033895492553711, "learning_rate": 6.823658269441402e-05, "loss": 0.2156, "step": 392 }, { "epoch": 13.586206896551724, "grad_norm": 6.298670768737793, "learning_rate": 6.801752464403068e-05, "loss": 0.2056, "step": 394 }, { "epoch": 13.655172413793103, "grad_norm": 5.606608867645264, "learning_rate": 6.779846659364732e-05, "loss": 0.5755, "step": 396 }, { "epoch": 13.724137931034482, "grad_norm": 4.751099109649658, "learning_rate": 6.757940854326397e-05, "loss": 0.3081, "step": 398 }, { "epoch": 13.793103448275861, "grad_norm": 6.851717472076416, "learning_rate": 6.736035049288061e-05, "loss": 0.512, "step": 400 }, { "epoch": 13.862068965517242, "grad_norm": 6.983868598937988, "learning_rate": 6.714129244249726e-05, "loss": 0.5849, "step": 402 }, { "epoch": 13.931034482758621, "grad_norm": 9.133752822875977, "learning_rate": 6.692223439211392e-05, "loss": 0.7721, "step": 404 }, { "epoch": 14.0, "grad_norm": 9.328068733215332, "learning_rate": 6.670317634173057e-05, "loss": 0.6232, "step": 406 }, { "epoch": 14.0, "eval_accuracy": 0.5757575757575758, "eval_f1_macro": 0.4789696951253122, "eval_f1_micro": 0.5757575757575758, "eval_f1_weighted": 0.5516716249691459, "eval_loss": 1.319008231163025, "eval_precision_macro": 0.502492644655116, "eval_precision_micro": 0.5757575757575758, "eval_precision_weighted": 0.5734135715543037, "eval_recall_macro": 0.5006122448979592, "eval_recall_micro": 0.5757575757575758, "eval_recall_weighted": 0.5757575757575758, "eval_runtime": 2.1818, "eval_samples_per_second": 60.499, "eval_steps_per_second": 7.792, "step": 406 }, { "epoch": 14.068965517241379, "grad_norm": 3.8055107593536377, "learning_rate": 6.648411829134721e-05, "loss": 0.2276, "step": 408 }, { "epoch": 14.137931034482758, "grad_norm": 3.0821352005004883, "learning_rate": 6.626506024096386e-05, "loss": 0.2451, "step": 410 }, { "epoch": 14.206896551724139, "grad_norm": 7.605597972869873, "learning_rate": 6.60460021905805e-05, "loss": 0.3332, "step": 412 }, { "epoch": 14.275862068965518, "grad_norm": 7.357143402099609, "learning_rate": 6.582694414019715e-05, "loss": 0.5178, "step": 414 }, { "epoch": 14.344827586206897, "grad_norm": 2.5872600078582764, "learning_rate": 6.560788608981381e-05, "loss": 0.1075, "step": 416 }, { "epoch": 14.413793103448276, "grad_norm": 5.9071879386901855, "learning_rate": 6.538882803943044e-05, "loss": 0.2012, "step": 418 }, { "epoch": 14.482758620689655, "grad_norm": 6.262528419494629, "learning_rate": 6.51697699890471e-05, "loss": 0.219, "step": 420 }, { "epoch": 14.551724137931034, "grad_norm": 4.72699499130249, "learning_rate": 6.495071193866375e-05, "loss": 0.4705, "step": 422 }, { "epoch": 14.620689655172415, "grad_norm": 2.8275880813598633, "learning_rate": 6.47316538882804e-05, "loss": 0.259, "step": 424 }, { "epoch": 14.689655172413794, "grad_norm": 4.1800312995910645, "learning_rate": 6.451259583789706e-05, "loss": 0.2478, "step": 426 }, { "epoch": 14.758620689655173, "grad_norm": 6.540757179260254, "learning_rate": 6.429353778751369e-05, "loss": 0.3005, "step": 428 }, { "epoch": 14.827586206896552, "grad_norm": 3.0680577754974365, "learning_rate": 6.407447973713035e-05, "loss": 0.3351, "step": 430 }, { "epoch": 14.89655172413793, "grad_norm": 12.751289367675781, "learning_rate": 6.385542168674698e-05, "loss": 0.4988, "step": 432 }, { "epoch": 14.96551724137931, "grad_norm": 7.108068466186523, "learning_rate": 6.363636363636364e-05, "loss": 0.3475, "step": 434 }, { "epoch": 15.0, "eval_accuracy": 0.696969696969697, "eval_f1_macro": 0.630347810119719, "eval_f1_micro": 0.696969696969697, "eval_f1_weighted": 0.6894476798984056, "eval_loss": 1.185251235961914, "eval_precision_macro": 0.6716845878136201, "eval_precision_micro": 0.696969696969697, "eval_precision_weighted": 0.7087732160312806, "eval_recall_macro": 0.6311791383219955, "eval_recall_micro": 0.696969696969697, "eval_recall_weighted": 0.696969696969697, "eval_runtime": 2.1666, "eval_samples_per_second": 60.925, "eval_steps_per_second": 7.846, "step": 435 }, { "epoch": 15.03448275862069, "grad_norm": 6.197813510894775, "learning_rate": 6.341730558598029e-05, "loss": 0.162, "step": 436 }, { "epoch": 15.10344827586207, "grad_norm": 3.9454376697540283, "learning_rate": 6.319824753559693e-05, "loss": 0.3396, "step": 438 }, { "epoch": 15.172413793103448, "grad_norm": 8.980201721191406, "learning_rate": 6.297918948521358e-05, "loss": 0.2316, "step": 440 }, { "epoch": 15.241379310344827, "grad_norm": 4.091892719268799, "learning_rate": 6.276013143483024e-05, "loss": 0.277, "step": 442 }, { "epoch": 15.310344827586206, "grad_norm": 7.498462200164795, "learning_rate": 6.254107338444689e-05, "loss": 0.3305, "step": 444 }, { "epoch": 15.379310344827585, "grad_norm": 6.023470401763916, "learning_rate": 6.232201533406353e-05, "loss": 0.1605, "step": 446 }, { "epoch": 15.448275862068966, "grad_norm": 4.88850212097168, "learning_rate": 6.210295728368018e-05, "loss": 0.1803, "step": 448 }, { "epoch": 15.517241379310345, "grad_norm": 2.798743724822998, "learning_rate": 6.188389923329682e-05, "loss": 0.1585, "step": 450 }, { "epoch": 15.586206896551724, "grad_norm": 6.272281646728516, "learning_rate": 6.166484118291348e-05, "loss": 0.1494, "step": 452 }, { "epoch": 15.655172413793103, "grad_norm": 7.970227241516113, "learning_rate": 6.144578313253012e-05, "loss": 0.3169, "step": 454 }, { "epoch": 15.724137931034482, "grad_norm": 6.0759406089782715, "learning_rate": 6.122672508214678e-05, "loss": 0.3508, "step": 456 }, { "epoch": 15.793103448275861, "grad_norm": 4.981871128082275, "learning_rate": 6.1007667031763415e-05, "loss": 0.1469, "step": 458 }, { "epoch": 15.862068965517242, "grad_norm": 8.259228706359863, "learning_rate": 6.078860898138007e-05, "loss": 0.3808, "step": 460 }, { "epoch": 15.931034482758621, "grad_norm": 5.493587017059326, "learning_rate": 6.056955093099672e-05, "loss": 0.218, "step": 462 }, { "epoch": 16.0, "grad_norm": 5.321525573730469, "learning_rate": 6.035049288061336e-05, "loss": 0.1956, "step": 464 }, { "epoch": 16.0, "eval_accuracy": 0.5151515151515151, "eval_f1_macro": 0.4323315041705846, "eval_f1_micro": 0.5151515151515151, "eval_f1_weighted": 0.4974051721657085, "eval_loss": 1.569486141204834, "eval_precision_macro": 0.47551801581876774, "eval_precision_micro": 0.5151515151515151, "eval_precision_weighted": 0.5333931937281219, "eval_recall_macro": 0.4357898715041572, "eval_recall_micro": 0.5151515151515151, "eval_recall_weighted": 0.5151515151515151, "eval_runtime": 2.1474, "eval_samples_per_second": 61.468, "eval_steps_per_second": 7.916, "step": 464 }, { "epoch": 16.06896551724138, "grad_norm": 7.035810947418213, "learning_rate": 6.0131434830230014e-05, "loss": 0.4266, "step": 466 }, { "epoch": 16.137931034482758, "grad_norm": 3.2283682823181152, "learning_rate": 5.991237677984666e-05, "loss": 0.2042, "step": 468 }, { "epoch": 16.20689655172414, "grad_norm": 3.6779544353485107, "learning_rate": 5.969331872946331e-05, "loss": 0.1058, "step": 470 }, { "epoch": 16.275862068965516, "grad_norm": 1.8620399236679077, "learning_rate": 5.9474260679079966e-05, "loss": 0.1272, "step": 472 }, { "epoch": 16.344827586206897, "grad_norm": 2.111825942993164, "learning_rate": 5.9255202628696605e-05, "loss": 0.1298, "step": 474 }, { "epoch": 16.413793103448278, "grad_norm": 6.74976110458374, "learning_rate": 5.903614457831326e-05, "loss": 0.3262, "step": 476 }, { "epoch": 16.482758620689655, "grad_norm": 5.992347240447998, "learning_rate": 5.88170865279299e-05, "loss": 0.3271, "step": 478 }, { "epoch": 16.551724137931036, "grad_norm": 2.5913877487182617, "learning_rate": 5.859802847754655e-05, "loss": 0.1947, "step": 480 }, { "epoch": 16.620689655172413, "grad_norm": 7.1807403564453125, "learning_rate": 5.8378970427163204e-05, "loss": 0.1701, "step": 482 }, { "epoch": 16.689655172413794, "grad_norm": 5.666691303253174, "learning_rate": 5.815991237677984e-05, "loss": 0.2304, "step": 484 }, { "epoch": 16.75862068965517, "grad_norm": 6.325366973876953, "learning_rate": 5.7940854326396496e-05, "loss": 0.3751, "step": 486 }, { "epoch": 16.82758620689655, "grad_norm": 4.312324523925781, "learning_rate": 5.772179627601315e-05, "loss": 0.16, "step": 488 }, { "epoch": 16.896551724137932, "grad_norm": 4.738943576812744, "learning_rate": 5.7502738225629795e-05, "loss": 0.2339, "step": 490 }, { "epoch": 16.96551724137931, "grad_norm": 4.70164155960083, "learning_rate": 5.728368017524645e-05, "loss": 0.1519, "step": 492 }, { "epoch": 17.0, "eval_accuracy": 0.6439393939393939, "eval_f1_macro": 0.5818818031106167, "eval_f1_micro": 0.6439393939393939, "eval_f1_weighted": 0.6317482833372663, "eval_loss": 1.440421223640442, "eval_precision_macro": 0.6438369250139081, "eval_precision_micro": 0.6439393939393939, "eval_precision_weighted": 0.657660361816567, "eval_recall_macro": 0.5705744520030234, "eval_recall_micro": 0.6439393939393939, "eval_recall_weighted": 0.6439393939393939, "eval_runtime": 2.213, "eval_samples_per_second": 59.648, "eval_steps_per_second": 7.682, "step": 493 }, { "epoch": 17.03448275862069, "grad_norm": 5.552275657653809, "learning_rate": 5.706462212486309e-05, "loss": 0.1605, "step": 494 }, { "epoch": 17.103448275862068, "grad_norm": 1.6765620708465576, "learning_rate": 5.684556407447974e-05, "loss": 0.057, "step": 496 }, { "epoch": 17.17241379310345, "grad_norm": 2.917738437652588, "learning_rate": 5.6626506024096394e-05, "loss": 0.0525, "step": 498 }, { "epoch": 17.24137931034483, "grad_norm": 4.772071838378906, "learning_rate": 5.640744797371303e-05, "loss": 0.0594, "step": 500 }, { "epoch": 17.310344827586206, "grad_norm": 5.168885231018066, "learning_rate": 5.6188389923329686e-05, "loss": 0.1819, "step": 502 }, { "epoch": 17.379310344827587, "grad_norm": 6.547173976898193, "learning_rate": 5.596933187294633e-05, "loss": 0.1747, "step": 504 }, { "epoch": 17.448275862068964, "grad_norm": 2.393808364868164, "learning_rate": 5.575027382256298e-05, "loss": 0.1172, "step": 506 }, { "epoch": 17.517241379310345, "grad_norm": 6.183032512664795, "learning_rate": 5.553121577217963e-05, "loss": 0.1184, "step": 508 }, { "epoch": 17.586206896551722, "grad_norm": 12.154343605041504, "learning_rate": 5.531215772179628e-05, "loss": 0.2882, "step": 510 }, { "epoch": 17.655172413793103, "grad_norm": 7.199910640716553, "learning_rate": 5.509309967141293e-05, "loss": 0.3635, "step": 512 }, { "epoch": 17.724137931034484, "grad_norm": 1.0473498106002808, "learning_rate": 5.487404162102957e-05, "loss": 0.0773, "step": 514 }, { "epoch": 17.79310344827586, "grad_norm": 1.043884038925171, "learning_rate": 5.465498357064622e-05, "loss": 0.0349, "step": 516 }, { "epoch": 17.862068965517242, "grad_norm": 0.591170072555542, "learning_rate": 5.4435925520262876e-05, "loss": 0.1192, "step": 518 }, { "epoch": 17.93103448275862, "grad_norm": 4.3712477684021, "learning_rate": 5.4216867469879516e-05, "loss": 0.1702, "step": 520 }, { "epoch": 18.0, "grad_norm": 5.553340911865234, "learning_rate": 5.399780941949617e-05, "loss": 0.1031, "step": 522 }, { "epoch": 18.0, "eval_accuracy": 0.6136363636363636, "eval_f1_macro": 0.5370450788240546, "eval_f1_micro": 0.6136363636363636, "eval_f1_weighted": 0.6040564132330857, "eval_loss": 1.4877225160598755, "eval_precision_macro": 0.5351284054291573, "eval_precision_micro": 0.6136363636363636, "eval_precision_weighted": 0.5975074566581743, "eval_recall_macro": 0.5421919879062737, "eval_recall_micro": 0.6136363636363636, "eval_recall_weighted": 0.6136363636363636, "eval_runtime": 2.2122, "eval_samples_per_second": 59.668, "eval_steps_per_second": 7.685, "step": 522 }, { "epoch": 18.06896551724138, "grad_norm": 2.231707811355591, "learning_rate": 5.3778751369112815e-05, "loss": 0.0562, "step": 524 }, { "epoch": 18.137931034482758, "grad_norm": 1.9797624349594116, "learning_rate": 5.355969331872947e-05, "loss": 0.1513, "step": 526 }, { "epoch": 18.20689655172414, "grad_norm": 4.362570285797119, "learning_rate": 5.334063526834612e-05, "loss": 0.1637, "step": 528 }, { "epoch": 18.275862068965516, "grad_norm": 5.458191871643066, "learning_rate": 5.312157721796276e-05, "loss": 0.1761, "step": 530 }, { "epoch": 18.344827586206897, "grad_norm": 12.664368629455566, "learning_rate": 5.290251916757941e-05, "loss": 0.2171, "step": 532 }, { "epoch": 18.413793103448278, "grad_norm": 4.849126815795898, "learning_rate": 5.2683461117196066e-05, "loss": 0.1213, "step": 534 }, { "epoch": 18.482758620689655, "grad_norm": 1.9513343572616577, "learning_rate": 5.2464403066812705e-05, "loss": 0.0647, "step": 536 }, { "epoch": 18.551724137931036, "grad_norm": 5.6937642097473145, "learning_rate": 5.224534501642936e-05, "loss": 0.1924, "step": 538 }, { "epoch": 18.620689655172413, "grad_norm": 6.157546043395996, "learning_rate": 5.2026286966046e-05, "loss": 0.1621, "step": 540 }, { "epoch": 18.689655172413794, "grad_norm": 3.375688076019287, "learning_rate": 5.180722891566265e-05, "loss": 0.0725, "step": 542 }, { "epoch": 18.75862068965517, "grad_norm": 1.283026099205017, "learning_rate": 5.1588170865279304e-05, "loss": 0.1705, "step": 544 }, { "epoch": 18.82758620689655, "grad_norm": 6.894308090209961, "learning_rate": 5.136911281489595e-05, "loss": 0.2579, "step": 546 }, { "epoch": 18.896551724137932, "grad_norm": 7.978748321533203, "learning_rate": 5.11500547645126e-05, "loss": 0.1522, "step": 548 }, { "epoch": 18.96551724137931, "grad_norm": 3.8156979084014893, "learning_rate": 5.093099671412924e-05, "loss": 0.0615, "step": 550 }, { "epoch": 19.0, "eval_accuracy": 0.6060606060606061, "eval_f1_macro": 0.6012825511436246, "eval_f1_micro": 0.6060606060606061, "eval_f1_weighted": 0.6106316401527286, "eval_loss": 1.4801414012908936, "eval_precision_macro": 0.6475544200111578, "eval_precision_micro": 0.6060606060606061, "eval_precision_weighted": 0.6581095440160177, "eval_recall_macro": 0.5951398337112623, "eval_recall_micro": 0.6060606060606061, "eval_recall_weighted": 0.6060606060606061, "eval_runtime": 2.2056, "eval_samples_per_second": 59.847, "eval_steps_per_second": 7.708, "step": 551 }, { "epoch": 19.03448275862069, "grad_norm": 1.505656361579895, "learning_rate": 5.0711938663745895e-05, "loss": 0.0365, "step": 552 }, { "epoch": 19.103448275862068, "grad_norm": 1.1916121244430542, "learning_rate": 5.049288061336255e-05, "loss": 0.099, "step": 554 }, { "epoch": 19.17241379310345, "grad_norm": 3.222411632537842, "learning_rate": 5.027382256297919e-05, "loss": 0.0374, "step": 556 }, { "epoch": 19.24137931034483, "grad_norm": 4.7354536056518555, "learning_rate": 5.005476451259584e-05, "loss": 0.0901, "step": 558 }, { "epoch": 19.310344827586206, "grad_norm": 0.37637993693351746, "learning_rate": 4.983570646221249e-05, "loss": 0.0087, "step": 560 }, { "epoch": 19.379310344827587, "grad_norm": 1.4076848030090332, "learning_rate": 4.961664841182913e-05, "loss": 0.2459, "step": 562 }, { "epoch": 19.448275862068964, "grad_norm": 7.020608901977539, "learning_rate": 4.9397590361445786e-05, "loss": 0.1859, "step": 564 }, { "epoch": 19.517241379310345, "grad_norm": 1.6740795373916626, "learning_rate": 4.917853231106244e-05, "loss": 0.0443, "step": 566 }, { "epoch": 19.586206896551722, "grad_norm": 1.2484346628189087, "learning_rate": 4.8959474260679085e-05, "loss": 0.026, "step": 568 }, { "epoch": 19.655172413793103, "grad_norm": 0.44467589259147644, "learning_rate": 4.874041621029573e-05, "loss": 0.0704, "step": 570 }, { "epoch": 19.724137931034484, "grad_norm": 1.164262056350708, "learning_rate": 4.852135815991238e-05, "loss": 0.0843, "step": 572 }, { "epoch": 19.79310344827586, "grad_norm": 2.4461233615875244, "learning_rate": 4.8302300109529024e-05, "loss": 0.1391, "step": 574 }, { "epoch": 19.862068965517242, "grad_norm": 0.4338299334049225, "learning_rate": 4.808324205914568e-05, "loss": 0.0174, "step": 576 }, { "epoch": 19.93103448275862, "grad_norm": 14.353382110595703, "learning_rate": 4.786418400876232e-05, "loss": 0.0887, "step": 578 }, { "epoch": 20.0, "grad_norm": 0.43623942136764526, "learning_rate": 4.764512595837897e-05, "loss": 0.0249, "step": 580 }, { "epoch": 20.0, "eval_accuracy": 0.5909090909090909, "eval_f1_macro": 0.5197787455591448, "eval_f1_micro": 0.5909090909090909, "eval_f1_weighted": 0.5825078945882032, "eval_loss": 1.6081513166427612, "eval_precision_macro": 0.5148994878087059, "eval_precision_micro": 0.5909090909090909, "eval_precision_weighted": 0.5769802287329502, "eval_recall_macro": 0.5272184429327286, "eval_recall_micro": 0.5909090909090909, "eval_recall_weighted": 0.5909090909090909, "eval_runtime": 2.1745, "eval_samples_per_second": 60.703, "eval_steps_per_second": 7.818, "step": 580 }, { "epoch": 20.06896551724138, "grad_norm": 0.3680793046951294, "learning_rate": 4.742606790799562e-05, "loss": 0.0216, "step": 582 }, { "epoch": 20.137931034482758, "grad_norm": 2.0422375202178955, "learning_rate": 4.7207009857612275e-05, "loss": 0.0177, "step": 584 }, { "epoch": 20.20689655172414, "grad_norm": 6.626030445098877, "learning_rate": 4.698795180722892e-05, "loss": 0.2056, "step": 586 }, { "epoch": 20.275862068965516, "grad_norm": 0.7436681389808655, "learning_rate": 4.676889375684557e-05, "loss": 0.0327, "step": 588 }, { "epoch": 20.344827586206897, "grad_norm": 0.9783719182014465, "learning_rate": 4.6549835706462214e-05, "loss": 0.044, "step": 590 }, { "epoch": 20.413793103448278, "grad_norm": 0.8457818627357483, "learning_rate": 4.633077765607886e-05, "loss": 0.1102, "step": 592 }, { "epoch": 20.482758620689655, "grad_norm": 3.060871124267578, "learning_rate": 4.611171960569551e-05, "loss": 0.0726, "step": 594 }, { "epoch": 20.551724137931036, "grad_norm": 7.481118679046631, "learning_rate": 4.589266155531216e-05, "loss": 0.1447, "step": 596 }, { "epoch": 20.620689655172413, "grad_norm": 8.65415096282959, "learning_rate": 4.5673603504928806e-05, "loss": 0.2099, "step": 598 }, { "epoch": 20.689655172413794, "grad_norm": 3.2042698860168457, "learning_rate": 4.545454545454546e-05, "loss": 0.0497, "step": 600 }, { "epoch": 20.75862068965517, "grad_norm": 0.5125285983085632, "learning_rate": 4.5235487404162105e-05, "loss": 0.069, "step": 602 }, { "epoch": 20.82758620689655, "grad_norm": 0.8691998720169067, "learning_rate": 4.501642935377876e-05, "loss": 0.0354, "step": 604 }, { "epoch": 20.896551724137932, "grad_norm": 10.247215270996094, "learning_rate": 4.4797371303395404e-05, "loss": 0.1541, "step": 606 }, { "epoch": 20.96551724137931, "grad_norm": 11.587034225463867, "learning_rate": 4.457831325301205e-05, "loss": 0.374, "step": 608 }, { "epoch": 21.0, "eval_accuracy": 0.6287878787878788, "eval_f1_macro": 0.6084137522037308, "eval_f1_micro": 0.6287878787878788, "eval_f1_weighted": 0.6185060346144132, "eval_loss": 1.7593897581100464, "eval_precision_macro": 0.6711527035056447, "eval_precision_micro": 0.6287878787878788, "eval_precision_weighted": 0.6679164641063037, "eval_recall_macro": 0.6049433106575963, "eval_recall_micro": 0.6287878787878788, "eval_recall_weighted": 0.6287878787878788, "eval_runtime": 2.1955, "eval_samples_per_second": 60.123, "eval_steps_per_second": 7.743, "step": 609 }, { "epoch": 21.03448275862069, "grad_norm": 1.38335120677948, "learning_rate": 4.4359255202628696e-05, "loss": 0.079, "step": 610 }, { "epoch": 21.103448275862068, "grad_norm": 5.99662446975708, "learning_rate": 4.414019715224535e-05, "loss": 0.0648, "step": 612 }, { "epoch": 21.17241379310345, "grad_norm": 1.0241988897323608, "learning_rate": 4.3921139101861996e-05, "loss": 0.1807, "step": 614 }, { "epoch": 21.24137931034483, "grad_norm": 0.5548591017723083, "learning_rate": 4.370208105147864e-05, "loss": 0.0375, "step": 616 }, { "epoch": 21.310344827586206, "grad_norm": 0.7137009501457214, "learning_rate": 4.348302300109529e-05, "loss": 0.0694, "step": 618 }, { "epoch": 21.379310344827587, "grad_norm": 7.560571193695068, "learning_rate": 4.326396495071194e-05, "loss": 0.2715, "step": 620 }, { "epoch": 21.448275862068964, "grad_norm": 7.067291736602783, "learning_rate": 4.3044906900328594e-05, "loss": 0.1016, "step": 622 }, { "epoch": 21.517241379310345, "grad_norm": 4.622091770172119, "learning_rate": 4.282584884994524e-05, "loss": 0.0621, "step": 624 }, { "epoch": 21.586206896551722, "grad_norm": 5.788636207580566, "learning_rate": 4.2606790799561886e-05, "loss": 0.1775, "step": 626 }, { "epoch": 21.655172413793103, "grad_norm": 3.3069419860839844, "learning_rate": 4.238773274917853e-05, "loss": 0.0586, "step": 628 }, { "epoch": 21.724137931034484, "grad_norm": 0.65139240026474, "learning_rate": 4.2168674698795186e-05, "loss": 0.0672, "step": 630 }, { "epoch": 21.79310344827586, "grad_norm": 2.4793200492858887, "learning_rate": 4.194961664841183e-05, "loss": 0.1428, "step": 632 }, { "epoch": 21.862068965517242, "grad_norm": 2.988377809524536, "learning_rate": 4.173055859802848e-05, "loss": 0.1759, "step": 634 }, { "epoch": 21.93103448275862, "grad_norm": 5.487617015838623, "learning_rate": 4.1511500547645124e-05, "loss": 0.0849, "step": 636 }, { "epoch": 22.0, "grad_norm": 2.8238141536712646, "learning_rate": 4.129244249726178e-05, "loss": 0.025, "step": 638 }, { "epoch": 22.0, "eval_accuracy": 0.6515151515151515, "eval_f1_macro": 0.6445815393183814, "eval_f1_micro": 0.6515151515151515, "eval_f1_weighted": 0.6520235479565623, "eval_loss": 1.4723178148269653, "eval_precision_macro": 0.6542847694633409, "eval_precision_micro": 0.6515151515151515, "eval_precision_weighted": 0.6660361050986052, "eval_recall_macro": 0.6478760393046107, "eval_recall_micro": 0.6515151515151515, "eval_recall_weighted": 0.6515151515151515, "eval_runtime": 2.2232, "eval_samples_per_second": 59.375, "eval_steps_per_second": 7.647, "step": 638 }, { "epoch": 22.06896551724138, "grad_norm": 6.143444061279297, "learning_rate": 4.107338444687843e-05, "loss": 0.0913, "step": 640 }, { "epoch": 22.137931034482758, "grad_norm": 2.971240997314453, "learning_rate": 4.0854326396495076e-05, "loss": 0.0312, "step": 642 }, { "epoch": 22.20689655172414, "grad_norm": 0.27099546790122986, "learning_rate": 4.063526834611172e-05, "loss": 0.045, "step": 644 }, { "epoch": 22.275862068965516, "grad_norm": 0.35845986008644104, "learning_rate": 4.041621029572837e-05, "loss": 0.013, "step": 646 }, { "epoch": 22.344827586206897, "grad_norm": 2.0845632553100586, "learning_rate": 4.019715224534502e-05, "loss": 0.0523, "step": 648 }, { "epoch": 22.413793103448278, "grad_norm": 3.686854362487793, "learning_rate": 3.997809419496167e-05, "loss": 0.0292, "step": 650 }, { "epoch": 22.482758620689655, "grad_norm": 0.846224844455719, "learning_rate": 3.9759036144578314e-05, "loss": 0.1901, "step": 652 }, { "epoch": 22.551724137931036, "grad_norm": 0.3240630626678467, "learning_rate": 3.953997809419496e-05, "loss": 0.0187, "step": 654 }, { "epoch": 22.620689655172413, "grad_norm": 7.635501861572266, "learning_rate": 3.9320920043811607e-05, "loss": 0.1124, "step": 656 }, { "epoch": 22.689655172413794, "grad_norm": 2.2201285362243652, "learning_rate": 3.910186199342826e-05, "loss": 0.03, "step": 658 }, { "epoch": 22.75862068965517, "grad_norm": 8.953709602355957, "learning_rate": 3.888280394304491e-05, "loss": 0.0588, "step": 660 }, { "epoch": 22.82758620689655, "grad_norm": 7.417150497436523, "learning_rate": 3.866374589266156e-05, "loss": 0.0751, "step": 662 }, { "epoch": 22.896551724137932, "grad_norm": 1.5134751796722412, "learning_rate": 3.8444687842278205e-05, "loss": 0.0166, "step": 664 }, { "epoch": 22.96551724137931, "grad_norm": 0.537891685962677, "learning_rate": 3.822562979189485e-05, "loss": 0.0096, "step": 666 }, { "epoch": 23.0, "eval_accuracy": 0.6136363636363636, "eval_f1_macro": 0.5899262553677856, "eval_f1_micro": 0.6136363636363636, "eval_f1_weighted": 0.6088674363985942, "eval_loss": 1.5689215660095215, "eval_precision_macro": 0.616981329954019, "eval_precision_micro": 0.6136363636363636, "eval_precision_weighted": 0.6315426797963563, "eval_recall_macro": 0.5878231292517008, "eval_recall_micro": 0.6136363636363636, "eval_recall_weighted": 0.6136363636363636, "eval_runtime": 2.2085, "eval_samples_per_second": 59.769, "eval_steps_per_second": 7.697, "step": 667 }, { "epoch": 23.03448275862069, "grad_norm": 0.5641638040542603, "learning_rate": 3.8006571741511504e-05, "loss": 0.0089, "step": 668 }, { "epoch": 23.103448275862068, "grad_norm": 3.1353189945220947, "learning_rate": 3.778751369112815e-05, "loss": 0.0292, "step": 670 }, { "epoch": 23.17241379310345, "grad_norm": 0.7373493313789368, "learning_rate": 3.7568455640744796e-05, "loss": 0.0612, "step": 672 }, { "epoch": 23.24137931034483, "grad_norm": 2.668566942214966, "learning_rate": 3.734939759036144e-05, "loss": 0.0416, "step": 674 }, { "epoch": 23.310344827586206, "grad_norm": 4.210921287536621, "learning_rate": 3.7130339539978096e-05, "loss": 0.0405, "step": 676 }, { "epoch": 23.379310344827587, "grad_norm": 0.31117522716522217, "learning_rate": 3.691128148959475e-05, "loss": 0.0162, "step": 678 }, { "epoch": 23.448275862068964, "grad_norm": 8.15129280090332, "learning_rate": 3.6692223439211395e-05, "loss": 0.0723, "step": 680 }, { "epoch": 23.517241379310345, "grad_norm": 2.1367807388305664, "learning_rate": 3.647316538882804e-05, "loss": 0.0501, "step": 682 }, { "epoch": 23.586206896551722, "grad_norm": 1.1246554851531982, "learning_rate": 3.625410733844469e-05, "loss": 0.0115, "step": 684 }, { "epoch": 23.655172413793103, "grad_norm": 1.3772636651992798, "learning_rate": 3.603504928806134e-05, "loss": 0.033, "step": 686 }, { "epoch": 23.724137931034484, "grad_norm": 0.15142899751663208, "learning_rate": 3.5815991237677986e-05, "loss": 0.0613, "step": 688 }, { "epoch": 23.79310344827586, "grad_norm": 13.712115287780762, "learning_rate": 3.559693318729463e-05, "loss": 0.2608, "step": 690 }, { "epoch": 23.862068965517242, "grad_norm": 6.292361259460449, "learning_rate": 3.537787513691128e-05, "loss": 0.1134, "step": 692 }, { "epoch": 23.93103448275862, "grad_norm": 0.6719773411750793, "learning_rate": 3.515881708652793e-05, "loss": 0.0106, "step": 694 }, { "epoch": 24.0, "grad_norm": 1.0674413442611694, "learning_rate": 3.4939759036144585e-05, "loss": 0.0661, "step": 696 }, { "epoch": 24.0, "eval_accuracy": 0.6666666666666666, "eval_f1_macro": 0.6056024708734068, "eval_f1_micro": 0.6666666666666666, "eval_f1_weighted": 0.657565805841668, "eval_loss": 1.6276419162750244, "eval_precision_macro": 0.6690476190476191, "eval_precision_micro": 0.6666666666666666, "eval_precision_weighted": 0.6866965105601469, "eval_recall_macro": 0.5948677248677248, "eval_recall_micro": 0.6666666666666666, "eval_recall_weighted": 0.6666666666666666, "eval_runtime": 2.2477, "eval_samples_per_second": 58.726, "eval_steps_per_second": 7.563, "step": 696 }, { "epoch": 24.06896551724138, "grad_norm": 0.15591685473918915, "learning_rate": 3.472070098576123e-05, "loss": 0.0109, "step": 698 }, { "epoch": 24.137931034482758, "grad_norm": 1.411007046699524, "learning_rate": 3.450164293537788e-05, "loss": 0.0806, "step": 700 }, { "epoch": 24.20689655172414, "grad_norm": 6.958545684814453, "learning_rate": 3.4282584884994523e-05, "loss": 0.2285, "step": 702 }, { "epoch": 24.275862068965516, "grad_norm": 0.38558292388916016, "learning_rate": 3.4063526834611176e-05, "loss": 0.0268, "step": 704 }, { "epoch": 24.344827586206897, "grad_norm": 6.778842926025391, "learning_rate": 3.384446878422782e-05, "loss": 0.1081, "step": 706 }, { "epoch": 24.413793103448278, "grad_norm": 0.2550676465034485, "learning_rate": 3.362541073384447e-05, "loss": 0.0058, "step": 708 }, { "epoch": 24.482758620689655, "grad_norm": 0.24779938161373138, "learning_rate": 3.3406352683461115e-05, "loss": 0.0252, "step": 710 }, { "epoch": 24.551724137931036, "grad_norm": 0.1385107785463333, "learning_rate": 3.318729463307776e-05, "loss": 0.0057, "step": 712 }, { "epoch": 24.620689655172413, "grad_norm": 3.3009445667266846, "learning_rate": 3.2968236582694414e-05, "loss": 0.0338, "step": 714 }, { "epoch": 24.689655172413794, "grad_norm": 0.6270205974578857, "learning_rate": 3.274917853231107e-05, "loss": 0.0112, "step": 716 }, { "epoch": 24.75862068965517, "grad_norm": 0.24541209638118744, "learning_rate": 3.253012048192771e-05, "loss": 0.0098, "step": 718 }, { "epoch": 24.82758620689655, "grad_norm": 0.5051412582397461, "learning_rate": 3.231106243154436e-05, "loss": 0.0616, "step": 720 }, { "epoch": 24.896551724137932, "grad_norm": 0.21808616816997528, "learning_rate": 3.209200438116101e-05, "loss": 0.0238, "step": 722 }, { "epoch": 24.96551724137931, "grad_norm": 0.19809569418430328, "learning_rate": 3.187294633077766e-05, "loss": 0.0463, "step": 724 }, { "epoch": 25.0, "eval_accuracy": 0.6136363636363636, "eval_f1_macro": 0.5591332103178793, "eval_f1_micro": 0.6136363636363636, "eval_f1_weighted": 0.6084641064500635, "eval_loss": 1.6760780811309814, "eval_precision_macro": 0.6192834056699603, "eval_precision_micro": 0.6136363636363636, "eval_precision_weighted": 0.6400907915613798, "eval_recall_macro": 0.5521088435374149, "eval_recall_micro": 0.6136363636363636, "eval_recall_weighted": 0.6136363636363636, "eval_runtime": 2.1953, "eval_samples_per_second": 60.128, "eval_steps_per_second": 7.744, "step": 725 }, { "epoch": 25.03448275862069, "grad_norm": 0.9460155367851257, "learning_rate": 3.1653888280394305e-05, "loss": 0.0328, "step": 726 }, { "epoch": 25.103448275862068, "grad_norm": 0.34770432114601135, "learning_rate": 3.143483023001095e-05, "loss": 0.0169, "step": 728 }, { "epoch": 25.17241379310345, "grad_norm": 0.6745150089263916, "learning_rate": 3.12157721796276e-05, "loss": 0.0292, "step": 730 }, { "epoch": 25.24137931034483, "grad_norm": 0.14288195967674255, "learning_rate": 3.099671412924425e-05, "loss": 0.0672, "step": 732 }, { "epoch": 25.310344827586206, "grad_norm": 0.1784912347793579, "learning_rate": 3.07776560788609e-05, "loss": 0.0046, "step": 734 }, { "epoch": 25.379310344827587, "grad_norm": 0.7752932906150818, "learning_rate": 3.055859802847755e-05, "loss": 0.0089, "step": 736 }, { "epoch": 25.448275862068964, "grad_norm": 8.310676574707031, "learning_rate": 3.0339539978094196e-05, "loss": 0.0854, "step": 738 }, { "epoch": 25.517241379310345, "grad_norm": 1.2783715724945068, "learning_rate": 3.012048192771085e-05, "loss": 0.0091, "step": 740 }, { "epoch": 25.586206896551722, "grad_norm": 1.2155754566192627, "learning_rate": 2.9901423877327495e-05, "loss": 0.0182, "step": 742 }, { "epoch": 25.655172413793103, "grad_norm": 0.10511256754398346, "learning_rate": 2.968236582694414e-05, "loss": 0.0043, "step": 744 }, { "epoch": 25.724137931034484, "grad_norm": 0.1467219889163971, "learning_rate": 2.9463307776560787e-05, "loss": 0.0041, "step": 746 }, { "epoch": 25.79310344827586, "grad_norm": 0.06411899626255035, "learning_rate": 2.9244249726177437e-05, "loss": 0.0043, "step": 748 }, { "epoch": 25.862068965517242, "grad_norm": 9.776043891906738, "learning_rate": 2.902519167579409e-05, "loss": 0.0456, "step": 750 }, { "epoch": 25.93103448275862, "grad_norm": 0.1743546575307846, "learning_rate": 2.8806133625410736e-05, "loss": 0.0029, "step": 752 }, { "epoch": 26.0, "grad_norm": 0.9783799648284912, "learning_rate": 2.8587075575027382e-05, "loss": 0.0118, "step": 754 }, { "epoch": 26.0, "eval_accuracy": 0.6287878787878788, "eval_f1_macro": 0.5353365735453567, "eval_f1_micro": 0.6287878787878788, "eval_f1_weighted": 0.6074711236094882, "eval_loss": 1.6210349798202515, "eval_precision_macro": 0.5715752748253354, "eval_precision_micro": 0.6287878787878788, "eval_precision_weighted": 0.6263481846840905, "eval_recall_macro": 0.5410279667422525, "eval_recall_micro": 0.6287878787878788, "eval_recall_weighted": 0.6287878787878788, "eval_runtime": 2.2013, "eval_samples_per_second": 59.964, "eval_steps_per_second": 7.723, "step": 754 }, { "epoch": 26.06896551724138, "grad_norm": 0.2862379252910614, "learning_rate": 2.8368017524644032e-05, "loss": 0.0041, "step": 756 }, { "epoch": 26.137931034482758, "grad_norm": 1.7093660831451416, "learning_rate": 2.8148959474260678e-05, "loss": 0.0129, "step": 758 }, { "epoch": 26.20689655172414, "grad_norm": 2.0235061645507812, "learning_rate": 2.792990142387733e-05, "loss": 0.0107, "step": 760 }, { "epoch": 26.275862068965516, "grad_norm": 0.19022098183631897, "learning_rate": 2.7710843373493977e-05, "loss": 0.0044, "step": 762 }, { "epoch": 26.344827586206897, "grad_norm": 0.09240903705358505, "learning_rate": 2.7491785323110624e-05, "loss": 0.0023, "step": 764 }, { "epoch": 26.413793103448278, "grad_norm": 0.08767610788345337, "learning_rate": 2.7272727272727273e-05, "loss": 0.0026, "step": 766 }, { "epoch": 26.482758620689655, "grad_norm": 0.3399060368537903, "learning_rate": 2.7053669222343926e-05, "loss": 0.0129, "step": 768 }, { "epoch": 26.551724137931036, "grad_norm": 0.16420547664165497, "learning_rate": 2.6834611171960572e-05, "loss": 0.0315, "step": 770 }, { "epoch": 26.620689655172413, "grad_norm": 0.07277621328830719, "learning_rate": 2.661555312157722e-05, "loss": 0.1677, "step": 772 }, { "epoch": 26.689655172413794, "grad_norm": 0.0779278352856636, "learning_rate": 2.6396495071193865e-05, "loss": 0.0034, "step": 774 }, { "epoch": 26.75862068965517, "grad_norm": 0.030221056193113327, "learning_rate": 2.6177437020810514e-05, "loss": 0.009, "step": 776 }, { "epoch": 26.82758620689655, "grad_norm": 0.7204201221466064, "learning_rate": 2.5958378970427167e-05, "loss": 0.0242, "step": 778 }, { "epoch": 26.896551724137932, "grad_norm": 2.2107677459716797, "learning_rate": 2.5739320920043813e-05, "loss": 0.0818, "step": 780 }, { "epoch": 26.96551724137931, "grad_norm": 0.04545823484659195, "learning_rate": 2.552026286966046e-05, "loss": 0.0018, "step": 782 }, { "epoch": 27.0, "eval_accuracy": 0.6742424242424242, "eval_f1_macro": 0.5860085994600535, "eval_f1_micro": 0.6742424242424242, "eval_f1_weighted": 0.6574843706054311, "eval_loss": 1.607276201248169, "eval_precision_macro": 0.5955862562810968, "eval_precision_micro": 0.6742424242424242, "eval_precision_weighted": 0.6586732219548587, "eval_recall_macro": 0.5929327286470143, "eval_recall_micro": 0.6742424242424242, "eval_recall_weighted": 0.6742424242424242, "eval_runtime": 2.1629, "eval_samples_per_second": 61.028, "eval_steps_per_second": 7.86, "step": 783 }, { "epoch": 27.03448275862069, "grad_norm": 1.2520081996917725, "learning_rate": 2.530120481927711e-05, "loss": 0.0145, "step": 784 }, { "epoch": 27.103448275862068, "grad_norm": 0.4600828289985657, "learning_rate": 2.5082146768893762e-05, "loss": 0.0133, "step": 786 }, { "epoch": 27.17241379310345, "grad_norm": 4.692933082580566, "learning_rate": 2.486308871851041e-05, "loss": 0.0382, "step": 788 }, { "epoch": 27.24137931034483, "grad_norm": 0.3261309862136841, "learning_rate": 2.4644030668127055e-05, "loss": 0.0035, "step": 790 }, { "epoch": 27.310344827586206, "grad_norm": 0.028574170544743538, "learning_rate": 2.44249726177437e-05, "loss": 0.0084, "step": 792 }, { "epoch": 27.379310344827587, "grad_norm": 0.4913921356201172, "learning_rate": 2.420591456736035e-05, "loss": 0.0183, "step": 794 }, { "epoch": 27.448275862068964, "grad_norm": 1.5067977905273438, "learning_rate": 2.3986856516977e-05, "loss": 0.0387, "step": 796 }, { "epoch": 27.517241379310345, "grad_norm": 0.8277406096458435, "learning_rate": 2.376779846659365e-05, "loss": 0.0233, "step": 798 }, { "epoch": 27.586206896551722, "grad_norm": 5.168019771575928, "learning_rate": 2.3548740416210296e-05, "loss": 0.0222, "step": 800 }, { "epoch": 27.655172413793103, "grad_norm": 0.5925205945968628, "learning_rate": 2.3329682365826945e-05, "loss": 0.0066, "step": 802 }, { "epoch": 27.724137931034484, "grad_norm": 0.7455288767814636, "learning_rate": 2.3110624315443595e-05, "loss": 0.0044, "step": 804 }, { "epoch": 27.79310344827586, "grad_norm": 0.029589757323265076, "learning_rate": 2.289156626506024e-05, "loss": 0.0055, "step": 806 }, { "epoch": 27.862068965517242, "grad_norm": 10.534521102905273, "learning_rate": 2.267250821467689e-05, "loss": 0.0515, "step": 808 }, { "epoch": 27.93103448275862, "grad_norm": 0.19806796312332153, "learning_rate": 2.2453450164293537e-05, "loss": 0.0157, "step": 810 }, { "epoch": 28.0, "grad_norm": 5.914359092712402, "learning_rate": 2.2234392113910187e-05, "loss": 0.0336, "step": 812 }, { "epoch": 28.0, "eval_accuracy": 0.6439393939393939, "eval_f1_macro": 0.6085667254819397, "eval_f1_micro": 0.6439393939393939, "eval_f1_weighted": 0.6411146413787409, "eval_loss": 1.5964038372039795, "eval_precision_macro": 0.6379142750217496, "eval_precision_micro": 0.6439393939393939, "eval_precision_weighted": 0.6565570652257958, "eval_recall_macro": 0.5978533635676493, "eval_recall_micro": 0.6439393939393939, "eval_recall_weighted": 0.6439393939393939, "eval_runtime": 2.2198, "eval_samples_per_second": 59.466, "eval_steps_per_second": 7.659, "step": 812 }, { "epoch": 28.06896551724138, "grad_norm": 1.2526508569717407, "learning_rate": 2.2015334063526836e-05, "loss": 0.0304, "step": 814 }, { "epoch": 28.137931034482758, "grad_norm": 1.441461443901062, "learning_rate": 2.1796276013143486e-05, "loss": 0.0071, "step": 816 }, { "epoch": 28.20689655172414, "grad_norm": 0.2004363089799881, "learning_rate": 2.1577217962760132e-05, "loss": 0.0261, "step": 818 }, { "epoch": 28.275862068965516, "grad_norm": 0.16825991868972778, "learning_rate": 2.1358159912376778e-05, "loss": 0.0297, "step": 820 }, { "epoch": 28.344827586206897, "grad_norm": 0.8127052783966064, "learning_rate": 2.1139101861993428e-05, "loss": 0.005, "step": 822 }, { "epoch": 28.413793103448278, "grad_norm": 0.2261103391647339, "learning_rate": 2.0920043811610077e-05, "loss": 0.0056, "step": 824 }, { "epoch": 28.482758620689655, "grad_norm": 0.04766825586557388, "learning_rate": 2.0700985761226727e-05, "loss": 0.0013, "step": 826 }, { "epoch": 28.551724137931036, "grad_norm": 0.036670394241809845, "learning_rate": 2.0481927710843373e-05, "loss": 0.0028, "step": 828 }, { "epoch": 28.620689655172413, "grad_norm": 0.41730597615242004, "learning_rate": 2.0262869660460023e-05, "loss": 0.0028, "step": 830 }, { "epoch": 28.689655172413794, "grad_norm": 0.04215677082538605, "learning_rate": 2.0043811610076672e-05, "loss": 0.0018, "step": 832 }, { "epoch": 28.75862068965517, "grad_norm": 0.08167728036642075, "learning_rate": 1.9824753559693322e-05, "loss": 0.0016, "step": 834 }, { "epoch": 28.82758620689655, "grad_norm": 0.031280118972063065, "learning_rate": 1.9605695509309968e-05, "loss": 0.002, "step": 836 }, { "epoch": 28.896551724137932, "grad_norm": 1.7285773754119873, "learning_rate": 1.9386637458926614e-05, "loss": 0.0117, "step": 838 }, { "epoch": 28.96551724137931, "grad_norm": 0.06211957335472107, "learning_rate": 1.9167579408543264e-05, "loss": 0.0014, "step": 840 }, { "epoch": 29.0, "eval_accuracy": 0.7121212121212122, "eval_f1_macro": 0.6872675353596543, "eval_f1_micro": 0.7121212121212122, "eval_f1_weighted": 0.7082990442199542, "eval_loss": 1.5290158987045288, "eval_precision_macro": 0.7262781667691, "eval_precision_micro": 0.7121212121212122, "eval_precision_weighted": 0.7308177925367624, "eval_recall_macro": 0.6733560090702948, "eval_recall_micro": 0.7121212121212122, "eval_recall_weighted": 0.7121212121212122, "eval_runtime": 2.1608, "eval_samples_per_second": 61.088, "eval_steps_per_second": 7.867, "step": 841 }, { "epoch": 29.03448275862069, "grad_norm": 0.01278562843799591, "learning_rate": 1.8948521358159914e-05, "loss": 0.003, "step": 842 }, { "epoch": 29.103448275862068, "grad_norm": 0.06935442239046097, "learning_rate": 1.8729463307776563e-05, "loss": 0.0014, "step": 844 }, { "epoch": 29.17241379310345, "grad_norm": 0.6586639285087585, "learning_rate": 1.851040525739321e-05, "loss": 0.0023, "step": 846 }, { "epoch": 29.24137931034483, "grad_norm": 3.5995774269104004, "learning_rate": 1.829134720700986e-05, "loss": 0.018, "step": 848 }, { "epoch": 29.310344827586206, "grad_norm": 0.08816396445035934, "learning_rate": 1.8072289156626505e-05, "loss": 0.0031, "step": 850 }, { "epoch": 29.379310344827587, "grad_norm": 1.209425926208496, "learning_rate": 1.7853231106243155e-05, "loss": 0.0282, "step": 852 }, { "epoch": 29.448275862068964, "grad_norm": 3.262197732925415, "learning_rate": 1.7634173055859804e-05, "loss": 0.0246, "step": 854 }, { "epoch": 29.517241379310345, "grad_norm": 0.7129732966423035, "learning_rate": 1.741511500547645e-05, "loss": 0.0085, "step": 856 }, { "epoch": 29.586206896551722, "grad_norm": 0.15869493782520294, "learning_rate": 1.71960569550931e-05, "loss": 0.0015, "step": 858 }, { "epoch": 29.655172413793103, "grad_norm": 0.07819876074790955, "learning_rate": 1.697699890470975e-05, "loss": 0.0476, "step": 860 }, { "epoch": 29.724137931034484, "grad_norm": 0.12180998921394348, "learning_rate": 1.67579408543264e-05, "loss": 0.0032, "step": 862 }, { "epoch": 29.79310344827586, "grad_norm": 0.034297507256269455, "learning_rate": 1.6538882803943046e-05, "loss": 0.0008, "step": 864 }, { "epoch": 29.862068965517242, "grad_norm": 0.14563943445682526, "learning_rate": 1.6319824753559695e-05, "loss": 0.0087, "step": 866 }, { "epoch": 29.93103448275862, "grad_norm": 0.23122666776180267, "learning_rate": 1.610076670317634e-05, "loss": 0.0416, "step": 868 }, { "epoch": 30.0, "grad_norm": 0.06973911821842194, "learning_rate": 1.588170865279299e-05, "loss": 0.021, "step": 870 }, { "epoch": 30.0, "eval_accuracy": 0.696969696969697, "eval_f1_macro": 0.6982068677202545, "eval_f1_micro": 0.696969696969697, "eval_f1_weighted": 0.6973740805478152, "eval_loss": 1.5439778566360474, "eval_precision_macro": 0.7076152020847177, "eval_precision_micro": 0.696969696969697, "eval_precision_weighted": 0.7169868679432659, "eval_recall_macro": 0.7086167800453513, "eval_recall_micro": 0.696969696969697, "eval_recall_weighted": 0.696969696969697, "eval_runtime": 2.1633, "eval_samples_per_second": 61.018, "eval_steps_per_second": 7.858, "step": 870 }, { "epoch": 30.06896551724138, "grad_norm": 1.7453641891479492, "learning_rate": 1.566265060240964e-05, "loss": 0.0083, "step": 872 }, { "epoch": 30.137931034482758, "grad_norm": 0.3179946541786194, "learning_rate": 1.5443592552026287e-05, "loss": 0.0024, "step": 874 }, { "epoch": 30.20689655172414, "grad_norm": 1.7078912258148193, "learning_rate": 1.5224534501642936e-05, "loss": 0.0082, "step": 876 }, { "epoch": 30.275862068965516, "grad_norm": 0.32421720027923584, "learning_rate": 1.5005476451259584e-05, "loss": 0.0032, "step": 878 }, { "epoch": 30.344827586206897, "grad_norm": 0.04044501855969429, "learning_rate": 1.4786418400876234e-05, "loss": 0.0444, "step": 880 }, { "epoch": 30.413793103448278, "grad_norm": 0.06957350671291351, "learning_rate": 1.4567360350492882e-05, "loss": 0.0226, "step": 882 }, { "epoch": 30.482758620689655, "grad_norm": 0.5221211314201355, "learning_rate": 1.4348302300109528e-05, "loss": 0.0041, "step": 884 }, { "epoch": 30.551724137931036, "grad_norm": 0.017598647624254227, "learning_rate": 1.412924424972618e-05, "loss": 0.0013, "step": 886 }, { "epoch": 30.620689655172413, "grad_norm": 0.47180604934692383, "learning_rate": 1.3910186199342825e-05, "loss": 0.003, "step": 888 }, { "epoch": 30.689655172413794, "grad_norm": 0.03941981866955757, "learning_rate": 1.3691128148959475e-05, "loss": 0.0112, "step": 890 }, { "epoch": 30.75862068965517, "grad_norm": 0.023555099964141846, "learning_rate": 1.3472070098576123e-05, "loss": 0.03, "step": 892 }, { "epoch": 30.82758620689655, "grad_norm": 0.48139652609825134, "learning_rate": 1.3253012048192772e-05, "loss": 0.0052, "step": 894 }, { "epoch": 30.896551724137932, "grad_norm": 0.1581326723098755, "learning_rate": 1.303395399780942e-05, "loss": 0.0156, "step": 896 }, { "epoch": 30.96551724137931, "grad_norm": 1.283683180809021, "learning_rate": 1.2814895947426067e-05, "loss": 0.0065, "step": 898 }, { "epoch": 31.0, "eval_accuracy": 0.696969696969697, "eval_f1_macro": 0.6868812120235886, "eval_f1_micro": 0.696969696969697, "eval_f1_weighted": 0.6914990595967736, "eval_loss": 1.6575924158096313, "eval_precision_macro": 0.7429775738046415, "eval_precision_micro": 0.696969696969697, "eval_precision_weighted": 0.7269597892803633, "eval_recall_macro": 0.6698866213151928, "eval_recall_micro": 0.696969696969697, "eval_recall_weighted": 0.696969696969697, "eval_runtime": 2.1493, "eval_samples_per_second": 61.416, "eval_steps_per_second": 7.91, "step": 899 }, { "epoch": 31.03448275862069, "grad_norm": 0.018120741471648216, "learning_rate": 1.2595837897042718e-05, "loss": 0.0011, "step": 900 }, { "epoch": 31.103448275862068, "grad_norm": 0.1496172994375229, "learning_rate": 1.2376779846659366e-05, "loss": 0.0031, "step": 902 }, { "epoch": 31.17241379310345, "grad_norm": 1.2407957315444946, "learning_rate": 1.2157721796276014e-05, "loss": 0.0278, "step": 904 }, { "epoch": 31.24137931034483, "grad_norm": 0.14974364638328552, "learning_rate": 1.1938663745892662e-05, "loss": 0.0236, "step": 906 }, { "epoch": 31.310344827586206, "grad_norm": 4.202882766723633, "learning_rate": 1.171960569550931e-05, "loss": 0.0241, "step": 908 }, { "epoch": 31.379310344827587, "grad_norm": 0.05061774700880051, "learning_rate": 1.1500547645125959e-05, "loss": 0.001, "step": 910 }, { "epoch": 31.448275862068964, "grad_norm": 0.019295161589980125, "learning_rate": 1.1281489594742607e-05, "loss": 0.0012, "step": 912 }, { "epoch": 31.517241379310345, "grad_norm": 0.1430915743112564, "learning_rate": 1.1062431544359257e-05, "loss": 0.0041, "step": 914 }, { "epoch": 31.586206896551722, "grad_norm": 0.03306346759200096, "learning_rate": 1.0843373493975904e-05, "loss": 0.0013, "step": 916 }, { "epoch": 31.655172413793103, "grad_norm": 0.42486900091171265, "learning_rate": 1.0624315443592552e-05, "loss": 0.006, "step": 918 }, { "epoch": 31.724137931034484, "grad_norm": 0.058433897793293, "learning_rate": 1.0405257393209202e-05, "loss": 0.0018, "step": 920 }, { "epoch": 31.79310344827586, "grad_norm": 0.027252651751041412, "learning_rate": 1.0186199342825848e-05, "loss": 0.004, "step": 922 }, { "epoch": 31.862068965517242, "grad_norm": 0.13943073153495789, "learning_rate": 9.967141292442498e-06, "loss": 0.0016, "step": 924 }, { "epoch": 31.93103448275862, "grad_norm": 0.048901163041591644, "learning_rate": 9.748083242059146e-06, "loss": 0.0019, "step": 926 }, { "epoch": 32.0, "grad_norm": 0.019549880176782608, "learning_rate": 9.529025191675795e-06, "loss": 0.0013, "step": 928 }, { "epoch": 32.0, "eval_accuracy": 0.7196969696969697, "eval_f1_macro": 0.7124096518979722, "eval_f1_micro": 0.7196969696969697, "eval_f1_weighted": 0.7172944138815308, "eval_loss": 1.560258388519287, "eval_precision_macro": 0.750803957946815, "eval_precision_micro": 0.7196969696969697, "eval_precision_weighted": 0.7410763478945297, "eval_recall_macro": 0.6987226001511715, "eval_recall_micro": 0.7196969696969697, "eval_recall_weighted": 0.7196969696969697, "eval_runtime": 2.1728, "eval_samples_per_second": 60.752, "eval_steps_per_second": 7.824, "step": 928 }, { "epoch": 32.06896551724138, "grad_norm": 0.05375111103057861, "learning_rate": 9.309967141292443e-06, "loss": 0.0024, "step": 930 }, { "epoch": 32.13793103448276, "grad_norm": 0.3470950424671173, "learning_rate": 9.090909090909091e-06, "loss": 0.0035, "step": 932 }, { "epoch": 32.206896551724135, "grad_norm": 0.02533532679080963, "learning_rate": 8.87185104052574e-06, "loss": 0.001, "step": 934 }, { "epoch": 32.275862068965516, "grad_norm": 1.6475239992141724, "learning_rate": 8.652792990142389e-06, "loss": 0.0343, "step": 936 }, { "epoch": 32.3448275862069, "grad_norm": 0.13403227925300598, "learning_rate": 8.433734939759036e-06, "loss": 0.0019, "step": 938 }, { "epoch": 32.41379310344828, "grad_norm": 0.13201530277729034, "learning_rate": 8.214676889375684e-06, "loss": 0.0019, "step": 940 }, { "epoch": 32.48275862068966, "grad_norm": 2.441126823425293, "learning_rate": 7.995618838992334e-06, "loss": 0.0152, "step": 942 }, { "epoch": 32.55172413793103, "grad_norm": 0.3257850408554077, "learning_rate": 7.776560788608982e-06, "loss": 0.0019, "step": 944 }, { "epoch": 32.62068965517241, "grad_norm": 0.10473517328500748, "learning_rate": 7.5575027382256306e-06, "loss": 0.0027, "step": 946 }, { "epoch": 32.689655172413794, "grad_norm": 0.04300970956683159, "learning_rate": 7.3384446878422785e-06, "loss": 0.0008, "step": 948 }, { "epoch": 32.758620689655174, "grad_norm": 0.12258446961641312, "learning_rate": 7.119386637458927e-06, "loss": 0.0028, "step": 950 }, { "epoch": 32.827586206896555, "grad_norm": 0.14553672075271606, "learning_rate": 6.900328587075576e-06, "loss": 0.0013, "step": 952 }, { "epoch": 32.89655172413793, "grad_norm": 1.198081612586975, "learning_rate": 6.681270536692223e-06, "loss": 0.0276, "step": 954 }, { "epoch": 32.96551724137931, "grad_norm": 0.4895022511482239, "learning_rate": 6.462212486308872e-06, "loss": 0.0129, "step": 956 }, { "epoch": 33.0, "eval_accuracy": 0.6893939393939394, "eval_f1_macro": 0.6841929992352647, "eval_f1_micro": 0.6893939393939394, "eval_f1_weighted": 0.6870473377924375, "eval_loss": 1.6027860641479492, "eval_precision_macro": 0.7152604691775198, "eval_precision_micro": 0.6893939393939394, "eval_precision_weighted": 0.7059007626456307, "eval_recall_macro": 0.673061224489796, "eval_recall_micro": 0.6893939393939394, "eval_recall_weighted": 0.6893939393939394, "eval_runtime": 2.1875, "eval_samples_per_second": 60.343, "eval_steps_per_second": 7.771, "step": 957 }, { "epoch": 33.03448275862069, "grad_norm": 0.18283292651176453, "learning_rate": 6.2431544359255205e-06, "loss": 0.0023, "step": 958 }, { "epoch": 33.10344827586207, "grad_norm": 0.09138727933168411, "learning_rate": 6.024096385542169e-06, "loss": 0.0018, "step": 960 }, { "epoch": 33.172413793103445, "grad_norm": 0.27659812569618225, "learning_rate": 5.805038335158817e-06, "loss": 0.004, "step": 962 }, { "epoch": 33.241379310344826, "grad_norm": 1.0702749490737915, "learning_rate": 5.585980284775466e-06, "loss": 0.0228, "step": 964 }, { "epoch": 33.310344827586206, "grad_norm": 0.10491228103637695, "learning_rate": 5.366922234392114e-06, "loss": 0.0023, "step": 966 }, { "epoch": 33.37931034482759, "grad_norm": 0.33777573704719543, "learning_rate": 5.1478641840087625e-06, "loss": 0.0034, "step": 968 }, { "epoch": 33.44827586206897, "grad_norm": 0.06301871687173843, "learning_rate": 4.928806133625411e-06, "loss": 0.0023, "step": 970 }, { "epoch": 33.51724137931034, "grad_norm": 0.09340860694646835, "learning_rate": 4.70974808324206e-06, "loss": 0.0205, "step": 972 }, { "epoch": 33.58620689655172, "grad_norm": 0.020821426063776016, "learning_rate": 4.490690032858708e-06, "loss": 0.0009, "step": 974 }, { "epoch": 33.6551724137931, "grad_norm": 0.04694080352783203, "learning_rate": 4.271631982475356e-06, "loss": 0.001, "step": 976 }, { "epoch": 33.724137931034484, "grad_norm": 0.056120615452528, "learning_rate": 4.0525739320920046e-06, "loss": 0.0018, "step": 978 }, { "epoch": 33.793103448275865, "grad_norm": 1.5992101430892944, "learning_rate": 3.8335158817086525e-06, "loss": 0.0073, "step": 980 }, { "epoch": 33.86206896551724, "grad_norm": 0.044718772172927856, "learning_rate": 3.614457831325301e-06, "loss": 0.012, "step": 982 }, { "epoch": 33.93103448275862, "grad_norm": 0.019480116665363312, "learning_rate": 3.39539978094195e-06, "loss": 0.0012, "step": 984 }, { "epoch": 34.0, "grad_norm": 0.01623496413230896, "learning_rate": 3.1763417305585983e-06, "loss": 0.0006, "step": 986 }, { "epoch": 34.0, "eval_accuracy": 0.6818181818181818, "eval_f1_macro": 0.6786724695679507, "eval_f1_micro": 0.6818181818181818, "eval_f1_weighted": 0.6800052243915973, "eval_loss": 1.6074531078338623, "eval_precision_macro": 0.7093953665382237, "eval_precision_micro": 0.6818181818181818, "eval_precision_weighted": 0.6991099809281627, "eval_recall_macro": 0.6677702191987906, "eval_recall_micro": 0.6818181818181818, "eval_recall_weighted": 0.6818181818181818, "eval_runtime": 2.1756, "eval_samples_per_second": 60.674, "eval_steps_per_second": 7.814, "step": 986 }, { "epoch": 34.06896551724138, "grad_norm": 0.02141761966049671, "learning_rate": 2.9572836801752466e-06, "loss": 0.0015, "step": 988 }, { "epoch": 34.13793103448276, "grad_norm": 0.03435864299535751, "learning_rate": 2.738225629791895e-06, "loss": 0.0009, "step": 990 }, { "epoch": 34.206896551724135, "grad_norm": 0.03004680573940277, "learning_rate": 2.5191675794085432e-06, "loss": 0.0011, "step": 992 }, { "epoch": 34.275862068965516, "grad_norm": 0.027880065143108368, "learning_rate": 2.3001095290251916e-06, "loss": 0.0008, "step": 994 }, { "epoch": 34.3448275862069, "grad_norm": 1.3690009117126465, "learning_rate": 2.0810514786418403e-06, "loss": 0.0308, "step": 996 }, { "epoch": 34.41379310344828, "grad_norm": 0.04672781005501747, "learning_rate": 1.8619934282584884e-06, "loss": 0.0012, "step": 998 }, { "epoch": 34.48275862068966, "grad_norm": 0.06204487010836601, "learning_rate": 1.642935377875137e-06, "loss": 0.0017, "step": 1000 }, { "epoch": 34.55172413793103, "grad_norm": 0.03130810335278511, "learning_rate": 1.4238773274917855e-06, "loss": 0.0015, "step": 1002 }, { "epoch": 34.62068965517241, "grad_norm": 0.06150972098112106, "learning_rate": 1.2048192771084338e-06, "loss": 0.0026, "step": 1004 }, { "epoch": 34.689655172413794, "grad_norm": 0.1301676630973816, "learning_rate": 9.857612267250823e-07, "loss": 0.0023, "step": 1006 }, { "epoch": 34.758620689655174, "grad_norm": 0.11791616678237915, "learning_rate": 7.667031763417306e-07, "loss": 0.0019, "step": 1008 }, { "epoch": 34.827586206896555, "grad_norm": 2.598999500274658, "learning_rate": 5.47645125958379e-07, "loss": 0.0082, "step": 1010 }, { "epoch": 34.89655172413793, "grad_norm": 1.0025254487991333, "learning_rate": 3.285870755750274e-07, "loss": 0.0207, "step": 1012 }, { "epoch": 34.96551724137931, "grad_norm": 0.543045699596405, "learning_rate": 1.095290251916758e-07, "loss": 0.0022, "step": 1014 }, { "epoch": 35.0, "eval_accuracy": 0.6893939393939394, "eval_f1_macro": 0.6848257838882342, "eval_f1_micro": 0.6893939393939394, "eval_f1_weighted": 0.6869347739708745, "eval_loss": 1.6008917093276978, "eval_precision_macro": 0.7170539138281073, "eval_precision_micro": 0.6893939393939394, "eval_precision_weighted": 0.7061795242880288, "eval_recall_macro": 0.673061224489796, "eval_recall_micro": 0.6893939393939394, "eval_recall_weighted": 0.6893939393939394, "eval_runtime": 2.1921, "eval_samples_per_second": 60.216, "eval_steps_per_second": 7.755, "step": 1015 }, { "epoch": 35.0, "step": 1015, "total_flos": 1.2531016253190758e+18, "train_loss": 0.5009635013656627, "train_runtime": 1007.7292, "train_samples_per_second": 16.046, "train_steps_per_second": 1.007 } ], "logging_steps": 2, "max_steps": 1015, "num_input_tokens_seen": 0, "num_train_epochs": 35, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2531016253190758e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }