Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_metric": 0.9263564944267273, | |
| "best_model_checkpoint": "Geofin5/checkpoint-204", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 204, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.029411764705882353, | |
| "grad_norm": 7.102517604827881, | |
| "learning_rate": 1.2195121951219514e-06, | |
| "loss": 1.3725, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.058823529411764705, | |
| "grad_norm": 5.622996807098389, | |
| "learning_rate": 2.4390243902439027e-06, | |
| "loss": 1.3922, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.08823529411764706, | |
| "grad_norm": 7.666197299957275, | |
| "learning_rate": 3.6585365853658537e-06, | |
| "loss": 1.3371, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 5.953295707702637, | |
| "learning_rate": 4.8780487804878055e-06, | |
| "loss": 1.322, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.14705882352941177, | |
| "grad_norm": 4.027065277099609, | |
| "learning_rate": 6.0975609756097564e-06, | |
| "loss": 1.2841, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.17647058823529413, | |
| "grad_norm": 6.355094909667969, | |
| "learning_rate": 7.317073170731707e-06, | |
| "loss": 1.2458, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.20588235294117646, | |
| "grad_norm": 4.935754776000977, | |
| "learning_rate": 8.53658536585366e-06, | |
| "loss": 1.2534, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 3.921065330505371, | |
| "learning_rate": 9.756097560975611e-06, | |
| "loss": 1.2224, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.2647058823529412, | |
| "grad_norm": 4.3277482986450195, | |
| "learning_rate": 1.0975609756097562e-05, | |
| "loss": 1.125, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 4.808041572570801, | |
| "learning_rate": 1.2195121951219513e-05, | |
| "loss": 1.0728, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3235294117647059, | |
| "grad_norm": 2.6956660747528076, | |
| "learning_rate": 1.3414634146341466e-05, | |
| "loss": 1.2269, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 2.8759944438934326, | |
| "learning_rate": 1.4634146341463415e-05, | |
| "loss": 1.094, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.38235294117647056, | |
| "grad_norm": 5.900161266326904, | |
| "learning_rate": 1.5853658536585366e-05, | |
| "loss": 1.1418, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.4117647058823529, | |
| "grad_norm": 8.949163436889648, | |
| "learning_rate": 1.707317073170732e-05, | |
| "loss": 1.1931, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.4411764705882353, | |
| "grad_norm": 4.940980911254883, | |
| "learning_rate": 1.8292682926829268e-05, | |
| "loss": 1.0575, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 4.716662406921387, | |
| "learning_rate": 1.9512195121951222e-05, | |
| "loss": 1.0049, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 9.422857284545898, | |
| "learning_rate": 2.073170731707317e-05, | |
| "loss": 1.1533, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.5294117647058824, | |
| "grad_norm": 7.63829231262207, | |
| "learning_rate": 2.1951219512195124e-05, | |
| "loss": 1.1386, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.5588235294117647, | |
| "grad_norm": 4.745357036590576, | |
| "learning_rate": 2.3170731707317075e-05, | |
| "loss": 0.9919, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 4.796962261199951, | |
| "learning_rate": 2.4390243902439026e-05, | |
| "loss": 0.9971, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6176470588235294, | |
| "grad_norm": 4.623069763183594, | |
| "learning_rate": 2.5609756097560977e-05, | |
| "loss": 1.016, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.6470588235294118, | |
| "grad_norm": 7.526188373565674, | |
| "learning_rate": 2.682926829268293e-05, | |
| "loss": 1.1257, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.6764705882352942, | |
| "grad_norm": 6.750401973724365, | |
| "learning_rate": 2.8048780487804882e-05, | |
| "loss": 1.0988, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 5.956785678863525, | |
| "learning_rate": 2.926829268292683e-05, | |
| "loss": 1.0436, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.7352941176470589, | |
| "grad_norm": 6.079898834228516, | |
| "learning_rate": 3.048780487804878e-05, | |
| "loss": 1.0277, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7647058823529411, | |
| "grad_norm": 4.464956760406494, | |
| "learning_rate": 3.170731707317073e-05, | |
| "loss": 0.8688, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.7941176470588235, | |
| "grad_norm": 6.561495304107666, | |
| "learning_rate": 3.292682926829269e-05, | |
| "loss": 0.8962, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 6.373690128326416, | |
| "learning_rate": 3.353658536585366e-05, | |
| "loss": 1.0343, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.8529411764705882, | |
| "grad_norm": 8.000486373901367, | |
| "learning_rate": 3.475609756097561e-05, | |
| "loss": 1.0543, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 5.830896854400635, | |
| "learning_rate": 3.597560975609756e-05, | |
| "loss": 1.0761, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.9117647058823529, | |
| "grad_norm": 10.151026725769043, | |
| "learning_rate": 3.7195121951219514e-05, | |
| "loss": 1.0737, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 9.73624038696289, | |
| "learning_rate": 3.8414634146341465e-05, | |
| "loss": 0.9112, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.9705882352941176, | |
| "grad_norm": 9.713314056396484, | |
| "learning_rate": 3.9634146341463416e-05, | |
| "loss": 0.8498, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 10.631162643432617, | |
| "learning_rate": 4.085365853658537e-05, | |
| "loss": 0.9937, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5564853556485355, | |
| "eval_f1_macro": 0.39598376878805575, | |
| "eval_f1_micro": 0.5564853556485355, | |
| "eval_f1_weighted": 0.4741897095372058, | |
| "eval_loss": 1.0355333089828491, | |
| "eval_precision_macro": 0.5140887485587238, | |
| "eval_precision_micro": 0.5564853556485355, | |
| "eval_precision_weighted": 0.5576172702264479, | |
| "eval_recall_macro": 0.4827948905153814, | |
| "eval_recall_micro": 0.5564853556485355, | |
| "eval_recall_weighted": 0.5564853556485355, | |
| "eval_runtime": 0.6549, | |
| "eval_samples_per_second": 729.907, | |
| "eval_steps_per_second": 12.216, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.0294117647058822, | |
| "grad_norm": 7.002508163452148, | |
| "learning_rate": 4.207317073170732e-05, | |
| "loss": 0.9379, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.0588235294117647, | |
| "grad_norm": 9.145544052124023, | |
| "learning_rate": 4.329268292682927e-05, | |
| "loss": 0.8374, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.088235294117647, | |
| "grad_norm": 10.859468460083008, | |
| "learning_rate": 4.451219512195122e-05, | |
| "loss": 1.0609, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.1176470588235294, | |
| "grad_norm": 5.649864196777344, | |
| "learning_rate": 4.573170731707318e-05, | |
| "loss": 0.8415, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.1470588235294117, | |
| "grad_norm": 7.1180877685546875, | |
| "learning_rate": 4.695121951219512e-05, | |
| "loss": 0.9504, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 11.018819808959961, | |
| "learning_rate": 4.817073170731707e-05, | |
| "loss": 0.9917, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.2058823529411764, | |
| "grad_norm": 9.096946716308594, | |
| "learning_rate": 4.9390243902439024e-05, | |
| "loss": 0.7748, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.2352941176470589, | |
| "grad_norm": 14.755735397338867, | |
| "learning_rate": 4.993188010899183e-05, | |
| "loss": 0.8427, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.2647058823529411, | |
| "grad_norm": 9.15238094329834, | |
| "learning_rate": 4.979564032697548e-05, | |
| "loss": 0.8695, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.2941176470588236, | |
| "grad_norm": 10.265277862548828, | |
| "learning_rate": 4.9659400544959125e-05, | |
| "loss": 0.6385, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.3235294117647058, | |
| "grad_norm": 9.704151153564453, | |
| "learning_rate": 4.952316076294278e-05, | |
| "loss": 0.756, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.3529411764705883, | |
| "grad_norm": 9.141586303710938, | |
| "learning_rate": 4.9386920980926435e-05, | |
| "loss": 0.7567, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.3823529411764706, | |
| "grad_norm": 9.520133018493652, | |
| "learning_rate": 4.925068119891008e-05, | |
| "loss": 0.9081, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "grad_norm": 17.20820426940918, | |
| "learning_rate": 4.911444141689374e-05, | |
| "loss": 0.9661, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.4411764705882353, | |
| "grad_norm": 9.26347827911377, | |
| "learning_rate": 4.8978201634877385e-05, | |
| "loss": 0.8661, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 12.740453720092773, | |
| "learning_rate": 4.884196185286104e-05, | |
| "loss": 0.8978, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 8.899398803710938, | |
| "learning_rate": 4.870572207084469e-05, | |
| "loss": 0.6809, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.5294117647058822, | |
| "grad_norm": 13.515447616577148, | |
| "learning_rate": 4.8569482288828335e-05, | |
| "loss": 0.7378, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.5588235294117647, | |
| "grad_norm": 6.7167439460754395, | |
| "learning_rate": 4.843324250681199e-05, | |
| "loss": 0.7376, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.5882352941176472, | |
| "grad_norm": 19.798791885375977, | |
| "learning_rate": 4.8297002724795645e-05, | |
| "loss": 0.7125, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.6176470588235294, | |
| "grad_norm": Infinity, | |
| "learning_rate": 4.822888283378747e-05, | |
| "loss": 0.7425, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.6470588235294117, | |
| "grad_norm": 8.12073040008545, | |
| "learning_rate": 4.809264305177112e-05, | |
| "loss": 0.672, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.6764705882352942, | |
| "grad_norm": 7.961780071258545, | |
| "learning_rate": 4.795640326975477e-05, | |
| "loss": 0.7023, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.7058823529411766, | |
| "grad_norm": 15.105574607849121, | |
| "learning_rate": 4.782016348773842e-05, | |
| "loss": 0.6107, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.7352941176470589, | |
| "grad_norm": 15.375845909118652, | |
| "learning_rate": 4.768392370572207e-05, | |
| "loss": 0.648, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": 11.371450424194336, | |
| "learning_rate": 4.7547683923705725e-05, | |
| "loss": 0.7948, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.7941176470588234, | |
| "grad_norm": 13.418404579162598, | |
| "learning_rate": 4.741144414168938e-05, | |
| "loss": 0.5621, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.8235294117647058, | |
| "grad_norm": 14.923059463500977, | |
| "learning_rate": 4.727520435967303e-05, | |
| "loss": 0.7301, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 1.8529411764705883, | |
| "grad_norm": 9.319025993347168, | |
| "learning_rate": 4.713896457765668e-05, | |
| "loss": 0.8735, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "grad_norm": 7.903226375579834, | |
| "learning_rate": 4.700272479564033e-05, | |
| "loss": 0.7913, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.9117647058823528, | |
| "grad_norm": 13.043506622314453, | |
| "learning_rate": 4.686648501362398e-05, | |
| "loss": 0.8099, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.9411764705882353, | |
| "grad_norm": 12.253973007202148, | |
| "learning_rate": 4.673024523160763e-05, | |
| "loss": 0.7651, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.9705882352941178, | |
| "grad_norm": 9.918290138244629, | |
| "learning_rate": 4.659400544959128e-05, | |
| "loss": 0.7537, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 17.887121200561523, | |
| "learning_rate": 4.6457765667574935e-05, | |
| "loss": 0.8837, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6401673640167364, | |
| "eval_f1_macro": 0.5112056699141869, | |
| "eval_f1_micro": 0.6401673640167364, | |
| "eval_f1_weighted": 0.623119203970463, | |
| "eval_loss": 0.9266994595527649, | |
| "eval_precision_macro": 0.6451808449222778, | |
| "eval_precision_micro": 0.6401673640167364, | |
| "eval_precision_weighted": 0.6880713224888448, | |
| "eval_recall_macro": 0.5646276187019543, | |
| "eval_recall_micro": 0.6401673640167364, | |
| "eval_recall_weighted": 0.6401673640167364, | |
| "eval_runtime": 0.6577, | |
| "eval_samples_per_second": 726.807, | |
| "eval_steps_per_second": 12.164, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 2.0294117647058822, | |
| "grad_norm": 12.88015365600586, | |
| "learning_rate": 4.632152588555859e-05, | |
| "loss": 0.6675, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 2.0588235294117645, | |
| "grad_norm": 7.740164756774902, | |
| "learning_rate": 4.618528610354224e-05, | |
| "loss": 0.726, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.088235294117647, | |
| "grad_norm": 6.169182777404785, | |
| "learning_rate": 4.604904632152589e-05, | |
| "loss": 0.5834, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "grad_norm": 5.821132659912109, | |
| "learning_rate": 4.591280653950954e-05, | |
| "loss": 0.5636, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 2.1470588235294117, | |
| "grad_norm": 7.872137069702148, | |
| "learning_rate": 4.577656675749319e-05, | |
| "loss": 0.566, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 2.176470588235294, | |
| "grad_norm": 7.407035827636719, | |
| "learning_rate": 4.564032697547684e-05, | |
| "loss": 0.5023, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 2.2058823529411766, | |
| "grad_norm": 8.054471015930176, | |
| "learning_rate": 4.550408719346049e-05, | |
| "loss": 0.4304, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.235294117647059, | |
| "grad_norm": 20.935813903808594, | |
| "learning_rate": 4.5367847411444145e-05, | |
| "loss": 0.5301, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 2.264705882352941, | |
| "grad_norm": 15.824753761291504, | |
| "learning_rate": 4.52316076294278e-05, | |
| "loss": 0.5815, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 2.2941176470588234, | |
| "grad_norm": 11.326836585998535, | |
| "learning_rate": 4.509536784741145e-05, | |
| "loss": 0.7101, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 2.323529411764706, | |
| "grad_norm": 27.99034309387207, | |
| "learning_rate": 4.4959128065395095e-05, | |
| "loss": 0.6138, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 7.2306623458862305, | |
| "learning_rate": 4.482288828337875e-05, | |
| "loss": 0.5515, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.3823529411764706, | |
| "grad_norm": 15.374252319335938, | |
| "learning_rate": 4.46866485013624e-05, | |
| "loss": 0.7274, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 2.411764705882353, | |
| "grad_norm": 11.15127182006836, | |
| "learning_rate": 4.4550408719346046e-05, | |
| "loss": 0.685, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 2.4411764705882355, | |
| "grad_norm": 18.0255069732666, | |
| "learning_rate": 4.44141689373297e-05, | |
| "loss": 0.6403, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 2.4705882352941178, | |
| "grad_norm": 15.681634902954102, | |
| "learning_rate": 4.4277929155313355e-05, | |
| "loss": 0.5377, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 13.26118278503418, | |
| "learning_rate": 4.414168937329701e-05, | |
| "loss": 0.6531, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.5294117647058822, | |
| "grad_norm": 5.877330780029297, | |
| "learning_rate": 4.400544959128066e-05, | |
| "loss": 0.4888, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 2.5588235294117645, | |
| "grad_norm": 17.445058822631836, | |
| "learning_rate": 4.3869209809264305e-05, | |
| "loss": 0.5194, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 2.588235294117647, | |
| "grad_norm": 11.56258487701416, | |
| "learning_rate": 4.373297002724796e-05, | |
| "loss": 0.3939, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 2.6176470588235294, | |
| "grad_norm": 14.838981628417969, | |
| "learning_rate": 4.359673024523161e-05, | |
| "loss": 0.4646, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 2.6470588235294117, | |
| "grad_norm": 16.686960220336914, | |
| "learning_rate": 4.3460490463215255e-05, | |
| "loss": 0.4943, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.6764705882352944, | |
| "grad_norm": 13.008139610290527, | |
| "learning_rate": 4.332425068119891e-05, | |
| "loss": 0.6352, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 2.7058823529411766, | |
| "grad_norm": 18.054786682128906, | |
| "learning_rate": 4.3188010899182565e-05, | |
| "loss": 0.4034, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 2.735294117647059, | |
| "grad_norm": 9.771907806396484, | |
| "learning_rate": 4.305177111716621e-05, | |
| "loss": 0.3427, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 2.764705882352941, | |
| "grad_norm": 8.055870056152344, | |
| "learning_rate": 4.291553133514987e-05, | |
| "loss": 0.3218, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 2.7941176470588234, | |
| "grad_norm": 14.025898933410645, | |
| "learning_rate": 4.2779291553133515e-05, | |
| "loss": 0.5226, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.8235294117647056, | |
| "grad_norm": 7.227923393249512, | |
| "learning_rate": 4.264305177111717e-05, | |
| "loss": 0.3266, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 2.8529411764705883, | |
| "grad_norm": 15.04366397857666, | |
| "learning_rate": 4.250681198910082e-05, | |
| "loss": 0.6136, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 2.8823529411764706, | |
| "grad_norm": 15.145903587341309, | |
| "learning_rate": 4.237057220708447e-05, | |
| "loss": 0.4222, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 2.911764705882353, | |
| "grad_norm": 14.604679107666016, | |
| "learning_rate": 4.223433242506813e-05, | |
| "loss": 0.6989, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 10.518308639526367, | |
| "learning_rate": 4.2098092643051775e-05, | |
| "loss": 0.4435, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.9705882352941178, | |
| "grad_norm": 16.961441040039062, | |
| "learning_rate": 4.196185286103542e-05, | |
| "loss": 0.6116, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 14.586898803710938, | |
| "learning_rate": 4.182561307901908e-05, | |
| "loss": 0.4291, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.700836820083682, | |
| "eval_f1_macro": 0.6193851541399454, | |
| "eval_f1_micro": 0.700836820083682, | |
| "eval_f1_weighted": 0.6928921325883705, | |
| "eval_loss": 0.9263564944267273, | |
| "eval_precision_macro": 0.6728486680867634, | |
| "eval_precision_micro": 0.700836820083682, | |
| "eval_precision_weighted": 0.7079874543114232, | |
| "eval_recall_macro": 0.6147070748686357, | |
| "eval_recall_micro": 0.700836820083682, | |
| "eval_recall_weighted": 0.700836820083682, | |
| "eval_runtime": 0.6574, | |
| "eval_samples_per_second": 727.055, | |
| "eval_steps_per_second": 12.168, | |
| "step": 204 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 816, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 12, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 428613555234816.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |