| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6089309878213802, | |
| "eval_steps": 20, | |
| "global_step": 900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_accuracy": 0.726605504587156, | |
| "eval_f1": 0.11834319526627218, | |
| "eval_loss": 0.6266470551490784, | |
| "eval_precision": 0.5, | |
| "eval_recall": 0.06711409395973154, | |
| "eval_runtime": 53.0676, | |
| "eval_samples_per_second": 5.615, | |
| "eval_steps_per_second": 0.188, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.0006765899864682003, | |
| "grad_norm": 1.9097040891647339, | |
| "learning_rate": 1.3513513513513515e-07, | |
| "loss": 0.5346, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0013531799729364006, | |
| "grad_norm": 2.262101173400879, | |
| "learning_rate": 2.702702702702703e-07, | |
| "loss": 0.6432, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0020297699594046007, | |
| "grad_norm": 2.2351596355438232, | |
| "learning_rate": 4.0540540540540546e-07, | |
| "loss": 0.6418, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0027063599458728013, | |
| "grad_norm": 2.1407454013824463, | |
| "learning_rate": 5.405405405405406e-07, | |
| "loss": 0.681, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0033829499323410014, | |
| "grad_norm": 1.836843729019165, | |
| "learning_rate": 6.756756756756758e-07, | |
| "loss": 0.6663, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0040595399188092015, | |
| "grad_norm": 2.4660489559173584, | |
| "learning_rate": 8.108108108108109e-07, | |
| "loss": 0.6643, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.004736129905277402, | |
| "grad_norm": 2.2095065116882324, | |
| "learning_rate": 9.459459459459461e-07, | |
| "loss": 0.6107, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.005412719891745603, | |
| "grad_norm": 2.3385086059570312, | |
| "learning_rate": 1.0810810810810812e-06, | |
| "loss": 0.6332, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.006089309878213802, | |
| "grad_norm": 2.0470025539398193, | |
| "learning_rate": 1.2162162162162164e-06, | |
| "loss": 0.6645, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.006765899864682003, | |
| "grad_norm": 2.1129884719848633, | |
| "learning_rate": 1.3513513513513515e-06, | |
| "loss": 0.5937, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007442489851150203, | |
| "grad_norm": 2.343991994857788, | |
| "learning_rate": 1.4864864864864868e-06, | |
| "loss": 0.6274, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.008119079837618403, | |
| "grad_norm": 2.254518508911133, | |
| "learning_rate": 1.6216216216216219e-06, | |
| "loss": 0.6133, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.008795669824086604, | |
| "grad_norm": 2.3268182277679443, | |
| "learning_rate": 1.756756756756757e-06, | |
| "loss": 0.5994, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.009472259810554804, | |
| "grad_norm": 2.1147611141204834, | |
| "learning_rate": 1.8918918918918922e-06, | |
| "loss": 0.6043, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.010148849797023005, | |
| "grad_norm": 3.140791654586792, | |
| "learning_rate": 2.0270270270270273e-06, | |
| "loss": 0.6447, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.010825439783491205, | |
| "grad_norm": 2.154975175857544, | |
| "learning_rate": 2.1621621621621623e-06, | |
| "loss": 0.6472, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.011502029769959404, | |
| "grad_norm": 2.405954599380493, | |
| "learning_rate": 2.297297297297298e-06, | |
| "loss": 0.6622, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.012178619756427604, | |
| "grad_norm": 1.8810043334960938, | |
| "learning_rate": 2.432432432432433e-06, | |
| "loss": 0.6463, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.012855209742895805, | |
| "grad_norm": 2.251763105392456, | |
| "learning_rate": 2.5675675675675675e-06, | |
| "loss": 0.6118, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.013531799729364006, | |
| "grad_norm": 2.2010996341705322, | |
| "learning_rate": 2.702702702702703e-06, | |
| "loss": 0.6566, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.013531799729364006, | |
| "eval_accuracy": 0.726605504587156, | |
| "eval_f1": 0.11834319526627218, | |
| "eval_loss": 0.6250319480895996, | |
| "eval_precision": 0.5, | |
| "eval_recall": 0.06711409395973154, | |
| "eval_runtime": 51.8026, | |
| "eval_samples_per_second": 5.753, | |
| "eval_steps_per_second": 0.193, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.014208389715832206, | |
| "grad_norm": 2.1348178386688232, | |
| "learning_rate": 2.837837837837838e-06, | |
| "loss": 0.6593, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.014884979702300407, | |
| "grad_norm": 2.461346387863159, | |
| "learning_rate": 2.9729729729729736e-06, | |
| "loss": 0.5665, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.015561569688768605, | |
| "grad_norm": 1.7864395380020142, | |
| "learning_rate": 3.1081081081081082e-06, | |
| "loss": 0.6044, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.016238159675236806, | |
| "grad_norm": 2.120920419692993, | |
| "learning_rate": 3.2432432432432437e-06, | |
| "loss": 0.6494, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.016914749661705007, | |
| "grad_norm": 2.293957233428955, | |
| "learning_rate": 3.3783783783783788e-06, | |
| "loss": 0.6729, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.017591339648173207, | |
| "grad_norm": 1.9928455352783203, | |
| "learning_rate": 3.513513513513514e-06, | |
| "loss": 0.606, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.018267929634641408, | |
| "grad_norm": 1.8565198183059692, | |
| "learning_rate": 3.648648648648649e-06, | |
| "loss": 0.571, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.018944519621109608, | |
| "grad_norm": 1.8976123332977295, | |
| "learning_rate": 3.7837837837837844e-06, | |
| "loss": 0.5702, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.01962110960757781, | |
| "grad_norm": 2.2150862216949463, | |
| "learning_rate": 3.918918918918919e-06, | |
| "loss": 0.5535, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.02029769959404601, | |
| "grad_norm": 2.0916941165924072, | |
| "learning_rate": 4.0540540540540545e-06, | |
| "loss": 0.6707, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02097428958051421, | |
| "grad_norm": 2.0436134338378906, | |
| "learning_rate": 4.189189189189189e-06, | |
| "loss": 0.5966, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.02165087956698241, | |
| "grad_norm": 1.8890984058380127, | |
| "learning_rate": 4.324324324324325e-06, | |
| "loss": 0.5533, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.022327469553450607, | |
| "grad_norm": 2.0738587379455566, | |
| "learning_rate": 4.45945945945946e-06, | |
| "loss": 0.6128, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.023004059539918808, | |
| "grad_norm": 1.9424076080322266, | |
| "learning_rate": 4.594594594594596e-06, | |
| "loss": 0.5763, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.02368064952638701, | |
| "grad_norm": 1.7840420007705688, | |
| "learning_rate": 4.72972972972973e-06, | |
| "loss": 0.5632, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.02435723951285521, | |
| "grad_norm": 2.2191755771636963, | |
| "learning_rate": 4.864864864864866e-06, | |
| "loss": 0.6482, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.02503382949932341, | |
| "grad_norm": 1.925732970237732, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6266, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.02571041948579161, | |
| "grad_norm": 1.7854461669921875, | |
| "learning_rate": 5.135135135135135e-06, | |
| "loss": 0.5505, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.02638700947225981, | |
| "grad_norm": 1.9672614336013794, | |
| "learning_rate": 5.2702702702702705e-06, | |
| "loss": 0.5851, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.02706359945872801, | |
| "grad_norm": 1.713619589805603, | |
| "learning_rate": 5.405405405405406e-06, | |
| "loss": 0.5066, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02706359945872801, | |
| "eval_accuracy": 0.728440366972477, | |
| "eval_f1": 0.11904761904761904, | |
| "eval_loss": 0.6119223237037659, | |
| "eval_precision": 0.5263157894736842, | |
| "eval_recall": 0.06711409395973154, | |
| "eval_runtime": 52.1134, | |
| "eval_samples_per_second": 5.718, | |
| "eval_steps_per_second": 0.192, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02774018944519621, | |
| "grad_norm": 2.512800455093384, | |
| "learning_rate": 5.540540540540541e-06, | |
| "loss": 0.6359, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.028416779431664412, | |
| "grad_norm": 2.311678647994995, | |
| "learning_rate": 5.675675675675676e-06, | |
| "loss": 0.5823, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.029093369418132613, | |
| "grad_norm": 1.8111237287521362, | |
| "learning_rate": 5.810810810810811e-06, | |
| "loss": 0.5194, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.029769959404600813, | |
| "grad_norm": 2.3231632709503174, | |
| "learning_rate": 5.945945945945947e-06, | |
| "loss": 0.6335, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.030446549391069014, | |
| "grad_norm": 1.9767159223556519, | |
| "learning_rate": 6.081081081081082e-06, | |
| "loss": 0.5406, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03112313937753721, | |
| "grad_norm": 1.8099788427352905, | |
| "learning_rate": 6.2162162162162164e-06, | |
| "loss": 0.5191, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.031799729364005415, | |
| "grad_norm": 1.9105194807052612, | |
| "learning_rate": 6.351351351351351e-06, | |
| "loss": 0.5575, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.03247631935047361, | |
| "grad_norm": 1.8297271728515625, | |
| "learning_rate": 6.486486486486487e-06, | |
| "loss": 0.51, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.033152909336941816, | |
| "grad_norm": 1.8884862661361694, | |
| "learning_rate": 6.621621621621622e-06, | |
| "loss": 0.5755, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.03382949932341001, | |
| "grad_norm": 2.0803935527801514, | |
| "learning_rate": 6.7567567567567575e-06, | |
| "loss": 0.5915, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03450608930987822, | |
| "grad_norm": 2.036954164505005, | |
| "learning_rate": 6.891891891891892e-06, | |
| "loss": 0.5394, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.035182679296346414, | |
| "grad_norm": 2.0037217140197754, | |
| "learning_rate": 7.027027027027028e-06, | |
| "loss": 0.4967, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.03585926928281461, | |
| "grad_norm": 1.6572487354278564, | |
| "learning_rate": 7.162162162162163e-06, | |
| "loss": 0.5458, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.036535859269282815, | |
| "grad_norm": 1.8542054891586304, | |
| "learning_rate": 7.297297297297298e-06, | |
| "loss": 0.4571, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.03721244925575101, | |
| "grad_norm": 1.6970975399017334, | |
| "learning_rate": 7.4324324324324324e-06, | |
| "loss": 0.5125, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.037889039242219216, | |
| "grad_norm": 1.8225724697113037, | |
| "learning_rate": 7.567567567567569e-06, | |
| "loss": 0.549, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.03856562922868741, | |
| "grad_norm": 1.5912785530090332, | |
| "learning_rate": 7.702702702702704e-06, | |
| "loss": 0.4843, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.03924221921515562, | |
| "grad_norm": 1.694573998451233, | |
| "learning_rate": 7.837837837837838e-06, | |
| "loss": 0.5804, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.039918809201623814, | |
| "grad_norm": 1.6933585405349731, | |
| "learning_rate": 7.972972972972974e-06, | |
| "loss": 0.5306, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.04059539918809202, | |
| "grad_norm": 1.7225837707519531, | |
| "learning_rate": 8.108108108108109e-06, | |
| "loss": 0.4866, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04059539918809202, | |
| "eval_accuracy": 0.7376146788990826, | |
| "eval_f1": 0.2011173184357542, | |
| "eval_loss": 0.581759512424469, | |
| "eval_precision": 0.6, | |
| "eval_recall": 0.12080536912751678, | |
| "eval_runtime": 51.4731, | |
| "eval_samples_per_second": 5.789, | |
| "eval_steps_per_second": 0.194, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.041271989174560215, | |
| "grad_norm": 1.9804434776306152, | |
| "learning_rate": 8.243243243243245e-06, | |
| "loss": 0.5489, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.04194857916102842, | |
| "grad_norm": 2.3419950008392334, | |
| "learning_rate": 8.378378378378378e-06, | |
| "loss": 0.5551, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.04262516914749662, | |
| "grad_norm": 2.275982618331909, | |
| "learning_rate": 8.513513513513514e-06, | |
| "loss": 0.5127, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.04330175913396482, | |
| "grad_norm": 2.507098913192749, | |
| "learning_rate": 8.64864864864865e-06, | |
| "loss": 0.5736, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.04397834912043302, | |
| "grad_norm": 1.8046241998672485, | |
| "learning_rate": 8.783783783783785e-06, | |
| "loss": 0.4755, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.044654939106901215, | |
| "grad_norm": 1.8296290636062622, | |
| "learning_rate": 8.91891891891892e-06, | |
| "loss": 0.4999, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.04533152909336942, | |
| "grad_norm": 2.3316869735717773, | |
| "learning_rate": 9.054054054054054e-06, | |
| "loss": 0.4797, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.046008119079837616, | |
| "grad_norm": 1.6778762340545654, | |
| "learning_rate": 9.189189189189191e-06, | |
| "loss": 0.5238, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.04668470906630582, | |
| "grad_norm": 1.8217062950134277, | |
| "learning_rate": 9.324324324324325e-06, | |
| "loss": 0.526, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.04736129905277402, | |
| "grad_norm": 2.7135376930236816, | |
| "learning_rate": 9.45945945945946e-06, | |
| "loss": 0.5899, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04803788903924222, | |
| "grad_norm": 1.841891884803772, | |
| "learning_rate": 9.594594594594594e-06, | |
| "loss": 0.5312, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.04871447902571042, | |
| "grad_norm": 1.9096564054489136, | |
| "learning_rate": 9.729729729729732e-06, | |
| "loss": 0.5277, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.04939106901217862, | |
| "grad_norm": 3.7141664028167725, | |
| "learning_rate": 9.864864864864865e-06, | |
| "loss": 0.5468, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.05006765899864682, | |
| "grad_norm": 2.147271156311035, | |
| "learning_rate": 1e-05, | |
| "loss": 0.4658, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.05074424898511502, | |
| "grad_norm": 3.2354440689086914, | |
| "learning_rate": 1.0135135135135136e-05, | |
| "loss": 0.4915, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.05142083897158322, | |
| "grad_norm": 2.6529741287231445, | |
| "learning_rate": 1.027027027027027e-05, | |
| "loss": 0.5009, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.052097428958051424, | |
| "grad_norm": 1.9220309257507324, | |
| "learning_rate": 1.0405405405405407e-05, | |
| "loss": 0.4614, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.05277401894451962, | |
| "grad_norm": 2.6269216537475586, | |
| "learning_rate": 1.0540540540540541e-05, | |
| "loss": 0.4909, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.05345060893098782, | |
| "grad_norm": 2.8617451190948486, | |
| "learning_rate": 1.0675675675675677e-05, | |
| "loss": 0.5087, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.05412719891745602, | |
| "grad_norm": 2.258033275604248, | |
| "learning_rate": 1.0810810810810812e-05, | |
| "loss": 0.4434, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05412719891745602, | |
| "eval_accuracy": 0.7412844036697248, | |
| "eval_f1": 0.3922413793103448, | |
| "eval_loss": 0.5494486689567566, | |
| "eval_precision": 0.5481927710843374, | |
| "eval_recall": 0.3053691275167785, | |
| "eval_runtime": 52.2043, | |
| "eval_samples_per_second": 5.708, | |
| "eval_steps_per_second": 0.192, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05480378890392422, | |
| "grad_norm": 3.6041858196258545, | |
| "learning_rate": 1.0945945945945946e-05, | |
| "loss": 0.4269, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.05548037889039242, | |
| "grad_norm": 2.4709510803222656, | |
| "learning_rate": 1.1081081081081081e-05, | |
| "loss": 0.5329, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.05615696887686062, | |
| "grad_norm": 2.8416366577148438, | |
| "learning_rate": 1.1216216216216219e-05, | |
| "loss": 0.4599, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.056833558863328824, | |
| "grad_norm": 2.6396408081054688, | |
| "learning_rate": 1.1351351351351352e-05, | |
| "loss": 0.4452, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.05751014884979702, | |
| "grad_norm": 1.7931419610977173, | |
| "learning_rate": 1.1486486486486488e-05, | |
| "loss": 0.4034, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.058186738836265225, | |
| "grad_norm": 2.2836318016052246, | |
| "learning_rate": 1.1621621621621622e-05, | |
| "loss": 0.3732, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.05886332882273342, | |
| "grad_norm": 2.0475215911865234, | |
| "learning_rate": 1.1756756756756757e-05, | |
| "loss": 0.4186, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.05953991880920163, | |
| "grad_norm": 2.0375993251800537, | |
| "learning_rate": 1.1891891891891894e-05, | |
| "loss": 0.3456, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.060216508795669824, | |
| "grad_norm": 3.458310604095459, | |
| "learning_rate": 1.2027027027027028e-05, | |
| "loss": 0.3599, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.06089309878213803, | |
| "grad_norm": 2.087979555130005, | |
| "learning_rate": 1.2162162162162164e-05, | |
| "loss": 0.3591, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.061569688768606225, | |
| "grad_norm": 2.4800474643707275, | |
| "learning_rate": 1.2297297297297299e-05, | |
| "loss": 0.3947, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.06224627875507442, | |
| "grad_norm": 3.9390594959259033, | |
| "learning_rate": 1.2432432432432433e-05, | |
| "loss": 0.4404, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.06292286874154263, | |
| "grad_norm": 3.231876850128174, | |
| "learning_rate": 1.2567567567567568e-05, | |
| "loss": 0.4116, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.06359945872801083, | |
| "grad_norm": 5.661862373352051, | |
| "learning_rate": 1.2702702702702702e-05, | |
| "loss": 0.4991, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.06427604871447902, | |
| "grad_norm": 3.7746121883392334, | |
| "learning_rate": 1.283783783783784e-05, | |
| "loss": 0.5173, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.06495263870094722, | |
| "grad_norm": 2.9691073894500732, | |
| "learning_rate": 1.2972972972972975e-05, | |
| "loss": 0.377, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.06562922868741543, | |
| "grad_norm": 2.5602574348449707, | |
| "learning_rate": 1.3108108108108109e-05, | |
| "loss": 0.3232, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.06630581867388363, | |
| "grad_norm": 3.1697347164154053, | |
| "learning_rate": 1.3243243243243244e-05, | |
| "loss": 0.3596, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.06698240866035182, | |
| "grad_norm": 5.4793877601623535, | |
| "learning_rate": 1.3378378378378381e-05, | |
| "loss": 0.3252, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.06765899864682003, | |
| "grad_norm": 3.7010715007781982, | |
| "learning_rate": 1.3513513513513515e-05, | |
| "loss": 0.264, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06765899864682003, | |
| "eval_accuracy": 0.7568807339449541, | |
| "eval_f1": 0.40979955456570155, | |
| "eval_loss": 0.5758041143417358, | |
| "eval_precision": 0.609271523178808, | |
| "eval_recall": 0.3087248322147651, | |
| "eval_runtime": 51.8245, | |
| "eval_samples_per_second": 5.75, | |
| "eval_steps_per_second": 0.193, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06833558863328823, | |
| "grad_norm": 2.3830792903900146, | |
| "learning_rate": 1.364864864864865e-05, | |
| "loss": 0.2756, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.06901217861975643, | |
| "grad_norm": 2.8554539680480957, | |
| "learning_rate": 1.3783783783783784e-05, | |
| "loss": 0.3233, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.06968876860622462, | |
| "grad_norm": 3.331234931945801, | |
| "learning_rate": 1.391891891891892e-05, | |
| "loss": 0.3524, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.07036535859269283, | |
| "grad_norm": 2.8779256343841553, | |
| "learning_rate": 1.4054054054054055e-05, | |
| "loss": 0.323, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.07104194857916103, | |
| "grad_norm": 2.8393092155456543, | |
| "learning_rate": 1.4189189189189189e-05, | |
| "loss": 0.328, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.07171853856562922, | |
| "grad_norm": 3.7622110843658447, | |
| "learning_rate": 1.4324324324324326e-05, | |
| "loss": 0.3449, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.07239512855209743, | |
| "grad_norm": 3.734447479248047, | |
| "learning_rate": 1.4459459459459462e-05, | |
| "loss": 0.3367, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.07307171853856563, | |
| "grad_norm": 4.105041980743408, | |
| "learning_rate": 1.4594594594594596e-05, | |
| "loss": 0.3038, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.07374830852503383, | |
| "grad_norm": 3.9254539012908936, | |
| "learning_rate": 1.4729729729729731e-05, | |
| "loss": 0.2617, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.07442489851150202, | |
| "grad_norm": 5.182884693145752, | |
| "learning_rate": 1.4864864864864865e-05, | |
| "loss": 0.3423, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07510148849797023, | |
| "grad_norm": 3.852728843688965, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.246, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.07577807848443843, | |
| "grad_norm": 3.291020631790161, | |
| "learning_rate": 1.5135135135135138e-05, | |
| "loss": 0.3383, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.07645466847090664, | |
| "grad_norm": 5.644819259643555, | |
| "learning_rate": 1.527027027027027e-05, | |
| "loss": 0.2452, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.07713125845737483, | |
| "grad_norm": 6.728042125701904, | |
| "learning_rate": 1.540540540540541e-05, | |
| "loss": 0.2767, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.07780784844384303, | |
| "grad_norm": 4.200859546661377, | |
| "learning_rate": 1.554054054054054e-05, | |
| "loss": 0.2707, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.07848443843031123, | |
| "grad_norm": 3.9574716091156006, | |
| "learning_rate": 1.5675675675675676e-05, | |
| "loss": 0.2733, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.07916102841677942, | |
| "grad_norm": 3.50284743309021, | |
| "learning_rate": 1.581081081081081e-05, | |
| "loss": 0.2615, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.07983761840324763, | |
| "grad_norm": 7.720501899719238, | |
| "learning_rate": 1.5945945945945947e-05, | |
| "loss": 0.2353, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.08051420838971583, | |
| "grad_norm": 5.794226169586182, | |
| "learning_rate": 1.6081081081081083e-05, | |
| "loss": 0.2454, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.08119079837618404, | |
| "grad_norm": 6.7274250984191895, | |
| "learning_rate": 1.6216216216216218e-05, | |
| "loss": 0.2948, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08119079837618404, | |
| "eval_accuracy": 0.7678899082568807, | |
| "eval_f1": 0.3990498812351544, | |
| "eval_loss": 0.7271434664726257, | |
| "eval_precision": 0.6829268292682927, | |
| "eval_recall": 0.28187919463087246, | |
| "eval_runtime": 51.8636, | |
| "eval_samples_per_second": 5.746, | |
| "eval_steps_per_second": 0.193, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08186738836265223, | |
| "grad_norm": 4.321250915527344, | |
| "learning_rate": 1.6351351351351354e-05, | |
| "loss": 0.2774, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.08254397834912043, | |
| "grad_norm": 5.205666542053223, | |
| "learning_rate": 1.648648648648649e-05, | |
| "loss": 0.254, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.08322056833558863, | |
| "grad_norm": 4.166099548339844, | |
| "learning_rate": 1.662162162162162e-05, | |
| "loss": 0.2455, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.08389715832205684, | |
| "grad_norm": 5.376754283905029, | |
| "learning_rate": 1.6756756756756757e-05, | |
| "loss": 0.2982, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.08457374830852503, | |
| "grad_norm": 5.893986225128174, | |
| "learning_rate": 1.6891891891891896e-05, | |
| "loss": 0.2632, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.08525033829499323, | |
| "grad_norm": 5.461335182189941, | |
| "learning_rate": 1.7027027027027028e-05, | |
| "loss": 0.1964, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.08592692828146144, | |
| "grad_norm": 8.870018005371094, | |
| "learning_rate": 1.7162162162162163e-05, | |
| "loss": 0.3057, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.08660351826792964, | |
| "grad_norm": 3.8947367668151855, | |
| "learning_rate": 1.72972972972973e-05, | |
| "loss": 0.2715, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.08728010825439783, | |
| "grad_norm": 4.829451084136963, | |
| "learning_rate": 1.7432432432432434e-05, | |
| "loss": 0.2395, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.08795669824086604, | |
| "grad_norm": 3.4110400676727295, | |
| "learning_rate": 1.756756756756757e-05, | |
| "loss": 0.2363, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08863328822733424, | |
| "grad_norm": 3.4218814373016357, | |
| "learning_rate": 1.7702702702702702e-05, | |
| "loss": 0.2343, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.08930987821380243, | |
| "grad_norm": 4.7118425369262695, | |
| "learning_rate": 1.783783783783784e-05, | |
| "loss": 0.2438, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.08998646820027063, | |
| "grad_norm": 5.201712608337402, | |
| "learning_rate": 1.7972972972972976e-05, | |
| "loss": 0.3213, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.09066305818673884, | |
| "grad_norm": 8.192056655883789, | |
| "learning_rate": 1.8108108108108108e-05, | |
| "loss": 0.3043, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.09133964817320704, | |
| "grad_norm": 4.3292694091796875, | |
| "learning_rate": 1.8243243243243244e-05, | |
| "loss": 0.3077, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.09201623815967523, | |
| "grad_norm": 9.865090370178223, | |
| "learning_rate": 1.8378378378378383e-05, | |
| "loss": 0.2584, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.09269282814614344, | |
| "grad_norm": 7.474611759185791, | |
| "learning_rate": 1.8513513513513515e-05, | |
| "loss": 0.2391, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.09336941813261164, | |
| "grad_norm": 3.1332149505615234, | |
| "learning_rate": 1.864864864864865e-05, | |
| "loss": 0.1335, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.09404600811907984, | |
| "grad_norm": 4.493014335632324, | |
| "learning_rate": 1.8783783783783786e-05, | |
| "loss": 0.2344, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.09472259810554803, | |
| "grad_norm": 5.90848970413208, | |
| "learning_rate": 1.891891891891892e-05, | |
| "loss": 0.2329, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09472259810554803, | |
| "eval_accuracy": 0.7697247706422018, | |
| "eval_f1": 0.35475578406169667, | |
| "eval_loss": 0.5965576767921448, | |
| "eval_precision": 0.7582417582417582, | |
| "eval_recall": 0.23154362416107382, | |
| "eval_runtime": 51.8498, | |
| "eval_samples_per_second": 5.747, | |
| "eval_steps_per_second": 0.193, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09539918809201624, | |
| "grad_norm": 3.0857784748077393, | |
| "learning_rate": 1.9054054054054057e-05, | |
| "loss": 0.1699, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.09607577807848444, | |
| "grad_norm": 3.591951370239258, | |
| "learning_rate": 1.918918918918919e-05, | |
| "loss": 0.2166, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.09675236806495263, | |
| "grad_norm": 5.5953826904296875, | |
| "learning_rate": 1.9324324324324328e-05, | |
| "loss": 0.1826, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.09742895805142084, | |
| "grad_norm": 4.522704601287842, | |
| "learning_rate": 1.9459459459459463e-05, | |
| "loss": 0.2288, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.09810554803788904, | |
| "grad_norm": 2.501812219619751, | |
| "learning_rate": 1.9594594594594595e-05, | |
| "loss": 0.1412, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.09878213802435724, | |
| "grad_norm": 6.26653528213501, | |
| "learning_rate": 1.972972972972973e-05, | |
| "loss": 0.2609, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.09945872801082543, | |
| "grad_norm": 13.06122875213623, | |
| "learning_rate": 1.9864864864864866e-05, | |
| "loss": 0.3233, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.10013531799729364, | |
| "grad_norm": 4.477540493011475, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2679, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.10081190798376184, | |
| "grad_norm": 5.897082328796387, | |
| "learning_rate": 1.9999972102437076e-05, | |
| "loss": 0.2136, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.10148849797023005, | |
| "grad_norm": 4.226516246795654, | |
| "learning_rate": 1.9999888409903948e-05, | |
| "loss": 0.22, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10216508795669824, | |
| "grad_norm": 6.373837471008301, | |
| "learning_rate": 1.9999748922867592e-05, | |
| "loss": 0.2117, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.10284167794316644, | |
| "grad_norm": 4.057104110717773, | |
| "learning_rate": 1.9999553642106267e-05, | |
| "loss": 0.2398, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.10351826792963464, | |
| "grad_norm": 6.765925884246826, | |
| "learning_rate": 1.9999302568709548e-05, | |
| "loss": 0.2921, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.10419485791610285, | |
| "grad_norm": 11.143022537231445, | |
| "learning_rate": 1.9998995704078305e-05, | |
| "loss": 0.2496, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.10487144790257104, | |
| "grad_norm": 7.253014087677002, | |
| "learning_rate": 1.9998633049924693e-05, | |
| "loss": 0.1869, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.10554803788903924, | |
| "grad_norm": 9.102387428283691, | |
| "learning_rate": 1.9998214608272136e-05, | |
| "loss": 0.2344, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.10622462787550745, | |
| "grad_norm": 3.9514195919036865, | |
| "learning_rate": 1.9997740381455348e-05, | |
| "loss": 0.2364, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.10690121786197564, | |
| "grad_norm": 5.509130954742432, | |
| "learning_rate": 1.9997210372120276e-05, | |
| "loss": 0.2863, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.10757780784844384, | |
| "grad_norm": 3.954360246658325, | |
| "learning_rate": 1.9996624583224112e-05, | |
| "loss": 0.1248, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.10825439783491204, | |
| "grad_norm": 3.0605578422546387, | |
| "learning_rate": 1.999598301803528e-05, | |
| "loss": 0.1726, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.10825439783491204, | |
| "eval_accuracy": 0.763302752293578, | |
| "eval_f1": 0.31382978723404253, | |
| "eval_loss": 0.5946537256240845, | |
| "eval_precision": 0.7564102564102564, | |
| "eval_recall": 0.19798657718120805, | |
| "eval_runtime": 51.9229, | |
| "eval_samples_per_second": 5.739, | |
| "eval_steps_per_second": 0.193, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.10893098782138025, | |
| "grad_norm": 4.9909281730651855, | |
| "learning_rate": 1.9995285680133393e-05, | |
| "loss": 0.2449, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.10960757780784844, | |
| "grad_norm": 2.7885420322418213, | |
| "learning_rate": 1.999453257340926e-05, | |
| "loss": 0.1239, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.11028416779431664, | |
| "grad_norm": 4.381866931915283, | |
| "learning_rate": 1.9993723702064852e-05, | |
| "loss": 0.146, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.11096075778078485, | |
| "grad_norm": 7.0832109451293945, | |
| "learning_rate": 1.9992859070613275e-05, | |
| "loss": 0.2178, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.11163734776725305, | |
| "grad_norm": 4.502629280090332, | |
| "learning_rate": 1.9991938683878746e-05, | |
| "loss": 0.2039, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.11231393775372124, | |
| "grad_norm": 3.46604323387146, | |
| "learning_rate": 1.9990962546996583e-05, | |
| "loss": 0.1235, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.11299052774018944, | |
| "grad_norm": 2.314317464828491, | |
| "learning_rate": 1.9989930665413148e-05, | |
| "loss": 0.1033, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.11366711772665765, | |
| "grad_norm": 5.851840019226074, | |
| "learning_rate": 1.998884304488584e-05, | |
| "loss": 0.2414, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.11434370771312584, | |
| "grad_norm": 6.2724714279174805, | |
| "learning_rate": 1.998769969148305e-05, | |
| "loss": 0.2474, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.11502029769959404, | |
| "grad_norm": 3.0591259002685547, | |
| "learning_rate": 1.9986500611584133e-05, | |
| "loss": 0.1661, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.11569688768606225, | |
| "grad_norm": 4.147556304931641, | |
| "learning_rate": 1.9985245811879372e-05, | |
| "loss": 0.1855, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.11637347767253045, | |
| "grad_norm": 4.872109413146973, | |
| "learning_rate": 1.9983935299369934e-05, | |
| "loss": 0.2505, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.11705006765899864, | |
| "grad_norm": 10.929080963134766, | |
| "learning_rate": 1.9982569081367844e-05, | |
| "loss": 0.238, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.11772665764546685, | |
| "grad_norm": 9.166586875915527, | |
| "learning_rate": 1.998114716549593e-05, | |
| "loss": 0.2415, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.11840324763193505, | |
| "grad_norm": 4.646167278289795, | |
| "learning_rate": 1.997966955968779e-05, | |
| "loss": 0.1264, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.11907983761840325, | |
| "grad_norm": 4.666916847229004, | |
| "learning_rate": 1.9978136272187745e-05, | |
| "loss": 0.178, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.11975642760487144, | |
| "grad_norm": 7.303848743438721, | |
| "learning_rate": 1.9976547311550796e-05, | |
| "loss": 0.2303, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.12043301759133965, | |
| "grad_norm": 5.617541313171387, | |
| "learning_rate": 1.997490268664256e-05, | |
| "loss": 0.1295, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.12110960757780785, | |
| "grad_norm": 7.912723541259766, | |
| "learning_rate": 1.9973202406639247e-05, | |
| "loss": 0.2137, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.12178619756427606, | |
| "grad_norm": 3.9384965896606445, | |
| "learning_rate": 1.997144648102759e-05, | |
| "loss": 0.1085, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.12178619756427606, | |
| "eval_accuracy": 0.7678899082568807, | |
| "eval_f1": 0.3394255874673629, | |
| "eval_loss": 0.558770477771759, | |
| "eval_precision": 0.7647058823529411, | |
| "eval_recall": 0.2181208053691275, | |
| "eval_runtime": 52.2836, | |
| "eval_samples_per_second": 5.7, | |
| "eval_steps_per_second": 0.191, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.12246278755074425, | |
| "grad_norm": 4.896997928619385, | |
| "learning_rate": 1.99696349196048e-05, | |
| "loss": 0.2525, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.12313937753721245, | |
| "grad_norm": 2.2250826358795166, | |
| "learning_rate": 1.9967767732478506e-05, | |
| "loss": 0.1442, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.12381596752368065, | |
| "grad_norm": 5.748762607574463, | |
| "learning_rate": 1.99658449300667e-05, | |
| "loss": 0.3173, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.12449255751014884, | |
| "grad_norm": 3.4051263332366943, | |
| "learning_rate": 1.9963866523097683e-05, | |
| "loss": 0.2134, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.12516914749661706, | |
| "grad_norm": 3.8892011642456055, | |
| "learning_rate": 1.9961832522610004e-05, | |
| "loss": 0.2136, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.12584573748308525, | |
| "grad_norm": 5.042850017547607, | |
| "learning_rate": 1.9959742939952393e-05, | |
| "loss": 0.1986, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.12652232746955344, | |
| "grad_norm": 7.566000461578369, | |
| "learning_rate": 1.99575977867837e-05, | |
| "loss": 0.2481, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.12719891745602166, | |
| "grad_norm": 5.193778991699219, | |
| "learning_rate": 1.995539707507284e-05, | |
| "loss": 0.2304, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.12787550744248985, | |
| "grad_norm": 4.714810371398926, | |
| "learning_rate": 1.99531408170987e-05, | |
| "loss": 0.2234, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.12855209742895804, | |
| "grad_norm": 4.679834842681885, | |
| "learning_rate": 1.9950829025450116e-05, | |
| "loss": 0.2152, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.12922868741542626, | |
| "grad_norm": 2.8689143657684326, | |
| "learning_rate": 1.994846171302575e-05, | |
| "loss": 0.1938, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.12990527740189445, | |
| "grad_norm": 3.1976468563079834, | |
| "learning_rate": 1.9946038893034045e-05, | |
| "loss": 0.1858, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.13058186738836267, | |
| "grad_norm": 3.2573113441467285, | |
| "learning_rate": 1.994356057899317e-05, | |
| "loss": 0.1333, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.13125845737483086, | |
| "grad_norm": 6.062759876251221, | |
| "learning_rate": 1.9941026784730898e-05, | |
| "loss": 0.2143, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.13193504736129905, | |
| "grad_norm": 3.474382162094116, | |
| "learning_rate": 1.9938437524384572e-05, | |
| "loss": 0.2385, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.13261163734776726, | |
| "grad_norm": 4.171142101287842, | |
| "learning_rate": 1.9935792812400997e-05, | |
| "loss": 0.2212, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.13328822733423545, | |
| "grad_norm": 2.72599720954895, | |
| "learning_rate": 1.9933092663536384e-05, | |
| "loss": 0.1579, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.13396481732070364, | |
| "grad_norm": 6.6125102043151855, | |
| "learning_rate": 1.9930337092856243e-05, | |
| "loss": 0.2187, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.13464140730717186, | |
| "grad_norm": 2.2951035499572754, | |
| "learning_rate": 1.9927526115735315e-05, | |
| "loss": 0.1567, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.13531799729364005, | |
| "grad_norm": 4.760623931884766, | |
| "learning_rate": 1.9924659747857485e-05, | |
| "loss": 0.2326, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13531799729364005, | |
| "eval_accuracy": 0.7623853211009174, | |
| "eval_f1": 0.3508771929824561, | |
| "eval_loss": 0.5019528865814209, | |
| "eval_precision": 0.693069306930693, | |
| "eval_recall": 0.2348993288590604, | |
| "eval_runtime": 51.9146, | |
| "eval_samples_per_second": 5.74, | |
| "eval_steps_per_second": 0.193, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13599458728010824, | |
| "grad_norm": 2.476130247116089, | |
| "learning_rate": 1.9921738005215687e-05, | |
| "loss": 0.0617, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.13667117726657646, | |
| "grad_norm": 7.735743045806885, | |
| "learning_rate": 1.9918760904111818e-05, | |
| "loss": 0.252, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.13734776725304465, | |
| "grad_norm": 3.769490957260132, | |
| "learning_rate": 1.991572846115666e-05, | |
| "loss": 0.1439, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.13802435723951287, | |
| "grad_norm": 4.381724834442139, | |
| "learning_rate": 1.9912640693269754e-05, | |
| "loss": 0.2143, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.13870094722598106, | |
| "grad_norm": 3.511615037918091, | |
| "learning_rate": 1.990949761767935e-05, | |
| "loss": 0.1747, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.13937753721244925, | |
| "grad_norm": 5.647243499755859, | |
| "learning_rate": 1.9906299251922273e-05, | |
| "loss": 0.1904, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.14005412719891747, | |
| "grad_norm": 3.9160757064819336, | |
| "learning_rate": 1.9903045613843844e-05, | |
| "loss": 0.1816, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.14073071718538566, | |
| "grad_norm": 5.033432483673096, | |
| "learning_rate": 1.9899736721597787e-05, | |
| "loss": 0.141, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.14140730717185385, | |
| "grad_norm": 4.042255401611328, | |
| "learning_rate": 1.9896372593646095e-05, | |
| "loss": 0.2043, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.14208389715832206, | |
| "grad_norm": 7.3899149894714355, | |
| "learning_rate": 1.989295324875897e-05, | |
| "loss": 0.2182, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.14276048714479025, | |
| "grad_norm": 9.090564727783203, | |
| "learning_rate": 1.9889478706014687e-05, | |
| "loss": 0.2333, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.14343707713125844, | |
| "grad_norm": 4.3934526443481445, | |
| "learning_rate": 1.9885948984799502e-05, | |
| "loss": 0.2836, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.14411366711772666, | |
| "grad_norm": 3.89416766166687, | |
| "learning_rate": 1.9882364104807536e-05, | |
| "loss": 0.2256, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.14479025710419485, | |
| "grad_norm": 6.6705241203308105, | |
| "learning_rate": 1.987872408604068e-05, | |
| "loss": 0.2007, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.14546684709066307, | |
| "grad_norm": 9.201639175415039, | |
| "learning_rate": 1.9875028948808457e-05, | |
| "loss": 0.217, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.14614343707713126, | |
| "grad_norm": 3.3696377277374268, | |
| "learning_rate": 1.9871278713727932e-05, | |
| "loss": 0.1132, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.14682002706359945, | |
| "grad_norm": 3.9588944911956787, | |
| "learning_rate": 1.9867473401723595e-05, | |
| "loss": 0.2086, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.14749661705006767, | |
| "grad_norm": 5.10556173324585, | |
| "learning_rate": 1.9863613034027224e-05, | |
| "loss": 0.1367, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.14817320703653586, | |
| "grad_norm": 3.8839104175567627, | |
| "learning_rate": 1.9859697632177796e-05, | |
| "loss": 0.1882, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.14884979702300405, | |
| "grad_norm": 3.7395753860473633, | |
| "learning_rate": 1.985572721802134e-05, | |
| "loss": 0.1228, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.14884979702300405, | |
| "eval_accuracy": 0.763302752293578, | |
| "eval_f1": 0.3316062176165803, | |
| "eval_loss": 0.49911028146743774, | |
| "eval_precision": 0.7272727272727273, | |
| "eval_recall": 0.21476510067114093, | |
| "eval_runtime": 53.4724, | |
| "eval_samples_per_second": 5.573, | |
| "eval_steps_per_second": 0.187, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.14952638700947227, | |
| "grad_norm": 2.52254056930542, | |
| "learning_rate": 1.9851701813710838e-05, | |
| "loss": 0.1429, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.15020297699594046, | |
| "grad_norm": 2.212614059448242, | |
| "learning_rate": 1.9847621441706076e-05, | |
| "loss": 0.0924, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.15087956698240865, | |
| "grad_norm": 5.361288547515869, | |
| "learning_rate": 1.9843486124773546e-05, | |
| "loss": 0.1915, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.15155615696887687, | |
| "grad_norm": 5.06777286529541, | |
| "learning_rate": 1.98392958859863e-05, | |
| "loss": 0.2049, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.15223274695534506, | |
| "grad_norm": 2.5112369060516357, | |
| "learning_rate": 1.9835050748723826e-05, | |
| "loss": 0.1132, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.15290933694181327, | |
| "grad_norm": 3.829697847366333, | |
| "learning_rate": 1.9830750736671923e-05, | |
| "loss": 0.1766, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.15358592692828146, | |
| "grad_norm": 6.517053604125977, | |
| "learning_rate": 1.982639587382256e-05, | |
| "loss": 0.2742, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.15426251691474965, | |
| "grad_norm": 3.8287339210510254, | |
| "learning_rate": 1.9821986184473757e-05, | |
| "loss": 0.1686, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.15493910690121787, | |
| "grad_norm": 3.6989524364471436, | |
| "learning_rate": 1.981752169322942e-05, | |
| "loss": 0.1286, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.15561569688768606, | |
| "grad_norm": 4.2301788330078125, | |
| "learning_rate": 1.981300242499924e-05, | |
| "loss": 0.1242, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.15629228687415425, | |
| "grad_norm": 5.749031066894531, | |
| "learning_rate": 1.9808428404998532e-05, | |
| "loss": 0.2348, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.15696887686062247, | |
| "grad_norm": 4.1858744621276855, | |
| "learning_rate": 1.9803799658748096e-05, | |
| "loss": 0.1809, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.15764546684709066, | |
| "grad_norm": 2.808894157409668, | |
| "learning_rate": 1.9799116212074077e-05, | |
| "loss": 0.1228, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.15832205683355885, | |
| "grad_norm": 4.898924350738525, | |
| "learning_rate": 1.9794378091107834e-05, | |
| "loss": 0.1964, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.15899864682002707, | |
| "grad_norm": 4.328680038452148, | |
| "learning_rate": 1.978958532228576e-05, | |
| "loss": 0.1566, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.15967523680649526, | |
| "grad_norm": 4.020467758178711, | |
| "learning_rate": 1.978473793234918e-05, | |
| "loss": 0.2254, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.16035182679296348, | |
| "grad_norm": 2.9529521465301514, | |
| "learning_rate": 1.977983594834416e-05, | |
| "loss": 0.1425, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.16102841677943167, | |
| "grad_norm": 3.5832724571228027, | |
| "learning_rate": 1.9774879397621387e-05, | |
| "loss": 0.1848, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.16170500676589986, | |
| "grad_norm": 6.061310768127441, | |
| "learning_rate": 1.9769868307835996e-05, | |
| "loss": 0.1344, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.16238159675236807, | |
| "grad_norm": 4.559755325317383, | |
| "learning_rate": 1.9764802706947423e-05, | |
| "loss": 0.1678, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.16238159675236807, | |
| "eval_accuracy": 0.763302752293578, | |
| "eval_f1": 0.32105263157894737, | |
| "eval_loss": 0.5306172966957092, | |
| "eval_precision": 0.7439024390243902, | |
| "eval_recall": 0.20469798657718122, | |
| "eval_runtime": 52.6664, | |
| "eval_samples_per_second": 5.658, | |
| "eval_steps_per_second": 0.19, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.16305818673883626, | |
| "grad_norm": 4.202253818511963, | |
| "learning_rate": 1.975968262321925e-05, | |
| "loss": 0.1828, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.16373477672530445, | |
| "grad_norm": 4.026851654052734, | |
| "learning_rate": 1.9754508085219057e-05, | |
| "loss": 0.2173, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.16441136671177267, | |
| "grad_norm": 3.136077404022217, | |
| "learning_rate": 1.9749279121818235e-05, | |
| "loss": 0.1614, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.16508795669824086, | |
| "grad_norm": 3.726810932159424, | |
| "learning_rate": 1.974399576219186e-05, | |
| "loss": 0.1667, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.16576454668470908, | |
| "grad_norm": 5.430721282958984, | |
| "learning_rate": 1.9738658035818495e-05, | |
| "loss": 0.2158, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.16644113667117727, | |
| "grad_norm": 2.940300226211548, | |
| "learning_rate": 1.973326597248006e-05, | |
| "loss": 0.1664, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.16711772665764546, | |
| "grad_norm": 3.436058282852173, | |
| "learning_rate": 1.972781960226163e-05, | |
| "loss": 0.1944, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.16779431664411368, | |
| "grad_norm": 3.708514928817749, | |
| "learning_rate": 1.9722318955551307e-05, | |
| "loss": 0.1869, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.16847090663058187, | |
| "grad_norm": 5.172970771789551, | |
| "learning_rate": 1.971676406304001e-05, | |
| "loss": 0.219, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.16914749661705006, | |
| "grad_norm": 4.198179721832275, | |
| "learning_rate": 1.9711154955721338e-05, | |
| "loss": 0.1606, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.16982408660351828, | |
| "grad_norm": 4.068305969238281, | |
| "learning_rate": 1.9705491664891368e-05, | |
| "loss": 0.1952, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.17050067658998647, | |
| "grad_norm": 4.898702144622803, | |
| "learning_rate": 1.969977422214851e-05, | |
| "loss": 0.1904, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.17117726657645466, | |
| "grad_norm": 3.666128158569336, | |
| "learning_rate": 1.9694002659393306e-05, | |
| "loss": 0.177, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.17185385656292287, | |
| "grad_norm": 3.7936861515045166, | |
| "learning_rate": 1.968817700882826e-05, | |
| "loss": 0.1003, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.17253044654939106, | |
| "grad_norm": 3.3366808891296387, | |
| "learning_rate": 1.9682297302957666e-05, | |
| "loss": 0.1729, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.17320703653585928, | |
| "grad_norm": 4.625013828277588, | |
| "learning_rate": 1.9676363574587414e-05, | |
| "loss": 0.2212, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.17388362652232747, | |
| "grad_norm": 4.048298358917236, | |
| "learning_rate": 1.9670375856824823e-05, | |
| "loss": 0.1301, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.17456021650879566, | |
| "grad_norm": 3.388268232345581, | |
| "learning_rate": 1.966433418307843e-05, | |
| "loss": 0.091, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.17523680649526388, | |
| "grad_norm": 3.287910223007202, | |
| "learning_rate": 1.9658238587057832e-05, | |
| "loss": 0.1748, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.17591339648173207, | |
| "grad_norm": 4.814307689666748, | |
| "learning_rate": 1.9652089102773487e-05, | |
| "loss": 0.2111, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.17591339648173207, | |
| "eval_accuracy": 0.763302752293578, | |
| "eval_f1": 0.29120879120879123, | |
| "eval_loss": 0.5758374333381653, | |
| "eval_precision": 0.803030303030303, | |
| "eval_recall": 0.17785234899328858, | |
| "eval_runtime": 52.5901, | |
| "eval_samples_per_second": 5.666, | |
| "eval_steps_per_second": 0.19, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.17658998646820026, | |
| "grad_norm": 6.4831461906433105, | |
| "learning_rate": 1.9645885764536522e-05, | |
| "loss": 0.2182, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.17726657645466848, | |
| "grad_norm": 6.477516174316406, | |
| "learning_rate": 1.9639628606958535e-05, | |
| "loss": 0.2462, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.17794316644113667, | |
| "grad_norm": 3.73384690284729, | |
| "learning_rate": 1.9633317664951418e-05, | |
| "loss": 0.1425, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.17861975642760486, | |
| "grad_norm": 4.063915252685547, | |
| "learning_rate": 1.962695297372715e-05, | |
| "loss": 0.1388, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.17929634641407308, | |
| "grad_norm": 4.379425048828125, | |
| "learning_rate": 1.962053456879761e-05, | |
| "loss": 0.1929, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.17997293640054127, | |
| "grad_norm": 3.696601629257202, | |
| "learning_rate": 1.9614062485974364e-05, | |
| "loss": 0.1795, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.18064952638700948, | |
| "grad_norm": 4.814270973205566, | |
| "learning_rate": 1.9607536761368484e-05, | |
| "loss": 0.1906, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.18132611637347767, | |
| "grad_norm": 4.517858028411865, | |
| "learning_rate": 1.960095743139033e-05, | |
| "loss": 0.1902, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.18200270635994586, | |
| "grad_norm": 4.473437309265137, | |
| "learning_rate": 1.9594324532749353e-05, | |
| "loss": 0.1581, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.18267929634641408, | |
| "grad_norm": 6.359562873840332, | |
| "learning_rate": 1.95876381024539e-05, | |
| "loss": 0.2641, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.18335588633288227, | |
| "grad_norm": 5.020825386047363, | |
| "learning_rate": 1.958089817781099e-05, | |
| "loss": 0.1734, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.18403247631935046, | |
| "grad_norm": 5.725416660308838, | |
| "learning_rate": 1.9574104796426124e-05, | |
| "loss": 0.2771, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.18470906630581868, | |
| "grad_norm": 3.2466204166412354, | |
| "learning_rate": 1.956725799620305e-05, | |
| "loss": 0.1493, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.18538565629228687, | |
| "grad_norm": 4.315434455871582, | |
| "learning_rate": 1.9560357815343577e-05, | |
| "loss": 0.1879, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.18606224627875506, | |
| "grad_norm": 4.0321245193481445, | |
| "learning_rate": 1.9553404292347356e-05, | |
| "loss": 0.1276, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.18673883626522328, | |
| "grad_norm": 3.7112905979156494, | |
| "learning_rate": 1.9546397466011654e-05, | |
| "loss": 0.173, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.18741542625169147, | |
| "grad_norm": 5.85778284072876, | |
| "learning_rate": 1.9539337375431144e-05, | |
| "loss": 0.3178, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.1880920162381597, | |
| "grad_norm": 5.455870151519775, | |
| "learning_rate": 1.9532224059997693e-05, | |
| "loss": 0.1971, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.18876860622462788, | |
| "grad_norm": 3.5191891193389893, | |
| "learning_rate": 1.9525057559400134e-05, | |
| "loss": 0.165, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.18944519621109607, | |
| "grad_norm": 4.080350399017334, | |
| "learning_rate": 1.9517837913624048e-05, | |
| "loss": 0.115, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.18944519621109607, | |
| "eval_accuracy": 0.7706422018348624, | |
| "eval_f1": 0.3315508021390374, | |
| "eval_loss": 0.49153777956962585, | |
| "eval_precision": 0.8157894736842105, | |
| "eval_recall": 0.2080536912751678, | |
| "eval_runtime": 53.3463, | |
| "eval_samples_per_second": 5.586, | |
| "eval_steps_per_second": 0.187, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.19012178619756429, | |
| "grad_norm": 3.4508748054504395, | |
| "learning_rate": 1.9510565162951538e-05, | |
| "loss": 0.1461, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.19079837618403248, | |
| "grad_norm": 5.258754253387451, | |
| "learning_rate": 1.9503239347961006e-05, | |
| "loss": 0.2396, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.19147496617050067, | |
| "grad_norm": 5.140385627746582, | |
| "learning_rate": 1.9495860509526935e-05, | |
| "loss": 0.1444, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.19215155615696888, | |
| "grad_norm": 1.9280897378921509, | |
| "learning_rate": 1.948842868881964e-05, | |
| "loss": 0.1426, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.19282814614343707, | |
| "grad_norm": 2.6845431327819824, | |
| "learning_rate": 1.948094392730506e-05, | |
| "loss": 0.1702, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.19350473612990526, | |
| "grad_norm": 2.7291038036346436, | |
| "learning_rate": 1.9473406266744518e-05, | |
| "loss": 0.1525, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.19418132611637348, | |
| "grad_norm": 2.8851161003112793, | |
| "learning_rate": 1.9465815749194482e-05, | |
| "loss": 0.1419, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.19485791610284167, | |
| "grad_norm": 3.973231554031372, | |
| "learning_rate": 1.9458172417006347e-05, | |
| "loss": 0.1782, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.1955345060893099, | |
| "grad_norm": 5.708676338195801, | |
| "learning_rate": 1.9450476312826178e-05, | |
| "loss": 0.1396, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.19621109607577808, | |
| "grad_norm": 3.4198830127716064, | |
| "learning_rate": 1.9442727479594486e-05, | |
| "loss": 0.1762, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.19688768606224627, | |
| "grad_norm": 5.898075103759766, | |
| "learning_rate": 1.9434925960545978e-05, | |
| "loss": 0.213, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.1975642760487145, | |
| "grad_norm": 5.121380805969238, | |
| "learning_rate": 1.9427071799209335e-05, | |
| "loss": 0.2684, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.19824086603518268, | |
| "grad_norm": 5.1736931800842285, | |
| "learning_rate": 1.941916503940694e-05, | |
| "loss": 0.2272, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.19891745602165087, | |
| "grad_norm": 3.988576650619507, | |
| "learning_rate": 1.941120572525467e-05, | |
| "loss": 0.2007, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.19959404600811909, | |
| "grad_norm": 6.444464683532715, | |
| "learning_rate": 1.9403193901161614e-05, | |
| "loss": 0.2243, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.20027063599458728, | |
| "grad_norm": 3.4448323249816895, | |
| "learning_rate": 1.9395129611829844e-05, | |
| "loss": 0.1175, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.2009472259810555, | |
| "grad_norm": 7.464962005615234, | |
| "learning_rate": 1.9387012902254165e-05, | |
| "loss": 0.2362, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.20162381596752368, | |
| "grad_norm": 4.778014183044434, | |
| "learning_rate": 1.9378843817721856e-05, | |
| "loss": 0.1657, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.20230040595399187, | |
| "grad_norm": 4.121883392333984, | |
| "learning_rate": 1.937062240381243e-05, | |
| "loss": 0.1339, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.2029769959404601, | |
| "grad_norm": 5.5182576179504395, | |
| "learning_rate": 1.9362348706397374e-05, | |
| "loss": 0.1785, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2029769959404601, | |
| "eval_accuracy": 0.773394495412844, | |
| "eval_f1": 0.35170603674540685, | |
| "eval_loss": 0.5283112525939941, | |
| "eval_precision": 0.8072289156626506, | |
| "eval_recall": 0.22483221476510068, | |
| "eval_runtime": 51.5451, | |
| "eval_samples_per_second": 5.781, | |
| "eval_steps_per_second": 0.194, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.20365358592692828, | |
| "grad_norm": 5.575674057006836, | |
| "learning_rate": 1.935402277163988e-05, | |
| "loss": 0.1904, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.20433017591339647, | |
| "grad_norm": 4.0572829246521, | |
| "learning_rate": 1.934564464599461e-05, | |
| "loss": 0.1882, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.2050067658998647, | |
| "grad_norm": 3.6547298431396484, | |
| "learning_rate": 1.9337214376207417e-05, | |
| "loss": 0.2039, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.20568335588633288, | |
| "grad_norm": 4.347687721252441, | |
| "learning_rate": 1.9328732009315107e-05, | |
| "loss": 0.169, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.20635994587280107, | |
| "grad_norm": 4.556861400604248, | |
| "learning_rate": 1.932019759264514e-05, | |
| "loss": 0.203, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.2070365358592693, | |
| "grad_norm": 5.285150527954102, | |
| "learning_rate": 1.931161117381541e-05, | |
| "loss": 0.1871, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.20771312584573748, | |
| "grad_norm": 3.5677342414855957, | |
| "learning_rate": 1.9302972800733945e-05, | |
| "loss": 0.1735, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.2083897158322057, | |
| "grad_norm": 2.768244743347168, | |
| "learning_rate": 1.929428252159866e-05, | |
| "loss": 0.1313, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.2090663058186739, | |
| "grad_norm": 3.212564706802368, | |
| "learning_rate": 1.9285540384897073e-05, | |
| "loss": 0.1884, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.20974289580514208, | |
| "grad_norm": 3.121872901916504, | |
| "learning_rate": 1.9276746439406046e-05, | |
| "loss": 0.2129, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.2104194857916103, | |
| "grad_norm": 3.6964709758758545, | |
| "learning_rate": 1.9267900734191515e-05, | |
| "loss": 0.2355, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.21109607577807848, | |
| "grad_norm": 3.5150861740112305, | |
| "learning_rate": 1.9259003318608192e-05, | |
| "loss": 0.1878, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.21177266576454667, | |
| "grad_norm": 3.9340460300445557, | |
| "learning_rate": 1.925005424229933e-05, | |
| "loss": 0.15, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.2124492557510149, | |
| "grad_norm": 8.675088882446289, | |
| "learning_rate": 1.9241053555196405e-05, | |
| "loss": 0.2527, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.21312584573748308, | |
| "grad_norm": 7.502621173858643, | |
| "learning_rate": 1.923200130751887e-05, | |
| "loss": 0.2089, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.21380243572395127, | |
| "grad_norm": 2.9825868606567383, | |
| "learning_rate": 1.922289754977385e-05, | |
| "loss": 0.1341, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.2144790257104195, | |
| "grad_norm": 5.308103084564209, | |
| "learning_rate": 1.9213742332755877e-05, | |
| "loss": 0.1872, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.21515561569688768, | |
| "grad_norm": 5.801865577697754, | |
| "learning_rate": 1.9204535707546602e-05, | |
| "loss": 0.2068, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.2158322056833559, | |
| "grad_norm": 4.229133605957031, | |
| "learning_rate": 1.919527772551451e-05, | |
| "loss": 0.1741, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.2165087956698241, | |
| "grad_norm": 2.5876822471618652, | |
| "learning_rate": 1.918596843831462e-05, | |
| "loss": 0.1221, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2165087956698241, | |
| "eval_accuracy": 0.771559633027523, | |
| "eval_f1": 0.3178082191780822, | |
| "eval_loss": 0.49765169620513916, | |
| "eval_precision": 0.8656716417910447, | |
| "eval_recall": 0.19463087248322147, | |
| "eval_runtime": 51.4716, | |
| "eval_samples_per_second": 5.79, | |
| "eval_steps_per_second": 0.194, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.21718538565629228, | |
| "grad_norm": 3.950688600540161, | |
| "learning_rate": 1.9176607897888217e-05, | |
| "loss": 0.1415, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.2178619756427605, | |
| "grad_norm": 2.8493704795837402, | |
| "learning_rate": 1.916719615646256e-05, | |
| "loss": 0.1171, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.2185385656292287, | |
| "grad_norm": 2.8084137439727783, | |
| "learning_rate": 1.9157733266550577e-05, | |
| "loss": 0.1774, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.21921515561569688, | |
| "grad_norm": 3.1478347778320312, | |
| "learning_rate": 1.914821928095058e-05, | |
| "loss": 0.1816, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.2198917456021651, | |
| "grad_norm": 3.8505070209503174, | |
| "learning_rate": 1.913865425274597e-05, | |
| "loss": 0.1413, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.22056833558863329, | |
| "grad_norm": 4.398627281188965, | |
| "learning_rate": 1.9129038235304946e-05, | |
| "loss": 0.1745, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.22124492557510148, | |
| "grad_norm": 4.207502841949463, | |
| "learning_rate": 1.9119371282280197e-05, | |
| "loss": 0.1996, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.2219215155615697, | |
| "grad_norm": 3.8576388359069824, | |
| "learning_rate": 1.9109653447608607e-05, | |
| "loss": 0.1522, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.22259810554803788, | |
| "grad_norm": 3.856382369995117, | |
| "learning_rate": 1.909988478551096e-05, | |
| "loss": 0.1642, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.2232746955345061, | |
| "grad_norm": 5.088944911956787, | |
| "learning_rate": 1.909006535049163e-05, | |
| "loss": 0.1496, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.2239512855209743, | |
| "grad_norm": 3.050905704498291, | |
| "learning_rate": 1.908019519733827e-05, | |
| "loss": 0.1263, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.22462787550744248, | |
| "grad_norm": 2.3252899646759033, | |
| "learning_rate": 1.907027438112153e-05, | |
| "loss": 0.1323, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.2253044654939107, | |
| "grad_norm": 3.3663437366485596, | |
| "learning_rate": 1.9060302957194732e-05, | |
| "loss": 0.1085, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.2259810554803789, | |
| "grad_norm": 6.574558258056641, | |
| "learning_rate": 1.9050280981193555e-05, | |
| "loss": 0.2637, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.22665764546684708, | |
| "grad_norm": 4.396321773529053, | |
| "learning_rate": 1.9040208509035745e-05, | |
| "loss": 0.1464, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.2273342354533153, | |
| "grad_norm": 6.94221305847168, | |
| "learning_rate": 1.9030085596920786e-05, | |
| "loss": 0.1608, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.2280108254397835, | |
| "grad_norm": 3.8373563289642334, | |
| "learning_rate": 1.9019912301329593e-05, | |
| "loss": 0.2373, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.22868741542625168, | |
| "grad_norm": 2.109513998031616, | |
| "learning_rate": 1.900968867902419e-05, | |
| "loss": 0.1219, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.2293640054127199, | |
| "grad_norm": 2.0259997844696045, | |
| "learning_rate": 1.899941478704742e-05, | |
| "loss": 0.1093, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.23004059539918809, | |
| "grad_norm": 2.7739081382751465, | |
| "learning_rate": 1.8989090682722583e-05, | |
| "loss": 0.1666, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.23004059539918809, | |
| "eval_accuracy": 0.7779816513761468, | |
| "eval_f1": 0.36649214659685864, | |
| "eval_loss": 0.48961758613586426, | |
| "eval_precision": 0.8333333333333334, | |
| "eval_recall": 0.2348993288590604, | |
| "eval_runtime": 51.6267, | |
| "eval_samples_per_second": 5.772, | |
| "eval_steps_per_second": 0.194, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.2307171853856563, | |
| "grad_norm": 2.2220234870910645, | |
| "learning_rate": 1.8978716423653153e-05, | |
| "loss": 0.1309, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.2313937753721245, | |
| "grad_norm": 2.679076910018921, | |
| "learning_rate": 1.8968292067722433e-05, | |
| "loss": 0.1182, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.23207036535859268, | |
| "grad_norm": 2.6752357482910156, | |
| "learning_rate": 1.8957817673093258e-05, | |
| "loss": 0.0976, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.2327469553450609, | |
| "grad_norm": 3.9076900482177734, | |
| "learning_rate": 1.8947293298207637e-05, | |
| "loss": 0.2482, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.2334235453315291, | |
| "grad_norm": 2.4209744930267334, | |
| "learning_rate": 1.8936719001786453e-05, | |
| "loss": 0.1294, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.23410013531799728, | |
| "grad_norm": 2.483504056930542, | |
| "learning_rate": 1.8926094842829128e-05, | |
| "loss": 0.1349, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.2347767253044655, | |
| "grad_norm": 5.663891315460205, | |
| "learning_rate": 1.891542088061329e-05, | |
| "loss": 0.247, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.2354533152909337, | |
| "grad_norm": 3.8202829360961914, | |
| "learning_rate": 1.8904697174694447e-05, | |
| "loss": 0.1991, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.2361299052774019, | |
| "grad_norm": 4.326652526855469, | |
| "learning_rate": 1.8893923784905647e-05, | |
| "loss": 0.2144, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.2368064952638701, | |
| "grad_norm": 4.610856056213379, | |
| "learning_rate": 1.888310077135716e-05, | |
| "loss": 0.2581, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2374830852503383, | |
| "grad_norm": 1.77244234085083, | |
| "learning_rate": 1.887222819443612e-05, | |
| "loss": 0.1308, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.2381596752368065, | |
| "grad_norm": 3.1713812351226807, | |
| "learning_rate": 1.886130611480621e-05, | |
| "loss": 0.1067, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.2388362652232747, | |
| "grad_norm": 3.379279136657715, | |
| "learning_rate": 1.885033459340731e-05, | |
| "loss": 0.1363, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.2395128552097429, | |
| "grad_norm": 3.649993896484375, | |
| "learning_rate": 1.8839313691455163e-05, | |
| "loss": 0.1521, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.2401894451962111, | |
| "grad_norm": 3.849156379699707, | |
| "learning_rate": 1.8828243470441026e-05, | |
| "loss": 0.2396, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.2408660351826793, | |
| "grad_norm": 2.089524745941162, | |
| "learning_rate": 1.8817123992131344e-05, | |
| "loss": 0.1016, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.24154262516914748, | |
| "grad_norm": 5.252878665924072, | |
| "learning_rate": 1.880595531856738e-05, | |
| "loss": 0.1606, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.2422192151556157, | |
| "grad_norm": 2.9322009086608887, | |
| "learning_rate": 1.879473751206489e-05, | |
| "loss": 0.1346, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.2428958051420839, | |
| "grad_norm": 3.819847822189331, | |
| "learning_rate": 1.878347063521377e-05, | |
| "loss": 0.2052, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.2435723951285521, | |
| "grad_norm": 3.4725139141082764, | |
| "learning_rate": 1.8772154750877696e-05, | |
| "loss": 0.1843, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2435723951285521, | |
| "eval_accuracy": 0.8009174311926606, | |
| "eval_f1": 0.49417249417249415, | |
| "eval_loss": 0.4227532148361206, | |
| "eval_precision": 0.8091603053435115, | |
| "eval_recall": 0.35570469798657717, | |
| "eval_runtime": 51.8917, | |
| "eval_samples_per_second": 5.743, | |
| "eval_steps_per_second": 0.193, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2442489851150203, | |
| "grad_norm": 5.326034069061279, | |
| "learning_rate": 1.876078992219379e-05, | |
| "loss": 0.1945, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.2449255751014885, | |
| "grad_norm": 7.080038070678711, | |
| "learning_rate": 1.8749376212572254e-05, | |
| "loss": 0.2108, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.2456021650879567, | |
| "grad_norm": 4.56862211227417, | |
| "learning_rate": 1.873791368569603e-05, | |
| "loss": 0.1117, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.2462787550744249, | |
| "grad_norm": 5.154612064361572, | |
| "learning_rate": 1.8726402405520425e-05, | |
| "loss": 0.1771, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.2469553450608931, | |
| "grad_norm": 4.204885005950928, | |
| "learning_rate": 1.8714842436272774e-05, | |
| "loss": 0.1878, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.2476319350473613, | |
| "grad_norm": 3.4089162349700928, | |
| "learning_rate": 1.8703233842452072e-05, | |
| "loss": 0.1567, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.2483085250338295, | |
| "grad_norm": 3.831226348876953, | |
| "learning_rate": 1.8691576688828613e-05, | |
| "loss": 0.1782, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.2489851150202977, | |
| "grad_norm": 7.267455577850342, | |
| "learning_rate": 1.8679871040443632e-05, | |
| "loss": 0.2087, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.2496617050067659, | |
| "grad_norm": 4.274538993835449, | |
| "learning_rate": 1.866811696260894e-05, | |
| "loss": 0.1906, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.2503382949932341, | |
| "grad_norm": 5.741366863250732, | |
| "learning_rate": 1.865631452090657e-05, | |
| "loss": 0.2119, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2510148849797023, | |
| "grad_norm": 3.3094899654388428, | |
| "learning_rate": 1.8644463781188387e-05, | |
| "loss": 0.1853, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.2516914749661705, | |
| "grad_norm": 3.474271774291992, | |
| "learning_rate": 1.863256480957574e-05, | |
| "loss": 0.1849, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.2523680649526387, | |
| "grad_norm": 2.9809741973876953, | |
| "learning_rate": 1.8620617672459097e-05, | |
| "loss": 0.1298, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.2530446549391069, | |
| "grad_norm": 6.16650915145874, | |
| "learning_rate": 1.8608622436497657e-05, | |
| "loss": 0.2405, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.25372124492557513, | |
| "grad_norm": 2.5345046520233154, | |
| "learning_rate": 1.859657916861899e-05, | |
| "loss": 0.1597, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2543978349120433, | |
| "grad_norm": 4.540293216705322, | |
| "learning_rate": 1.8584487936018663e-05, | |
| "loss": 0.2112, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.2550744248985115, | |
| "grad_norm": 1.9297409057617188, | |
| "learning_rate": 1.8572348806159857e-05, | |
| "loss": 0.181, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.2557510148849797, | |
| "grad_norm": 2.586928606033325, | |
| "learning_rate": 1.8560161846773002e-05, | |
| "loss": 0.1348, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.2564276048714479, | |
| "grad_norm": 2.4230494499206543, | |
| "learning_rate": 1.854792712585539e-05, | |
| "loss": 0.1307, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.2571041948579161, | |
| "grad_norm": 3.1270833015441895, | |
| "learning_rate": 1.8535644711670804e-05, | |
| "loss": 0.1995, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2571041948579161, | |
| "eval_accuracy": 0.773394495412844, | |
| "eval_f1": 0.35509138381201044, | |
| "eval_loss": 0.45505937933921814, | |
| "eval_precision": 0.8, | |
| "eval_recall": 0.22818791946308725, | |
| "eval_runtime": 51.9893, | |
| "eval_samples_per_second": 5.732, | |
| "eval_steps_per_second": 0.192, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2577807848443843, | |
| "grad_norm": 3.090973138809204, | |
| "learning_rate": 1.8523314672749123e-05, | |
| "loss": 0.1002, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.2584573748308525, | |
| "grad_norm": 2.197547197341919, | |
| "learning_rate": 1.851093707788596e-05, | |
| "loss": 0.1507, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.2591339648173207, | |
| "grad_norm": 3.1576650142669678, | |
| "learning_rate": 1.8498511996142255e-05, | |
| "loss": 0.1499, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.2598105548037889, | |
| "grad_norm": 4.379716396331787, | |
| "learning_rate": 1.848603949684391e-05, | |
| "loss": 0.1485, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.2604871447902571, | |
| "grad_norm": 3.0266523361206055, | |
| "learning_rate": 1.8473519649581396e-05, | |
| "loss": 0.1738, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.26116373477672533, | |
| "grad_norm": 4.019914627075195, | |
| "learning_rate": 1.8460952524209355e-05, | |
| "loss": 0.2077, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.2618403247631935, | |
| "grad_norm": 4.621005535125732, | |
| "learning_rate": 1.844833819084622e-05, | |
| "loss": 0.1384, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.2625169147496617, | |
| "grad_norm": 3.632671594619751, | |
| "learning_rate": 1.8435676719873828e-05, | |
| "loss": 0.1214, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.2631935047361299, | |
| "grad_norm": 6.161576271057129, | |
| "learning_rate": 1.842296818193701e-05, | |
| "loss": 0.2652, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.2638700947225981, | |
| "grad_norm": 2.499661922454834, | |
| "learning_rate": 1.8410212647943215e-05, | |
| "loss": 0.1611, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2645466847090663, | |
| "grad_norm": 2.6793367862701416, | |
| "learning_rate": 1.8397410189062106e-05, | |
| "loss": 0.1301, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.2652232746955345, | |
| "grad_norm": 3.2850935459136963, | |
| "learning_rate": 1.8384560876725163e-05, | |
| "loss": 0.1692, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.2658998646820027, | |
| "grad_norm": 4.6707682609558105, | |
| "learning_rate": 1.8371664782625287e-05, | |
| "loss": 0.1819, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.2665764546684709, | |
| "grad_norm": 2.8113269805908203, | |
| "learning_rate": 1.8358721978716398e-05, | |
| "loss": 0.1587, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.2672530446549391, | |
| "grad_norm": 2.5062456130981445, | |
| "learning_rate": 1.834573253721303e-05, | |
| "loss": 0.1458, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.2679296346414073, | |
| "grad_norm": 4.038453102111816, | |
| "learning_rate": 1.8332696530589936e-05, | |
| "loss": 0.1954, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.26860622462787553, | |
| "grad_norm": 4.07120418548584, | |
| "learning_rate": 1.831961403158168e-05, | |
| "loss": 0.1479, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.2692828146143437, | |
| "grad_norm": 3.7134037017822266, | |
| "learning_rate": 1.830648511318223e-05, | |
| "loss": 0.1507, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.2699594046008119, | |
| "grad_norm": 4.160938739776611, | |
| "learning_rate": 1.8293309848644554e-05, | |
| "loss": 0.1709, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.2706359945872801, | |
| "grad_norm": 3.3158607482910156, | |
| "learning_rate": 1.8280088311480203e-05, | |
| "loss": 0.0761, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2706359945872801, | |
| "eval_accuracy": 0.8073394495412844, | |
| "eval_f1": 0.5161290322580645, | |
| "eval_loss": 0.42147114872932434, | |
| "eval_precision": 0.8235294117647058, | |
| "eval_recall": 0.37583892617449666, | |
| "eval_runtime": 51.945, | |
| "eval_samples_per_second": 5.737, | |
| "eval_steps_per_second": 0.193, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2713125845737483, | |
| "grad_norm": 3.803469181060791, | |
| "learning_rate": 1.8266820575458908e-05, | |
| "loss": 0.1116, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.2719891745602165, | |
| "grad_norm": 4.727139949798584, | |
| "learning_rate": 1.8253506714608176e-05, | |
| "loss": 0.1973, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.27266576454668473, | |
| "grad_norm": 4.788311004638672, | |
| "learning_rate": 1.8240146803212854e-05, | |
| "loss": 0.2294, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.2733423545331529, | |
| "grad_norm": 9.147326469421387, | |
| "learning_rate": 1.822674091581474e-05, | |
| "loss": 0.1194, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.2740189445196211, | |
| "grad_norm": 3.2791059017181396, | |
| "learning_rate": 1.8213289127212152e-05, | |
| "loss": 0.0931, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2746955345060893, | |
| "grad_norm": 4.282406330108643, | |
| "learning_rate": 1.8199791512459507e-05, | |
| "loss": 0.1893, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.2753721244925575, | |
| "grad_norm": 5.307563781738281, | |
| "learning_rate": 1.8186248146866928e-05, | |
| "loss": 0.1627, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.27604871447902574, | |
| "grad_norm": 3.126235246658325, | |
| "learning_rate": 1.817265910599978e-05, | |
| "loss": 0.1254, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.2767253044654939, | |
| "grad_norm": 3.751150369644165, | |
| "learning_rate": 1.81590244656783e-05, | |
| "loss": 0.1884, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.2774018944519621, | |
| "grad_norm": 4.908536434173584, | |
| "learning_rate": 1.8145344301977126e-05, | |
| "loss": 0.1522, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2780784844384303, | |
| "grad_norm": 3.765190601348877, | |
| "learning_rate": 1.8131618691224916e-05, | |
| "loss": 0.1509, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.2787550744248985, | |
| "grad_norm": 4.558242321014404, | |
| "learning_rate": 1.811784771000387e-05, | |
| "loss": 0.1151, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.2794316644113667, | |
| "grad_norm": 3.2288334369659424, | |
| "learning_rate": 1.8104031435149366e-05, | |
| "loss": 0.1238, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.28010825439783493, | |
| "grad_norm": 3.91261625289917, | |
| "learning_rate": 1.8090169943749477e-05, | |
| "loss": 0.1979, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.2807848443843031, | |
| "grad_norm": 5.789203643798828, | |
| "learning_rate": 1.8076263313144568e-05, | |
| "loss": 0.1015, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.2814614343707713, | |
| "grad_norm": 3.4970619678497314, | |
| "learning_rate": 1.806231162092686e-05, | |
| "loss": 0.1626, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.2821380243572395, | |
| "grad_norm": 2.941303253173828, | |
| "learning_rate": 1.804831494494e-05, | |
| "loss": 0.1262, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.2828146143437077, | |
| "grad_norm": 3.1387312412261963, | |
| "learning_rate": 1.8034273363278615e-05, | |
| "loss": 0.112, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.28349120433017594, | |
| "grad_norm": 3.1637914180755615, | |
| "learning_rate": 1.8020186954287883e-05, | |
| "loss": 0.1387, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.28416779431664413, | |
| "grad_norm": 7.6281538009643555, | |
| "learning_rate": 1.8006055796563103e-05, | |
| "loss": 0.1498, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.28416779431664413, | |
| "eval_accuracy": 0.7880733944954128, | |
| "eval_f1": 0.43795620437956206, | |
| "eval_loss": 0.4631403982639313, | |
| "eval_precision": 0.7964601769911505, | |
| "eval_recall": 0.30201342281879195, | |
| "eval_runtime": 53.6246, | |
| "eval_samples_per_second": 5.557, | |
| "eval_steps_per_second": 0.186, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2848443843031123, | |
| "grad_norm": 7.874175071716309, | |
| "learning_rate": 1.7991879968949248e-05, | |
| "loss": 0.1542, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.2855209742895805, | |
| "grad_norm": 2.5916264057159424, | |
| "learning_rate": 1.797765955054053e-05, | |
| "loss": 0.1319, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.2861975642760487, | |
| "grad_norm": 9.027409553527832, | |
| "learning_rate": 1.7963394620679945e-05, | |
| "loss": 0.2224, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.2868741542625169, | |
| "grad_norm": 2.5223119258880615, | |
| "learning_rate": 1.7949085258958853e-05, | |
| "loss": 0.1183, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.28755074424898514, | |
| "grad_norm": 1.9095633029937744, | |
| "learning_rate": 1.7934731545216515e-05, | |
| "loss": 0.1178, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.2882273342354533, | |
| "grad_norm": 3.547039031982422, | |
| "learning_rate": 1.792033355953966e-05, | |
| "loss": 0.1246, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.2889039242219215, | |
| "grad_norm": 3.945955991744995, | |
| "learning_rate": 1.790589138226203e-05, | |
| "loss": 0.2155, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.2895805142083897, | |
| "grad_norm": 1.659956932067871, | |
| "learning_rate": 1.789140509396394e-05, | |
| "loss": 0.0721, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.2902571041948579, | |
| "grad_norm": 3.547576427459717, | |
| "learning_rate": 1.7876874775471806e-05, | |
| "loss": 0.1895, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.29093369418132614, | |
| "grad_norm": 5.191123008728027, | |
| "learning_rate": 1.7862300507857733e-05, | |
| "loss": 0.2105, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.29161028416779433, | |
| "grad_norm": 4.68615198135376, | |
| "learning_rate": 1.7847682372439024e-05, | |
| "loss": 0.2427, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.2922868741542625, | |
| "grad_norm": 7.467837333679199, | |
| "learning_rate": 1.7833020450777756e-05, | |
| "loss": 0.255, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.2929634641407307, | |
| "grad_norm": 4.769316673278809, | |
| "learning_rate": 1.78183148246803e-05, | |
| "loss": 0.2349, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.2936400541271989, | |
| "grad_norm": 2.3752694129943848, | |
| "learning_rate": 1.7803565576196884e-05, | |
| "loss": 0.1347, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.2943166441136671, | |
| "grad_norm": 2.9256367683410645, | |
| "learning_rate": 1.7788772787621126e-05, | |
| "loss": 0.19, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.29499323410013534, | |
| "grad_norm": 2.6127521991729736, | |
| "learning_rate": 1.7773936541489577e-05, | |
| "loss": 0.1579, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.2956698240866035, | |
| "grad_norm": 1.9983330965042114, | |
| "learning_rate": 1.7759056920581256e-05, | |
| "loss": 0.1109, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.2963464140730717, | |
| "grad_norm": 2.2543447017669678, | |
| "learning_rate": 1.7744134007917195e-05, | |
| "loss": 0.1244, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.2970230040595399, | |
| "grad_norm": 3.1791696548461914, | |
| "learning_rate": 1.7729167886759974e-05, | |
| "loss": 0.1867, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.2976995940460081, | |
| "grad_norm": 2.7958037853240967, | |
| "learning_rate": 1.771415864061326e-05, | |
| "loss": 0.1344, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2976995940460081, | |
| "eval_accuracy": 0.7963302752293578, | |
| "eval_f1": 0.47641509433962265, | |
| "eval_loss": 0.42485642433166504, | |
| "eval_precision": 0.8015873015873016, | |
| "eval_recall": 0.3389261744966443, | |
| "eval_runtime": 52.4984, | |
| "eval_samples_per_second": 5.676, | |
| "eval_steps_per_second": 0.19, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.29837618403247634, | |
| "grad_norm": 2.20705509185791, | |
| "learning_rate": 1.7699106353221322e-05, | |
| "loss": 0.1233, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.29905277401894453, | |
| "grad_norm": 2.328334331512451, | |
| "learning_rate": 1.7684011108568593e-05, | |
| "loss": 0.1142, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.2997293640054127, | |
| "grad_norm": 3.256822109222412, | |
| "learning_rate": 1.7668872990879175e-05, | |
| "loss": 0.1556, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.3004059539918809, | |
| "grad_norm": 2.4061648845672607, | |
| "learning_rate": 1.765369208461639e-05, | |
| "loss": 0.0828, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.3010825439783491, | |
| "grad_norm": 5.99202299118042, | |
| "learning_rate": 1.7638468474482297e-05, | |
| "loss": 0.157, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.3017591339648173, | |
| "grad_norm": 3.7360379695892334, | |
| "learning_rate": 1.762320224541722e-05, | |
| "loss": 0.1257, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.30243572395128554, | |
| "grad_norm": 3.2651238441467285, | |
| "learning_rate": 1.760789348259927e-05, | |
| "loss": 0.1732, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.30311231393775373, | |
| "grad_norm": 3.5508763790130615, | |
| "learning_rate": 1.7592542271443888e-05, | |
| "loss": 0.1644, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.3037889039242219, | |
| "grad_norm": 4.703643798828125, | |
| "learning_rate": 1.757714869760335e-05, | |
| "loss": 0.2103, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.3044654939106901, | |
| "grad_norm": 5.588313102722168, | |
| "learning_rate": 1.756171284696629e-05, | |
| "loss": 0.187, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3051420838971583, | |
| "grad_norm": 1.9803051948547363, | |
| "learning_rate": 1.7546234805657235e-05, | |
| "loss": 0.0944, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.30581867388362655, | |
| "grad_norm": 3.1527740955352783, | |
| "learning_rate": 1.7530714660036112e-05, | |
| "loss": 0.1105, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.30649526387009474, | |
| "grad_norm": 4.078627109527588, | |
| "learning_rate": 1.7515152496697765e-05, | |
| "loss": 0.161, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.3071718538565629, | |
| "grad_norm": 4.430943489074707, | |
| "learning_rate": 1.749954840247148e-05, | |
| "loss": 0.1883, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.3078484438430311, | |
| "grad_norm": 3.115837335586548, | |
| "learning_rate": 1.7483902464420507e-05, | |
| "loss": 0.1122, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.3085250338294993, | |
| "grad_norm": 3.008695602416992, | |
| "learning_rate": 1.7468214769841542e-05, | |
| "loss": 0.1034, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.3092016238159675, | |
| "grad_norm": 6.273781776428223, | |
| "learning_rate": 1.7452485406264278e-05, | |
| "loss": 0.1709, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.30987821380243574, | |
| "grad_norm": 4.796054363250732, | |
| "learning_rate": 1.74367144614509e-05, | |
| "loss": 0.1932, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.31055480378890393, | |
| "grad_norm": 10.217569351196289, | |
| "learning_rate": 1.742090202339559e-05, | |
| "loss": 0.19, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.3112313937753721, | |
| "grad_norm": 3.481541395187378, | |
| "learning_rate": 1.7405048180324046e-05, | |
| "loss": 0.161, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3112313937753721, | |
| "eval_accuracy": 0.8091743119266055, | |
| "eval_f1": 0.5336322869955157, | |
| "eval_loss": 0.42575448751449585, | |
| "eval_precision": 0.8040540540540541, | |
| "eval_recall": 0.39932885906040266, | |
| "eval_runtime": 53.3596, | |
| "eval_samples_per_second": 5.585, | |
| "eval_steps_per_second": 0.187, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3119079837618403, | |
| "grad_norm": 3.4007368087768555, | |
| "learning_rate": 1.7389153020692985e-05, | |
| "loss": 0.1502, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.3125845737483085, | |
| "grad_norm": 3.0644993782043457, | |
| "learning_rate": 1.7373216633189653e-05, | |
| "loss": 0.1749, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.31326116373477675, | |
| "grad_norm": 4.407646179199219, | |
| "learning_rate": 1.735723910673132e-05, | |
| "loss": 0.1703, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.31393775372124494, | |
| "grad_norm": 3.6031856536865234, | |
| "learning_rate": 1.7341220530464796e-05, | |
| "loss": 0.1745, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.31461434370771313, | |
| "grad_norm": 5.204887390136719, | |
| "learning_rate": 1.7325160993765934e-05, | |
| "loss": 0.0987, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.3152909336941813, | |
| "grad_norm": 2.864173173904419, | |
| "learning_rate": 1.7309060586239117e-05, | |
| "loss": 0.1985, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.3159675236806495, | |
| "grad_norm": 3.125213861465454, | |
| "learning_rate": 1.7292919397716772e-05, | |
| "loss": 0.1482, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.3166441136671177, | |
| "grad_norm": 5.636457920074463, | |
| "learning_rate": 1.7276737518258865e-05, | |
| "loss": 0.1882, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.31732070365358594, | |
| "grad_norm": 4.034516334533691, | |
| "learning_rate": 1.7260515038152393e-05, | |
| "loss": 0.2319, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.31799729364005414, | |
| "grad_norm": 2.29288911819458, | |
| "learning_rate": 1.7244252047910893e-05, | |
| "loss": 0.0806, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.3186738836265223, | |
| "grad_norm": 4.686462879180908, | |
| "learning_rate": 1.7227948638273918e-05, | |
| "loss": 0.192, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.3193504736129905, | |
| "grad_norm": 3.879487991333008, | |
| "learning_rate": 1.7211604900206552e-05, | |
| "loss": 0.1904, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.3200270635994587, | |
| "grad_norm": 4.023051738739014, | |
| "learning_rate": 1.7195220924898883e-05, | |
| "loss": 0.2034, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.32070365358592695, | |
| "grad_norm": 4.105659008026123, | |
| "learning_rate": 1.717879680376551e-05, | |
| "loss": 0.1803, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.32138024357239514, | |
| "grad_norm": 5.522044658660889, | |
| "learning_rate": 1.7162332628445024e-05, | |
| "loss": 0.2052, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.32205683355886333, | |
| "grad_norm": 4.441620349884033, | |
| "learning_rate": 1.7145828490799497e-05, | |
| "loss": 0.1982, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.3227334235453315, | |
| "grad_norm": 2.258070707321167, | |
| "learning_rate": 1.7129284482913973e-05, | |
| "loss": 0.1493, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.3234100135317997, | |
| "grad_norm": 4.115694522857666, | |
| "learning_rate": 1.7112700697095955e-05, | |
| "loss": 0.1957, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.32408660351826796, | |
| "grad_norm": 4.366945743560791, | |
| "learning_rate": 1.709607722587488e-05, | |
| "loss": 0.2066, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.32476319350473615, | |
| "grad_norm": 3.625458240509033, | |
| "learning_rate": 1.7079414162001617e-05, | |
| "loss": 0.2087, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.32476319350473615, | |
| "eval_accuracy": 0.810091743119266, | |
| "eval_f1": 0.5152224824355972, | |
| "eval_loss": 0.4003700017929077, | |
| "eval_precision": 0.8527131782945736, | |
| "eval_recall": 0.3691275167785235, | |
| "eval_runtime": 52.4305, | |
| "eval_samples_per_second": 5.684, | |
| "eval_steps_per_second": 0.191, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.32543978349120434, | |
| "grad_norm": 4.504916667938232, | |
| "learning_rate": 1.7062711598447936e-05, | |
| "loss": 0.168, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.3261163734776725, | |
| "grad_norm": 3.6613380908966064, | |
| "learning_rate": 1.7045969628406013e-05, | |
| "loss": 0.1761, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.3267929634641407, | |
| "grad_norm": 5.603222846984863, | |
| "learning_rate": 1.7029188345287868e-05, | |
| "loss": 0.1618, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.3274695534506089, | |
| "grad_norm": 5.035453796386719, | |
| "learning_rate": 1.7012367842724887e-05, | |
| "loss": 0.1485, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.32814614343707715, | |
| "grad_norm": 3.6518185138702393, | |
| "learning_rate": 1.6995508214567275e-05, | |
| "loss": 0.1497, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.32882273342354534, | |
| "grad_norm": 3.421865463256836, | |
| "learning_rate": 1.6978609554883544e-05, | |
| "loss": 0.1269, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.32949932341001353, | |
| "grad_norm": 1.6450062990188599, | |
| "learning_rate": 1.6961671957959967e-05, | |
| "loss": 0.0814, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.3301759133964817, | |
| "grad_norm": 3.649200201034546, | |
| "learning_rate": 1.6944695518300087e-05, | |
| "loss": 0.0999, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.3308525033829499, | |
| "grad_norm": 5.043969631195068, | |
| "learning_rate": 1.6927680330624165e-05, | |
| "loss": 0.2421, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.33152909336941816, | |
| "grad_norm": 2.5395421981811523, | |
| "learning_rate": 1.691062648986865e-05, | |
| "loss": 0.1194, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.33220568335588635, | |
| "grad_norm": 3.3575992584228516, | |
| "learning_rate": 1.6893534091185658e-05, | |
| "loss": 0.1217, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.33288227334235454, | |
| "grad_norm": 3.968233823776245, | |
| "learning_rate": 1.6876403229942453e-05, | |
| "loss": 0.1636, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.33355886332882273, | |
| "grad_norm": 4.133052825927734, | |
| "learning_rate": 1.6859234001720882e-05, | |
| "loss": 0.0986, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.3342354533152909, | |
| "grad_norm": 6.324604034423828, | |
| "learning_rate": 1.6842026502316874e-05, | |
| "loss": 0.1145, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.3349120433017591, | |
| "grad_norm": 4.619051933288574, | |
| "learning_rate": 1.682478082773989e-05, | |
| "loss": 0.2143, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.33558863328822736, | |
| "grad_norm": 3.0241997241973877, | |
| "learning_rate": 1.680749707421238e-05, | |
| "loss": 0.1057, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.33626522327469555, | |
| "grad_norm": 1.7503517866134644, | |
| "learning_rate": 1.6790175338169277e-05, | |
| "loss": 0.0634, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.33694181326116374, | |
| "grad_norm": 4.2864990234375, | |
| "learning_rate": 1.6772815716257414e-05, | |
| "loss": 0.1524, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.3376184032476319, | |
| "grad_norm": 4.454866886138916, | |
| "learning_rate": 1.6755418305335026e-05, | |
| "loss": 0.1908, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.3382949932341001, | |
| "grad_norm": 5.34849739074707, | |
| "learning_rate": 1.673798320247118e-05, | |
| "loss": 0.1857, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3382949932341001, | |
| "eval_accuracy": 0.7862385321100918, | |
| "eval_f1": 0.391644908616188, | |
| "eval_loss": 0.5401991605758667, | |
| "eval_precision": 0.8823529411764706, | |
| "eval_recall": 0.2516778523489933, | |
| "eval_runtime": 52.9654, | |
| "eval_samples_per_second": 5.626, | |
| "eval_steps_per_second": 0.189, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.33897158322056836, | |
| "grad_norm": 4.322254180908203, | |
| "learning_rate": 1.672051050494526e-05, | |
| "loss": 0.2227, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.33964817320703655, | |
| "grad_norm": 3.6232991218566895, | |
| "learning_rate": 1.67030003102464e-05, | |
| "loss": 0.1609, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.34032476319350474, | |
| "grad_norm": 6.037874221801758, | |
| "learning_rate": 1.6685452716072946e-05, | |
| "loss": 0.144, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.34100135317997293, | |
| "grad_norm": 3.3179101943969727, | |
| "learning_rate": 1.6667867820331927e-05, | |
| "loss": 0.1325, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.3416779431664411, | |
| "grad_norm": 3.1885428428649902, | |
| "learning_rate": 1.6650245721138483e-05, | |
| "loss": 0.1493, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.3423545331529093, | |
| "grad_norm": 3.5949137210845947, | |
| "learning_rate": 1.6632586516815346e-05, | |
| "loss": 0.1273, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.34303112313937756, | |
| "grad_norm": 2.8679418563842773, | |
| "learning_rate": 1.6614890305892266e-05, | |
| "loss": 0.0887, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.34370771312584575, | |
| "grad_norm": 2.384528160095215, | |
| "learning_rate": 1.6597157187105475e-05, | |
| "loss": 0.0974, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.34438430311231394, | |
| "grad_norm": 3.2372498512268066, | |
| "learning_rate": 1.657938725939713e-05, | |
| "loss": 0.1175, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.34506089309878213, | |
| "grad_norm": 2.4635872840881348, | |
| "learning_rate": 1.6561580621914764e-05, | |
| "loss": 0.0602, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3457374830852503, | |
| "grad_norm": 4.7463531494140625, | |
| "learning_rate": 1.6543737374010742e-05, | |
| "loss": 0.1404, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.34641407307171856, | |
| "grad_norm": 3.910125255584717, | |
| "learning_rate": 1.6525857615241686e-05, | |
| "loss": 0.1732, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.34709066305818675, | |
| "grad_norm": 3.2249362468719482, | |
| "learning_rate": 1.6507941445367935e-05, | |
| "loss": 0.1706, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.34776725304465494, | |
| "grad_norm": 3.5670406818389893, | |
| "learning_rate": 1.648998896435299e-05, | |
| "loss": 0.1288, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.34844384303112313, | |
| "grad_norm": 2.954425096511841, | |
| "learning_rate": 1.6472000272362937e-05, | |
| "loss": 0.1691, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.3491204330175913, | |
| "grad_norm": 3.0924575328826904, | |
| "learning_rate": 1.6453975469765913e-05, | |
| "loss": 0.1445, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.3497970230040595, | |
| "grad_norm": 3.242204427719116, | |
| "learning_rate": 1.643591465713153e-05, | |
| "loss": 0.113, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.35047361299052776, | |
| "grad_norm": 3.513796806335449, | |
| "learning_rate": 1.6417817935230318e-05, | |
| "loss": 0.1342, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.35115020297699595, | |
| "grad_norm": 3.459606409072876, | |
| "learning_rate": 1.6399685405033168e-05, | |
| "loss": 0.167, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.35182679296346414, | |
| "grad_norm": 3.4279625415802, | |
| "learning_rate": 1.6381517167710757e-05, | |
| "loss": 0.1466, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.35182679296346414, | |
| "eval_accuracy": 0.8027522935779816, | |
| "eval_f1": 0.48687350835322196, | |
| "eval_loss": 0.4396270513534546, | |
| "eval_precision": 0.8429752066115702, | |
| "eval_recall": 0.3422818791946309, | |
| "eval_runtime": 53.1809, | |
| "eval_samples_per_second": 5.604, | |
| "eval_steps_per_second": 0.188, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.35250338294993233, | |
| "grad_norm": 3.181802988052368, | |
| "learning_rate": 1.6363313324632995e-05, | |
| "loss": 0.1381, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.3531799729364005, | |
| "grad_norm": 2.620626449584961, | |
| "learning_rate": 1.6345073977368455e-05, | |
| "loss": 0.1523, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.35385656292286877, | |
| "grad_norm": 4.116923809051514, | |
| "learning_rate": 1.6326799227683806e-05, | |
| "loss": 0.0602, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.35453315290933696, | |
| "grad_norm": 3.4836175441741943, | |
| "learning_rate": 1.630848917754324e-05, | |
| "loss": 0.0969, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.35520974289580515, | |
| "grad_norm": 3.9089815616607666, | |
| "learning_rate": 1.629014392910791e-05, | |
| "loss": 0.1509, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.35588633288227334, | |
| "grad_norm": 3.1335699558258057, | |
| "learning_rate": 1.6271763584735373e-05, | |
| "loss": 0.1366, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.3565629228687415, | |
| "grad_norm": 3.3636960983276367, | |
| "learning_rate": 1.625334824697898e-05, | |
| "loss": 0.1233, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.3572395128552097, | |
| "grad_norm": 3.7551486492156982, | |
| "learning_rate": 1.6234898018587336e-05, | |
| "loss": 0.1541, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.35791610284167796, | |
| "grad_norm": 6.929388046264648, | |
| "learning_rate": 1.6216413002503736e-05, | |
| "loss": 0.2057, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.35859269282814615, | |
| "grad_norm": 4.0752763748168945, | |
| "learning_rate": 1.619789330186555e-05, | |
| "loss": 0.1008, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.35926928281461434, | |
| "grad_norm": 3.1588234901428223, | |
| "learning_rate": 1.6179339020003685e-05, | |
| "loss": 0.1454, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.35994587280108253, | |
| "grad_norm": 6.536987781524658, | |
| "learning_rate": 1.616075026044199e-05, | |
| "loss": 0.1461, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.3606224627875507, | |
| "grad_norm": 3.1867458820343018, | |
| "learning_rate": 1.6142127126896682e-05, | |
| "loss": 0.182, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.36129905277401897, | |
| "grad_norm": 3.9853105545043945, | |
| "learning_rate": 1.6123469723275766e-05, | |
| "loss": 0.1525, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.36197564276048716, | |
| "grad_norm": 2.4770116806030273, | |
| "learning_rate": 1.6104778153678467e-05, | |
| "loss": 0.1789, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.36265223274695535, | |
| "grad_norm": 4.895524024963379, | |
| "learning_rate": 1.6086052522394625e-05, | |
| "loss": 0.1909, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.36332882273342354, | |
| "grad_norm": 7.819604873657227, | |
| "learning_rate": 1.6067292933904144e-05, | |
| "loss": 0.1793, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.36400541271989173, | |
| "grad_norm": 5.251774311065674, | |
| "learning_rate": 1.6048499492876378e-05, | |
| "loss": 0.1829, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.3646820027063599, | |
| "grad_norm": 5.605532646179199, | |
| "learning_rate": 1.602967230416957e-05, | |
| "loss": 0.273, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.36535859269282817, | |
| "grad_norm": 3.302903175354004, | |
| "learning_rate": 1.6010811472830253e-05, | |
| "loss": 0.1608, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.36535859269282817, | |
| "eval_accuracy": 0.8165137614678899, | |
| "eval_f1": 0.5412844036697247, | |
| "eval_loss": 0.4029388427734375, | |
| "eval_precision": 0.855072463768116, | |
| "eval_recall": 0.3959731543624161, | |
| "eval_runtime": 54.3008, | |
| "eval_samples_per_second": 5.488, | |
| "eval_steps_per_second": 0.184, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.36603518267929636, | |
| "grad_norm": 3.057288408279419, | |
| "learning_rate": 1.5991917104092677e-05, | |
| "loss": 0.1671, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.36671177266576455, | |
| "grad_norm": 4.837218761444092, | |
| "learning_rate": 1.5972989303378207e-05, | |
| "loss": 0.1425, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.36738836265223274, | |
| "grad_norm": 2.922201633453369, | |
| "learning_rate": 1.595402817629475e-05, | |
| "loss": 0.2097, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.3680649526387009, | |
| "grad_norm": 8.20699691772461, | |
| "learning_rate": 1.593503382863615e-05, | |
| "loss": 0.1657, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.36874154262516917, | |
| "grad_norm": 3.043370246887207, | |
| "learning_rate": 1.591600636638161e-05, | |
| "loss": 0.1568, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.36941813261163736, | |
| "grad_norm": 6.523357391357422, | |
| "learning_rate": 1.589694589569509e-05, | |
| "loss": 0.1299, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.37009472259810555, | |
| "grad_norm": 3.4266302585601807, | |
| "learning_rate": 1.5877852522924733e-05, | |
| "loss": 0.1608, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.37077131258457374, | |
| "grad_norm": 4.111809253692627, | |
| "learning_rate": 1.5858726354602248e-05, | |
| "loss": 0.1975, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.37144790257104193, | |
| "grad_norm": 3.3651816844940186, | |
| "learning_rate": 1.5839567497442338e-05, | |
| "loss": 0.171, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.3721244925575101, | |
| "grad_norm": 3.0030105113983154, | |
| "learning_rate": 1.5820376058342077e-05, | |
| "loss": 0.1365, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.37280108254397837, | |
| "grad_norm": 3.5873923301696777, | |
| "learning_rate": 1.5801152144380353e-05, | |
| "loss": 0.1745, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.37347767253044656, | |
| "grad_norm": 3.0994861125946045, | |
| "learning_rate": 1.578189586281723e-05, | |
| "loss": 0.1407, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.37415426251691475, | |
| "grad_norm": 2.788184642791748, | |
| "learning_rate": 1.5762607321093368e-05, | |
| "loss": 0.134, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.37483085250338294, | |
| "grad_norm": 2.5775389671325684, | |
| "learning_rate": 1.5743286626829437e-05, | |
| "loss": 0.1075, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.37550744248985113, | |
| "grad_norm": 3.5334537029266357, | |
| "learning_rate": 1.5723933887825492e-05, | |
| "loss": 0.1165, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.3761840324763194, | |
| "grad_norm": 3.544222116470337, | |
| "learning_rate": 1.5704549212060383e-05, | |
| "loss": 0.1739, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.37686062246278756, | |
| "grad_norm": 3.89497709274292, | |
| "learning_rate": 1.568513270769115e-05, | |
| "loss": 0.1549, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.37753721244925575, | |
| "grad_norm": 2.996244192123413, | |
| "learning_rate": 1.5665684483052425e-05, | |
| "loss": 0.1742, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.37821380243572394, | |
| "grad_norm": 4.149686336517334, | |
| "learning_rate": 1.564620464665582e-05, | |
| "loss": 0.1655, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.37889039242219213, | |
| "grad_norm": 3.9218225479125977, | |
| "learning_rate": 1.5626693307189334e-05, | |
| "loss": 0.1472, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.37889039242219213, | |
| "eval_accuracy": 0.8064220183486238, | |
| "eval_f1": 0.5104408352668214, | |
| "eval_loss": 0.44546324014663696, | |
| "eval_precision": 0.8270676691729323, | |
| "eval_recall": 0.3691275167785235, | |
| "eval_runtime": 52.4956, | |
| "eval_samples_per_second": 5.677, | |
| "eval_steps_per_second": 0.19, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3795669824086603, | |
| "grad_norm": 7.877570629119873, | |
| "learning_rate": 1.560715057351673e-05, | |
| "loss": 0.0964, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.38024357239512857, | |
| "grad_norm": 3.73523211479187, | |
| "learning_rate": 1.5587576554676927e-05, | |
| "loss": 0.1444, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.38092016238159676, | |
| "grad_norm": 10.825215339660645, | |
| "learning_rate": 1.556797135988342e-05, | |
| "loss": 0.151, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.38159675236806495, | |
| "grad_norm": 6.863844871520996, | |
| "learning_rate": 1.5548335098523634e-05, | |
| "loss": 0.2423, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.38227334235453314, | |
| "grad_norm": 3.01707124710083, | |
| "learning_rate": 1.5528667880158338e-05, | |
| "loss": 0.1483, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.38294993234100133, | |
| "grad_norm": 3.50577712059021, | |
| "learning_rate": 1.5508969814521026e-05, | |
| "loss": 0.1359, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.3836265223274696, | |
| "grad_norm": 3.558225631713867, | |
| "learning_rate": 1.5489241011517303e-05, | |
| "loss": 0.0951, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.38430311231393777, | |
| "grad_norm": 7.069665431976318, | |
| "learning_rate": 1.5469481581224274e-05, | |
| "loss": 0.0979, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.38497970230040596, | |
| "grad_norm": 4.208998680114746, | |
| "learning_rate": 1.5449691633889924e-05, | |
| "loss": 0.1451, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.38565629228687415, | |
| "grad_norm": 3.042346477508545, | |
| "learning_rate": 1.5429871279932514e-05, | |
| "loss": 0.1555, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.38633288227334234, | |
| "grad_norm": 6.0393595695495605, | |
| "learning_rate": 1.5410020629939966e-05, | |
| "loss": 0.1965, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.3870094722598105, | |
| "grad_norm": 3.022724151611328, | |
| "learning_rate": 1.5390139794669225e-05, | |
| "loss": 0.1219, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.3876860622462788, | |
| "grad_norm": 2.8513102531433105, | |
| "learning_rate": 1.5370228885045662e-05, | |
| "loss": 0.1634, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.38836265223274696, | |
| "grad_norm": 3.42635440826416, | |
| "learning_rate": 1.535028801216245e-05, | |
| "loss": 0.1874, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.38903924221921515, | |
| "grad_norm": 6.154781341552734, | |
| "learning_rate": 1.533031728727994e-05, | |
| "loss": 0.1979, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.38971583220568334, | |
| "grad_norm": 4.227107524871826, | |
| "learning_rate": 1.531031682182504e-05, | |
| "loss": 0.2125, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.39039242219215153, | |
| "grad_norm": 3.389040946960449, | |
| "learning_rate": 1.5290286727390604e-05, | |
| "loss": 0.1554, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.3910690121786198, | |
| "grad_norm": 3.34859299659729, | |
| "learning_rate": 1.527022711573479e-05, | |
| "loss": 0.1859, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.39174560216508797, | |
| "grad_norm": 2.8738038539886475, | |
| "learning_rate": 1.5250138098780456e-05, | |
| "loss": 0.105, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.39242219215155616, | |
| "grad_norm": 3.0525081157684326, | |
| "learning_rate": 1.5230019788614527e-05, | |
| "loss": 0.1437, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.39242219215155616, | |
| "eval_accuracy": 0.8036697247706422, | |
| "eval_f1": 0.4928909952606635, | |
| "eval_loss": 0.4023875296115875, | |
| "eval_precision": 0.8387096774193549, | |
| "eval_recall": 0.348993288590604, | |
| "eval_runtime": 53.2583, | |
| "eval_samples_per_second": 5.595, | |
| "eval_steps_per_second": 0.188, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.39309878213802435, | |
| "grad_norm": 2.1808993816375732, | |
| "learning_rate": 1.5209872297487365e-05, | |
| "loss": 0.158, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.39377537212449254, | |
| "grad_norm": 2.2843339443206787, | |
| "learning_rate": 1.5189695737812153e-05, | |
| "loss": 0.0944, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.3944519621109608, | |
| "grad_norm": 3.0277621746063232, | |
| "learning_rate": 1.5169490222164255e-05, | |
| "loss": 0.1253, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.395128552097429, | |
| "grad_norm": 3.6869609355926514, | |
| "learning_rate": 1.5149255863280607e-05, | |
| "loss": 0.1293, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.39580514208389717, | |
| "grad_norm": 2.9757912158966064, | |
| "learning_rate": 1.5128992774059063e-05, | |
| "loss": 0.1344, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.39648173207036536, | |
| "grad_norm": 3.4651436805725098, | |
| "learning_rate": 1.5108701067557787e-05, | |
| "loss": 0.1131, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.39715832205683355, | |
| "grad_norm": 3.6160385608673096, | |
| "learning_rate": 1.5088380856994608e-05, | |
| "loss": 0.1679, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.39783491204330174, | |
| "grad_norm": 2.459801197052002, | |
| "learning_rate": 1.50680322557464e-05, | |
| "loss": 0.0886, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.39851150202977, | |
| "grad_norm": 3.2416129112243652, | |
| "learning_rate": 1.504765537734844e-05, | |
| "loss": 0.1534, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.39918809201623817, | |
| "grad_norm": 2.9892609119415283, | |
| "learning_rate": 1.5027250335493771e-05, | |
| "loss": 0.0943, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.39986468200270636, | |
| "grad_norm": 5.113293647766113, | |
| "learning_rate": 1.5006817244032589e-05, | |
| "loss": 0.1717, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.40054127198917455, | |
| "grad_norm": 6.7918195724487305, | |
| "learning_rate": 1.4986356216971583e-05, | |
| "loss": 0.1747, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.40121786197564274, | |
| "grad_norm": 3.726599931716919, | |
| "learning_rate": 1.4965867368473308e-05, | |
| "loss": 0.1416, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.401894451962111, | |
| "grad_norm": 6.388960361480713, | |
| "learning_rate": 1.4945350812855555e-05, | |
| "loss": 0.1946, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.4025710419485792, | |
| "grad_norm": 3.8928306102752686, | |
| "learning_rate": 1.4924806664590702e-05, | |
| "loss": 0.1622, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.40324763193504737, | |
| "grad_norm": 3.4860315322875977, | |
| "learning_rate": 1.4904235038305084e-05, | |
| "loss": 0.1549, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.40392422192151556, | |
| "grad_norm": 4.641502857208252, | |
| "learning_rate": 1.4883636048778347e-05, | |
| "loss": 0.1789, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.40460081190798375, | |
| "grad_norm": 4.543725967407227, | |
| "learning_rate": 1.4863009810942814e-05, | |
| "loss": 0.1651, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.40527740189445194, | |
| "grad_norm": 10.844779014587402, | |
| "learning_rate": 1.4842356439882841e-05, | |
| "loss": 0.2244, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.4059539918809202, | |
| "grad_norm": 7.078522205352783, | |
| "learning_rate": 1.4821676050834166e-05, | |
| "loss": 0.2055, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4059539918809202, | |
| "eval_accuracy": 0.8229357798165138, | |
| "eval_f1": 0.5758241758241758, | |
| "eval_loss": 0.40693244338035583, | |
| "eval_precision": 0.8343949044585988, | |
| "eval_recall": 0.4395973154362416, | |
| "eval_runtime": 53.515, | |
| "eval_samples_per_second": 5.569, | |
| "eval_steps_per_second": 0.187, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4066305818673884, | |
| "grad_norm": 5.193844795227051, | |
| "learning_rate": 1.4800968759183288e-05, | |
| "loss": 0.1437, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.40730717185385656, | |
| "grad_norm": 4.853598117828369, | |
| "learning_rate": 1.4780234680466792e-05, | |
| "loss": 0.2045, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.40798376184032475, | |
| "grad_norm": 2.771559476852417, | |
| "learning_rate": 1.4759473930370738e-05, | |
| "loss": 0.0885, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.40866035182679294, | |
| "grad_norm": 2.854980945587158, | |
| "learning_rate": 1.4738686624729987e-05, | |
| "loss": 0.0996, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.4093369418132612, | |
| "grad_norm": 3.643080949783325, | |
| "learning_rate": 1.4717872879527578e-05, | |
| "loss": 0.1512, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.4100135317997294, | |
| "grad_norm": 3.83292555809021, | |
| "learning_rate": 1.4697032810894062e-05, | |
| "loss": 0.1761, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.41069012178619757, | |
| "grad_norm": 2.26672101020813, | |
| "learning_rate": 1.4676166535106866e-05, | |
| "loss": 0.116, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.41136671177266576, | |
| "grad_norm": 3.7530345916748047, | |
| "learning_rate": 1.4655274168589635e-05, | |
| "loss": 0.1392, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.41204330175913395, | |
| "grad_norm": 3.018079996109009, | |
| "learning_rate": 1.463435582791159e-05, | |
| "loss": 0.1159, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.41271989174560214, | |
| "grad_norm": 3.2969181537628174, | |
| "learning_rate": 1.461341162978688e-05, | |
| "loss": 0.2222, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.4133964817320704, | |
| "grad_norm": 4.386163711547852, | |
| "learning_rate": 1.459244169107392e-05, | |
| "loss": 0.1679, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.4140730717185386, | |
| "grad_norm": 3.1107709407806396, | |
| "learning_rate": 1.4571446128774746e-05, | |
| "loss": 0.0994, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.41474966170500677, | |
| "grad_norm": 4.727970600128174, | |
| "learning_rate": 1.4550425060034367e-05, | |
| "loss": 0.2336, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.41542625169147496, | |
| "grad_norm": 2.739393949508667, | |
| "learning_rate": 1.4529378602140096e-05, | |
| "loss": 0.1315, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.41610284167794315, | |
| "grad_norm": 3.5588788986206055, | |
| "learning_rate": 1.4508306872520912e-05, | |
| "loss": 0.128, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.4167794316644114, | |
| "grad_norm": 3.703800916671753, | |
| "learning_rate": 1.4487209988746791e-05, | |
| "loss": 0.1546, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.4174560216508796, | |
| "grad_norm": 4.310748100280762, | |
| "learning_rate": 1.4466088068528068e-05, | |
| "loss": 0.1225, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.4181326116373478, | |
| "grad_norm": 4.539455413818359, | |
| "learning_rate": 1.444494122971476e-05, | |
| "loss": 0.0978, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.41880920162381596, | |
| "grad_norm": 4.7135210037231445, | |
| "learning_rate": 1.4423769590295917e-05, | |
| "loss": 0.1628, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.41948579161028415, | |
| "grad_norm": 2.408241033554077, | |
| "learning_rate": 1.4402573268398969e-05, | |
| "loss": 0.1135, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.41948579161028415, | |
| "eval_accuracy": 0.8146788990825689, | |
| "eval_f1": 0.5388127853881278, | |
| "eval_loss": 0.4136140048503876, | |
| "eval_precision": 0.8428571428571429, | |
| "eval_recall": 0.3959731543624161, | |
| "eval_runtime": 52.6653, | |
| "eval_samples_per_second": 5.658, | |
| "eval_steps_per_second": 0.19, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.42016238159675234, | |
| "grad_norm": 7.886865139007568, | |
| "learning_rate": 1.4381352382289064e-05, | |
| "loss": 0.2124, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.4208389715832206, | |
| "grad_norm": 4.534457206726074, | |
| "learning_rate": 1.4360107050368403e-05, | |
| "loss": 0.2076, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.4215155615696888, | |
| "grad_norm": 4.329330921173096, | |
| "learning_rate": 1.4338837391175582e-05, | |
| "loss": 0.1768, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.42219215155615697, | |
| "grad_norm": 4.343597412109375, | |
| "learning_rate": 1.4317543523384928e-05, | |
| "loss": 0.1652, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.42286874154262516, | |
| "grad_norm": 3.2291901111602783, | |
| "learning_rate": 1.4296225565805854e-05, | |
| "loss": 0.1808, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.42354533152909335, | |
| "grad_norm": 3.0291831493377686, | |
| "learning_rate": 1.4274883637382162e-05, | |
| "loss": 0.1375, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.4242219215155616, | |
| "grad_norm": 4.428736686706543, | |
| "learning_rate": 1.4253517857191415e-05, | |
| "loss": 0.125, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.4248985115020298, | |
| "grad_norm": 4.2908616065979, | |
| "learning_rate": 1.4232128344444251e-05, | |
| "loss": 0.2029, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.425575101488498, | |
| "grad_norm": 3.273651123046875, | |
| "learning_rate": 1.4210715218483726e-05, | |
| "loss": 0.156, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.42625169147496617, | |
| "grad_norm": 4.760056495666504, | |
| "learning_rate": 1.4189278598784648e-05, | |
| "loss": 0.1802, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.42692828146143436, | |
| "grad_norm": 4.106754302978516, | |
| "learning_rate": 1.4167818604952906e-05, | |
| "loss": 0.1104, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.42760487144790255, | |
| "grad_norm": 3.9572083950042725, | |
| "learning_rate": 1.4146335356724803e-05, | |
| "loss": 0.1584, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.4282814614343708, | |
| "grad_norm": 4.0582194328308105, | |
| "learning_rate": 1.4124828973966393e-05, | |
| "loss": 0.1678, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.428958051420839, | |
| "grad_norm": 3.1031177043914795, | |
| "learning_rate": 1.410329957667281e-05, | |
| "loss": 0.119, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.42963464140730717, | |
| "grad_norm": 6.854620456695557, | |
| "learning_rate": 1.4081747284967602e-05, | |
| "loss": 0.2084, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.43031123139377536, | |
| "grad_norm": 3.733327627182007, | |
| "learning_rate": 1.4060172219102046e-05, | |
| "loss": 0.0581, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.43098782138024355, | |
| "grad_norm": 6.749164581298828, | |
| "learning_rate": 1.4038574499454496e-05, | |
| "loss": 0.1663, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.4316644113667118, | |
| "grad_norm": 4.190524101257324, | |
| "learning_rate": 1.4016954246529697e-05, | |
| "loss": 0.1851, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.43234100135318, | |
| "grad_norm": 5.539178371429443, | |
| "learning_rate": 1.3995311580958124e-05, | |
| "loss": 0.1292, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.4330175913396482, | |
| "grad_norm": 3.6414852142333984, | |
| "learning_rate": 1.3973646623495305e-05, | |
| "loss": 0.1331, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.4330175913396482, | |
| "eval_accuracy": 0.791743119266055, | |
| "eval_f1": 0.44226044226044225, | |
| "eval_loss": 0.46029743552207947, | |
| "eval_precision": 0.8256880733944955, | |
| "eval_recall": 0.30201342281879195, | |
| "eval_runtime": 52.6165, | |
| "eval_samples_per_second": 5.664, | |
| "eval_steps_per_second": 0.19, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.43369418132611637, | |
| "grad_norm": 2.1139845848083496, | |
| "learning_rate": 1.395195949502114e-05, | |
| "loss": 0.1142, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.43437077131258456, | |
| "grad_norm": 4.319032669067383, | |
| "learning_rate": 1.3930250316539237e-05, | |
| "loss": 0.1261, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.43504736129905275, | |
| "grad_norm": 6.74521017074585, | |
| "learning_rate": 1.3908519209176227e-05, | |
| "loss": 0.1988, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.435723951285521, | |
| "grad_norm": 4.900043487548828, | |
| "learning_rate": 1.3886766294181105e-05, | |
| "loss": 0.1694, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.4364005412719892, | |
| "grad_norm": 3.822930097579956, | |
| "learning_rate": 1.3864991692924524e-05, | |
| "loss": 0.1607, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.4370771312584574, | |
| "grad_norm": 6.831581115722656, | |
| "learning_rate": 1.384319552689815e-05, | |
| "loss": 0.1969, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.43775372124492556, | |
| "grad_norm": 4.031900405883789, | |
| "learning_rate": 1.3821377917713969e-05, | |
| "loss": 0.1413, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.43843031123139375, | |
| "grad_norm": 4.6610565185546875, | |
| "learning_rate": 1.37995389871036e-05, | |
| "loss": 0.1502, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.439106901217862, | |
| "grad_norm": 5.273025035858154, | |
| "learning_rate": 1.3777678856917637e-05, | |
| "loss": 0.1584, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.4397834912043302, | |
| "grad_norm": 4.015101909637451, | |
| "learning_rate": 1.3755797649124944e-05, | |
| "loss": 0.1968, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4404600811907984, | |
| "grad_norm": 3.669017791748047, | |
| "learning_rate": 1.3733895485812005e-05, | |
| "loss": 0.1168, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.44113667117726657, | |
| "grad_norm": 4.213320732116699, | |
| "learning_rate": 1.3711972489182208e-05, | |
| "loss": 0.1717, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.44181326116373476, | |
| "grad_norm": 4.028888702392578, | |
| "learning_rate": 1.369002878155519e-05, | |
| "loss": 0.1231, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.44248985115020295, | |
| "grad_norm": 4.561563968658447, | |
| "learning_rate": 1.366806448536615e-05, | |
| "loss": 0.1529, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.4431664411366712, | |
| "grad_norm": 5.331336975097656, | |
| "learning_rate": 1.3646079723165148e-05, | |
| "loss": 0.1654, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.4438430311231394, | |
| "grad_norm": 3.651341199874878, | |
| "learning_rate": 1.3624074617616443e-05, | |
| "loss": 0.1898, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.4445196211096076, | |
| "grad_norm": 3.460707187652588, | |
| "learning_rate": 1.3602049291497798e-05, | |
| "loss": 0.132, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.44519621109607577, | |
| "grad_norm": 4.810662269592285, | |
| "learning_rate": 1.3580003867699801e-05, | |
| "loss": 0.2468, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.44587280108254396, | |
| "grad_norm": 3.721719264984131, | |
| "learning_rate": 1.3557938469225167e-05, | |
| "loss": 0.1419, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.4465493910690122, | |
| "grad_norm": 2.96408748626709, | |
| "learning_rate": 1.3535853219188064e-05, | |
| "loss": 0.1433, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.4465493910690122, | |
| "eval_accuracy": 0.7779816513761468, | |
| "eval_f1": 0.35978835978835977, | |
| "eval_loss": 0.45713141560554504, | |
| "eval_precision": 0.85, | |
| "eval_recall": 0.22818791946308725, | |
| "eval_runtime": 52.7449, | |
| "eval_samples_per_second": 5.65, | |
| "eval_steps_per_second": 0.19, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.4472259810554804, | |
| "grad_norm": 4.556414604187012, | |
| "learning_rate": 1.3513748240813429e-05, | |
| "loss": 0.2081, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.4479025710419486, | |
| "grad_norm": 3.337388515472412, | |
| "learning_rate": 1.349162365743626e-05, | |
| "loss": 0.1469, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.4485791610284168, | |
| "grad_norm": 2.080909252166748, | |
| "learning_rate": 1.3469479592500954e-05, | |
| "loss": 0.084, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.44925575101488496, | |
| "grad_norm": 2.1243245601654053, | |
| "learning_rate": 1.3447316169560593e-05, | |
| "loss": 0.1212, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.44993234100135315, | |
| "grad_norm": 6.11072301864624, | |
| "learning_rate": 1.3425133512276284e-05, | |
| "loss": 0.18, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.4506089309878214, | |
| "grad_norm": 3.8702616691589355, | |
| "learning_rate": 1.3402931744416432e-05, | |
| "loss": 0.1654, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.4512855209742896, | |
| "grad_norm": 2.126553535461426, | |
| "learning_rate": 1.3380710989856086e-05, | |
| "loss": 0.0749, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.4519621109607578, | |
| "grad_norm": 3.0334889888763428, | |
| "learning_rate": 1.3358471372576229e-05, | |
| "loss": 0.1168, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.45263870094722597, | |
| "grad_norm": 4.436944484710693, | |
| "learning_rate": 1.3336213016663078e-05, | |
| "loss": 0.1849, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.45331529093369416, | |
| "grad_norm": 2.813225030899048, | |
| "learning_rate": 1.3313936046307411e-05, | |
| "loss": 0.0991, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.4539918809201624, | |
| "grad_norm": 4.604645729064941, | |
| "learning_rate": 1.3291640585803869e-05, | |
| "loss": 0.181, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.4546684709066306, | |
| "grad_norm": 4.593608379364014, | |
| "learning_rate": 1.3269326759550252e-05, | |
| "loss": 0.1916, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.4553450608930988, | |
| "grad_norm": 4.45982551574707, | |
| "learning_rate": 1.3246994692046837e-05, | |
| "loss": 0.1576, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.456021650879567, | |
| "grad_norm": 3.094189167022705, | |
| "learning_rate": 1.3224644507895672e-05, | |
| "loss": 0.137, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.45669824086603517, | |
| "grad_norm": 4.386197090148926, | |
| "learning_rate": 1.320227633179989e-05, | |
| "loss": 0.1798, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.45737483085250336, | |
| "grad_norm": 6.814380168914795, | |
| "learning_rate": 1.3179890288563015e-05, | |
| "loss": 0.1869, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.4580514208389716, | |
| "grad_norm": 3.6425650119781494, | |
| "learning_rate": 1.3157486503088255e-05, | |
| "loss": 0.0971, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.4587280108254398, | |
| "grad_norm": 4.681551933288574, | |
| "learning_rate": 1.3135065100377816e-05, | |
| "loss": 0.1818, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.459404600811908, | |
| "grad_norm": 2.930051803588867, | |
| "learning_rate": 1.3112626205532189e-05, | |
| "loss": 0.1051, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.46008119079837617, | |
| "grad_norm": 4.322526454925537, | |
| "learning_rate": 1.3090169943749475e-05, | |
| "loss": 0.1372, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.46008119079837617, | |
| "eval_accuracy": 0.8018348623853211, | |
| "eval_f1": 0.4881516587677725, | |
| "eval_loss": 0.4219379127025604, | |
| "eval_precision": 0.8306451612903226, | |
| "eval_recall": 0.34563758389261745, | |
| "eval_runtime": 53.1579, | |
| "eval_samples_per_second": 5.606, | |
| "eval_steps_per_second": 0.188, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.46075778078484436, | |
| "grad_norm": 6.767197132110596, | |
| "learning_rate": 1.3067696440324671e-05, | |
| "loss": 0.2199, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.4614343707713126, | |
| "grad_norm": 6.519789218902588, | |
| "learning_rate": 1.3045205820648969e-05, | |
| "loss": 0.1482, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.4621109607577808, | |
| "grad_norm": 5.150829792022705, | |
| "learning_rate": 1.3022698210209069e-05, | |
| "loss": 0.1252, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.462787550744249, | |
| "grad_norm": 3.6090028285980225, | |
| "learning_rate": 1.3000173734586461e-05, | |
| "loss": 0.1423, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.4634641407307172, | |
| "grad_norm": 3.7701938152313232, | |
| "learning_rate": 1.2977632519456745e-05, | |
| "loss": 0.1346, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.46414073071718537, | |
| "grad_norm": 2.650648355484009, | |
| "learning_rate": 1.295507469058891e-05, | |
| "loss": 0.1563, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.4648173207036536, | |
| "grad_norm": 3.0055038928985596, | |
| "learning_rate": 1.293250037384465e-05, | |
| "loss": 0.12, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.4654939106901218, | |
| "grad_norm": 3.10675311088562, | |
| "learning_rate": 1.2909909695177647e-05, | |
| "loss": 0.1294, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.46617050067659, | |
| "grad_norm": 3.340036630630493, | |
| "learning_rate": 1.2887302780632876e-05, | |
| "loss": 0.1208, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.4668470906630582, | |
| "grad_norm": 3.540969133377075, | |
| "learning_rate": 1.2864679756345905e-05, | |
| "loss": 0.1568, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.4675236806495264, | |
| "grad_norm": 2.290696620941162, | |
| "learning_rate": 1.284204074854218e-05, | |
| "loss": 0.0659, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.46820027063599456, | |
| "grad_norm": 2.736776351928711, | |
| "learning_rate": 1.2819385883536332e-05, | |
| "loss": 0.1017, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.4688768606224628, | |
| "grad_norm": 4.793468952178955, | |
| "learning_rate": 1.2796715287731463e-05, | |
| "loss": 0.1693, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.469553450608931, | |
| "grad_norm": 5.424515724182129, | |
| "learning_rate": 1.2774029087618448e-05, | |
| "loss": 0.2241, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.4702300405953992, | |
| "grad_norm": 3.8648910522460938, | |
| "learning_rate": 1.2751327409775227e-05, | |
| "loss": 0.1706, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.4709066305818674, | |
| "grad_norm": 5.154799461364746, | |
| "learning_rate": 1.2728610380866097e-05, | |
| "loss": 0.1878, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.47158322056833557, | |
| "grad_norm": 4.5547261238098145, | |
| "learning_rate": 1.2705878127641007e-05, | |
| "loss": 0.1794, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.4722598105548038, | |
| "grad_norm": 4.59852409362793, | |
| "learning_rate": 1.268313077693485e-05, | |
| "loss": 0.1402, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.472936400541272, | |
| "grad_norm": 3.0397305488586426, | |
| "learning_rate": 1.2660368455666752e-05, | |
| "loss": 0.1126, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.4736129905277402, | |
| "grad_norm": 2.698749303817749, | |
| "learning_rate": 1.2637591290839377e-05, | |
| "loss": 0.0677, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4736129905277402, | |
| "eval_accuracy": 0.7990825688073394, | |
| "eval_f1": 0.46973365617433416, | |
| "eval_loss": 0.442281574010849, | |
| "eval_precision": 0.8434782608695652, | |
| "eval_recall": 0.32550335570469796, | |
| "eval_runtime": 52.9641, | |
| "eval_samples_per_second": 5.626, | |
| "eval_steps_per_second": 0.189, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4742895805142084, | |
| "grad_norm": 3.1701228618621826, | |
| "learning_rate": 1.26147994095382e-05, | |
| "loss": 0.1402, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.4749661705006766, | |
| "grad_norm": 3.6727280616760254, | |
| "learning_rate": 1.2591992938930808e-05, | |
| "loss": 0.1482, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.47564276048714477, | |
| "grad_norm": 2.606227159500122, | |
| "learning_rate": 1.2569172006266192e-05, | |
| "loss": 0.141, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.476319350473613, | |
| "grad_norm": 3.620513677597046, | |
| "learning_rate": 1.2546336738874037e-05, | |
| "loss": 0.1777, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.4769959404600812, | |
| "grad_norm": 4.079351902008057, | |
| "learning_rate": 1.2523487264163997e-05, | |
| "loss": 0.1614, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.4776725304465494, | |
| "grad_norm": 3.8010270595550537, | |
| "learning_rate": 1.2500623709625008e-05, | |
| "loss": 0.1359, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.4783491204330176, | |
| "grad_norm": 3.370544672012329, | |
| "learning_rate": 1.2477746202824563e-05, | |
| "loss": 0.1554, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.4790257104194858, | |
| "grad_norm": 5.698882579803467, | |
| "learning_rate": 1.2454854871407993e-05, | |
| "loss": 0.1387, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.479702300405954, | |
| "grad_norm": 3.655916213989258, | |
| "learning_rate": 1.2431949843097776e-05, | |
| "loss": 0.0942, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.4803788903924222, | |
| "grad_norm": 4.965806484222412, | |
| "learning_rate": 1.2409031245692798e-05, | |
| "loss": 0.1671, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.4810554803788904, | |
| "grad_norm": 4.127477645874023, | |
| "learning_rate": 1.238609920706767e-05, | |
| "loss": 0.1012, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.4817320703653586, | |
| "grad_norm": 4.219476222991943, | |
| "learning_rate": 1.2363153855171985e-05, | |
| "loss": 0.1406, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.4824086603518268, | |
| "grad_norm": 4.781118392944336, | |
| "learning_rate": 1.2340195318029623e-05, | |
| "loss": 0.1519, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.48308525033829497, | |
| "grad_norm": 2.8674471378326416, | |
| "learning_rate": 1.2317223723738036e-05, | |
| "loss": 0.0691, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.4837618403247632, | |
| "grad_norm": 3.4862661361694336, | |
| "learning_rate": 1.2294239200467516e-05, | |
| "loss": 0.1282, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.4844384303112314, | |
| "grad_norm": 2.642241954803467, | |
| "learning_rate": 1.2271241876460507e-05, | |
| "loss": 0.1144, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.4851150202976996, | |
| "grad_norm": 5.012102127075195, | |
| "learning_rate": 1.2248231880030861e-05, | |
| "loss": 0.1781, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.4857916102841678, | |
| "grad_norm": 3.223320484161377, | |
| "learning_rate": 1.2225209339563144e-05, | |
| "loss": 0.103, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.486468200270636, | |
| "grad_norm": 3.7974560260772705, | |
| "learning_rate": 1.2202174383511916e-05, | |
| "loss": 0.1277, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.4871447902571042, | |
| "grad_norm": 8.01950454711914, | |
| "learning_rate": 1.2179127140400997e-05, | |
| "loss": 0.2256, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4871447902571042, | |
| "eval_accuracy": 0.7798165137614679, | |
| "eval_f1": 0.375, | |
| "eval_loss": 0.49802127480506897, | |
| "eval_precision": 0.8372093023255814, | |
| "eval_recall": 0.24161073825503357, | |
| "eval_runtime": 52.7234, | |
| "eval_samples_per_second": 5.652, | |
| "eval_steps_per_second": 0.19, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4878213802435724, | |
| "grad_norm": 5.34826135635376, | |
| "learning_rate": 1.2156067738822777e-05, | |
| "loss": 0.173, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.4884979702300406, | |
| "grad_norm": 2.6310982704162598, | |
| "learning_rate": 1.213299630743747e-05, | |
| "loss": 0.0958, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.4891745602165088, | |
| "grad_norm": 4.56181526184082, | |
| "learning_rate": 1.2109912974972424e-05, | |
| "loss": 0.1279, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.489851150202977, | |
| "grad_norm": 5.082941055297852, | |
| "learning_rate": 1.2086817870221376e-05, | |
| "loss": 0.158, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.49052774018944517, | |
| "grad_norm": 6.134848117828369, | |
| "learning_rate": 1.2063711122043759e-05, | |
| "loss": 0.1063, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.4912043301759134, | |
| "grad_norm": 3.6247506141662598, | |
| "learning_rate": 1.204059285936397e-05, | |
| "loss": 0.1078, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.4918809201623816, | |
| "grad_norm": 4.745319366455078, | |
| "learning_rate": 1.2017463211170635e-05, | |
| "loss": 0.1094, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.4925575101488498, | |
| "grad_norm": 5.845713138580322, | |
| "learning_rate": 1.1994322306515926e-05, | |
| "loss": 0.1373, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.493234100135318, | |
| "grad_norm": 3.4521119594573975, | |
| "learning_rate": 1.1971170274514802e-05, | |
| "loss": 0.0997, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.4939106901217862, | |
| "grad_norm": 4.090993404388428, | |
| "learning_rate": 1.1948007244344334e-05, | |
| "loss": 0.1578, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.4945872801082544, | |
| "grad_norm": 5.571890830993652, | |
| "learning_rate": 1.1924833345242921e-05, | |
| "loss": 0.1614, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.4952638700947226, | |
| "grad_norm": 4.286579132080078, | |
| "learning_rate": 1.1901648706509637e-05, | |
| "loss": 0.1557, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.4959404600811908, | |
| "grad_norm": 6.809506893157959, | |
| "learning_rate": 1.1878453457503465e-05, | |
| "loss": 0.1479, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.496617050067659, | |
| "grad_norm": 3.8739116191864014, | |
| "learning_rate": 1.185524772764258e-05, | |
| "loss": 0.1504, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.4972936400541272, | |
| "grad_norm": 3.7848074436187744, | |
| "learning_rate": 1.1832031646403654e-05, | |
| "loss": 0.1236, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.4979702300405954, | |
| "grad_norm": 4.305004596710205, | |
| "learning_rate": 1.1808805343321102e-05, | |
| "loss": 0.1505, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.4986468200270636, | |
| "grad_norm": 3.052428722381592, | |
| "learning_rate": 1.1785568947986368e-05, | |
| "loss": 0.1454, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.4993234100135318, | |
| "grad_norm": 7.71512508392334, | |
| "learning_rate": 1.176232259004722e-05, | |
| "loss": 0.2538, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 5.634016990661621, | |
| "learning_rate": 1.1739066399206997e-05, | |
| "loss": 0.1768, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.5006765899864682, | |
| "grad_norm": 4.39807653427124, | |
| "learning_rate": 1.1715800505223918e-05, | |
| "loss": 0.2035, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5006765899864682, | |
| "eval_accuracy": 0.7871559633027523, | |
| "eval_f1": 0.40816326530612246, | |
| "eval_loss": 0.4451703131198883, | |
| "eval_precision": 0.851063829787234, | |
| "eval_recall": 0.2684563758389262, | |
| "eval_runtime": 53.0418, | |
| "eval_samples_per_second": 5.618, | |
| "eval_steps_per_second": 0.189, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5013531799729364, | |
| "grad_norm": 4.298307418823242, | |
| "learning_rate": 1.1692525037910325e-05, | |
| "loss": 0.155, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.5020297699594046, | |
| "grad_norm": 3.8376667499542236, | |
| "learning_rate": 1.166924012713198e-05, | |
| "loss": 0.1715, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.5027063599458728, | |
| "grad_norm": 4.111176013946533, | |
| "learning_rate": 1.164594590280734e-05, | |
| "loss": 0.139, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.503382949932341, | |
| "grad_norm": 3.5634899139404297, | |
| "learning_rate": 1.1622642494906819e-05, | |
| "loss": 0.1315, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.5040595399188093, | |
| "grad_norm": 3.7423675060272217, | |
| "learning_rate": 1.1599330033452078e-05, | |
| "loss": 0.1789, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.5047361299052774, | |
| "grad_norm": 7.1124958992004395, | |
| "learning_rate": 1.1576008648515286e-05, | |
| "loss": 0.1674, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.5054127198917456, | |
| "grad_norm": 3.564636468887329, | |
| "learning_rate": 1.1552678470218406e-05, | |
| "loss": 0.1415, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.5060893098782138, | |
| "grad_norm": 7.098050117492676, | |
| "learning_rate": 1.1529339628732462e-05, | |
| "loss": 0.1789, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.506765899864682, | |
| "grad_norm": 7.915879726409912, | |
| "learning_rate": 1.1505992254276808e-05, | |
| "loss": 0.184, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.5074424898511503, | |
| "grad_norm": 3.962040424346924, | |
| "learning_rate": 1.148263647711842e-05, | |
| "loss": 0.1447, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5081190798376184, | |
| "grad_norm": 2.833312749862671, | |
| "learning_rate": 1.1459272427571148e-05, | |
| "loss": 0.1153, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.5087956698240866, | |
| "grad_norm": 3.094414234161377, | |
| "learning_rate": 1.1435900235995004e-05, | |
| "loss": 0.1555, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.5094722598105548, | |
| "grad_norm": 3.7343735694885254, | |
| "learning_rate": 1.141252003279542e-05, | |
| "loss": 0.1781, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.510148849797023, | |
| "grad_norm": 2.3985371589660645, | |
| "learning_rate": 1.1389131948422534e-05, | |
| "loss": 0.1266, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.5108254397834912, | |
| "grad_norm": 2.8854312896728516, | |
| "learning_rate": 1.1365736113370463e-05, | |
| "loss": 0.1198, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.5115020297699594, | |
| "grad_norm": 3.6787984371185303, | |
| "learning_rate": 1.1342332658176556e-05, | |
| "loss": 0.1701, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.5121786197564276, | |
| "grad_norm": 3.9833555221557617, | |
| "learning_rate": 1.1318921713420691e-05, | |
| "loss": 0.1883, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.5128552097428958, | |
| "grad_norm": 5.8127031326293945, | |
| "learning_rate": 1.1295503409724526e-05, | |
| "loss": 0.19, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.513531799729364, | |
| "grad_norm": 3.7036736011505127, | |
| "learning_rate": 1.1272077877750782e-05, | |
| "loss": 0.1532, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.5142083897158322, | |
| "grad_norm": 3.996150255203247, | |
| "learning_rate": 1.124864524820251e-05, | |
| "loss": 0.1412, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5142083897158322, | |
| "eval_accuracy": 0.7761467889908257, | |
| "eval_f1": 0.34408602150537637, | |
| "eval_loss": 0.4691689610481262, | |
| "eval_precision": 0.8648648648648649, | |
| "eval_recall": 0.21476510067114093, | |
| "eval_runtime": 52.6721, | |
| "eval_samples_per_second": 5.658, | |
| "eval_steps_per_second": 0.19, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5148849797023004, | |
| "grad_norm": 6.409296989440918, | |
| "learning_rate": 1.1225205651822359e-05, | |
| "loss": 0.264, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.5155615696887687, | |
| "grad_norm": 3.7257473468780518, | |
| "learning_rate": 1.1201759219391858e-05, | |
| "loss": 0.1302, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.5162381596752368, | |
| "grad_norm": 2.880531072616577, | |
| "learning_rate": 1.1178306081730666e-05, | |
| "loss": 0.1321, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.516914749661705, | |
| "grad_norm": 3.284871816635132, | |
| "learning_rate": 1.1154846369695864e-05, | |
| "loss": 0.1171, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.5175913396481732, | |
| "grad_norm": 3.0467426776885986, | |
| "learning_rate": 1.1131380214181205e-05, | |
| "loss": 0.1664, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.5182679296346414, | |
| "grad_norm": 4.38018274307251, | |
| "learning_rate": 1.1107907746116402e-05, | |
| "loss": 0.167, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.5189445196211097, | |
| "grad_norm": 2.9475748538970947, | |
| "learning_rate": 1.108442909646639e-05, | |
| "loss": 0.1161, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.5196211096075778, | |
| "grad_norm": 5.2659687995910645, | |
| "learning_rate": 1.1060944396230583e-05, | |
| "loss": 0.1163, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.520297699594046, | |
| "grad_norm": 3.119966983795166, | |
| "learning_rate": 1.1037453776442164e-05, | |
| "loss": 0.1693, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.5209742895805142, | |
| "grad_norm": 3.546898603439331, | |
| "learning_rate": 1.1013957368167343e-05, | |
| "loss": 0.1362, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5216508795669824, | |
| "grad_norm": 3.2547402381896973, | |
| "learning_rate": 1.099045530250463e-05, | |
| "loss": 0.1175, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.5223274695534507, | |
| "grad_norm": 2.995699882507324, | |
| "learning_rate": 1.0966947710584086e-05, | |
| "loss": 0.1131, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.5230040595399188, | |
| "grad_norm": 2.557124614715576, | |
| "learning_rate": 1.0943434723566624e-05, | |
| "loss": 0.139, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.523680649526387, | |
| "grad_norm": 3.192425489425659, | |
| "learning_rate": 1.091991647264325e-05, | |
| "loss": 0.1644, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.5243572395128552, | |
| "grad_norm": 5.2245025634765625, | |
| "learning_rate": 1.0896393089034336e-05, | |
| "loss": 0.1449, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.5250338294993234, | |
| "grad_norm": 3.094622850418091, | |
| "learning_rate": 1.0872864703988903e-05, | |
| "loss": 0.1066, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.5257104194857916, | |
| "grad_norm": 2.3677878379821777, | |
| "learning_rate": 1.0849331448783869e-05, | |
| "loss": 0.1244, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.5263870094722598, | |
| "grad_norm": 5.578123569488525, | |
| "learning_rate": 1.0825793454723325e-05, | |
| "loss": 0.1779, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.527063599458728, | |
| "grad_norm": 5.073922157287598, | |
| "learning_rate": 1.0802250853137808e-05, | |
| "loss": 0.2355, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.5277401894451962, | |
| "grad_norm": 2.925035238265991, | |
| "learning_rate": 1.0778703775383559e-05, | |
| "loss": 0.1346, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5277401894451962, | |
| "eval_accuracy": 0.8119266055045872, | |
| "eval_f1": 0.5287356321839081, | |
| "eval_loss": 0.40138694643974304, | |
| "eval_precision": 0.8394160583941606, | |
| "eval_recall": 0.3859060402684564, | |
| "eval_runtime": 52.7212, | |
| "eval_samples_per_second": 5.652, | |
| "eval_steps_per_second": 0.19, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5284167794316644, | |
| "grad_norm": 4.117710113525391, | |
| "learning_rate": 1.0755152352841798e-05, | |
| "loss": 0.1435, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.5290933694181326, | |
| "grad_norm": 4.172342777252197, | |
| "learning_rate": 1.0731596716917978e-05, | |
| "loss": 0.1428, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.5297699594046008, | |
| "grad_norm": 2.936666250228882, | |
| "learning_rate": 1.0708036999041072e-05, | |
| "loss": 0.1051, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.530446549391069, | |
| "grad_norm": 2.487924098968506, | |
| "learning_rate": 1.0684473330662815e-05, | |
| "loss": 0.0967, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.5311231393775372, | |
| "grad_norm": 3.894505739212036, | |
| "learning_rate": 1.0660905843256995e-05, | |
| "loss": 0.1088, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.5317997293640054, | |
| "grad_norm": 5.016809463500977, | |
| "learning_rate": 1.0637334668318708e-05, | |
| "loss": 0.1301, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.5324763193504736, | |
| "grad_norm": 3.1163365840911865, | |
| "learning_rate": 1.0613759937363617e-05, | |
| "loss": 0.125, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.5331529093369418, | |
| "grad_norm": 3.6140894889831543, | |
| "learning_rate": 1.0590181781927229e-05, | |
| "loss": 0.1339, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.5338294993234101, | |
| "grad_norm": 4.263391971588135, | |
| "learning_rate": 1.0566600333564163e-05, | |
| "loss": 0.2378, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.5345060893098782, | |
| "grad_norm": 5.283838748931885, | |
| "learning_rate": 1.0543015723847402e-05, | |
| "loss": 0.1762, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5351826792963464, | |
| "grad_norm": 4.460554599761963, | |
| "learning_rate": 1.0519428084367583e-05, | |
| "loss": 0.1446, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.5358592692828146, | |
| "grad_norm": 3.4405527114868164, | |
| "learning_rate": 1.0495837546732224e-05, | |
| "loss": 0.0647, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.5365358592692828, | |
| "grad_norm": 2.5219104290008545, | |
| "learning_rate": 1.0472244242565035e-05, | |
| "loss": 0.0866, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.5372124492557511, | |
| "grad_norm": 4.9833221435546875, | |
| "learning_rate": 1.044864830350515e-05, | |
| "loss": 0.1009, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.5378890392422192, | |
| "grad_norm": 3.444195032119751, | |
| "learning_rate": 1.042504986120641e-05, | |
| "loss": 0.0941, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.5385656292286874, | |
| "grad_norm": 5.0867509841918945, | |
| "learning_rate": 1.0401449047336622e-05, | |
| "loss": 0.1377, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.5392422192151556, | |
| "grad_norm": 4.2269606590271, | |
| "learning_rate": 1.0377845993576819e-05, | |
| "loss": 0.1651, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.5399188092016238, | |
| "grad_norm": 5.579936504364014, | |
| "learning_rate": 1.0354240831620542e-05, | |
| "loss": 0.2036, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.540595399188092, | |
| "grad_norm": 3.619593858718872, | |
| "learning_rate": 1.0330633693173083e-05, | |
| "loss": 0.0519, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.5412719891745602, | |
| "grad_norm": 5.691805362701416, | |
| "learning_rate": 1.0307024709950775e-05, | |
| "loss": 0.1691, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5412719891745602, | |
| "eval_accuracy": 0.7926605504587156, | |
| "eval_f1": 0.4321608040201005, | |
| "eval_loss": 0.48544931411743164, | |
| "eval_precision": 0.86, | |
| "eval_recall": 0.28859060402684567, | |
| "eval_runtime": 53.0047, | |
| "eval_samples_per_second": 5.622, | |
| "eval_steps_per_second": 0.189, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5419485791610285, | |
| "grad_norm": 4.048157691955566, | |
| "learning_rate": 1.0283414013680233e-05, | |
| "loss": 0.1629, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.5426251691474966, | |
| "grad_norm": 8.180608749389648, | |
| "learning_rate": 1.0259801736097634e-05, | |
| "loss": 0.2779, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.5433017591339648, | |
| "grad_norm": 2.2637126445770264, | |
| "learning_rate": 1.023618800894798e-05, | |
| "loss": 0.0888, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.543978349120433, | |
| "grad_norm": 3.4222660064697266, | |
| "learning_rate": 1.0212572963984358e-05, | |
| "loss": 0.1072, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.5446549391069012, | |
| "grad_norm": 5.149094104766846, | |
| "learning_rate": 1.0188956732967208e-05, | |
| "loss": 0.1834, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.5453315290933695, | |
| "grad_norm": 4.928592681884766, | |
| "learning_rate": 1.0165339447663586e-05, | |
| "loss": 0.1065, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.5460081190798376, | |
| "grad_norm": 3.3745458126068115, | |
| "learning_rate": 1.0141721239846436e-05, | |
| "loss": 0.1105, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.5466847090663058, | |
| "grad_norm": 3.140056848526001, | |
| "learning_rate": 1.0118102241293848e-05, | |
| "loss": 0.1057, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.547361299052774, | |
| "grad_norm": 2.9225945472717285, | |
| "learning_rate": 1.0094482583788311e-05, | |
| "loss": 0.1409, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.5480378890392422, | |
| "grad_norm": 5.8072333335876465, | |
| "learning_rate": 1.0070862399116016e-05, | |
| "loss": 0.1697, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5487144790257105, | |
| "grad_norm": 2.9896950721740723, | |
| "learning_rate": 1.0047241819066069e-05, | |
| "loss": 0.0893, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.5493910690121786, | |
| "grad_norm": 9.982451438903809, | |
| "learning_rate": 1.0023620975429803e-05, | |
| "loss": 0.2119, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.5500676589986468, | |
| "grad_norm": 5.057304382324219, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1417, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.550744248985115, | |
| "grad_norm": 8.598350524902344, | |
| "learning_rate": 9.976379024570202e-06, | |
| "loss": 0.2122, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.5514208389715832, | |
| "grad_norm": 2.8650100231170654, | |
| "learning_rate": 9.952758180933933e-06, | |
| "loss": 0.0901, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.5520974289580515, | |
| "grad_norm": 5.409826278686523, | |
| "learning_rate": 9.929137600883986e-06, | |
| "loss": 0.1613, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.5527740189445196, | |
| "grad_norm": 2.837448835372925, | |
| "learning_rate": 9.90551741621169e-06, | |
| "loss": 0.0946, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.5534506089309879, | |
| "grad_norm": 3.9413063526153564, | |
| "learning_rate": 9.881897758706155e-06, | |
| "loss": 0.1099, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.554127198917456, | |
| "grad_norm": 2.284583330154419, | |
| "learning_rate": 9.858278760153567e-06, | |
| "loss": 0.0653, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.5548037889039242, | |
| "grad_norm": 6.636195182800293, | |
| "learning_rate": 9.834660552336415e-06, | |
| "loss": 0.1583, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5548037889039242, | |
| "eval_accuracy": 0.7788990825688074, | |
| "eval_f1": 0.3674540682414698, | |
| "eval_loss": 0.47594916820526123, | |
| "eval_precision": 0.8433734939759037, | |
| "eval_recall": 0.2348993288590604, | |
| "eval_runtime": 53.6382, | |
| "eval_samples_per_second": 5.556, | |
| "eval_steps_per_second": 0.186, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5554803788903924, | |
| "grad_norm": 3.6005120277404785, | |
| "learning_rate": 9.811043267032797e-06, | |
| "loss": 0.0887, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.5561569688768606, | |
| "grad_norm": 3.6898558139801025, | |
| "learning_rate": 9.787427036015647e-06, | |
| "loss": 0.147, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.5568335588633289, | |
| "grad_norm": 6.481770992279053, | |
| "learning_rate": 9.763811991052021e-06, | |
| "loss": 0.1877, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.557510148849797, | |
| "grad_norm": 2.4457807540893555, | |
| "learning_rate": 9.74019826390237e-06, | |
| "loss": 0.1004, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.5581867388362652, | |
| "grad_norm": 2.3346476554870605, | |
| "learning_rate": 9.716585986319769e-06, | |
| "loss": 0.1199, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.5588633288227334, | |
| "grad_norm": 5.753274917602539, | |
| "learning_rate": 9.692975290049228e-06, | |
| "loss": 0.1646, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.5595399188092016, | |
| "grad_norm": 4.540411949157715, | |
| "learning_rate": 9.669366306826919e-06, | |
| "loss": 0.1275, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.5602165087956699, | |
| "grad_norm": 4.377498149871826, | |
| "learning_rate": 9.645759168379463e-06, | |
| "loss": 0.1508, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.560893098782138, | |
| "grad_norm": 4.894872188568115, | |
| "learning_rate": 9.622154006423185e-06, | |
| "loss": 0.1608, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.5615696887686062, | |
| "grad_norm": 3.9579596519470215, | |
| "learning_rate": 9.598550952663383e-06, | |
| "loss": 0.0748, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5622462787550744, | |
| "grad_norm": 3.1920807361602783, | |
| "learning_rate": 9.574950138793593e-06, | |
| "loss": 0.0958, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.5629228687415426, | |
| "grad_norm": 2.8056745529174805, | |
| "learning_rate": 9.551351696494854e-06, | |
| "loss": 0.1528, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.5635994587280109, | |
| "grad_norm": 2.493863105773926, | |
| "learning_rate": 9.527755757434968e-06, | |
| "loss": 0.0693, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.564276048714479, | |
| "grad_norm": 3.3314931392669678, | |
| "learning_rate": 9.504162453267776e-06, | |
| "loss": 0.1227, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.5649526387009473, | |
| "grad_norm": 3.9033989906311035, | |
| "learning_rate": 9.480571915632422e-06, | |
| "loss": 0.1199, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.5656292286874154, | |
| "grad_norm": 3.6395678520202637, | |
| "learning_rate": 9.456984276152598e-06, | |
| "loss": 0.1057, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.5663058186738836, | |
| "grad_norm": 6.916732311248779, | |
| "learning_rate": 9.43339966643584e-06, | |
| "loss": 0.1741, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.5669824086603519, | |
| "grad_norm": 3.8561432361602783, | |
| "learning_rate": 9.409818218072774e-06, | |
| "loss": 0.1654, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.56765899864682, | |
| "grad_norm": 3.962113380432129, | |
| "learning_rate": 9.386240062636388e-06, | |
| "loss": 0.1459, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.5683355886332883, | |
| "grad_norm": 2.5661449432373047, | |
| "learning_rate": 9.362665331681294e-06, | |
| "loss": 0.1363, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5683355886332883, | |
| "eval_accuracy": 0.8009174311926606, | |
| "eval_f1": 0.4668304668304668, | |
| "eval_loss": 0.43011632561683655, | |
| "eval_precision": 0.8715596330275229, | |
| "eval_recall": 0.3187919463087248, | |
| "eval_runtime": 54.0976, | |
| "eval_samples_per_second": 5.509, | |
| "eval_steps_per_second": 0.185, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5690121786197564, | |
| "grad_norm": 3.792685031890869, | |
| "learning_rate": 9.339094156743007e-06, | |
| "loss": 0.1408, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.5696887686062246, | |
| "grad_norm": 4.538841247558594, | |
| "learning_rate": 9.315526669337189e-06, | |
| "loss": 0.1399, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.5703653585926928, | |
| "grad_norm": 5.182969570159912, | |
| "learning_rate": 9.291963000958932e-06, | |
| "loss": 0.1753, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.571041948579161, | |
| "grad_norm": 3.905219554901123, | |
| "learning_rate": 9.268403283082025e-06, | |
| "loss": 0.143, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.5717185385656293, | |
| "grad_norm": 3.7634634971618652, | |
| "learning_rate": 9.244847647158203e-06, | |
| "loss": 0.1469, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.5723951285520974, | |
| "grad_norm": 3.5530450344085693, | |
| "learning_rate": 9.221296224616443e-06, | |
| "loss": 0.1334, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.5730717185385656, | |
| "grad_norm": 6.1246161460876465, | |
| "learning_rate": 9.197749146862193e-06, | |
| "loss": 0.1216, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.5737483085250338, | |
| "grad_norm": 7.231658458709717, | |
| "learning_rate": 9.174206545276678e-06, | |
| "loss": 0.2128, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.574424898511502, | |
| "grad_norm": 6.129051685333252, | |
| "learning_rate": 9.150668551216134e-06, | |
| "loss": 0.2178, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.5751014884979703, | |
| "grad_norm": 4.892454624176025, | |
| "learning_rate": 9.127135296011102e-06, | |
| "loss": 0.1496, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5757780784844384, | |
| "grad_norm": 6.8777337074279785, | |
| "learning_rate": 9.103606910965666e-06, | |
| "loss": 0.2008, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.5764546684709067, | |
| "grad_norm": 3.538118600845337, | |
| "learning_rate": 9.080083527356755e-06, | |
| "loss": 0.1232, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.5771312584573748, | |
| "grad_norm": 5.2440080642700195, | |
| "learning_rate": 9.056565276433378e-06, | |
| "loss": 0.1973, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.577807848443843, | |
| "grad_norm": 2.282479763031006, | |
| "learning_rate": 9.033052289415914e-06, | |
| "loss": 0.0696, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.5784844384303113, | |
| "grad_norm": 3.643191337585449, | |
| "learning_rate": 9.009544697495373e-06, | |
| "loss": 0.1378, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.5791610284167794, | |
| "grad_norm": 3.0240986347198486, | |
| "learning_rate": 8.986042631832656e-06, | |
| "loss": 0.1579, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.5798376184032477, | |
| "grad_norm": 3.921018362045288, | |
| "learning_rate": 8.962546223557838e-06, | |
| "loss": 0.1194, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.5805142083897158, | |
| "grad_norm": 3.4192543029785156, | |
| "learning_rate": 8.93905560376942e-06, | |
| "loss": 0.1817, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.581190798376184, | |
| "grad_norm": 4.1514363288879395, | |
| "learning_rate": 8.915570903533615e-06, | |
| "loss": 0.1489, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.5818673883626523, | |
| "grad_norm": 3.885377883911133, | |
| "learning_rate": 8.892092253883602e-06, | |
| "loss": 0.1456, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5818673883626523, | |
| "eval_accuracy": 0.7926605504587156, | |
| "eval_f1": 0.42346938775510207, | |
| "eval_loss": 0.4394099712371826, | |
| "eval_precision": 0.8829787234042553, | |
| "eval_recall": 0.2785234899328859, | |
| "eval_runtime": 53.3745, | |
| "eval_samples_per_second": 5.583, | |
| "eval_steps_per_second": 0.187, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5825439783491204, | |
| "grad_norm": 3.6462066173553467, | |
| "learning_rate": 8.8686197858188e-06, | |
| "loss": 0.1782, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.5832205683355887, | |
| "grad_norm": 3.2633800506591797, | |
| "learning_rate": 8.84515363030414e-06, | |
| "loss": 0.1656, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.5838971583220568, | |
| "grad_norm": 5.255461692810059, | |
| "learning_rate": 8.821693918269334e-06, | |
| "loss": 0.1306, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.584573748308525, | |
| "grad_norm": 4.525811672210693, | |
| "learning_rate": 8.798240780608143e-06, | |
| "loss": 0.1684, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.5852503382949933, | |
| "grad_norm": 2.788585901260376, | |
| "learning_rate": 8.774794348177641e-06, | |
| "loss": 0.1456, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.5859269282814614, | |
| "grad_norm": 3.1500301361083984, | |
| "learning_rate": 8.751354751797492e-06, | |
| "loss": 0.1347, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.5866035182679297, | |
| "grad_norm": 3.487180471420288, | |
| "learning_rate": 8.727922122249221e-06, | |
| "loss": 0.1393, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.5872801082543978, | |
| "grad_norm": 2.1133573055267334, | |
| "learning_rate": 8.704496590275479e-06, | |
| "loss": 0.0814, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.587956698240866, | |
| "grad_norm": 3.227505922317505, | |
| "learning_rate": 8.68107828657931e-06, | |
| "loss": 0.1104, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.5886332882273342, | |
| "grad_norm": 2.8195204734802246, | |
| "learning_rate": 8.657667341823449e-06, | |
| "loss": 0.1073, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.5893098782138024, | |
| "grad_norm": 6.85077428817749, | |
| "learning_rate": 8.63426388662954e-06, | |
| "loss": 0.2117, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.5899864682002707, | |
| "grad_norm": 3.027806043624878, | |
| "learning_rate": 8.61086805157747e-06, | |
| "loss": 0.13, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.5906630581867388, | |
| "grad_norm": 3.608955144882202, | |
| "learning_rate": 8.587479967204584e-06, | |
| "loss": 0.1323, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.591339648173207, | |
| "grad_norm": 3.6784167289733887, | |
| "learning_rate": 8.564099764004998e-06, | |
| "loss": 0.1205, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.5920162381596752, | |
| "grad_norm": 3.6753430366516113, | |
| "learning_rate": 8.540727572428854e-06, | |
| "loss": 0.1728, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.5926928281461434, | |
| "grad_norm": 3.4869165420532227, | |
| "learning_rate": 8.51736352288158e-06, | |
| "loss": 0.1363, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.5933694181326117, | |
| "grad_norm": 6.327773571014404, | |
| "learning_rate": 8.494007745723197e-06, | |
| "loss": 0.1723, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.5940460081190798, | |
| "grad_norm": 4.366674423217773, | |
| "learning_rate": 8.47066037126754e-06, | |
| "loss": 0.1557, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.5947225981055481, | |
| "grad_norm": 3.28305721282959, | |
| "learning_rate": 8.447321529781597e-06, | |
| "loss": 0.1253, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.5953991880920162, | |
| "grad_norm": 3.0015041828155518, | |
| "learning_rate": 8.423991351484715e-06, | |
| "loss": 0.1318, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5953991880920162, | |
| "eval_accuracy": 0.8192660550458716, | |
| "eval_f1": 0.5553047404063205, | |
| "eval_loss": 0.3900049328804016, | |
| "eval_precision": 0.8482758620689655, | |
| "eval_recall": 0.412751677852349, | |
| "eval_runtime": 53.5058, | |
| "eval_samples_per_second": 5.569, | |
| "eval_steps_per_second": 0.187, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5960757780784844, | |
| "grad_norm": 2.9270071983337402, | |
| "learning_rate": 8.400669966547925e-06, | |
| "loss": 0.1256, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.5967523680649527, | |
| "grad_norm": 2.6574175357818604, | |
| "learning_rate": 8.377357505093183e-06, | |
| "loss": 0.0761, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.5974289580514208, | |
| "grad_norm": 3.648263692855835, | |
| "learning_rate": 8.35405409719266e-06, | |
| "loss": 0.124, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.5981055480378891, | |
| "grad_norm": 4.690035820007324, | |
| "learning_rate": 8.330759872868022e-06, | |
| "loss": 0.182, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.5987821380243572, | |
| "grad_norm": 3.0360960960388184, | |
| "learning_rate": 8.307474962089676e-06, | |
| "loss": 0.1437, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.5994587280108254, | |
| "grad_norm": 3.8773977756500244, | |
| "learning_rate": 8.284199494776083e-06, | |
| "loss": 0.0975, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.6001353179972937, | |
| "grad_norm": 3.7407238483428955, | |
| "learning_rate": 8.260933600793003e-06, | |
| "loss": 0.1422, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.6008119079837618, | |
| "grad_norm": 4.789558410644531, | |
| "learning_rate": 8.237677409952784e-06, | |
| "loss": 0.1737, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.6014884979702301, | |
| "grad_norm": 5.8444929122924805, | |
| "learning_rate": 8.214431052013636e-06, | |
| "loss": 0.1658, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.6021650879566982, | |
| "grad_norm": 2.8387224674224854, | |
| "learning_rate": 8.191194656678905e-06, | |
| "loss": 0.1317, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.6028416779431665, | |
| "grad_norm": 3.5490684509277344, | |
| "learning_rate": 8.16796835359635e-06, | |
| "loss": 0.0796, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.6035182679296346, | |
| "grad_norm": 3.600038766860962, | |
| "learning_rate": 8.144752272357424e-06, | |
| "loss": 0.1059, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.6041948579161028, | |
| "grad_norm": 6.19486665725708, | |
| "learning_rate": 8.12154654249654e-06, | |
| "loss": 0.2211, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.6048714479025711, | |
| "grad_norm": 3.217571973800659, | |
| "learning_rate": 8.098351293490365e-06, | |
| "loss": 0.0893, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.6055480378890392, | |
| "grad_norm": 3.447753667831421, | |
| "learning_rate": 8.07516665475708e-06, | |
| "loss": 0.1373, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.6062246278755075, | |
| "grad_norm": 4.001631259918213, | |
| "learning_rate": 8.051992755655672e-06, | |
| "loss": 0.1635, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.6069012178619756, | |
| "grad_norm": 3.870436191558838, | |
| "learning_rate": 8.0288297254852e-06, | |
| "loss": 0.1659, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.6075778078484438, | |
| "grad_norm": 7.8299479484558105, | |
| "learning_rate": 8.005677693484077e-06, | |
| "loss": 0.2432, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.6082543978349121, | |
| "grad_norm": 2.8993029594421387, | |
| "learning_rate": 7.98253678882937e-06, | |
| "loss": 0.0963, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.6089309878213802, | |
| "grad_norm": 3.3452529907226562, | |
| "learning_rate": 7.959407140636034e-06, | |
| "loss": 0.1336, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6089309878213802, | |
| "eval_accuracy": 0.8, | |
| "eval_f1": 0.47342995169082125, | |
| "eval_loss": 0.42485949397087097, | |
| "eval_precision": 0.8448275862068966, | |
| "eval_recall": 0.3288590604026846, | |
| "eval_runtime": 53.6449, | |
| "eval_samples_per_second": 5.555, | |
| "eval_steps_per_second": 0.186, | |
| "step": 900 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1478, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.732359159822418e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |