| { | |
| "best_metric": 1.2568100690841675, | |
| "best_model_checkpoint": "square_run_32_batch/checkpoint-240", | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 450, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 6.848705291748047, | |
| "learning_rate": 4.444444444444445e-06, | |
| "loss": 2.0193, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 5.4228925704956055, | |
| "learning_rate": 8.88888888888889e-06, | |
| "loss": 2.1013, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 4.7422590255737305, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 1.9569, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 5.0756025314331055, | |
| "learning_rate": 1.777777777777778e-05, | |
| "loss": 1.9677, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 7.1845269203186035, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 1.884, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 4.051375865936279, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 1.9017, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.9333333333333333, | |
| "grad_norm": 5.293440341949463, | |
| "learning_rate": 3.111111111111111e-05, | |
| "loss": 1.9373, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.1893939393939394, | |
| "eval_f1_macro": 0.04638218923933209, | |
| "eval_f1_micro": 0.1893939393939394, | |
| "eval_f1_weighted": 0.06149153876426603, | |
| "eval_loss": 1.8818118572235107, | |
| "eval_precision_macro": 0.027685492801771874, | |
| "eval_precision_micro": 0.1893939393939394, | |
| "eval_precision_weighted": 0.03670425182053089, | |
| "eval_recall_macro": 0.14285714285714285, | |
| "eval_recall_micro": 0.1893939393939394, | |
| "eval_recall_weighted": 0.1893939393939394, | |
| "eval_runtime": 2.2684, | |
| "eval_samples_per_second": 58.192, | |
| "eval_steps_per_second": 2.204, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "grad_norm": 3.4230170249938965, | |
| "learning_rate": 3.555555555555556e-05, | |
| "loss": 1.9179, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 3.4030396938323975, | |
| "learning_rate": 4e-05, | |
| "loss": 1.8139, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 4.195278167724609, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 1.8941, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.4666666666666668, | |
| "grad_norm": 3.2356927394866943, | |
| "learning_rate": 4.888888888888889e-05, | |
| "loss": 1.8915, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 3.322704315185547, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 1.917, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 1.7333333333333334, | |
| "grad_norm": 3.293910026550293, | |
| "learning_rate": 5.7777777777777776e-05, | |
| "loss": 1.8943, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 1.8666666666666667, | |
| "grad_norm": 4.803905487060547, | |
| "learning_rate": 6.222222222222222e-05, | |
| "loss": 1.8841, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 4.006722927093506, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 1.869, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.26515151515151514, | |
| "eval_f1_macro": 0.10998877665544333, | |
| "eval_f1_micro": 0.26515151515151514, | |
| "eval_f1_weighted": 0.14177124783185388, | |
| "eval_loss": 1.864223599433899, | |
| "eval_precision_macro": 0.075, | |
| "eval_precision_micro": 0.26515151515151514, | |
| "eval_precision_weighted": 0.09678030303030305, | |
| "eval_recall_macro": 0.20634920634920634, | |
| "eval_recall_micro": 0.26515151515151514, | |
| "eval_recall_weighted": 0.26515151515151514, | |
| "eval_runtime": 1.8754, | |
| "eval_samples_per_second": 70.384, | |
| "eval_steps_per_second": 2.666, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.1333333333333333, | |
| "grad_norm": 2.15875506401062, | |
| "learning_rate": 7.111111111111112e-05, | |
| "loss": 1.8686, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 2.2666666666666666, | |
| "grad_norm": 2.8864150047302246, | |
| "learning_rate": 7.555555555555556e-05, | |
| "loss": 1.8652, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 3.819974899291992, | |
| "learning_rate": 8e-05, | |
| "loss": 1.818, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 2.533333333333333, | |
| "grad_norm": 7.35491418838501, | |
| "learning_rate": 8.444444444444444e-05, | |
| "loss": 1.9347, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 5.84605598449707, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 1.8508, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 2.4050137996673584, | |
| "learning_rate": 9.333333333333334e-05, | |
| "loss": 1.8884, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 2.9333333333333336, | |
| "grad_norm": 5.182938575744629, | |
| "learning_rate": 9.777777777777778e-05, | |
| "loss": 1.9218, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.25757575757575757, | |
| "eval_f1_macro": 0.11628985865833667, | |
| "eval_f1_micro": 0.25757575757575757, | |
| "eval_f1_weighted": 0.1459780747505363, | |
| "eval_loss": 1.8754385709762573, | |
| "eval_precision_macro": 0.131615925058548, | |
| "eval_precision_micro": 0.25757575757575757, | |
| "eval_precision_weighted": 0.15663189269746647, | |
| "eval_recall_macro": 0.19047619047619047, | |
| "eval_recall_micro": 0.25757575757575757, | |
| "eval_recall_weighted": 0.25757575757575757, | |
| "eval_runtime": 1.9051, | |
| "eval_samples_per_second": 69.286, | |
| "eval_steps_per_second": 2.624, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 3.066666666666667, | |
| "grad_norm": 5.7963128089904785, | |
| "learning_rate": 9.97530864197531e-05, | |
| "loss": 1.9754, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 5.1605048179626465, | |
| "learning_rate": 9.925925925925926e-05, | |
| "loss": 1.8577, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 3.756355047225952, | |
| "learning_rate": 9.876543209876543e-05, | |
| "loss": 1.8707, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.466666666666667, | |
| "grad_norm": 2.5353710651397705, | |
| "learning_rate": 9.827160493827162e-05, | |
| "loss": 1.7918, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 4.181753635406494, | |
| "learning_rate": 9.777777777777778e-05, | |
| "loss": 1.8251, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 3.7333333333333334, | |
| "grad_norm": 2.4634644985198975, | |
| "learning_rate": 9.728395061728396e-05, | |
| "loss": 1.7713, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 3.8666666666666667, | |
| "grad_norm": 8.700553894042969, | |
| "learning_rate": 9.679012345679013e-05, | |
| "loss": 1.8962, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 5.921916484832764, | |
| "learning_rate": 9.62962962962963e-05, | |
| "loss": 1.6733, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.38636363636363635, | |
| "eval_f1_macro": 0.2445293836598184, | |
| "eval_f1_micro": 0.38636363636363635, | |
| "eval_f1_weighted": 0.3052538765582244, | |
| "eval_loss": 1.6881486177444458, | |
| "eval_precision_macro": 0.24274221103966703, | |
| "eval_precision_micro": 0.38636363636363635, | |
| "eval_precision_weighted": 0.2917426054412356, | |
| "eval_recall_macro": 0.2992441421012849, | |
| "eval_recall_micro": 0.38636363636363635, | |
| "eval_recall_weighted": 0.38636363636363635, | |
| "eval_runtime": 1.8719, | |
| "eval_samples_per_second": 70.516, | |
| "eval_steps_per_second": 2.671, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.133333333333334, | |
| "grad_norm": 3.3924758434295654, | |
| "learning_rate": 9.580246913580247e-05, | |
| "loss": 1.5941, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 4.266666666666667, | |
| "grad_norm": 6.785348415374756, | |
| "learning_rate": 9.530864197530865e-05, | |
| "loss": 1.582, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 4.813143730163574, | |
| "learning_rate": 9.481481481481483e-05, | |
| "loss": 1.649, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 4.533333333333333, | |
| "grad_norm": 5.351255893707275, | |
| "learning_rate": 9.432098765432099e-05, | |
| "loss": 1.6271, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 4.666666666666667, | |
| "grad_norm": 11.194862365722656, | |
| "learning_rate": 9.382716049382717e-05, | |
| "loss": 1.7395, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 6.263866424560547, | |
| "learning_rate": 9.333333333333334e-05, | |
| "loss": 1.4422, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 4.933333333333334, | |
| "grad_norm": 8.602386474609375, | |
| "learning_rate": 9.28395061728395e-05, | |
| "loss": 1.54, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.42424242424242425, | |
| "eval_f1_macro": 0.32515713851372885, | |
| "eval_f1_micro": 0.42424242424242425, | |
| "eval_f1_weighted": 0.38558697740383735, | |
| "eval_loss": 1.5528110265731812, | |
| "eval_precision_macro": 0.34291374508765815, | |
| "eval_precision_micro": 0.42424242424242425, | |
| "eval_precision_weighted": 0.4100833883442579, | |
| "eval_recall_macro": 0.35698412698412696, | |
| "eval_recall_micro": 0.42424242424242425, | |
| "eval_recall_weighted": 0.42424242424242425, | |
| "eval_runtime": 1.9443, | |
| "eval_samples_per_second": 67.891, | |
| "eval_steps_per_second": 2.572, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 5.066666666666666, | |
| "grad_norm": 7.738183498382568, | |
| "learning_rate": 9.234567901234568e-05, | |
| "loss": 1.6152, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 7.564102649688721, | |
| "learning_rate": 9.185185185185186e-05, | |
| "loss": 1.4993, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 5.333333333333333, | |
| "grad_norm": 8.335043907165527, | |
| "learning_rate": 9.135802469135802e-05, | |
| "loss": 1.494, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 5.466666666666667, | |
| "grad_norm": 6.382967948913574, | |
| "learning_rate": 9.08641975308642e-05, | |
| "loss": 1.4944, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 7.259094715118408, | |
| "learning_rate": 9.037037037037038e-05, | |
| "loss": 1.3191, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 5.733333333333333, | |
| "grad_norm": 4.972009658813477, | |
| "learning_rate": 8.987654320987655e-05, | |
| "loss": 1.3894, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 5.866666666666667, | |
| "grad_norm": 9.250694274902344, | |
| "learning_rate": 8.938271604938272e-05, | |
| "loss": 1.3979, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 7.207069396972656, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 1.4418, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.38636363636363635, | |
| "eval_f1_macro": 0.285838283865586, | |
| "eval_f1_micro": 0.38636363636363635, | |
| "eval_f1_weighted": 0.3212562379097532, | |
| "eval_loss": 1.5736558437347412, | |
| "eval_precision_macro": 0.284608858206039, | |
| "eval_precision_micro": 0.38636363636363635, | |
| "eval_precision_weighted": 0.3242631096693543, | |
| "eval_recall_macro": 0.3398034769463341, | |
| "eval_recall_micro": 0.38636363636363635, | |
| "eval_recall_weighted": 0.38636363636363635, | |
| "eval_runtime": 1.8593, | |
| "eval_samples_per_second": 70.995, | |
| "eval_steps_per_second": 2.689, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.133333333333334, | |
| "grad_norm": 4.753687381744385, | |
| "learning_rate": 8.839506172839507e-05, | |
| "loss": 1.3218, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 6.266666666666667, | |
| "grad_norm": 5.942229747772217, | |
| "learning_rate": 8.790123456790123e-05, | |
| "loss": 1.3995, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 4.026015281677246, | |
| "learning_rate": 8.740740740740741e-05, | |
| "loss": 1.3155, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 6.533333333333333, | |
| "grad_norm": 4.893887042999268, | |
| "learning_rate": 8.691358024691359e-05, | |
| "loss": 1.2009, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 3.904926061630249, | |
| "learning_rate": 8.641975308641975e-05, | |
| "loss": 0.98, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 4.266864776611328, | |
| "learning_rate": 8.592592592592593e-05, | |
| "loss": 1.0803, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 6.933333333333334, | |
| "grad_norm": 5.24403190612793, | |
| "learning_rate": 8.54320987654321e-05, | |
| "loss": 0.8592, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.4393939393939394, | |
| "eval_f1_macro": 0.3443599467808913, | |
| "eval_f1_micro": 0.4393939393939394, | |
| "eval_f1_weighted": 0.39645908811500513, | |
| "eval_loss": 1.5408130884170532, | |
| "eval_precision_macro": 0.32083233878346656, | |
| "eval_precision_micro": 0.4393939393939394, | |
| "eval_precision_weighted": 0.36735850041771095, | |
| "eval_recall_macro": 0.37913832199546477, | |
| "eval_recall_micro": 0.4393939393939394, | |
| "eval_recall_weighted": 0.4393939393939394, | |
| "eval_runtime": 1.9485, | |
| "eval_samples_per_second": 67.743, | |
| "eval_steps_per_second": 2.566, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 7.066666666666666, | |
| "grad_norm": 5.595825672149658, | |
| "learning_rate": 8.493827160493828e-05, | |
| "loss": 1.0203, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 5.34617805480957, | |
| "learning_rate": 8.444444444444444e-05, | |
| "loss": 1.0819, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 7.333333333333333, | |
| "grad_norm": 6.987905025482178, | |
| "learning_rate": 8.395061728395062e-05, | |
| "loss": 1.1165, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 7.466666666666667, | |
| "grad_norm": 6.039572715759277, | |
| "learning_rate": 8.34567901234568e-05, | |
| "loss": 1.0403, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "grad_norm": 6.031858444213867, | |
| "learning_rate": 8.296296296296296e-05, | |
| "loss": 0.9709, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 7.733333333333333, | |
| "grad_norm": 6.656283855438232, | |
| "learning_rate": 8.246913580246915e-05, | |
| "loss": 0.8358, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 7.866666666666667, | |
| "grad_norm": 6.286685943603516, | |
| "learning_rate": 8.197530864197531e-05, | |
| "loss": 1.146, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 9.892986297607422, | |
| "learning_rate": 8.148148148148148e-05, | |
| "loss": 1.1427, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5606060606060606, | |
| "eval_f1_macro": 0.46377203827822905, | |
| "eval_f1_micro": 0.5606060606060606, | |
| "eval_f1_weighted": 0.5317054176401353, | |
| "eval_loss": 1.2803829908370972, | |
| "eval_precision_macro": 0.469819473380193, | |
| "eval_precision_micro": 0.5606060606060606, | |
| "eval_precision_weighted": 0.5280005916463256, | |
| "eval_recall_macro": 0.4830687830687831, | |
| "eval_recall_micro": 0.5606060606060606, | |
| "eval_recall_weighted": 0.5606060606060606, | |
| "eval_runtime": 1.9474, | |
| "eval_samples_per_second": 67.784, | |
| "eval_steps_per_second": 2.568, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.133333333333333, | |
| "grad_norm": 4.904130458831787, | |
| "learning_rate": 8.098765432098767e-05, | |
| "loss": 0.8933, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 8.266666666666667, | |
| "grad_norm": 4.419686794281006, | |
| "learning_rate": 8.049382716049383e-05, | |
| "loss": 0.9245, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "grad_norm": 8.33668041229248, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8385, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 8.533333333333333, | |
| "grad_norm": 8.35203742980957, | |
| "learning_rate": 7.950617283950618e-05, | |
| "loss": 0.9428, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 8.666666666666666, | |
| "grad_norm": 5.724539279937744, | |
| "learning_rate": 7.901234567901235e-05, | |
| "loss": 0.7591, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 8.662413597106934, | |
| "learning_rate": 7.851851851851852e-05, | |
| "loss": 0.995, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 8.933333333333334, | |
| "grad_norm": 5.197690010070801, | |
| "learning_rate": 7.802469135802469e-05, | |
| "loss": 0.7849, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.553030303030303, | |
| "eval_f1_macro": 0.46486536691732006, | |
| "eval_f1_micro": 0.553030303030303, | |
| "eval_f1_weighted": 0.529141811901771, | |
| "eval_loss": 1.2879999876022339, | |
| "eval_precision_macro": 0.48036078903674717, | |
| "eval_precision_micro": 0.553030303030303, | |
| "eval_precision_weighted": 0.540143107077697, | |
| "eval_recall_macro": 0.4822675736961451, | |
| "eval_recall_micro": 0.553030303030303, | |
| "eval_recall_weighted": 0.553030303030303, | |
| "eval_runtime": 1.9912, | |
| "eval_samples_per_second": 66.292, | |
| "eval_steps_per_second": 2.511, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 9.066666666666666, | |
| "grad_norm": 7.03670597076416, | |
| "learning_rate": 7.753086419753088e-05, | |
| "loss": 0.8049, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "grad_norm": 5.591729640960693, | |
| "learning_rate": 7.703703703703704e-05, | |
| "loss": 0.9341, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 9.333333333333334, | |
| "grad_norm": 6.677962303161621, | |
| "learning_rate": 7.65432098765432e-05, | |
| "loss": 0.7679, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 9.466666666666667, | |
| "grad_norm": 5.4789934158325195, | |
| "learning_rate": 7.60493827160494e-05, | |
| "loss": 0.7773, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 5.957266330718994, | |
| "learning_rate": 7.555555555555556e-05, | |
| "loss": 0.638, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 9.733333333333333, | |
| "grad_norm": 5.691118240356445, | |
| "learning_rate": 7.506172839506173e-05, | |
| "loss": 0.7762, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 9.866666666666667, | |
| "grad_norm": 6.8899827003479, | |
| "learning_rate": 7.456790123456791e-05, | |
| "loss": 0.9012, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 7.408969402313232, | |
| "learning_rate": 7.407407407407407e-05, | |
| "loss": 0.6846, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.5151515151515151, | |
| "eval_f1_macro": 0.42983280392444223, | |
| "eval_f1_micro": 0.5151515151515151, | |
| "eval_f1_weighted": 0.48105498068393227, | |
| "eval_loss": 1.3130199909210205, | |
| "eval_precision_macro": 0.4404005812415951, | |
| "eval_precision_micro": 0.5151515151515151, | |
| "eval_precision_weighted": 0.5005354338015628, | |
| "eval_recall_macro": 0.4670748299319728, | |
| "eval_recall_micro": 0.5151515151515151, | |
| "eval_recall_weighted": 0.5151515151515151, | |
| "eval_runtime": 1.9855, | |
| "eval_samples_per_second": 66.482, | |
| "eval_steps_per_second": 2.518, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 10.133333333333333, | |
| "grad_norm": 5.070552825927734, | |
| "learning_rate": 7.358024691358025e-05, | |
| "loss": 0.6116, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 10.266666666666667, | |
| "grad_norm": 4.844223499298096, | |
| "learning_rate": 7.308641975308643e-05, | |
| "loss": 0.6517, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "grad_norm": 3.965522289276123, | |
| "learning_rate": 7.25925925925926e-05, | |
| "loss": 0.5573, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 10.533333333333333, | |
| "grad_norm": 7.53262996673584, | |
| "learning_rate": 7.209876543209877e-05, | |
| "loss": 0.7258, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 10.666666666666666, | |
| "grad_norm": 6.725161552429199, | |
| "learning_rate": 7.160493827160494e-05, | |
| "loss": 0.8109, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "grad_norm": 8.250865936279297, | |
| "learning_rate": 7.111111111111112e-05, | |
| "loss": 0.8596, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 10.933333333333334, | |
| "grad_norm": 4.163515567779541, | |
| "learning_rate": 7.061728395061728e-05, | |
| "loss": 0.4006, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.5833333333333334, | |
| "eval_f1_macro": 0.49308835780529725, | |
| "eval_f1_micro": 0.5833333333333334, | |
| "eval_f1_weighted": 0.5597960736751899, | |
| "eval_loss": 1.295769214630127, | |
| "eval_precision_macro": 0.498317425896604, | |
| "eval_precision_micro": 0.5833333333333334, | |
| "eval_precision_weighted": 0.5756076561299337, | |
| "eval_recall_macro": 0.5229024943310657, | |
| "eval_recall_micro": 0.5833333333333334, | |
| "eval_recall_weighted": 0.5833333333333334, | |
| "eval_runtime": 1.9133, | |
| "eval_samples_per_second": 68.991, | |
| "eval_steps_per_second": 2.613, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 11.066666666666666, | |
| "grad_norm": 4.829576015472412, | |
| "learning_rate": 7.012345679012346e-05, | |
| "loss": 0.6355, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "grad_norm": 5.353898525238037, | |
| "learning_rate": 6.962962962962964e-05, | |
| "loss": 0.4955, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 11.333333333333334, | |
| "grad_norm": 5.44912052154541, | |
| "learning_rate": 6.91358024691358e-05, | |
| "loss": 0.4833, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 11.466666666666667, | |
| "grad_norm": 5.900742530822754, | |
| "learning_rate": 6.864197530864198e-05, | |
| "loss": 0.5752, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 11.6, | |
| "grad_norm": 6.004303455352783, | |
| "learning_rate": 6.814814814814815e-05, | |
| "loss": 0.5738, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 11.733333333333333, | |
| "grad_norm": 3.937319040298462, | |
| "learning_rate": 6.765432098765433e-05, | |
| "loss": 0.4661, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 11.866666666666667, | |
| "grad_norm": 4.814683437347412, | |
| "learning_rate": 6.716049382716049e-05, | |
| "loss": 0.5694, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 6.7769880294799805, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.4329, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.553030303030303, | |
| "eval_f1_macro": 0.506246746427407, | |
| "eval_f1_micro": 0.553030303030303, | |
| "eval_f1_weighted": 0.5561970515744254, | |
| "eval_loss": 1.299007773399353, | |
| "eval_precision_macro": 0.5314684490530354, | |
| "eval_precision_micro": 0.553030303030303, | |
| "eval_precision_weighted": 0.5874290165244113, | |
| "eval_recall_macro": 0.5133106575963718, | |
| "eval_recall_micro": 0.553030303030303, | |
| "eval_recall_weighted": 0.553030303030303, | |
| "eval_runtime": 2.0372, | |
| "eval_samples_per_second": 64.793, | |
| "eval_steps_per_second": 2.454, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 12.133333333333333, | |
| "grad_norm": 5.787886619567871, | |
| "learning_rate": 6.617283950617285e-05, | |
| "loss": 0.5719, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 12.266666666666667, | |
| "grad_norm": 2.843268632888794, | |
| "learning_rate": 6.567901234567901e-05, | |
| "loss": 0.4646, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 12.4, | |
| "grad_norm": 4.530274391174316, | |
| "learning_rate": 6.51851851851852e-05, | |
| "loss": 0.3544, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 12.533333333333333, | |
| "grad_norm": 5.348933696746826, | |
| "learning_rate": 6.469135802469136e-05, | |
| "loss": 0.3957, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 12.666666666666666, | |
| "grad_norm": 7.746328830718994, | |
| "learning_rate": 6.419753086419753e-05, | |
| "loss": 0.4989, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "grad_norm": 6.134746074676514, | |
| "learning_rate": 6.37037037037037e-05, | |
| "loss": 0.7035, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 12.933333333333334, | |
| "grad_norm": 5.567310810089111, | |
| "learning_rate": 6.320987654320988e-05, | |
| "loss": 0.482, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.5151515151515151, | |
| "eval_f1_macro": 0.4842067834885892, | |
| "eval_f1_micro": 0.5151515151515151, | |
| "eval_f1_weighted": 0.5233183119383529, | |
| "eval_loss": 1.3830989599227905, | |
| "eval_precision_macro": 0.5517290249433106, | |
| "eval_precision_micro": 0.5151515151515151, | |
| "eval_precision_weighted": 0.5803270803270804, | |
| "eval_recall_macro": 0.48390778533635675, | |
| "eval_recall_micro": 0.5151515151515151, | |
| "eval_recall_weighted": 0.5151515151515151, | |
| "eval_runtime": 2.882, | |
| "eval_samples_per_second": 45.801, | |
| "eval_steps_per_second": 1.735, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 13.066666666666666, | |
| "grad_norm": 6.7704386711120605, | |
| "learning_rate": 6.271604938271606e-05, | |
| "loss": 0.5136, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "grad_norm": 5.41668701171875, | |
| "learning_rate": 6.222222222222222e-05, | |
| "loss": 0.4843, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 13.333333333333334, | |
| "grad_norm": 4.7562150955200195, | |
| "learning_rate": 6.17283950617284e-05, | |
| "loss": 0.3338, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 13.466666666666667, | |
| "grad_norm": 4.077147960662842, | |
| "learning_rate": 6.123456790123457e-05, | |
| "loss": 0.2694, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "grad_norm": 4.678223609924316, | |
| "learning_rate": 6.074074074074074e-05, | |
| "loss": 0.2965, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 13.733333333333333, | |
| "grad_norm": 6.246657371520996, | |
| "learning_rate": 6.024691358024692e-05, | |
| "loss": 0.489, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 13.866666666666667, | |
| "grad_norm": 4.0403971672058105, | |
| "learning_rate": 5.975308641975309e-05, | |
| "loss": 0.3524, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 11.723469734191895, | |
| "learning_rate": 5.925925925925926e-05, | |
| "loss": 0.6409, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.5984848484848485, | |
| "eval_f1_macro": 0.5080833548412379, | |
| "eval_f1_micro": 0.5984848484848485, | |
| "eval_f1_weighted": 0.576454835403795, | |
| "eval_loss": 1.4066194295883179, | |
| "eval_precision_macro": 0.5193577256077255, | |
| "eval_precision_micro": 0.5984848484848485, | |
| "eval_precision_weighted": 0.5819911307127215, | |
| "eval_recall_macro": 0.5231594860166289, | |
| "eval_recall_micro": 0.5984848484848485, | |
| "eval_recall_weighted": 0.5984848484848485, | |
| "eval_runtime": 4.8101, | |
| "eval_samples_per_second": 27.442, | |
| "eval_steps_per_second": 1.039, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 14.133333333333333, | |
| "grad_norm": 4.278630256652832, | |
| "learning_rate": 5.8765432098765437e-05, | |
| "loss": 0.1963, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 14.266666666666667, | |
| "grad_norm": 5.803009510040283, | |
| "learning_rate": 5.8271604938271607e-05, | |
| "loss": 0.4284, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "grad_norm": 4.886916160583496, | |
| "learning_rate": 5.7777777777777776e-05, | |
| "loss": 0.3091, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 14.533333333333333, | |
| "grad_norm": 6.119672775268555, | |
| "learning_rate": 5.728395061728395e-05, | |
| "loss": 0.3287, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 14.666666666666666, | |
| "grad_norm": 7.14682149887085, | |
| "learning_rate": 5.679012345679012e-05, | |
| "loss": 0.2819, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 14.8, | |
| "grad_norm": 5.075103282928467, | |
| "learning_rate": 5.62962962962963e-05, | |
| "loss": 0.2101, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 14.933333333333334, | |
| "grad_norm": 4.5539045333862305, | |
| "learning_rate": 5.580246913580247e-05, | |
| "loss": 0.3206, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.5606060606060606, | |
| "eval_f1_macro": 0.5154896879386676, | |
| "eval_f1_micro": 0.5606060606060606, | |
| "eval_f1_weighted": 0.5520090359376074, | |
| "eval_loss": 1.3689966201782227, | |
| "eval_precision_macro": 0.6158199643493761, | |
| "eval_precision_micro": 0.5606060606060606, | |
| "eval_precision_weighted": 0.5889932074758278, | |
| "eval_recall_macro": 0.5170219198790628, | |
| "eval_recall_micro": 0.5606060606060606, | |
| "eval_recall_weighted": 0.5606060606060606, | |
| "eval_runtime": 2.0949, | |
| "eval_samples_per_second": 63.009, | |
| "eval_steps_per_second": 2.387, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 15.066666666666666, | |
| "grad_norm": 4.093947887420654, | |
| "learning_rate": 5.530864197530864e-05, | |
| "loss": 0.3352, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "grad_norm": 5.242745876312256, | |
| "learning_rate": 5.4814814814814817e-05, | |
| "loss": 0.2066, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 15.333333333333334, | |
| "grad_norm": 5.613947868347168, | |
| "learning_rate": 5.4320987654320986e-05, | |
| "loss": 0.3504, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 15.466666666666667, | |
| "grad_norm": 3.4319839477539062, | |
| "learning_rate": 5.382716049382717e-05, | |
| "loss": 0.2294, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "grad_norm": 6.01231575012207, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 0.2498, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 15.733333333333333, | |
| "grad_norm": 3.9071357250213623, | |
| "learning_rate": 5.28395061728395e-05, | |
| "loss": 0.2092, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 15.866666666666667, | |
| "grad_norm": 5.718769550323486, | |
| "learning_rate": 5.234567901234568e-05, | |
| "loss": 0.2223, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 4.071746349334717, | |
| "learning_rate": 5.185185185185185e-05, | |
| "loss": 0.1773, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6515151515151515, | |
| "eval_f1_macro": 0.592019301793738, | |
| "eval_f1_micro": 0.6515151515151515, | |
| "eval_f1_weighted": 0.6407837434153223, | |
| "eval_loss": 1.2568100690841675, | |
| "eval_precision_macro": 0.6893528941196284, | |
| "eval_precision_micro": 0.6515151515151515, | |
| "eval_precision_weighted": 0.6623135907365115, | |
| "eval_recall_macro": 0.5842857142857143, | |
| "eval_recall_micro": 0.6515151515151515, | |
| "eval_recall_weighted": 0.6515151515151515, | |
| "eval_runtime": 1.9927, | |
| "eval_samples_per_second": 66.24, | |
| "eval_steps_per_second": 2.509, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 16.133333333333333, | |
| "grad_norm": 3.561516761779785, | |
| "learning_rate": 5.135802469135803e-05, | |
| "loss": 0.1696, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 16.266666666666666, | |
| "grad_norm": 1.3526779413223267, | |
| "learning_rate": 5.0864197530864197e-05, | |
| "loss": 0.0665, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 16.4, | |
| "grad_norm": 4.29080057144165, | |
| "learning_rate": 5.0370370370370366e-05, | |
| "loss": 0.195, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 16.533333333333335, | |
| "grad_norm": 6.229769706726074, | |
| "learning_rate": 4.987654320987655e-05, | |
| "loss": 0.2993, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 16.666666666666668, | |
| "grad_norm": 4.949665546417236, | |
| "learning_rate": 4.938271604938271e-05, | |
| "loss": 0.2081, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 16.8, | |
| "grad_norm": 6.123852252960205, | |
| "learning_rate": 4.888888888888889e-05, | |
| "loss": 0.212, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 16.933333333333334, | |
| "grad_norm": 4.0239105224609375, | |
| "learning_rate": 4.8395061728395067e-05, | |
| "loss": 0.3259, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.6060606060606061, | |
| "eval_f1_macro": 0.5467242234296787, | |
| "eval_f1_micro": 0.6060606060606061, | |
| "eval_f1_weighted": 0.5961390083174005, | |
| "eval_loss": 1.3405537605285645, | |
| "eval_precision_macro": 0.5614736217067472, | |
| "eval_precision_micro": 0.6060606060606061, | |
| "eval_precision_weighted": 0.6033042542530208, | |
| "eval_recall_macro": 0.5466817838246409, | |
| "eval_recall_micro": 0.6060606060606061, | |
| "eval_recall_weighted": 0.6060606060606061, | |
| "eval_runtime": 2.0502, | |
| "eval_samples_per_second": 64.382, | |
| "eval_steps_per_second": 2.439, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 17.066666666666666, | |
| "grad_norm": 1.4321271181106567, | |
| "learning_rate": 4.7901234567901237e-05, | |
| "loss": 0.055, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 17.2, | |
| "grad_norm": 2.13454008102417, | |
| "learning_rate": 4.740740740740741e-05, | |
| "loss": 0.1221, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 17.333333333333332, | |
| "grad_norm": 5.276524066925049, | |
| "learning_rate": 4.691358024691358e-05, | |
| "loss": 0.1417, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 17.466666666666665, | |
| "grad_norm": 3.8555052280426025, | |
| "learning_rate": 4.641975308641975e-05, | |
| "loss": 0.2943, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 17.6, | |
| "grad_norm": 4.094534873962402, | |
| "learning_rate": 4.592592592592593e-05, | |
| "loss": 0.2206, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 17.733333333333334, | |
| "grad_norm": 4.184159278869629, | |
| "learning_rate": 4.54320987654321e-05, | |
| "loss": 0.1565, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 17.866666666666667, | |
| "grad_norm": 5.283144474029541, | |
| "learning_rate": 4.493827160493828e-05, | |
| "loss": 0.1427, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 3.6470813751220703, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.1123, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.6363636363636364, | |
| "eval_f1_macro": 0.5867719657675725, | |
| "eval_f1_micro": 0.6363636363636364, | |
| "eval_f1_weighted": 0.6305501232595613, | |
| "eval_loss": 1.376707911491394, | |
| "eval_precision_macro": 0.6257631257631259, | |
| "eval_precision_micro": 0.6363636363636364, | |
| "eval_precision_weighted": 0.6413447663447664, | |
| "eval_recall_macro": 0.5785109599395314, | |
| "eval_recall_micro": 0.6363636363636364, | |
| "eval_recall_weighted": 0.6363636363636364, | |
| "eval_runtime": 1.992, | |
| "eval_samples_per_second": 66.266, | |
| "eval_steps_per_second": 2.51, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 18.133333333333333, | |
| "grad_norm": 3.1710643768310547, | |
| "learning_rate": 4.3950617283950617e-05, | |
| "loss": 0.1219, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 18.266666666666666, | |
| "grad_norm": 7.098196506500244, | |
| "learning_rate": 4.345679012345679e-05, | |
| "loss": 0.1588, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "grad_norm": 1.8567241430282593, | |
| "learning_rate": 4.296296296296296e-05, | |
| "loss": 0.1043, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 18.533333333333335, | |
| "grad_norm": 2.1221156120300293, | |
| "learning_rate": 4.246913580246914e-05, | |
| "loss": 0.0748, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 18.666666666666668, | |
| "grad_norm": 3.03196120262146, | |
| "learning_rate": 4.197530864197531e-05, | |
| "loss": 0.1148, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 18.8, | |
| "grad_norm": 1.7942876815795898, | |
| "learning_rate": 4.148148148148148e-05, | |
| "loss": 0.0679, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 18.933333333333334, | |
| "grad_norm": 4.499013900756836, | |
| "learning_rate": 4.0987654320987657e-05, | |
| "loss": 0.1129, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.6439393939393939, | |
| "eval_f1_macro": 0.587916778045086, | |
| "eval_f1_micro": 0.6439393939393939, | |
| "eval_f1_weighted": 0.6305576751206262, | |
| "eval_loss": 1.4679865837097168, | |
| "eval_precision_macro": 0.6809288563910413, | |
| "eval_precision_micro": 0.6439393939393939, | |
| "eval_precision_weighted": 0.6932697872537444, | |
| "eval_recall_macro": 0.5806046863189721, | |
| "eval_recall_micro": 0.6439393939393939, | |
| "eval_recall_weighted": 0.6439393939393939, | |
| "eval_runtime": 1.9847, | |
| "eval_samples_per_second": 66.508, | |
| "eval_steps_per_second": 2.519, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 19.066666666666666, | |
| "grad_norm": 2.631176233291626, | |
| "learning_rate": 4.049382716049383e-05, | |
| "loss": 0.1028, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "grad_norm": 4.930914402008057, | |
| "learning_rate": 4e-05, | |
| "loss": 0.2555, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 19.333333333333332, | |
| "grad_norm": 3.355149745941162, | |
| "learning_rate": 3.950617283950617e-05, | |
| "loss": 0.0792, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 19.466666666666665, | |
| "grad_norm": 2.2780933380126953, | |
| "learning_rate": 3.901234567901234e-05, | |
| "loss": 0.0595, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 19.6, | |
| "grad_norm": 4.880768299102783, | |
| "learning_rate": 3.851851851851852e-05, | |
| "loss": 0.0756, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 19.733333333333334, | |
| "grad_norm": 2.175165891647339, | |
| "learning_rate": 3.80246913580247e-05, | |
| "loss": 0.1077, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 19.866666666666667, | |
| "grad_norm": 2.6557981967926025, | |
| "learning_rate": 3.7530864197530867e-05, | |
| "loss": 0.1094, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 1.2508912086486816, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.0651, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.6893939393939394, | |
| "eval_f1_macro": 0.6655257312106627, | |
| "eval_f1_micro": 0.6893939393939394, | |
| "eval_f1_weighted": 0.687595503348928, | |
| "eval_loss": 1.4981398582458496, | |
| "eval_precision_macro": 0.7114991648833447, | |
| "eval_precision_micro": 0.6893939393939394, | |
| "eval_precision_weighted": 0.7224498247915767, | |
| "eval_recall_macro": 0.6510808767951625, | |
| "eval_recall_micro": 0.6893939393939394, | |
| "eval_recall_weighted": 0.6893939393939394, | |
| "eval_runtime": 1.9861, | |
| "eval_samples_per_second": 66.462, | |
| "eval_steps_per_second": 2.517, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 20.133333333333333, | |
| "grad_norm": 5.263727188110352, | |
| "learning_rate": 3.654320987654321e-05, | |
| "loss": 0.075, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 20.266666666666666, | |
| "grad_norm": 4.619281768798828, | |
| "learning_rate": 3.604938271604938e-05, | |
| "loss": 0.1319, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 20.4, | |
| "grad_norm": 1.0995675325393677, | |
| "learning_rate": 3.555555555555556e-05, | |
| "loss": 0.0366, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 20.533333333333335, | |
| "grad_norm": 4.2385663986206055, | |
| "learning_rate": 3.506172839506173e-05, | |
| "loss": 0.1331, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 20.666666666666668, | |
| "grad_norm": 2.6913745403289795, | |
| "learning_rate": 3.45679012345679e-05, | |
| "loss": 0.0894, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "grad_norm": 4.785970687866211, | |
| "learning_rate": 3.4074074074074077e-05, | |
| "loss": 0.0756, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 20.933333333333334, | |
| "grad_norm": 1.5702877044677734, | |
| "learning_rate": 3.3580246913580247e-05, | |
| "loss": 0.0685, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.6515151515151515, | |
| "eval_f1_macro": 0.6091138915880551, | |
| "eval_f1_micro": 0.6515151515151515, | |
| "eval_f1_weighted": 0.6494256262321655, | |
| "eval_loss": 1.4620611667633057, | |
| "eval_precision_macro": 0.630280884283538, | |
| "eval_precision_micro": 0.6515151515151515, | |
| "eval_precision_weighted": 0.664075183502428, | |
| "eval_recall_macro": 0.6039682539682539, | |
| "eval_recall_micro": 0.6515151515151515, | |
| "eval_recall_weighted": 0.6515151515151515, | |
| "eval_runtime": 2.0276, | |
| "eval_samples_per_second": 65.103, | |
| "eval_steps_per_second": 2.466, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 21.066666666666666, | |
| "grad_norm": 0.6037698984146118, | |
| "learning_rate": 3.308641975308642e-05, | |
| "loss": 0.0537, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 21.2, | |
| "grad_norm": 0.877955436706543, | |
| "learning_rate": 3.25925925925926e-05, | |
| "loss": 0.0283, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 21.333333333333332, | |
| "grad_norm": 4.185865879058838, | |
| "learning_rate": 3.209876543209876e-05, | |
| "loss": 0.1153, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 21.466666666666665, | |
| "grad_norm": 0.7465834021568298, | |
| "learning_rate": 3.160493827160494e-05, | |
| "loss": 0.0311, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 21.6, | |
| "grad_norm": 1.4049850702285767, | |
| "learning_rate": 3.111111111111111e-05, | |
| "loss": 0.0641, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 21.733333333333334, | |
| "grad_norm": 1.6191234588623047, | |
| "learning_rate": 3.061728395061729e-05, | |
| "loss": 0.0417, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 21.866666666666667, | |
| "grad_norm": 1.2088876962661743, | |
| "learning_rate": 3.012345679012346e-05, | |
| "loss": 0.0314, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 0.7652052640914917, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 0.1469, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.6212121212121212, | |
| "eval_f1_macro": 0.5330299221627766, | |
| "eval_f1_micro": 0.6212121212121212, | |
| "eval_f1_weighted": 0.604041002442862, | |
| "eval_loss": 1.534732699394226, | |
| "eval_precision_macro": 0.5476940619507992, | |
| "eval_precision_micro": 0.6212121212121212, | |
| "eval_precision_weighted": 0.6148931558944467, | |
| "eval_recall_macro": 0.5439984882842026, | |
| "eval_recall_micro": 0.6212121212121212, | |
| "eval_recall_weighted": 0.6212121212121212, | |
| "eval_runtime": 1.9747, | |
| "eval_samples_per_second": 66.844, | |
| "eval_steps_per_second": 2.532, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 22.133333333333333, | |
| "grad_norm": 3.304185152053833, | |
| "learning_rate": 2.9135802469135803e-05, | |
| "loss": 0.0456, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 22.266666666666666, | |
| "grad_norm": 2.3118255138397217, | |
| "learning_rate": 2.8641975308641977e-05, | |
| "loss": 0.0377, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 22.4, | |
| "grad_norm": 2.3639698028564453, | |
| "learning_rate": 2.814814814814815e-05, | |
| "loss": 0.0708, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 22.533333333333335, | |
| "grad_norm": 1.741746187210083, | |
| "learning_rate": 2.765432098765432e-05, | |
| "loss": 0.0353, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 22.666666666666668, | |
| "grad_norm": 0.6108101010322571, | |
| "learning_rate": 2.7160493827160493e-05, | |
| "loss": 0.0531, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 22.8, | |
| "grad_norm": 2.961045503616333, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.0394, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 22.933333333333334, | |
| "grad_norm": 1.7298003435134888, | |
| "learning_rate": 2.617283950617284e-05, | |
| "loss": 0.0289, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.6287878787878788, | |
| "eval_f1_macro": 0.5465784005632545, | |
| "eval_f1_micro": 0.6287878787878788, | |
| "eval_f1_weighted": 0.6179920372130975, | |
| "eval_loss": 1.5417176485061646, | |
| "eval_precision_macro": 0.5409361471861472, | |
| "eval_precision_micro": 0.6287878787878788, | |
| "eval_precision_weighted": 0.610816976584022, | |
| "eval_recall_macro": 0.5549206349206349, | |
| "eval_recall_micro": 0.6287878787878788, | |
| "eval_recall_weighted": 0.6287878787878788, | |
| "eval_runtime": 1.9902, | |
| "eval_samples_per_second": 66.326, | |
| "eval_steps_per_second": 2.512, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 23.066666666666666, | |
| "grad_norm": 0.7690654397010803, | |
| "learning_rate": 2.5679012345679017e-05, | |
| "loss": 0.0458, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "grad_norm": 3.320651054382324, | |
| "learning_rate": 2.5185185185185183e-05, | |
| "loss": 0.0804, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 23.333333333333332, | |
| "grad_norm": 2.0301012992858887, | |
| "learning_rate": 2.4691358024691357e-05, | |
| "loss": 0.0279, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 23.466666666666665, | |
| "grad_norm": 0.4531901478767395, | |
| "learning_rate": 2.4197530864197533e-05, | |
| "loss": 0.0139, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 23.6, | |
| "grad_norm": 2.56703519821167, | |
| "learning_rate": 2.3703703703703707e-05, | |
| "loss": 0.0783, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 23.733333333333334, | |
| "grad_norm": 0.20635652542114258, | |
| "learning_rate": 2.3209876543209877e-05, | |
| "loss": 0.012, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 23.866666666666667, | |
| "grad_norm": 0.5930025577545166, | |
| "learning_rate": 2.271604938271605e-05, | |
| "loss": 0.0145, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.24041523039340973, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.01, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.6363636363636364, | |
| "eval_f1_macro": 0.5474889044983636, | |
| "eval_f1_micro": 0.6363636363636364, | |
| "eval_f1_weighted": 0.6187343775995573, | |
| "eval_loss": 1.5670151710510254, | |
| "eval_precision_macro": 0.5434552419168567, | |
| "eval_precision_micro": 0.6363636363636364, | |
| "eval_precision_weighted": 0.6103857259761386, | |
| "eval_recall_macro": 0.5594179894179894, | |
| "eval_recall_micro": 0.6363636363636364, | |
| "eval_recall_weighted": 0.6363636363636364, | |
| "eval_runtime": 1.9405, | |
| "eval_samples_per_second": 68.023, | |
| "eval_steps_per_second": 2.577, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 24.133333333333333, | |
| "grad_norm": 0.21558411419391632, | |
| "learning_rate": 2.1728395061728397e-05, | |
| "loss": 0.0102, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 24.266666666666666, | |
| "grad_norm": 3.2394814491271973, | |
| "learning_rate": 2.123456790123457e-05, | |
| "loss": 0.0218, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 24.4, | |
| "grad_norm": 3.6115405559539795, | |
| "learning_rate": 2.074074074074074e-05, | |
| "loss": 0.1149, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 24.533333333333335, | |
| "grad_norm": 0.1589735597372055, | |
| "learning_rate": 2.0246913580246917e-05, | |
| "loss": 0.0082, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 24.666666666666668, | |
| "grad_norm": 1.3840848207473755, | |
| "learning_rate": 1.9753086419753087e-05, | |
| "loss": 0.0174, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 24.8, | |
| "grad_norm": 3.772754192352295, | |
| "learning_rate": 1.925925925925926e-05, | |
| "loss": 0.043, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 24.933333333333334, | |
| "grad_norm": 0.41601723432540894, | |
| "learning_rate": 1.8765432098765433e-05, | |
| "loss": 0.035, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.6363636363636364, | |
| "eval_f1_macro": 0.5529395694676043, | |
| "eval_f1_micro": 0.6363636363636364, | |
| "eval_f1_weighted": 0.6209326623035122, | |
| "eval_loss": 1.6037245988845825, | |
| "eval_precision_macro": 0.5470247238680418, | |
| "eval_precision_micro": 0.6363636363636364, | |
| "eval_precision_weighted": 0.6156263091746962, | |
| "eval_recall_macro": 0.5679213907785335, | |
| "eval_recall_micro": 0.6363636363636364, | |
| "eval_recall_weighted": 0.6363636363636364, | |
| "eval_runtime": 1.9551, | |
| "eval_samples_per_second": 67.515, | |
| "eval_steps_per_second": 2.557, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 25.066666666666666, | |
| "grad_norm": 0.4554837942123413, | |
| "learning_rate": 1.8271604938271607e-05, | |
| "loss": 0.0636, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 25.2, | |
| "grad_norm": 4.667645454406738, | |
| "learning_rate": 1.777777777777778e-05, | |
| "loss": 0.0685, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 25.333333333333332, | |
| "grad_norm": 6.68064022064209, | |
| "learning_rate": 1.728395061728395e-05, | |
| "loss": 0.0919, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 25.466666666666665, | |
| "grad_norm": 0.2510056793689728, | |
| "learning_rate": 1.6790123456790123e-05, | |
| "loss": 0.0111, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 25.6, | |
| "grad_norm": 0.6245520114898682, | |
| "learning_rate": 1.62962962962963e-05, | |
| "loss": 0.0134, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 25.733333333333334, | |
| "grad_norm": 2.165201187133789, | |
| "learning_rate": 1.580246913580247e-05, | |
| "loss": 0.0271, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 25.866666666666667, | |
| "grad_norm": 0.24112091958522797, | |
| "learning_rate": 1.5308641975308643e-05, | |
| "loss": 0.0105, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.377363383769989, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 0.0109, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.6212121212121212, | |
| "eval_f1_macro": 0.5896814040471776, | |
| "eval_f1_micro": 0.6212121212121212, | |
| "eval_f1_weighted": 0.6203213160225189, | |
| "eval_loss": 1.6751586198806763, | |
| "eval_precision_macro": 0.6144605795534588, | |
| "eval_precision_micro": 0.6212121212121212, | |
| "eval_precision_weighted": 0.6527441598649029, | |
| "eval_recall_macro": 0.5999697656840514, | |
| "eval_recall_micro": 0.6212121212121212, | |
| "eval_recall_weighted": 0.6212121212121212, | |
| "eval_runtime": 1.9656, | |
| "eval_samples_per_second": 67.154, | |
| "eval_steps_per_second": 2.544, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 26.133333333333333, | |
| "grad_norm": 0.3774866461753845, | |
| "learning_rate": 1.4320987654320988e-05, | |
| "loss": 0.0097, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 26.266666666666666, | |
| "grad_norm": 3.956695079803467, | |
| "learning_rate": 1.382716049382716e-05, | |
| "loss": 0.0233, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "grad_norm": 0.5877533555030823, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.0157, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 26.533333333333335, | |
| "grad_norm": 1.2962318658828735, | |
| "learning_rate": 1.2839506172839508e-05, | |
| "loss": 0.0249, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 26.666666666666668, | |
| "grad_norm": 2.2431485652923584, | |
| "learning_rate": 1.2345679012345678e-05, | |
| "loss": 0.0224, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 26.8, | |
| "grad_norm": 0.21492817997932434, | |
| "learning_rate": 1.1851851851851853e-05, | |
| "loss": 0.0117, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 26.933333333333334, | |
| "grad_norm": 0.4237399697303772, | |
| "learning_rate": 1.1358024691358025e-05, | |
| "loss": 0.038, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.6136363636363636, | |
| "eval_f1_macro": 0.5343822919199936, | |
| "eval_f1_micro": 0.6136363636363636, | |
| "eval_f1_weighted": 0.6008425380028616, | |
| "eval_loss": 1.672375202178955, | |
| "eval_precision_macro": 0.5331553830282576, | |
| "eval_precision_micro": 0.6136363636363636, | |
| "eval_precision_weighted": 0.6004965634415023, | |
| "eval_recall_macro": 0.546832955404384, | |
| "eval_recall_micro": 0.6136363636363636, | |
| "eval_recall_weighted": 0.6136363636363636, | |
| "eval_runtime": 1.9801, | |
| "eval_samples_per_second": 66.662, | |
| "eval_steps_per_second": 2.525, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 27.066666666666666, | |
| "grad_norm": 1.5725435018539429, | |
| "learning_rate": 1.0864197530864198e-05, | |
| "loss": 0.0149, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "grad_norm": 0.13784648478031158, | |
| "learning_rate": 1.037037037037037e-05, | |
| "loss": 0.0092, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 27.333333333333332, | |
| "grad_norm": 0.09840863198041916, | |
| "learning_rate": 9.876543209876543e-06, | |
| "loss": 0.008, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 27.466666666666665, | |
| "grad_norm": 0.8349915146827698, | |
| "learning_rate": 9.382716049382717e-06, | |
| "loss": 0.0206, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 27.6, | |
| "grad_norm": 0.33149102330207825, | |
| "learning_rate": 8.88888888888889e-06, | |
| "loss": 0.0173, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 27.733333333333334, | |
| "grad_norm": 0.3867279589176178, | |
| "learning_rate": 8.395061728395062e-06, | |
| "loss": 0.0093, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 27.866666666666667, | |
| "grad_norm": 1.726897120475769, | |
| "learning_rate": 7.901234567901235e-06, | |
| "loss": 0.0214, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.19306233525276184, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.0116, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.6212121212121212, | |
| "eval_f1_macro": 0.5383730158730159, | |
| "eval_f1_micro": 0.6212121212121212, | |
| "eval_f1_weighted": 0.609029280904281, | |
| "eval_loss": 1.6251877546310425, | |
| "eval_precision_macro": 0.533696432596027, | |
| "eval_precision_micro": 0.6212121212121212, | |
| "eval_precision_weighted": 0.6033010121498966, | |
| "eval_recall_macro": 0.5490778533635676, | |
| "eval_recall_micro": 0.6212121212121212, | |
| "eval_recall_weighted": 0.6212121212121212, | |
| "eval_runtime": 1.9285, | |
| "eval_samples_per_second": 68.448, | |
| "eval_steps_per_second": 2.593, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 28.133333333333333, | |
| "grad_norm": 1.669783115386963, | |
| "learning_rate": 6.91358024691358e-06, | |
| "loss": 0.0318, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 28.266666666666666, | |
| "grad_norm": 0.6250646114349365, | |
| "learning_rate": 6.419753086419754e-06, | |
| "loss": 0.0195, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 28.4, | |
| "grad_norm": 0.4752732813358307, | |
| "learning_rate": 5.925925925925927e-06, | |
| "loss": 0.0124, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 28.533333333333335, | |
| "grad_norm": 0.16341274976730347, | |
| "learning_rate": 5.432098765432099e-06, | |
| "loss": 0.0065, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 28.666666666666668, | |
| "grad_norm": 0.08904340863227844, | |
| "learning_rate": 4.938271604938272e-06, | |
| "loss": 0.0062, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 28.8, | |
| "grad_norm": 0.24332502484321594, | |
| "learning_rate": 4.444444444444445e-06, | |
| "loss": 0.0055, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 28.933333333333334, | |
| "grad_norm": 0.47205692529678345, | |
| "learning_rate": 3.9506172839506175e-06, | |
| "loss": 0.006, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.6363636363636364, | |
| "eval_f1_macro": 0.557191887992969, | |
| "eval_f1_micro": 0.6363636363636364, | |
| "eval_f1_weighted": 0.6294141170899599, | |
| "eval_loss": 1.597952961921692, | |
| "eval_precision_macro": 0.5529214559386972, | |
| "eval_precision_micro": 0.6363636363636364, | |
| "eval_precision_weighted": 0.6245954516428655, | |
| "eval_recall_macro": 0.563363567649282, | |
| "eval_recall_micro": 0.6363636363636364, | |
| "eval_recall_weighted": 0.6363636363636364, | |
| "eval_runtime": 1.9083, | |
| "eval_samples_per_second": 69.172, | |
| "eval_steps_per_second": 2.62, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 29.066666666666666, | |
| "grad_norm": 0.27642032504081726, | |
| "learning_rate": 3.45679012345679e-06, | |
| "loss": 0.0162, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 29.2, | |
| "grad_norm": 0.9449041485786438, | |
| "learning_rate": 2.9629629629629633e-06, | |
| "loss": 0.0088, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 29.333333333333332, | |
| "grad_norm": 0.14337310194969177, | |
| "learning_rate": 2.469135802469136e-06, | |
| "loss": 0.0193, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 29.466666666666665, | |
| "grad_norm": 0.17881515622138977, | |
| "learning_rate": 1.9753086419753087e-06, | |
| "loss": 0.0191, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 29.6, | |
| "grad_norm": 0.15386801958084106, | |
| "learning_rate": 1.4814814814814817e-06, | |
| "loss": 0.005, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 29.733333333333334, | |
| "grad_norm": 0.32567164301872253, | |
| "learning_rate": 9.876543209876544e-07, | |
| "loss": 0.0077, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 29.866666666666667, | |
| "grad_norm": 0.6249086260795593, | |
| "learning_rate": 4.938271604938272e-07, | |
| "loss": 0.0158, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.24103443324565887, | |
| "learning_rate": 0.0, | |
| "loss": 0.0046, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.6439393939393939, | |
| "eval_f1_macro": 0.5605037390491809, | |
| "eval_f1_micro": 0.6439393939393939, | |
| "eval_f1_weighted": 0.634156085647718, | |
| "eval_loss": 1.593876838684082, | |
| "eval_precision_macro": 0.5545634920634921, | |
| "eval_precision_micro": 0.6439393939393939, | |
| "eval_precision_weighted": 0.6269465488215488, | |
| "eval_recall_macro": 0.5686545729402873, | |
| "eval_recall_micro": 0.6439393939393939, | |
| "eval_recall_weighted": 0.6439393939393939, | |
| "eval_runtime": 2.0538, | |
| "eval_samples_per_second": 64.27, | |
| "eval_steps_per_second": 2.434, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 450, | |
| "total_flos": 1.0740871074163507e+18, | |
| "train_loss": 0.6009381743893027, | |
| "train_runtime": 318.0836, | |
| "train_samples_per_second": 43.573, | |
| "train_steps_per_second": 1.415 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 450, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0740871074163507e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |