{ "best_metric": 1.0249524116516113, "best_model_checkpoint": "AST-vocal-disorder-classification/checkpoint-570", "epoch": 10.0, "eval_steps": 1, "global_step": 570, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017543859649122806, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 2.988, "step": 1 }, { "epoch": 0.03508771929824561, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 2.6954, "step": 2 }, { "epoch": 0.05263157894736842, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 2.2632, "step": 3 }, { "epoch": 0.07017543859649122, "grad_norm": 165.02626037597656, "learning_rate": 2e-08, "loss": 3.195, "step": 4 }, { "epoch": 0.08771929824561403, "grad_norm": 168.1376495361328, "learning_rate": 4e-08, "loss": 2.8908, "step": 5 }, { "epoch": 0.10526315789473684, "grad_norm": Infinity, "learning_rate": 4e-08, "loss": 2.7552, "step": 6 }, { "epoch": 0.12280701754385964, "grad_norm": 158.0127716064453, "learning_rate": 6e-08, "loss": 3.1245, "step": 7 }, { "epoch": 0.14035087719298245, "grad_norm": 150.22560119628906, "learning_rate": 8e-08, "loss": 2.8198, "step": 8 }, { "epoch": 0.15789473684210525, "grad_norm": 133.4413299560547, "learning_rate": 1e-07, "loss": 2.4031, "step": 9 }, { "epoch": 0.17543859649122806, "grad_norm": 99.31004333496094, "learning_rate": 1.2e-07, "loss": 2.457, "step": 10 }, { "epoch": 0.19298245614035087, "grad_norm": 118.67373657226562, "learning_rate": 1.4e-07, "loss": 2.5123, "step": 11 }, { "epoch": 0.21052631578947367, "grad_norm": 132.25393676757812, "learning_rate": 1.6e-07, "loss": 2.5434, "step": 12 }, { "epoch": 0.22807017543859648, "grad_norm": 94.40389251708984, "learning_rate": 1.8e-07, "loss": 2.3049, "step": 13 }, { "epoch": 0.24561403508771928, "grad_norm": 125.89042663574219, "learning_rate": 2e-07, "loss": 2.5478, "step": 14 }, { "epoch": 0.2631578947368421, "grad_norm": 142.48435974121094, "learning_rate": 2.1999999999999998e-07, "loss": 2.8247, "step": 15 }, { "epoch": 0.2807017543859649, "grad_norm": 126.35132598876953, "learning_rate": 2.4e-07, "loss": 2.4798, "step": 16 }, { "epoch": 0.2982456140350877, "grad_norm": 151.13067626953125, "learning_rate": 2.6e-07, "loss": 2.7472, "step": 17 }, { "epoch": 0.3157894736842105, "grad_norm": 91.00186920166016, "learning_rate": 2.8e-07, "loss": 2.2366, "step": 18 }, { "epoch": 0.3333333333333333, "grad_norm": 81.35576629638672, "learning_rate": 3e-07, "loss": 2.3093, "step": 19 }, { "epoch": 0.3508771929824561, "grad_norm": 164.45582580566406, "learning_rate": 3.2e-07, "loss": 2.9604, "step": 20 }, { "epoch": 0.3684210526315789, "grad_norm": 118.8172378540039, "learning_rate": 3.4000000000000003e-07, "loss": 2.3687, "step": 21 }, { "epoch": 0.38596491228070173, "grad_norm": 136.17398071289062, "learning_rate": 3.6e-07, "loss": 2.3948, "step": 22 }, { "epoch": 0.40350877192982454, "grad_norm": 118.8042221069336, "learning_rate": 3.7999999999999996e-07, "loss": 2.2426, "step": 23 }, { "epoch": 0.42105263157894735, "grad_norm": 107.0354995727539, "learning_rate": 4e-07, "loss": 2.3865, "step": 24 }, { "epoch": 0.43859649122807015, "grad_norm": 77.19119262695312, "learning_rate": 4.1999999999999995e-07, "loss": 1.9904, "step": 25 }, { "epoch": 0.45614035087719296, "grad_norm": 140.62896728515625, "learning_rate": 4.3999999999999997e-07, "loss": 2.7652, "step": 26 }, { "epoch": 0.47368421052631576, "grad_norm": 75.05490112304688, "learning_rate": 4.6e-07, "loss": 2.075, "step": 27 }, { "epoch": 0.49122807017543857, "grad_norm": 117.8460922241211, "learning_rate": 4.8e-07, "loss": 2.5994, "step": 28 }, { "epoch": 0.5087719298245614, "grad_norm": 113.06562042236328, "learning_rate": 5e-07, "loss": 1.8539, "step": 29 }, { "epoch": 0.5263157894736842, "grad_norm": 110.89696502685547, "learning_rate": 5.2e-07, "loss": 2.1113, "step": 30 }, { "epoch": 0.543859649122807, "grad_norm": 98.19458770751953, "learning_rate": 5.4e-07, "loss": 2.1587, "step": 31 }, { "epoch": 0.5614035087719298, "grad_norm": 66.95838928222656, "learning_rate": 5.6e-07, "loss": 1.8982, "step": 32 }, { "epoch": 0.5789473684210527, "grad_norm": 52.46982192993164, "learning_rate": 5.8e-07, "loss": 1.6926, "step": 33 }, { "epoch": 0.5964912280701754, "grad_norm": 79.77543640136719, "learning_rate": 6e-07, "loss": 1.9471, "step": 34 }, { "epoch": 0.6140350877192983, "grad_norm": 116.57611846923828, "learning_rate": 6.2e-07, "loss": 1.799, "step": 35 }, { "epoch": 0.631578947368421, "grad_norm": 91.27916717529297, "learning_rate": 6.4e-07, "loss": 2.2582, "step": 36 }, { "epoch": 0.6491228070175439, "grad_norm": 79.36409759521484, "learning_rate": 6.6e-07, "loss": 2.1673, "step": 37 }, { "epoch": 0.6666666666666666, "grad_norm": 65.35518646240234, "learning_rate": 6.800000000000001e-07, "loss": 1.7037, "step": 38 }, { "epoch": 0.6842105263157895, "grad_norm": 42.132991790771484, "learning_rate": 7e-07, "loss": 1.4278, "step": 39 }, { "epoch": 0.7017543859649122, "grad_norm": 51.01911544799805, "learning_rate": 7.2e-07, "loss": 1.7739, "step": 40 }, { "epoch": 0.7192982456140351, "grad_norm": 65.60211181640625, "learning_rate": 7.4e-07, "loss": 1.7796, "step": 41 }, { "epoch": 0.7368421052631579, "grad_norm": 48.78630828857422, "learning_rate": 7.599999999999999e-07, "loss": 1.5501, "step": 42 }, { "epoch": 0.7543859649122807, "grad_norm": 29.828662872314453, "learning_rate": 7.799999999999999e-07, "loss": 1.56, "step": 43 }, { "epoch": 0.7719298245614035, "grad_norm": 50.522647857666016, "learning_rate": 8e-07, "loss": 1.4566, "step": 44 }, { "epoch": 0.7894736842105263, "grad_norm": 52.197025299072266, "learning_rate": 8.199999999999999e-07, "loss": 1.5521, "step": 45 }, { "epoch": 0.8070175438596491, "grad_norm": 41.581092834472656, "learning_rate": 8.399999999999999e-07, "loss": 1.6864, "step": 46 }, { "epoch": 0.8245614035087719, "grad_norm": 61.20100021362305, "learning_rate": 8.599999999999999e-07, "loss": 1.3111, "step": 47 }, { "epoch": 0.8421052631578947, "grad_norm": 39.471736907958984, "learning_rate": 8.799999999999999e-07, "loss": 1.6241, "step": 48 }, { "epoch": 0.8596491228070176, "grad_norm": 39.578338623046875, "learning_rate": 9e-07, "loss": 1.4799, "step": 49 }, { "epoch": 0.8771929824561403, "grad_norm": 57.32265090942383, "learning_rate": 9.2e-07, "loss": 1.8162, "step": 50 }, { "epoch": 0.8947368421052632, "grad_norm": 37.478973388671875, "learning_rate": 9.399999999999999e-07, "loss": 1.5859, "step": 51 }, { "epoch": 0.9122807017543859, "grad_norm": 45.59309387207031, "learning_rate": 9.6e-07, "loss": 1.3365, "step": 52 }, { "epoch": 0.9298245614035088, "grad_norm": 47.914791107177734, "learning_rate": 9.8e-07, "loss": 1.7568, "step": 53 }, { "epoch": 0.9473684210526315, "grad_norm": 31.730653762817383, "learning_rate": 1e-06, "loss": 1.6654, "step": 54 }, { "epoch": 0.9649122807017544, "grad_norm": 47.1168327331543, "learning_rate": 9.98076923076923e-07, "loss": 1.2154, "step": 55 }, { "epoch": 0.9824561403508771, "grad_norm": 34.07067108154297, "learning_rate": 9.961538461538461e-07, "loss": 1.296, "step": 56 }, { "epoch": 1.0, "grad_norm": 32.9833869934082, "learning_rate": 9.942307692307691e-07, "loss": 1.5427, "step": 57 }, { "epoch": 1.0, "eval_accuracy": 0.3831417624521073, "eval_loss": 1.5033824443817139, "eval_roc_auc": 0.5521317875556301, "eval_runtime": 24.2654, "eval_samples_per_second": 10.756, "eval_steps_per_second": 0.701, "step": 57 }, { "epoch": 1.0175438596491229, "grad_norm": 21.84532356262207, "learning_rate": 9.923076923076923e-07, "loss": 1.3469, "step": 58 }, { "epoch": 1.0350877192982457, "grad_norm": 54.183753967285156, "learning_rate": 9.903846153846153e-07, "loss": 1.5014, "step": 59 }, { "epoch": 1.0526315789473684, "grad_norm": 45.070030212402344, "learning_rate": 9.884615384615385e-07, "loss": 1.6736, "step": 60 }, { "epoch": 1.0701754385964912, "grad_norm": 54.34022903442383, "learning_rate": 9.865384615384615e-07, "loss": 1.7181, "step": 61 }, { "epoch": 1.087719298245614, "grad_norm": 61.96958923339844, "learning_rate": 9.846153846153847e-07, "loss": 1.7463, "step": 62 }, { "epoch": 1.1052631578947367, "grad_norm": 38.20405960083008, "learning_rate": 9.826923076923076e-07, "loss": 1.516, "step": 63 }, { "epoch": 1.1228070175438596, "grad_norm": 31.333251953125, "learning_rate": 9.807692307692306e-07, "loss": 1.6652, "step": 64 }, { "epoch": 1.1403508771929824, "grad_norm": 33.82837677001953, "learning_rate": 9.788461538461538e-07, "loss": 1.3185, "step": 65 }, { "epoch": 1.1578947368421053, "grad_norm": 44.01963424682617, "learning_rate": 9.769230769230768e-07, "loss": 1.2599, "step": 66 }, { "epoch": 1.1754385964912282, "grad_norm": 41.35686492919922, "learning_rate": 9.75e-07, "loss": 1.4831, "step": 67 }, { "epoch": 1.1929824561403508, "grad_norm": 37.442283630371094, "learning_rate": 9.73076923076923e-07, "loss": 1.6914, "step": 68 }, { "epoch": 1.2105263157894737, "grad_norm": 45.21449661254883, "learning_rate": 9.711538461538462e-07, "loss": 1.0162, "step": 69 }, { "epoch": 1.2280701754385965, "grad_norm": 37.826507568359375, "learning_rate": 9.692307692307691e-07, "loss": 1.5321, "step": 70 }, { "epoch": 1.2456140350877192, "grad_norm": 81.76914978027344, "learning_rate": 9.673076923076923e-07, "loss": 1.7058, "step": 71 }, { "epoch": 1.263157894736842, "grad_norm": 29.697542190551758, "learning_rate": 9.653846153846153e-07, "loss": 1.1953, "step": 72 }, { "epoch": 1.280701754385965, "grad_norm": 25.966896057128906, "learning_rate": 9.634615384615385e-07, "loss": 1.2336, "step": 73 }, { "epoch": 1.2982456140350878, "grad_norm": 27.649991989135742, "learning_rate": 9.615384615384615e-07, "loss": 1.4547, "step": 74 }, { "epoch": 1.3157894736842106, "grad_norm": 85.1219482421875, "learning_rate": 9.596153846153847e-07, "loss": 1.8594, "step": 75 }, { "epoch": 1.3333333333333333, "grad_norm": 45.845516204833984, "learning_rate": 9.576923076923077e-07, "loss": 1.4723, "step": 76 }, { "epoch": 1.3508771929824561, "grad_norm": 33.39055633544922, "learning_rate": 9.557692307692309e-07, "loss": 1.3105, "step": 77 }, { "epoch": 1.368421052631579, "grad_norm": 32.90497970581055, "learning_rate": 9.538461538461538e-07, "loss": 1.551, "step": 78 }, { "epoch": 1.3859649122807016, "grad_norm": 18.763742446899414, "learning_rate": 9.519230769230768e-07, "loss": 1.1766, "step": 79 }, { "epoch": 1.4035087719298245, "grad_norm": 38.28258514404297, "learning_rate": 9.499999999999999e-07, "loss": 1.3663, "step": 80 }, { "epoch": 1.4210526315789473, "grad_norm": 44.95620346069336, "learning_rate": 9.48076923076923e-07, "loss": 1.4312, "step": 81 }, { "epoch": 1.4385964912280702, "grad_norm": 46.334720611572266, "learning_rate": 9.461538461538461e-07, "loss": 1.5499, "step": 82 }, { "epoch": 1.456140350877193, "grad_norm": 24.238452911376953, "learning_rate": 9.442307692307692e-07, "loss": 1.4905, "step": 83 }, { "epoch": 1.4736842105263157, "grad_norm": 33.23431396484375, "learning_rate": 9.423076923076923e-07, "loss": 1.3647, "step": 84 }, { "epoch": 1.4912280701754386, "grad_norm": 46.839908599853516, "learning_rate": 9.403846153846153e-07, "loss": 1.7358, "step": 85 }, { "epoch": 1.5087719298245614, "grad_norm": 27.96128273010254, "learning_rate": 9.384615384615384e-07, "loss": 1.4824, "step": 86 }, { "epoch": 1.526315789473684, "grad_norm": 47.32505416870117, "learning_rate": 9.365384615384615e-07, "loss": 1.5127, "step": 87 }, { "epoch": 1.543859649122807, "grad_norm": 31.273122787475586, "learning_rate": 9.346153846153846e-07, "loss": 1.1443, "step": 88 }, { "epoch": 1.5614035087719298, "grad_norm": 47.530086517333984, "learning_rate": 9.326923076923077e-07, "loss": 1.4594, "step": 89 }, { "epoch": 1.5789473684210527, "grad_norm": 52.47103500366211, "learning_rate": 9.307692307692308e-07, "loss": 1.3066, "step": 90 }, { "epoch": 1.5964912280701755, "grad_norm": 48.503936767578125, "learning_rate": 9.288461538461539e-07, "loss": 1.2926, "step": 91 }, { "epoch": 1.6140350877192984, "grad_norm": 37.38385009765625, "learning_rate": 9.26923076923077e-07, "loss": 1.2452, "step": 92 }, { "epoch": 1.631578947368421, "grad_norm": 69.00371551513672, "learning_rate": 9.25e-07, "loss": 1.7103, "step": 93 }, { "epoch": 1.6491228070175439, "grad_norm": 20.759923934936523, "learning_rate": 9.230769230769231e-07, "loss": 1.4222, "step": 94 }, { "epoch": 1.6666666666666665, "grad_norm": 17.494487762451172, "learning_rate": 9.211538461538461e-07, "loss": 1.3884, "step": 95 }, { "epoch": 1.6842105263157894, "grad_norm": 72.35301971435547, "learning_rate": 9.192307692307692e-07, "loss": 1.7956, "step": 96 }, { "epoch": 1.7017543859649122, "grad_norm": 58.388648986816406, "learning_rate": 9.173076923076922e-07, "loss": 1.3706, "step": 97 }, { "epoch": 1.719298245614035, "grad_norm": 32.08078384399414, "learning_rate": 9.153846153846153e-07, "loss": 1.3232, "step": 98 }, { "epoch": 1.736842105263158, "grad_norm": 43.424861907958984, "learning_rate": 9.134615384615383e-07, "loss": 1.0919, "step": 99 }, { "epoch": 1.7543859649122808, "grad_norm": 43.55315017700195, "learning_rate": 9.115384615384614e-07, "loss": 1.3153, "step": 100 }, { "epoch": 1.7719298245614035, "grad_norm": 12.423308372497559, "learning_rate": 9.096153846153845e-07, "loss": 1.2841, "step": 101 }, { "epoch": 1.7894736842105263, "grad_norm": 32.44642639160156, "learning_rate": 9.076923076923076e-07, "loss": 1.2104, "step": 102 }, { "epoch": 1.807017543859649, "grad_norm": 74.81303405761719, "learning_rate": 9.057692307692307e-07, "loss": 0.9296, "step": 103 }, { "epoch": 1.8245614035087718, "grad_norm": 62.9661979675293, "learning_rate": 9.038461538461538e-07, "loss": 1.1964, "step": 104 }, { "epoch": 1.8421052631578947, "grad_norm": 52.237205505371094, "learning_rate": 9.019230769230769e-07, "loss": 1.3407, "step": 105 }, { "epoch": 1.8596491228070176, "grad_norm": 26.808528900146484, "learning_rate": 9e-07, "loss": 1.1248, "step": 106 }, { "epoch": 1.8771929824561404, "grad_norm": 28.834348678588867, "learning_rate": 8.98076923076923e-07, "loss": 1.1314, "step": 107 }, { "epoch": 1.8947368421052633, "grad_norm": 58.954833984375, "learning_rate": 8.961538461538461e-07, "loss": 1.1691, "step": 108 }, { "epoch": 1.912280701754386, "grad_norm": 24.54899024963379, "learning_rate": 8.942307692307692e-07, "loss": 1.3596, "step": 109 }, { "epoch": 1.9298245614035088, "grad_norm": 38.63195037841797, "learning_rate": 8.923076923076923e-07, "loss": 1.1972, "step": 110 }, { "epoch": 1.9473684210526314, "grad_norm": 40.493141174316406, "learning_rate": 8.903846153846153e-07, "loss": 1.4167, "step": 111 }, { "epoch": 1.9649122807017543, "grad_norm": 25.149085998535156, "learning_rate": 8.884615384615384e-07, "loss": 1.1808, "step": 112 }, { "epoch": 1.9824561403508771, "grad_norm": 26.5919132232666, "learning_rate": 8.865384615384615e-07, "loss": 1.2026, "step": 113 }, { "epoch": 2.0, "grad_norm": 27.22132682800293, "learning_rate": 8.846153846153846e-07, "loss": 1.1829, "step": 114 }, { "epoch": 2.0, "eval_accuracy": 0.44061302681992337, "eval_loss": 1.3389430046081543, "eval_roc_auc": 0.7114645372622986, "eval_runtime": 22.9802, "eval_samples_per_second": 11.358, "eval_steps_per_second": 0.74, "step": 114 }, { "epoch": 2.017543859649123, "grad_norm": 45.17961883544922, "learning_rate": 8.826923076923076e-07, "loss": 1.2122, "step": 115 }, { "epoch": 2.0350877192982457, "grad_norm": 39.010257720947266, "learning_rate": 8.807692307692307e-07, "loss": 1.143, "step": 116 }, { "epoch": 2.0526315789473686, "grad_norm": 45.62030029296875, "learning_rate": 8.788461538461538e-07, "loss": 1.4336, "step": 117 }, { "epoch": 2.0701754385964914, "grad_norm": 39.72404098510742, "learning_rate": 8.769230769230769e-07, "loss": 1.2491, "step": 118 }, { "epoch": 2.087719298245614, "grad_norm": 60.91448211669922, "learning_rate": 8.75e-07, "loss": 1.3879, "step": 119 }, { "epoch": 2.1052631578947367, "grad_norm": 22.340192794799805, "learning_rate": 8.730769230769231e-07, "loss": 1.1815, "step": 120 }, { "epoch": 2.1228070175438596, "grad_norm": 51.66433334350586, "learning_rate": 8.711538461538462e-07, "loss": 1.3663, "step": 121 }, { "epoch": 2.1403508771929824, "grad_norm": 34.69876480102539, "learning_rate": 8.692307692307692e-07, "loss": 0.8601, "step": 122 }, { "epoch": 2.1578947368421053, "grad_norm": 31.769758224487305, "learning_rate": 8.673076923076923e-07, "loss": 1.067, "step": 123 }, { "epoch": 2.175438596491228, "grad_norm": 15.30595588684082, "learning_rate": 8.653846153846154e-07, "loss": 1.2825, "step": 124 }, { "epoch": 2.192982456140351, "grad_norm": 50.3326416015625, "learning_rate": 8.634615384615385e-07, "loss": 1.2839, "step": 125 }, { "epoch": 2.2105263157894735, "grad_norm": 39.54274368286133, "learning_rate": 8.615384615384616e-07, "loss": 1.2719, "step": 126 }, { "epoch": 2.2280701754385963, "grad_norm": 40.03786087036133, "learning_rate": 8.596153846153846e-07, "loss": 1.2902, "step": 127 }, { "epoch": 2.245614035087719, "grad_norm": 25.278501510620117, "learning_rate": 8.576923076923076e-07, "loss": 0.9576, "step": 128 }, { "epoch": 2.263157894736842, "grad_norm": 74.27449035644531, "learning_rate": 8.557692307692306e-07, "loss": 1.6626, "step": 129 }, { "epoch": 2.280701754385965, "grad_norm": 49.429100036621094, "learning_rate": 8.538461538461537e-07, "loss": 1.1279, "step": 130 }, { "epoch": 2.2982456140350878, "grad_norm": 66.35237121582031, "learning_rate": 8.519230769230768e-07, "loss": 1.3382, "step": 131 }, { "epoch": 2.3157894736842106, "grad_norm": 54.39750671386719, "learning_rate": 8.499999999999999e-07, "loss": 1.1413, "step": 132 }, { "epoch": 2.3333333333333335, "grad_norm": 39.203617095947266, "learning_rate": 8.48076923076923e-07, "loss": 1.646, "step": 133 }, { "epoch": 2.3508771929824563, "grad_norm": 40.577457427978516, "learning_rate": 8.461538461538461e-07, "loss": 1.5547, "step": 134 }, { "epoch": 2.3684210526315788, "grad_norm": 61.2403564453125, "learning_rate": 8.442307692307692e-07, "loss": 1.4927, "step": 135 }, { "epoch": 2.3859649122807016, "grad_norm": 27.469097137451172, "learning_rate": 8.423076923076923e-07, "loss": 1.2704, "step": 136 }, { "epoch": 2.4035087719298245, "grad_norm": 41.40801239013672, "learning_rate": 8.403846153846153e-07, "loss": 1.12, "step": 137 }, { "epoch": 2.4210526315789473, "grad_norm": 31.15001106262207, "learning_rate": 8.384615384615384e-07, "loss": 1.1188, "step": 138 }, { "epoch": 2.43859649122807, "grad_norm": 48.32929611206055, "learning_rate": 8.365384615384615e-07, "loss": 0.9936, "step": 139 }, { "epoch": 2.456140350877193, "grad_norm": 43.33560562133789, "learning_rate": 8.346153846153846e-07, "loss": 1.5216, "step": 140 }, { "epoch": 2.473684210526316, "grad_norm": 34.28059387207031, "learning_rate": 8.326923076923077e-07, "loss": 1.328, "step": 141 }, { "epoch": 2.4912280701754383, "grad_norm": 55.07752227783203, "learning_rate": 8.307692307692308e-07, "loss": 1.3941, "step": 142 }, { "epoch": 2.5087719298245617, "grad_norm": 33.26469421386719, "learning_rate": 8.288461538461539e-07, "loss": 1.0795, "step": 143 }, { "epoch": 2.526315789473684, "grad_norm": 41.03183364868164, "learning_rate": 8.269230769230768e-07, "loss": 1.1216, "step": 144 }, { "epoch": 2.543859649122807, "grad_norm": 27.419574737548828, "learning_rate": 8.249999999999999e-07, "loss": 1.2125, "step": 145 }, { "epoch": 2.56140350877193, "grad_norm": 34.500667572021484, "learning_rate": 8.23076923076923e-07, "loss": 0.9645, "step": 146 }, { "epoch": 2.5789473684210527, "grad_norm": 33.50322341918945, "learning_rate": 8.211538461538461e-07, "loss": 1.1995, "step": 147 }, { "epoch": 2.5964912280701755, "grad_norm": 73.93108367919922, "learning_rate": 8.192307692307692e-07, "loss": 1.7082, "step": 148 }, { "epoch": 2.6140350877192984, "grad_norm": 14.201051712036133, "learning_rate": 8.173076923076923e-07, "loss": 1.1467, "step": 149 }, { "epoch": 2.6315789473684212, "grad_norm": 23.20082664489746, "learning_rate": 8.153846153846154e-07, "loss": 1.2548, "step": 150 }, { "epoch": 2.6491228070175437, "grad_norm": 37.417213439941406, "learning_rate": 8.134615384615385e-07, "loss": 0.9575, "step": 151 }, { "epoch": 2.6666666666666665, "grad_norm": 69.82878112792969, "learning_rate": 8.115384615384615e-07, "loss": 1.3022, "step": 152 }, { "epoch": 2.6842105263157894, "grad_norm": 45.68886184692383, "learning_rate": 8.096153846153846e-07, "loss": 1.1804, "step": 153 }, { "epoch": 2.7017543859649122, "grad_norm": 51.7285270690918, "learning_rate": 8.076923076923077e-07, "loss": 1.3008, "step": 154 }, { "epoch": 2.719298245614035, "grad_norm": 51.00165939331055, "learning_rate": 8.057692307692308e-07, "loss": 1.2146, "step": 155 }, { "epoch": 2.736842105263158, "grad_norm": 56.5025634765625, "learning_rate": 8.038461538461538e-07, "loss": 1.7964, "step": 156 }, { "epoch": 2.754385964912281, "grad_norm": 29.800735473632812, "learning_rate": 8.019230769230769e-07, "loss": 1.0893, "step": 157 }, { "epoch": 2.7719298245614032, "grad_norm": 35.997886657714844, "learning_rate": 8e-07, "loss": 1.2403, "step": 158 }, { "epoch": 2.7894736842105265, "grad_norm": 65.59085083007812, "learning_rate": 7.98076923076923e-07, "loss": 1.0296, "step": 159 }, { "epoch": 2.807017543859649, "grad_norm": 57.628726959228516, "learning_rate": 7.96153846153846e-07, "loss": 1.3895, "step": 160 }, { "epoch": 2.824561403508772, "grad_norm": 51.50771713256836, "learning_rate": 7.942307692307691e-07, "loss": 0.7671, "step": 161 }, { "epoch": 2.8421052631578947, "grad_norm": 17.941665649414062, "learning_rate": 7.923076923076922e-07, "loss": 1.0704, "step": 162 }, { "epoch": 2.8596491228070176, "grad_norm": 52.35181427001953, "learning_rate": 7.903846153846153e-07, "loss": 1.13, "step": 163 }, { "epoch": 2.8771929824561404, "grad_norm": 42.02775955200195, "learning_rate": 7.884615384615384e-07, "loss": 0.8805, "step": 164 }, { "epoch": 2.8947368421052633, "grad_norm": 58.180484771728516, "learning_rate": 7.865384615384615e-07, "loss": 1.1904, "step": 165 }, { "epoch": 2.912280701754386, "grad_norm": 30.56048583984375, "learning_rate": 7.846153846153846e-07, "loss": 1.1506, "step": 166 }, { "epoch": 2.9298245614035086, "grad_norm": 39.470184326171875, "learning_rate": 7.826923076923076e-07, "loss": 0.741, "step": 167 }, { "epoch": 2.9473684210526314, "grad_norm": 72.42061614990234, "learning_rate": 7.807692307692307e-07, "loss": 1.3581, "step": 168 }, { "epoch": 2.9649122807017543, "grad_norm": 42.25473403930664, "learning_rate": 7.788461538461538e-07, "loss": 1.3318, "step": 169 }, { "epoch": 2.982456140350877, "grad_norm": 28.385595321655273, "learning_rate": 7.769230769230769e-07, "loss": 1.0435, "step": 170 }, { "epoch": 3.0, "grad_norm": 44.694984436035156, "learning_rate": 7.75e-07, "loss": 1.2793, "step": 171 }, { "epoch": 3.0, "eval_accuracy": 0.5019157088122606, "eval_loss": 1.2615339756011963, "eval_roc_auc": 0.7588809381350943, "eval_runtime": 24.6917, "eval_samples_per_second": 10.57, "eval_steps_per_second": 0.688, "step": 171 }, { "epoch": 3.017543859649123, "grad_norm": 42.023494720458984, "learning_rate": 7.730769230769231e-07, "loss": 1.1126, "step": 172 }, { "epoch": 3.0350877192982457, "grad_norm": 79.69966125488281, "learning_rate": 7.711538461538462e-07, "loss": 1.2264, "step": 173 }, { "epoch": 3.0526315789473686, "grad_norm": 33.83009719848633, "learning_rate": 7.692307692307693e-07, "loss": 1.0632, "step": 174 }, { "epoch": 3.0701754385964914, "grad_norm": 42.74213409423828, "learning_rate": 7.673076923076923e-07, "loss": 0.7554, "step": 175 }, { "epoch": 3.087719298245614, "grad_norm": 29.90761375427246, "learning_rate": 7.653846153846153e-07, "loss": 1.1182, "step": 176 }, { "epoch": 3.1052631578947367, "grad_norm": 34.67997741699219, "learning_rate": 7.634615384615384e-07, "loss": 1.2303, "step": 177 }, { "epoch": 3.1228070175438596, "grad_norm": 28.055559158325195, "learning_rate": 7.615384615384615e-07, "loss": 0.9286, "step": 178 }, { "epoch": 3.1403508771929824, "grad_norm": 48.56884765625, "learning_rate": 7.596153846153846e-07, "loss": 0.9129, "step": 179 }, { "epoch": 3.1578947368421053, "grad_norm": 23.18881607055664, "learning_rate": 7.576923076923077e-07, "loss": 1.1422, "step": 180 }, { "epoch": 3.175438596491228, "grad_norm": 19.716148376464844, "learning_rate": 7.557692307692308e-07, "loss": 1.1811, "step": 181 }, { "epoch": 3.192982456140351, "grad_norm": 23.179607391357422, "learning_rate": 7.538461538461538e-07, "loss": 0.9284, "step": 182 }, { "epoch": 3.2105263157894735, "grad_norm": 21.30498504638672, "learning_rate": 7.519230769230769e-07, "loss": 0.909, "step": 183 }, { "epoch": 3.2280701754385963, "grad_norm": 39.3275260925293, "learning_rate": 7.5e-07, "loss": 1.1318, "step": 184 }, { "epoch": 3.245614035087719, "grad_norm": 52.91360855102539, "learning_rate": 7.48076923076923e-07, "loss": 1.2176, "step": 185 }, { "epoch": 3.263157894736842, "grad_norm": 40.860450744628906, "learning_rate": 7.461538461538461e-07, "loss": 1.2245, "step": 186 }, { "epoch": 3.280701754385965, "grad_norm": 28.36117935180664, "learning_rate": 7.442307692307692e-07, "loss": 1.0225, "step": 187 }, { "epoch": 3.2982456140350878, "grad_norm": 40.48318099975586, "learning_rate": 7.423076923076923e-07, "loss": 1.289, "step": 188 }, { "epoch": 3.3157894736842106, "grad_norm": 27.249305725097656, "learning_rate": 7.403846153846153e-07, "loss": 1.2829, "step": 189 }, { "epoch": 3.3333333333333335, "grad_norm": 23.647472381591797, "learning_rate": 7.384615384615384e-07, "loss": 1.223, "step": 190 }, { "epoch": 3.3508771929824563, "grad_norm": 21.608871459960938, "learning_rate": 7.365384615384615e-07, "loss": 1.0349, "step": 191 }, { "epoch": 3.3684210526315788, "grad_norm": 49.232421875, "learning_rate": 7.346153846153846e-07, "loss": 1.1577, "step": 192 }, { "epoch": 3.3859649122807016, "grad_norm": 35.15854263305664, "learning_rate": 7.326923076923076e-07, "loss": 0.8111, "step": 193 }, { "epoch": 3.4035087719298245, "grad_norm": 33.47184371948242, "learning_rate": 7.307692307692307e-07, "loss": 1.4127, "step": 194 }, { "epoch": 3.4210526315789473, "grad_norm": 38.76816940307617, "learning_rate": 7.288461538461538e-07, "loss": 1.2329, "step": 195 }, { "epoch": 3.43859649122807, "grad_norm": 64.3719253540039, "learning_rate": 7.269230769230769e-07, "loss": 1.4848, "step": 196 }, { "epoch": 3.456140350877193, "grad_norm": 26.278711318969727, "learning_rate": 7.249999999999999e-07, "loss": 1.1606, "step": 197 }, { "epoch": 3.473684210526316, "grad_norm": 33.774658203125, "learning_rate": 7.23076923076923e-07, "loss": 1.1549, "step": 198 }, { "epoch": 3.4912280701754383, "grad_norm": 27.449785232543945, "learning_rate": 7.211538461538461e-07, "loss": 0.7551, "step": 199 }, { "epoch": 3.5087719298245617, "grad_norm": 18.257455825805664, "learning_rate": 7.192307692307692e-07, "loss": 0.9322, "step": 200 }, { "epoch": 3.526315789473684, "grad_norm": 54.30673599243164, "learning_rate": 7.173076923076923e-07, "loss": 0.7284, "step": 201 }, { "epoch": 3.543859649122807, "grad_norm": 56.77223587036133, "learning_rate": 7.153846153846154e-07, "loss": 1.0322, "step": 202 }, { "epoch": 3.56140350877193, "grad_norm": 23.362255096435547, "learning_rate": 7.134615384615385e-07, "loss": 0.9139, "step": 203 }, { "epoch": 3.5789473684210527, "grad_norm": 46.12820816040039, "learning_rate": 7.115384615384616e-07, "loss": 0.9289, "step": 204 }, { "epoch": 3.5964912280701755, "grad_norm": 38.374629974365234, "learning_rate": 7.096153846153846e-07, "loss": 0.9759, "step": 205 }, { "epoch": 3.6140350877192984, "grad_norm": 40.8470458984375, "learning_rate": 7.076923076923077e-07, "loss": 0.9255, "step": 206 }, { "epoch": 3.6315789473684212, "grad_norm": 51.18167495727539, "learning_rate": 7.057692307692308e-07, "loss": 1.6224, "step": 207 }, { "epoch": 3.6491228070175437, "grad_norm": 65.96550750732422, "learning_rate": 7.038461538461539e-07, "loss": 1.4234, "step": 208 }, { "epoch": 3.6666666666666665, "grad_norm": 46.82773208618164, "learning_rate": 7.019230769230769e-07, "loss": 1.4326, "step": 209 }, { "epoch": 3.6842105263157894, "grad_norm": 29.189741134643555, "learning_rate": 7e-07, "loss": 0.9554, "step": 210 }, { "epoch": 3.7017543859649122, "grad_norm": 21.677045822143555, "learning_rate": 6.980769230769231e-07, "loss": 1.2323, "step": 211 }, { "epoch": 3.719298245614035, "grad_norm": 55.265682220458984, "learning_rate": 6.961538461538461e-07, "loss": 1.211, "step": 212 }, { "epoch": 3.736842105263158, "grad_norm": 31.52195167541504, "learning_rate": 6.942307692307691e-07, "loss": 0.9928, "step": 213 }, { "epoch": 3.754385964912281, "grad_norm": 70.80876159667969, "learning_rate": 6.923076923076922e-07, "loss": 1.4359, "step": 214 }, { "epoch": 3.7719298245614032, "grad_norm": 40.67531967163086, "learning_rate": 6.903846153846153e-07, "loss": 1.1827, "step": 215 }, { "epoch": 3.7894736842105265, "grad_norm": 33.12160873413086, "learning_rate": 6.884615384615384e-07, "loss": 1.0607, "step": 216 }, { "epoch": 3.807017543859649, "grad_norm": 47.94017791748047, "learning_rate": 6.865384615384615e-07, "loss": 0.8461, "step": 217 }, { "epoch": 3.824561403508772, "grad_norm": 46.735321044921875, "learning_rate": 6.846153846153846e-07, "loss": 0.8335, "step": 218 }, { "epoch": 3.8421052631578947, "grad_norm": 44.989681243896484, "learning_rate": 6.826923076923076e-07, "loss": 1.0917, "step": 219 }, { "epoch": 3.8596491228070176, "grad_norm": 33.04579162597656, "learning_rate": 6.807692307692307e-07, "loss": 0.8094, "step": 220 }, { "epoch": 3.8771929824561404, "grad_norm": 26.442249298095703, "learning_rate": 6.788461538461538e-07, "loss": 1.3672, "step": 221 }, { "epoch": 3.8947368421052633, "grad_norm": 47.97966003417969, "learning_rate": 6.769230769230769e-07, "loss": 1.1112, "step": 222 }, { "epoch": 3.912280701754386, "grad_norm": 49.454437255859375, "learning_rate": 6.75e-07, "loss": 1.2617, "step": 223 }, { "epoch": 3.9298245614035086, "grad_norm": 39.12055587768555, "learning_rate": 6.730769230769231e-07, "loss": 1.3417, "step": 224 }, { "epoch": 3.9473684210526314, "grad_norm": 38.112548828125, "learning_rate": 6.711538461538461e-07, "loss": 1.1137, "step": 225 }, { "epoch": 3.9649122807017543, "grad_norm": 18.497282028198242, "learning_rate": 6.692307692307692e-07, "loss": 0.636, "step": 226 }, { "epoch": 3.982456140350877, "grad_norm": 39.13663101196289, "learning_rate": 6.673076923076922e-07, "loss": 1.1667, "step": 227 }, { "epoch": 4.0, "grad_norm": 27.05844497680664, "learning_rate": 6.653846153846153e-07, "loss": 0.9226, "step": 228 }, { "epoch": 4.0, "eval_accuracy": 0.5363984674329502, "eval_loss": 1.1785928010940552, "eval_roc_auc": 0.7825664232388221, "eval_runtime": 24.813, "eval_samples_per_second": 10.519, "eval_steps_per_second": 0.685, "step": 228 }, { "epoch": 4.017543859649122, "grad_norm": 40.03508758544922, "learning_rate": 6.634615384615384e-07, "loss": 0.6566, "step": 229 }, { "epoch": 4.035087719298246, "grad_norm": 31.557764053344727, "learning_rate": 6.615384615384615e-07, "loss": 0.9575, "step": 230 }, { "epoch": 4.052631578947368, "grad_norm": 25.278390884399414, "learning_rate": 6.596153846153846e-07, "loss": 0.7892, "step": 231 }, { "epoch": 4.0701754385964914, "grad_norm": 30.69614028930664, "learning_rate": 6.576923076923077e-07, "loss": 1.0433, "step": 232 }, { "epoch": 4.087719298245614, "grad_norm": 58.29319381713867, "learning_rate": 6.557692307692308e-07, "loss": 1.5216, "step": 233 }, { "epoch": 4.105263157894737, "grad_norm": 42.853153228759766, "learning_rate": 6.538461538461538e-07, "loss": 1.22, "step": 234 }, { "epoch": 4.12280701754386, "grad_norm": 44.47975540161133, "learning_rate": 6.519230769230769e-07, "loss": 1.0883, "step": 235 }, { "epoch": 4.140350877192983, "grad_norm": 32.618743896484375, "learning_rate": 6.5e-07, "loss": 0.8169, "step": 236 }, { "epoch": 4.157894736842105, "grad_norm": 45.881961822509766, "learning_rate": 6.480769230769231e-07, "loss": 1.0063, "step": 237 }, { "epoch": 4.175438596491228, "grad_norm": 34.217777252197266, "learning_rate": 6.461538461538462e-07, "loss": 1.3464, "step": 238 }, { "epoch": 4.192982456140351, "grad_norm": 26.423919677734375, "learning_rate": 6.442307692307693e-07, "loss": 1.001, "step": 239 }, { "epoch": 4.2105263157894735, "grad_norm": 29.074453353881836, "learning_rate": 6.423076923076924e-07, "loss": 1.0627, "step": 240 }, { "epoch": 4.228070175438597, "grad_norm": 38.35462188720703, "learning_rate": 6.403846153846154e-07, "loss": 0.8004, "step": 241 }, { "epoch": 4.245614035087719, "grad_norm": 21.466026306152344, "learning_rate": 6.384615384615383e-07, "loss": 1.1149, "step": 242 }, { "epoch": 4.2631578947368425, "grad_norm": 34.41309356689453, "learning_rate": 6.365384615384614e-07, "loss": 1.1782, "step": 243 }, { "epoch": 4.280701754385965, "grad_norm": 34.77060317993164, "learning_rate": 6.346153846153845e-07, "loss": 0.9231, "step": 244 }, { "epoch": 4.298245614035087, "grad_norm": 26.94576072692871, "learning_rate": 6.326923076923076e-07, "loss": 1.1394, "step": 245 }, { "epoch": 4.315789473684211, "grad_norm": 19.103694915771484, "learning_rate": 6.307692307692307e-07, "loss": 0.939, "step": 246 }, { "epoch": 4.333333333333333, "grad_norm": 40.02509307861328, "learning_rate": 6.288461538461538e-07, "loss": 1.1394, "step": 247 }, { "epoch": 4.350877192982456, "grad_norm": 62.130611419677734, "learning_rate": 6.269230769230769e-07, "loss": 1.1179, "step": 248 }, { "epoch": 4.368421052631579, "grad_norm": 45.432708740234375, "learning_rate": 6.249999999999999e-07, "loss": 0.8211, "step": 249 }, { "epoch": 4.385964912280702, "grad_norm": 52.313697814941406, "learning_rate": 6.23076923076923e-07, "loss": 1.1714, "step": 250 }, { "epoch": 4.4035087719298245, "grad_norm": 24.41583824157715, "learning_rate": 6.211538461538461e-07, "loss": 1.0351, "step": 251 }, { "epoch": 4.421052631578947, "grad_norm": 45.19395446777344, "learning_rate": 6.192307692307692e-07, "loss": 0.6095, "step": 252 }, { "epoch": 4.43859649122807, "grad_norm": 31.14399528503418, "learning_rate": 6.173076923076923e-07, "loss": 1.138, "step": 253 }, { "epoch": 4.456140350877193, "grad_norm": 48.5388298034668, "learning_rate": 6.153846153846154e-07, "loss": 1.2723, "step": 254 }, { "epoch": 4.473684210526316, "grad_norm": 63.80923843383789, "learning_rate": 6.134615384615385e-07, "loss": 1.272, "step": 255 }, { "epoch": 4.491228070175438, "grad_norm": 24.565670013427734, "learning_rate": 6.115384615384616e-07, "loss": 0.8632, "step": 256 }, { "epoch": 4.508771929824562, "grad_norm": 27.799039840698242, "learning_rate": 6.096153846153846e-07, "loss": 0.9728, "step": 257 }, { "epoch": 4.526315789473684, "grad_norm": 32.81442642211914, "learning_rate": 6.076923076923076e-07, "loss": 1.0024, "step": 258 }, { "epoch": 4.543859649122807, "grad_norm": 19.44063949584961, "learning_rate": 6.057692307692307e-07, "loss": 1.1237, "step": 259 }, { "epoch": 4.56140350877193, "grad_norm": 50.00696563720703, "learning_rate": 6.038461538461538e-07, "loss": 0.9283, "step": 260 }, { "epoch": 4.578947368421053, "grad_norm": 35.66808319091797, "learning_rate": 6.019230769230769e-07, "loss": 0.7856, "step": 261 }, { "epoch": 4.5964912280701755, "grad_norm": 36.49198913574219, "learning_rate": 6e-07, "loss": 1.0933, "step": 262 }, { "epoch": 4.614035087719298, "grad_norm": 29.842639923095703, "learning_rate": 5.980769230769231e-07, "loss": 0.9233, "step": 263 }, { "epoch": 4.631578947368421, "grad_norm": 80.13423156738281, "learning_rate": 5.961538461538461e-07, "loss": 1.2123, "step": 264 }, { "epoch": 4.649122807017544, "grad_norm": 43.099300384521484, "learning_rate": 5.942307692307692e-07, "loss": 1.0768, "step": 265 }, { "epoch": 4.666666666666667, "grad_norm": 32.86907196044922, "learning_rate": 5.923076923076923e-07, "loss": 0.8827, "step": 266 }, { "epoch": 4.684210526315789, "grad_norm": 51.32353973388672, "learning_rate": 5.903846153846154e-07, "loss": 1.2962, "step": 267 }, { "epoch": 4.701754385964913, "grad_norm": 38.76735305786133, "learning_rate": 5.884615384615385e-07, "loss": 0.5998, "step": 268 }, { "epoch": 4.719298245614035, "grad_norm": 29.20305824279785, "learning_rate": 5.865384615384616e-07, "loss": 0.8401, "step": 269 }, { "epoch": 4.7368421052631575, "grad_norm": 37.6125373840332, "learning_rate": 5.846153846153847e-07, "loss": 1.0015, "step": 270 }, { "epoch": 4.754385964912281, "grad_norm": 60.04852294921875, "learning_rate": 5.826923076923078e-07, "loss": 1.052, "step": 271 }, { "epoch": 4.771929824561403, "grad_norm": 48.63616943359375, "learning_rate": 5.807692307692307e-07, "loss": 1.0901, "step": 272 }, { "epoch": 4.7894736842105265, "grad_norm": 42.764347076416016, "learning_rate": 5.788461538461538e-07, "loss": 0.5775, "step": 273 }, { "epoch": 4.807017543859649, "grad_norm": 38.956268310546875, "learning_rate": 5.769230769230768e-07, "loss": 1.13, "step": 274 }, { "epoch": 4.824561403508772, "grad_norm": 39.21760940551758, "learning_rate": 5.749999999999999e-07, "loss": 1.0837, "step": 275 }, { "epoch": 4.842105263157895, "grad_norm": 28.150279998779297, "learning_rate": 5.73076923076923e-07, "loss": 0.7626, "step": 276 }, { "epoch": 4.859649122807017, "grad_norm": 37.51382827758789, "learning_rate": 5.711538461538461e-07, "loss": 0.8416, "step": 277 }, { "epoch": 4.87719298245614, "grad_norm": 62.94125747680664, "learning_rate": 5.692307692307692e-07, "loss": 0.9175, "step": 278 }, { "epoch": 4.894736842105263, "grad_norm": 26.954492568969727, "learning_rate": 5.673076923076922e-07, "loss": 1.233, "step": 279 }, { "epoch": 4.912280701754386, "grad_norm": 35.33868408203125, "learning_rate": 5.653846153846153e-07, "loss": 0.8353, "step": 280 }, { "epoch": 4.9298245614035086, "grad_norm": 43.24800491333008, "learning_rate": 5.634615384615384e-07, "loss": 0.876, "step": 281 }, { "epoch": 4.947368421052632, "grad_norm": 36.65079879760742, "learning_rate": 5.615384615384615e-07, "loss": 1.1462, "step": 282 }, { "epoch": 4.964912280701754, "grad_norm": 20.44588851928711, "learning_rate": 5.596153846153846e-07, "loss": 0.9175, "step": 283 }, { "epoch": 4.982456140350877, "grad_norm": 29.738140106201172, "learning_rate": 5.576923076923077e-07, "loss": 0.878, "step": 284 }, { "epoch": 5.0, "grad_norm": 69.77880859375, "learning_rate": 5.557692307692308e-07, "loss": 1.176, "step": 285 }, { "epoch": 5.0, "eval_accuracy": 0.5363984674329502, "eval_loss": 1.1245133876800537, "eval_roc_auc": 0.8024073042505275, "eval_runtime": 24.2409, "eval_samples_per_second": 10.767, "eval_steps_per_second": 0.701, "step": 285 }, { "epoch": 5.017543859649122, "grad_norm": 43.9262580871582, "learning_rate": 5.538461538461539e-07, "loss": 0.9786, "step": 286 }, { "epoch": 5.035087719298246, "grad_norm": 42.07478713989258, "learning_rate": 5.519230769230769e-07, "loss": 0.8725, "step": 287 }, { "epoch": 5.052631578947368, "grad_norm": 18.638660430908203, "learning_rate": 5.5e-07, "loss": 0.5895, "step": 288 }, { "epoch": 5.0701754385964914, "grad_norm": 29.218503952026367, "learning_rate": 5.480769230769231e-07, "loss": 0.8934, "step": 289 }, { "epoch": 5.087719298245614, "grad_norm": 72.49881744384766, "learning_rate": 5.461538461538461e-07, "loss": 1.0794, "step": 290 }, { "epoch": 5.105263157894737, "grad_norm": 47.327239990234375, "learning_rate": 5.442307692307692e-07, "loss": 0.9808, "step": 291 }, { "epoch": 5.12280701754386, "grad_norm": 49.39778137207031, "learning_rate": 5.423076923076923e-07, "loss": 1.0015, "step": 292 }, { "epoch": 5.140350877192983, "grad_norm": 36.040836334228516, "learning_rate": 5.403846153846154e-07, "loss": 1.0248, "step": 293 }, { "epoch": 5.157894736842105, "grad_norm": 33.901615142822266, "learning_rate": 5.384615384615384e-07, "loss": 0.908, "step": 294 }, { "epoch": 5.175438596491228, "grad_norm": 32.479549407958984, "learning_rate": 5.365384615384615e-07, "loss": 0.7743, "step": 295 }, { "epoch": 5.192982456140351, "grad_norm": 73.39112854003906, "learning_rate": 5.346153846153846e-07, "loss": 0.911, "step": 296 }, { "epoch": 5.2105263157894735, "grad_norm": 50.840206146240234, "learning_rate": 5.326923076923077e-07, "loss": 1.1771, "step": 297 }, { "epoch": 5.228070175438597, "grad_norm": 29.241947174072266, "learning_rate": 5.307692307692308e-07, "loss": 1.0472, "step": 298 }, { "epoch": 5.245614035087719, "grad_norm": 32.0538215637207, "learning_rate": 5.288461538461539e-07, "loss": 0.9781, "step": 299 }, { "epoch": 5.2631578947368425, "grad_norm": 39.43052291870117, "learning_rate": 5.269230769230769e-07, "loss": 0.9577, "step": 300 }, { "epoch": 5.280701754385965, "grad_norm": 23.0631160736084, "learning_rate": 5.25e-07, "loss": 1.0764, "step": 301 }, { "epoch": 5.298245614035087, "grad_norm": 46.16770553588867, "learning_rate": 5.23076923076923e-07, "loss": 0.792, "step": 302 }, { "epoch": 5.315789473684211, "grad_norm": 51.57275390625, "learning_rate": 5.211538461538461e-07, "loss": 0.9721, "step": 303 }, { "epoch": 5.333333333333333, "grad_norm": 30.06395721435547, "learning_rate": 5.192307692307692e-07, "loss": 1.0893, "step": 304 }, { "epoch": 5.350877192982456, "grad_norm": 71.85147094726562, "learning_rate": 5.173076923076923e-07, "loss": 1.2034, "step": 305 }, { "epoch": 5.368421052631579, "grad_norm": 45.4450569152832, "learning_rate": 5.153846153846153e-07, "loss": 0.7588, "step": 306 }, { "epoch": 5.385964912280702, "grad_norm": 40.09158706665039, "learning_rate": 5.134615384615384e-07, "loss": 0.9968, "step": 307 }, { "epoch": 5.4035087719298245, "grad_norm": 62.48497772216797, "learning_rate": 5.115384615384615e-07, "loss": 1.2783, "step": 308 }, { "epoch": 5.421052631578947, "grad_norm": 59.85103988647461, "learning_rate": 5.096153846153845e-07, "loss": 1.2234, "step": 309 }, { "epoch": 5.43859649122807, "grad_norm": 41.75504684448242, "learning_rate": 5.076923076923076e-07, "loss": 1.0972, "step": 310 }, { "epoch": 5.456140350877193, "grad_norm": 25.908830642700195, "learning_rate": 5.057692307692307e-07, "loss": 1.0729, "step": 311 }, { "epoch": 5.473684210526316, "grad_norm": 52.866825103759766, "learning_rate": 5.038461538461538e-07, "loss": 0.7172, "step": 312 }, { "epoch": 5.491228070175438, "grad_norm": 62.79011917114258, "learning_rate": 5.019230769230769e-07, "loss": 0.8971, "step": 313 }, { "epoch": 5.508771929824562, "grad_norm": 61.6699333190918, "learning_rate": 5e-07, "loss": 1.1901, "step": 314 }, { "epoch": 5.526315789473684, "grad_norm": 39.02582550048828, "learning_rate": 4.980769230769231e-07, "loss": 0.9623, "step": 315 }, { "epoch": 5.543859649122807, "grad_norm": 20.786083221435547, "learning_rate": 4.961538461538462e-07, "loss": 0.9772, "step": 316 }, { "epoch": 5.56140350877193, "grad_norm": 29.874135971069336, "learning_rate": 4.942307692307692e-07, "loss": 0.5292, "step": 317 }, { "epoch": 5.578947368421053, "grad_norm": 73.50117492675781, "learning_rate": 4.923076923076923e-07, "loss": 1.4202, "step": 318 }, { "epoch": 5.5964912280701755, "grad_norm": 28.74661636352539, "learning_rate": 4.903846153846153e-07, "loss": 0.6696, "step": 319 }, { "epoch": 5.614035087719298, "grad_norm": 38.96842956542969, "learning_rate": 4.884615384615384e-07, "loss": 0.9159, "step": 320 }, { "epoch": 5.631578947368421, "grad_norm": 36.87344741821289, "learning_rate": 4.865384615384615e-07, "loss": 0.9728, "step": 321 }, { "epoch": 5.649122807017544, "grad_norm": 38.103668212890625, "learning_rate": 4.846153846153846e-07, "loss": 0.9663, "step": 322 }, { "epoch": 5.666666666666667, "grad_norm": 24.839292526245117, "learning_rate": 4.826923076923077e-07, "loss": 0.8722, "step": 323 }, { "epoch": 5.684210526315789, "grad_norm": 27.795520782470703, "learning_rate": 4.807692307692307e-07, "loss": 0.7313, "step": 324 }, { "epoch": 5.701754385964913, "grad_norm": 48.86296844482422, "learning_rate": 4.788461538461538e-07, "loss": 0.7879, "step": 325 }, { "epoch": 5.719298245614035, "grad_norm": 43.693058013916016, "learning_rate": 4.769230769230769e-07, "loss": 0.944, "step": 326 }, { "epoch": 5.7368421052631575, "grad_norm": 26.41690444946289, "learning_rate": 4.7499999999999995e-07, "loss": 0.6766, "step": 327 }, { "epoch": 5.754385964912281, "grad_norm": 51.20581817626953, "learning_rate": 4.7307692307692304e-07, "loss": 0.8956, "step": 328 }, { "epoch": 5.771929824561403, "grad_norm": 42.70225143432617, "learning_rate": 4.711538461538461e-07, "loss": 0.9747, "step": 329 }, { "epoch": 5.7894736842105265, "grad_norm": 32.69301986694336, "learning_rate": 4.692307692307692e-07, "loss": 0.5307, "step": 330 }, { "epoch": 5.807017543859649, "grad_norm": 47.880863189697266, "learning_rate": 4.673076923076923e-07, "loss": 0.7341, "step": 331 }, { "epoch": 5.824561403508772, "grad_norm": 37.469017028808594, "learning_rate": 4.653846153846154e-07, "loss": 0.8207, "step": 332 }, { "epoch": 5.842105263157895, "grad_norm": 44.53699493408203, "learning_rate": 4.634615384615385e-07, "loss": 1.0366, "step": 333 }, { "epoch": 5.859649122807017, "grad_norm": 34.84064483642578, "learning_rate": 4.6153846153846156e-07, "loss": 0.9788, "step": 334 }, { "epoch": 5.87719298245614, "grad_norm": 35.885215759277344, "learning_rate": 4.596153846153846e-07, "loss": 0.8453, "step": 335 }, { "epoch": 5.894736842105263, "grad_norm": 33.525081634521484, "learning_rate": 4.5769230769230763e-07, "loss": 0.6823, "step": 336 }, { "epoch": 5.912280701754386, "grad_norm": 46.2805290222168, "learning_rate": 4.557692307692307e-07, "loss": 0.9438, "step": 337 }, { "epoch": 5.9298245614035086, "grad_norm": 56.342018127441406, "learning_rate": 4.538461538461538e-07, "loss": 1.2671, "step": 338 }, { "epoch": 5.947368421052632, "grad_norm": 32.246089935302734, "learning_rate": 4.519230769230769e-07, "loss": 0.9635, "step": 339 }, { "epoch": 5.964912280701754, "grad_norm": 39.31554412841797, "learning_rate": 4.5e-07, "loss": 0.8637, "step": 340 }, { "epoch": 5.982456140350877, "grad_norm": 32.075775146484375, "learning_rate": 4.4807692307692307e-07, "loss": 1.0645, "step": 341 }, { "epoch": 6.0, "grad_norm": 33.591068267822266, "learning_rate": 4.4615384615384615e-07, "loss": 0.7403, "step": 342 }, { "epoch": 6.0, "eval_accuracy": 0.5708812260536399, "eval_loss": 1.0752440690994263, "eval_roc_auc": 0.8091941568824714, "eval_runtime": 25.1571, "eval_samples_per_second": 10.375, "eval_steps_per_second": 0.676, "step": 342 }, { "epoch": 6.017543859649122, "grad_norm": 23.306591033935547, "learning_rate": 4.442307692307692e-07, "loss": 0.5686, "step": 343 }, { "epoch": 6.035087719298246, "grad_norm": 44.798919677734375, "learning_rate": 4.423076923076923e-07, "loss": 1.203, "step": 344 }, { "epoch": 6.052631578947368, "grad_norm": 23.386505126953125, "learning_rate": 4.4038461538461536e-07, "loss": 0.7083, "step": 345 }, { "epoch": 6.0701754385964914, "grad_norm": 49.338531494140625, "learning_rate": 4.3846153846153845e-07, "loss": 0.9921, "step": 346 }, { "epoch": 6.087719298245614, "grad_norm": 26.851451873779297, "learning_rate": 4.3653846153846154e-07, "loss": 0.7737, "step": 347 }, { "epoch": 6.105263157894737, "grad_norm": 41.42471694946289, "learning_rate": 4.346153846153846e-07, "loss": 1.1449, "step": 348 }, { "epoch": 6.12280701754386, "grad_norm": 30.84017562866211, "learning_rate": 4.326923076923077e-07, "loss": 1.0377, "step": 349 }, { "epoch": 6.140350877192983, "grad_norm": 21.94278335571289, "learning_rate": 4.307692307692308e-07, "loss": 0.7842, "step": 350 }, { "epoch": 6.157894736842105, "grad_norm": 54.80387496948242, "learning_rate": 4.288461538461538e-07, "loss": 0.6877, "step": 351 }, { "epoch": 6.175438596491228, "grad_norm": 52.03696060180664, "learning_rate": 4.2692307692307687e-07, "loss": 0.819, "step": 352 }, { "epoch": 6.192982456140351, "grad_norm": 47.98991775512695, "learning_rate": 4.2499999999999995e-07, "loss": 1.2738, "step": 353 }, { "epoch": 6.2105263157894735, "grad_norm": 31.361557006835938, "learning_rate": 4.2307692307692304e-07, "loss": 1.0351, "step": 354 }, { "epoch": 6.228070175438597, "grad_norm": 58.75925827026367, "learning_rate": 4.2115384615384613e-07, "loss": 0.9005, "step": 355 }, { "epoch": 6.245614035087719, "grad_norm": 61.34735107421875, "learning_rate": 4.192307692307692e-07, "loss": 1.335, "step": 356 }, { "epoch": 6.2631578947368425, "grad_norm": 38.18897247314453, "learning_rate": 4.173076923076923e-07, "loss": 0.8518, "step": 357 }, { "epoch": 6.280701754385965, "grad_norm": 47.511199951171875, "learning_rate": 4.153846153846154e-07, "loss": 1.0651, "step": 358 }, { "epoch": 6.298245614035087, "grad_norm": 52.5938720703125, "learning_rate": 4.134615384615384e-07, "loss": 1.2098, "step": 359 }, { "epoch": 6.315789473684211, "grad_norm": 46.33355712890625, "learning_rate": 4.115384615384615e-07, "loss": 0.6994, "step": 360 }, { "epoch": 6.333333333333333, "grad_norm": 43.15740203857422, "learning_rate": 4.096153846153846e-07, "loss": 1.0389, "step": 361 }, { "epoch": 6.350877192982456, "grad_norm": 33.72315216064453, "learning_rate": 4.076923076923077e-07, "loss": 0.862, "step": 362 }, { "epoch": 6.368421052631579, "grad_norm": 50.00696563720703, "learning_rate": 4.0576923076923077e-07, "loss": 0.9237, "step": 363 }, { "epoch": 6.385964912280702, "grad_norm": 22.04112434387207, "learning_rate": 4.0384615384615386e-07, "loss": 0.7235, "step": 364 }, { "epoch": 6.4035087719298245, "grad_norm": 56.314048767089844, "learning_rate": 4.019230769230769e-07, "loss": 1.048, "step": 365 }, { "epoch": 6.421052631578947, "grad_norm": 26.58925437927246, "learning_rate": 4e-07, "loss": 0.6484, "step": 366 }, { "epoch": 6.43859649122807, "grad_norm": 55.187747955322266, "learning_rate": 3.98076923076923e-07, "loss": 0.9495, "step": 367 }, { "epoch": 6.456140350877193, "grad_norm": 45.539772033691406, "learning_rate": 3.961538461538461e-07, "loss": 0.6283, "step": 368 }, { "epoch": 6.473684210526316, "grad_norm": 40.78237533569336, "learning_rate": 3.942307692307692e-07, "loss": 0.5378, "step": 369 }, { "epoch": 6.491228070175438, "grad_norm": 24.31471824645996, "learning_rate": 3.923076923076923e-07, "loss": 0.7294, "step": 370 }, { "epoch": 6.508771929824562, "grad_norm": 30.035335540771484, "learning_rate": 3.9038461538461536e-07, "loss": 0.8559, "step": 371 }, { "epoch": 6.526315789473684, "grad_norm": 31.007173538208008, "learning_rate": 3.8846153846153845e-07, "loss": 0.802, "step": 372 }, { "epoch": 6.543859649122807, "grad_norm": 43.71822738647461, "learning_rate": 3.8653846153846154e-07, "loss": 0.748, "step": 373 }, { "epoch": 6.56140350877193, "grad_norm": 21.99559783935547, "learning_rate": 3.8461538461538463e-07, "loss": 0.6553, "step": 374 }, { "epoch": 6.578947368421053, "grad_norm": 44.91862869262695, "learning_rate": 3.8269230769230766e-07, "loss": 0.8155, "step": 375 }, { "epoch": 6.5964912280701755, "grad_norm": 28.355440139770508, "learning_rate": 3.8076923076923075e-07, "loss": 1.1146, "step": 376 }, { "epoch": 6.614035087719298, "grad_norm": 28.48116111755371, "learning_rate": 3.7884615384615384e-07, "loss": 1.1448, "step": 377 }, { "epoch": 6.631578947368421, "grad_norm": 50.27273941040039, "learning_rate": 3.769230769230769e-07, "loss": 1.0349, "step": 378 }, { "epoch": 6.649122807017544, "grad_norm": 32.425384521484375, "learning_rate": 3.75e-07, "loss": 0.8526, "step": 379 }, { "epoch": 6.666666666666667, "grad_norm": 73.90445709228516, "learning_rate": 3.7307692307692304e-07, "loss": 1.2825, "step": 380 }, { "epoch": 6.684210526315789, "grad_norm": 48.79448699951172, "learning_rate": 3.7115384615384613e-07, "loss": 0.6351, "step": 381 }, { "epoch": 6.701754385964913, "grad_norm": 24.40697479248047, "learning_rate": 3.692307692307692e-07, "loss": 0.7081, "step": 382 }, { "epoch": 6.719298245614035, "grad_norm": 47.5269660949707, "learning_rate": 3.673076923076923e-07, "loss": 0.671, "step": 383 }, { "epoch": 6.7368421052631575, "grad_norm": 37.11399841308594, "learning_rate": 3.6538461538461534e-07, "loss": 0.6574, "step": 384 }, { "epoch": 6.754385964912281, "grad_norm": 43.93232345581055, "learning_rate": 3.6346153846153843e-07, "loss": 1.0039, "step": 385 }, { "epoch": 6.771929824561403, "grad_norm": 35.62379455566406, "learning_rate": 3.615384615384615e-07, "loss": 0.7748, "step": 386 }, { "epoch": 6.7894736842105265, "grad_norm": 68.19344329833984, "learning_rate": 3.596153846153846e-07, "loss": 1.033, "step": 387 }, { "epoch": 6.807017543859649, "grad_norm": 39.3653678894043, "learning_rate": 3.576923076923077e-07, "loss": 0.5758, "step": 388 }, { "epoch": 6.824561403508772, "grad_norm": 30.772722244262695, "learning_rate": 3.557692307692308e-07, "loss": 0.8695, "step": 389 }, { "epoch": 6.842105263157895, "grad_norm": 32.83871078491211, "learning_rate": 3.5384615384615386e-07, "loss": 1.0549, "step": 390 }, { "epoch": 6.859649122807017, "grad_norm": 62.99473190307617, "learning_rate": 3.5192307692307695e-07, "loss": 0.9173, "step": 391 }, { "epoch": 6.87719298245614, "grad_norm": 42.774044036865234, "learning_rate": 3.5e-07, "loss": 1.0925, "step": 392 }, { "epoch": 6.894736842105263, "grad_norm": 45.01066589355469, "learning_rate": 3.4807692307692307e-07, "loss": 1.0266, "step": 393 }, { "epoch": 6.912280701754386, "grad_norm": 30.733928680419922, "learning_rate": 3.461538461538461e-07, "loss": 0.7034, "step": 394 }, { "epoch": 6.9298245614035086, "grad_norm": 19.22307014465332, "learning_rate": 3.442307692307692e-07, "loss": 0.9147, "step": 395 }, { "epoch": 6.947368421052632, "grad_norm": 68.61734008789062, "learning_rate": 3.423076923076923e-07, "loss": 1.1881, "step": 396 }, { "epoch": 6.964912280701754, "grad_norm": 33.16205978393555, "learning_rate": 3.4038461538461537e-07, "loss": 0.7483, "step": 397 }, { "epoch": 6.982456140350877, "grad_norm": 19.65161895751953, "learning_rate": 3.3846153846153845e-07, "loss": 0.6954, "step": 398 }, { "epoch": 7.0, "grad_norm": 92.69136810302734, "learning_rate": 3.3653846153846154e-07, "loss": 1.1487, "step": 399 }, { "epoch": 7.0, "eval_accuracy": 0.5632183908045977, "eval_loss": 1.069606065750122, "eval_roc_auc": 0.8184030717743787, "eval_runtime": 24.405, "eval_samples_per_second": 10.695, "eval_steps_per_second": 0.697, "step": 399 }, { "epoch": 7.017543859649122, "grad_norm": 87.78544616699219, "learning_rate": 3.346153846153846e-07, "loss": 0.976, "step": 400 }, { "epoch": 7.035087719298246, "grad_norm": 27.48891258239746, "learning_rate": 3.3269230769230766e-07, "loss": 0.5747, "step": 401 }, { "epoch": 7.052631578947368, "grad_norm": 45.76131057739258, "learning_rate": 3.3076923076923075e-07, "loss": 0.9605, "step": 402 }, { "epoch": 7.0701754385964914, "grad_norm": 20.498069763183594, "learning_rate": 3.2884615384615384e-07, "loss": 0.5053, "step": 403 }, { "epoch": 7.087719298245614, "grad_norm": 22.58955192565918, "learning_rate": 3.269230769230769e-07, "loss": 0.5368, "step": 404 }, { "epoch": 7.105263157894737, "grad_norm": 22.225399017333984, "learning_rate": 3.25e-07, "loss": 0.777, "step": 405 }, { "epoch": 7.12280701754386, "grad_norm": 39.7629508972168, "learning_rate": 3.230769230769231e-07, "loss": 0.7354, "step": 406 }, { "epoch": 7.140350877192983, "grad_norm": 58.76552200317383, "learning_rate": 3.211538461538462e-07, "loss": 1.1802, "step": 407 }, { "epoch": 7.157894736842105, "grad_norm": 22.695573806762695, "learning_rate": 3.1923076923076917e-07, "loss": 0.8024, "step": 408 }, { "epoch": 7.175438596491228, "grad_norm": 33.9383430480957, "learning_rate": 3.1730769230769225e-07, "loss": 1.0508, "step": 409 }, { "epoch": 7.192982456140351, "grad_norm": 30.876842498779297, "learning_rate": 3.1538461538461534e-07, "loss": 0.7214, "step": 410 }, { "epoch": 7.2105263157894735, "grad_norm": 36.76365280151367, "learning_rate": 3.1346153846153843e-07, "loss": 0.7907, "step": 411 }, { "epoch": 7.228070175438597, "grad_norm": 65.87890625, "learning_rate": 3.115384615384615e-07, "loss": 1.2976, "step": 412 }, { "epoch": 7.245614035087719, "grad_norm": 43.14383316040039, "learning_rate": 3.096153846153846e-07, "loss": 1.1367, "step": 413 }, { "epoch": 7.2631578947368425, "grad_norm": 27.98732566833496, "learning_rate": 3.076923076923077e-07, "loss": 1.0924, "step": 414 }, { "epoch": 7.280701754385965, "grad_norm": 48.689537048339844, "learning_rate": 3.057692307692308e-07, "loss": 1.0298, "step": 415 }, { "epoch": 7.298245614035087, "grad_norm": 27.990142822265625, "learning_rate": 3.038461538461538e-07, "loss": 0.9779, "step": 416 }, { "epoch": 7.315789473684211, "grad_norm": 29.33123016357422, "learning_rate": 3.019230769230769e-07, "loss": 0.7683, "step": 417 }, { "epoch": 7.333333333333333, "grad_norm": 26.44052505493164, "learning_rate": 3e-07, "loss": 0.8771, "step": 418 }, { "epoch": 7.350877192982456, "grad_norm": 28.419343948364258, "learning_rate": 2.980769230769231e-07, "loss": 0.9028, "step": 419 }, { "epoch": 7.368421052631579, "grad_norm": 38.75981521606445, "learning_rate": 2.9615384615384616e-07, "loss": 0.9058, "step": 420 }, { "epoch": 7.385964912280702, "grad_norm": 27.943565368652344, "learning_rate": 2.9423076923076925e-07, "loss": 0.5811, "step": 421 }, { "epoch": 7.4035087719298245, "grad_norm": 38.048736572265625, "learning_rate": 2.9230769230769234e-07, "loss": 1.0206, "step": 422 }, { "epoch": 7.421052631578947, "grad_norm": 30.790170669555664, "learning_rate": 2.9038461538461537e-07, "loss": 0.8268, "step": 423 }, { "epoch": 7.43859649122807, "grad_norm": 52.75239562988281, "learning_rate": 2.884615384615384e-07, "loss": 1.0024, "step": 424 }, { "epoch": 7.456140350877193, "grad_norm": 30.304975509643555, "learning_rate": 2.865384615384615e-07, "loss": 0.7764, "step": 425 }, { "epoch": 7.473684210526316, "grad_norm": 44.08205795288086, "learning_rate": 2.846153846153846e-07, "loss": 0.6279, "step": 426 }, { "epoch": 7.491228070175438, "grad_norm": 29.00324058532715, "learning_rate": 2.8269230769230767e-07, "loss": 0.7732, "step": 427 }, { "epoch": 7.508771929824562, "grad_norm": 44.4395637512207, "learning_rate": 2.8076923076923075e-07, "loss": 0.5693, "step": 428 }, { "epoch": 7.526315789473684, "grad_norm": 28.682804107666016, "learning_rate": 2.7884615384615384e-07, "loss": 0.8483, "step": 429 }, { "epoch": 7.543859649122807, "grad_norm": 71.88604736328125, "learning_rate": 2.7692307692307693e-07, "loss": 1.0121, "step": 430 }, { "epoch": 7.56140350877193, "grad_norm": 37.93716049194336, "learning_rate": 2.75e-07, "loss": 0.9846, "step": 431 }, { "epoch": 7.578947368421053, "grad_norm": 68.40493774414062, "learning_rate": 2.7307692307692305e-07, "loss": 0.7968, "step": 432 }, { "epoch": 7.5964912280701755, "grad_norm": 34.130680084228516, "learning_rate": 2.7115384615384614e-07, "loss": 0.6804, "step": 433 }, { "epoch": 7.614035087719298, "grad_norm": 66.54443359375, "learning_rate": 2.692307692307692e-07, "loss": 0.8493, "step": 434 }, { "epoch": 7.631578947368421, "grad_norm": 30.927793502807617, "learning_rate": 2.673076923076923e-07, "loss": 0.8261, "step": 435 }, { "epoch": 7.649122807017544, "grad_norm": 31.034412384033203, "learning_rate": 2.653846153846154e-07, "loss": 1.0382, "step": 436 }, { "epoch": 7.666666666666667, "grad_norm": 28.199138641357422, "learning_rate": 2.6346153846153843e-07, "loss": 0.6613, "step": 437 }, { "epoch": 7.684210526315789, "grad_norm": 34.426605224609375, "learning_rate": 2.615384615384615e-07, "loss": 0.8676, "step": 438 }, { "epoch": 7.701754385964913, "grad_norm": 26.49811553955078, "learning_rate": 2.596153846153846e-07, "loss": 0.6587, "step": 439 }, { "epoch": 7.719298245614035, "grad_norm": 28.70757293701172, "learning_rate": 2.5769230769230764e-07, "loss": 0.7954, "step": 440 }, { "epoch": 7.7368421052631575, "grad_norm": 38.85836410522461, "learning_rate": 2.5576923076923073e-07, "loss": 0.8062, "step": 441 }, { "epoch": 7.754385964912281, "grad_norm": 26.44515609741211, "learning_rate": 2.538461538461538e-07, "loss": 1.0693, "step": 442 }, { "epoch": 7.771929824561403, "grad_norm": 55.86748504638672, "learning_rate": 2.519230769230769e-07, "loss": 0.9166, "step": 443 }, { "epoch": 7.7894736842105265, "grad_norm": 28.77703857421875, "learning_rate": 2.5e-07, "loss": 0.8507, "step": 444 }, { "epoch": 7.807017543859649, "grad_norm": 39.333343505859375, "learning_rate": 2.480769230769231e-07, "loss": 1.02, "step": 445 }, { "epoch": 7.824561403508772, "grad_norm": 44.16427230834961, "learning_rate": 2.4615384615384616e-07, "loss": 1.0713, "step": 446 }, { "epoch": 7.842105263157895, "grad_norm": 22.932966232299805, "learning_rate": 2.442307692307692e-07, "loss": 0.4415, "step": 447 }, { "epoch": 7.859649122807017, "grad_norm": 48.378841400146484, "learning_rate": 2.423076923076923e-07, "loss": 0.7085, "step": 448 }, { "epoch": 7.87719298245614, "grad_norm": 50.131019592285156, "learning_rate": 2.4038461538461537e-07, "loss": 0.9735, "step": 449 }, { "epoch": 7.894736842105263, "grad_norm": 20.94889259338379, "learning_rate": 2.3846153846153846e-07, "loss": 0.6935, "step": 450 }, { "epoch": 7.912280701754386, "grad_norm": 30.711423873901367, "learning_rate": 2.3653846153846152e-07, "loss": 0.7382, "step": 451 }, { "epoch": 7.9298245614035086, "grad_norm": 37.16843795776367, "learning_rate": 2.346153846153846e-07, "loss": 1.0025, "step": 452 }, { "epoch": 7.947368421052632, "grad_norm": 61.31568908691406, "learning_rate": 2.326923076923077e-07, "loss": 0.7435, "step": 453 }, { "epoch": 7.964912280701754, "grad_norm": 30.279325485229492, "learning_rate": 2.3076923076923078e-07, "loss": 0.8862, "step": 454 }, { "epoch": 7.982456140350877, "grad_norm": 22.893749237060547, "learning_rate": 2.2884615384615382e-07, "loss": 0.8411, "step": 455 }, { "epoch": 8.0, "grad_norm": 33.30168533325195, "learning_rate": 2.269230769230769e-07, "loss": 0.521, "step": 456 }, { "epoch": 8.0, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.0445302724838257, "eval_roc_auc": 0.821241054744927, "eval_runtime": 24.8722, "eval_samples_per_second": 10.494, "eval_steps_per_second": 0.683, "step": 456 }, { "epoch": 8.017543859649123, "grad_norm": 32.153465270996094, "learning_rate": 2.25e-07, "loss": 0.8686, "step": 457 }, { "epoch": 8.035087719298245, "grad_norm": 26.060014724731445, "learning_rate": 2.2307692307692308e-07, "loss": 0.7634, "step": 458 }, { "epoch": 8.052631578947368, "grad_norm": 60.5893440246582, "learning_rate": 2.2115384615384614e-07, "loss": 1.3411, "step": 459 }, { "epoch": 8.070175438596491, "grad_norm": 28.305891036987305, "learning_rate": 2.1923076923076922e-07, "loss": 0.6241, "step": 460 }, { "epoch": 8.087719298245615, "grad_norm": 20.124664306640625, "learning_rate": 2.173076923076923e-07, "loss": 0.591, "step": 461 }, { "epoch": 8.105263157894736, "grad_norm": 87.66210174560547, "learning_rate": 2.153846153846154e-07, "loss": 1.2101, "step": 462 }, { "epoch": 8.12280701754386, "grad_norm": 35.75629425048828, "learning_rate": 2.1346153846153843e-07, "loss": 0.8357, "step": 463 }, { "epoch": 8.140350877192983, "grad_norm": 58.722328186035156, "learning_rate": 2.1153846153846152e-07, "loss": 1.0812, "step": 464 }, { "epoch": 8.157894736842104, "grad_norm": 45.29573059082031, "learning_rate": 2.096153846153846e-07, "loss": 0.7323, "step": 465 }, { "epoch": 8.175438596491228, "grad_norm": 29.968141555786133, "learning_rate": 2.076923076923077e-07, "loss": 0.6938, "step": 466 }, { "epoch": 8.192982456140351, "grad_norm": 37.96227264404297, "learning_rate": 2.0576923076923076e-07, "loss": 0.9054, "step": 467 }, { "epoch": 8.210526315789474, "grad_norm": 66.0616455078125, "learning_rate": 2.0384615384615384e-07, "loss": 0.9688, "step": 468 }, { "epoch": 8.228070175438596, "grad_norm": 70.00723266601562, "learning_rate": 2.0192307692307693e-07, "loss": 1.1553, "step": 469 }, { "epoch": 8.24561403508772, "grad_norm": 39.04280090332031, "learning_rate": 2e-07, "loss": 0.9341, "step": 470 }, { "epoch": 8.263157894736842, "grad_norm": 34.20655059814453, "learning_rate": 1.9807692307692305e-07, "loss": 0.9593, "step": 471 }, { "epoch": 8.280701754385966, "grad_norm": 63.928348541259766, "learning_rate": 1.9615384615384614e-07, "loss": 0.9055, "step": 472 }, { "epoch": 8.298245614035087, "grad_norm": 71.03302001953125, "learning_rate": 1.9423076923076923e-07, "loss": 0.921, "step": 473 }, { "epoch": 8.31578947368421, "grad_norm": 37.35957717895508, "learning_rate": 1.9230769230769231e-07, "loss": 0.6336, "step": 474 }, { "epoch": 8.333333333333334, "grad_norm": 17.601655960083008, "learning_rate": 1.9038461538461537e-07, "loss": 0.4762, "step": 475 }, { "epoch": 8.350877192982455, "grad_norm": 45.87922286987305, "learning_rate": 1.8846153846153846e-07, "loss": 0.7384, "step": 476 }, { "epoch": 8.368421052631579, "grad_norm": 35.179988861083984, "learning_rate": 1.8653846153846152e-07, "loss": 0.6582, "step": 477 }, { "epoch": 8.385964912280702, "grad_norm": 29.4314022064209, "learning_rate": 1.846153846153846e-07, "loss": 0.6557, "step": 478 }, { "epoch": 8.403508771929825, "grad_norm": 39.96945571899414, "learning_rate": 1.8269230769230767e-07, "loss": 0.9508, "step": 479 }, { "epoch": 8.421052631578947, "grad_norm": 41.48187255859375, "learning_rate": 1.8076923076923076e-07, "loss": 0.6461, "step": 480 }, { "epoch": 8.43859649122807, "grad_norm": 46.457733154296875, "learning_rate": 1.7884615384615384e-07, "loss": 0.7903, "step": 481 }, { "epoch": 8.456140350877194, "grad_norm": 60.275028228759766, "learning_rate": 1.7692307692307693e-07, "loss": 0.9536, "step": 482 }, { "epoch": 8.473684210526315, "grad_norm": 32.316471099853516, "learning_rate": 1.75e-07, "loss": 0.7564, "step": 483 }, { "epoch": 8.491228070175438, "grad_norm": 30.793420791625977, "learning_rate": 1.7307692307692305e-07, "loss": 0.5605, "step": 484 }, { "epoch": 8.508771929824562, "grad_norm": 19.355497360229492, "learning_rate": 1.7115384615384614e-07, "loss": 0.5652, "step": 485 }, { "epoch": 8.526315789473685, "grad_norm": 24.5403995513916, "learning_rate": 1.6923076923076923e-07, "loss": 0.6776, "step": 486 }, { "epoch": 8.543859649122806, "grad_norm": 35.900184631347656, "learning_rate": 1.673076923076923e-07, "loss": 0.7501, "step": 487 }, { "epoch": 8.56140350877193, "grad_norm": 28.108530044555664, "learning_rate": 1.6538461538461538e-07, "loss": 0.5605, "step": 488 }, { "epoch": 8.578947368421053, "grad_norm": 39.321144104003906, "learning_rate": 1.6346153846153846e-07, "loss": 0.9753, "step": 489 }, { "epoch": 8.596491228070175, "grad_norm": 24.50644874572754, "learning_rate": 1.6153846153846155e-07, "loss": 0.9194, "step": 490 }, { "epoch": 8.614035087719298, "grad_norm": 50.84926986694336, "learning_rate": 1.5961538461538458e-07, "loss": 0.9425, "step": 491 }, { "epoch": 8.631578947368421, "grad_norm": 38.82880401611328, "learning_rate": 1.5769230769230767e-07, "loss": 0.8447, "step": 492 }, { "epoch": 8.649122807017545, "grad_norm": 48.61659240722656, "learning_rate": 1.5576923076923076e-07, "loss": 0.9379, "step": 493 }, { "epoch": 8.666666666666666, "grad_norm": 41.58673095703125, "learning_rate": 1.5384615384615385e-07, "loss": 0.8007, "step": 494 }, { "epoch": 8.68421052631579, "grad_norm": 49.88103485107422, "learning_rate": 1.519230769230769e-07, "loss": 1.2287, "step": 495 }, { "epoch": 8.701754385964913, "grad_norm": 32.42145538330078, "learning_rate": 1.5e-07, "loss": 0.655, "step": 496 }, { "epoch": 8.719298245614034, "grad_norm": 31.83061981201172, "learning_rate": 1.4807692307692308e-07, "loss": 0.7932, "step": 497 }, { "epoch": 8.736842105263158, "grad_norm": 45.73976516723633, "learning_rate": 1.4615384615384617e-07, "loss": 0.7517, "step": 498 }, { "epoch": 8.75438596491228, "grad_norm": 24.301603317260742, "learning_rate": 1.442307692307692e-07, "loss": 0.6368, "step": 499 }, { "epoch": 8.771929824561404, "grad_norm": 43.589569091796875, "learning_rate": 1.423076923076923e-07, "loss": 0.7572, "step": 500 }, { "epoch": 8.789473684210526, "grad_norm": 55.28351974487305, "learning_rate": 1.4038461538461538e-07, "loss": 0.8073, "step": 501 }, { "epoch": 8.807017543859649, "grad_norm": 29.774131774902344, "learning_rate": 1.3846153846153846e-07, "loss": 0.7737, "step": 502 }, { "epoch": 8.824561403508772, "grad_norm": 62.9353141784668, "learning_rate": 1.3653846153846152e-07, "loss": 0.7529, "step": 503 }, { "epoch": 8.842105263157894, "grad_norm": 38.0306510925293, "learning_rate": 1.346153846153846e-07, "loss": 0.7406, "step": 504 }, { "epoch": 8.859649122807017, "grad_norm": 41.864105224609375, "learning_rate": 1.326923076923077e-07, "loss": 0.9119, "step": 505 }, { "epoch": 8.87719298245614, "grad_norm": 67.60685729980469, "learning_rate": 1.3076923076923076e-07, "loss": 0.828, "step": 506 }, { "epoch": 8.894736842105264, "grad_norm": 37.95588302612305, "learning_rate": 1.2884615384615382e-07, "loss": 0.8381, "step": 507 }, { "epoch": 8.912280701754385, "grad_norm": 40.29434585571289, "learning_rate": 1.269230769230769e-07, "loss": 0.8085, "step": 508 }, { "epoch": 8.929824561403509, "grad_norm": 68.67707061767578, "learning_rate": 1.25e-07, "loss": 1.0056, "step": 509 }, { "epoch": 8.947368421052632, "grad_norm": 45.547428131103516, "learning_rate": 1.2307692307692308e-07, "loss": 0.7981, "step": 510 }, { "epoch": 8.964912280701755, "grad_norm": 39.541439056396484, "learning_rate": 1.2115384615384614e-07, "loss": 0.6487, "step": 511 }, { "epoch": 8.982456140350877, "grad_norm": 28.73076820373535, "learning_rate": 1.1923076923076923e-07, "loss": 0.5069, "step": 512 }, { "epoch": 9.0, "grad_norm": 31.97428321838379, "learning_rate": 1.173076923076923e-07, "loss": 0.9922, "step": 513 }, { "epoch": 9.0, "eval_accuracy": 0.5977011494252874, "eval_loss": 1.0299025774002075, "eval_roc_auc": 0.8216890808633837, "eval_runtime": 34.5614, "eval_samples_per_second": 7.552, "eval_steps_per_second": 0.492, "step": 513 }, { "epoch": 9.017543859649123, "grad_norm": 48.0136833190918, "learning_rate": 1.1538461538461539e-07, "loss": 1.0235, "step": 514 }, { "epoch": 9.035087719298245, "grad_norm": 28.533130645751953, "learning_rate": 1.1346153846153845e-07, "loss": 0.5459, "step": 515 }, { "epoch": 9.052631578947368, "grad_norm": 41.17901611328125, "learning_rate": 1.1153846153846154e-07, "loss": 0.5279, "step": 516 }, { "epoch": 9.070175438596491, "grad_norm": 32.64225387573242, "learning_rate": 1.0961538461538461e-07, "loss": 1.0252, "step": 517 }, { "epoch": 9.087719298245615, "grad_norm": 32.45205307006836, "learning_rate": 1.076923076923077e-07, "loss": 0.6554, "step": 518 }, { "epoch": 9.105263157894736, "grad_norm": 35.420379638671875, "learning_rate": 1.0576923076923076e-07, "loss": 0.6954, "step": 519 }, { "epoch": 9.12280701754386, "grad_norm": 53.34769058227539, "learning_rate": 1.0384615384615385e-07, "loss": 0.7513, "step": 520 }, { "epoch": 9.140350877192983, "grad_norm": 28.763874053955078, "learning_rate": 1.0192307692307692e-07, "loss": 0.5913, "step": 521 }, { "epoch": 9.157894736842104, "grad_norm": 24.717885971069336, "learning_rate": 1e-07, "loss": 0.6692, "step": 522 }, { "epoch": 9.175438596491228, "grad_norm": 25.547611236572266, "learning_rate": 9.807692307692307e-08, "loss": 0.75, "step": 523 }, { "epoch": 9.192982456140351, "grad_norm": 41.97018051147461, "learning_rate": 9.615384615384616e-08, "loss": 0.6604, "step": 524 }, { "epoch": 9.210526315789474, "grad_norm": 80.47174835205078, "learning_rate": 9.423076923076923e-08, "loss": 1.2781, "step": 525 }, { "epoch": 9.228070175438596, "grad_norm": 27.064115524291992, "learning_rate": 9.23076923076923e-08, "loss": 0.7449, "step": 526 }, { "epoch": 9.24561403508772, "grad_norm": 34.857425689697266, "learning_rate": 9.038461538461538e-08, "loss": 0.5484, "step": 527 }, { "epoch": 9.263157894736842, "grad_norm": 63.48345184326172, "learning_rate": 8.846153846153847e-08, "loss": 0.8263, "step": 528 }, { "epoch": 9.280701754385966, "grad_norm": 36.6185417175293, "learning_rate": 8.653846153846153e-08, "loss": 0.8279, "step": 529 }, { "epoch": 9.298245614035087, "grad_norm": 45.01316833496094, "learning_rate": 8.461538461538461e-08, "loss": 0.9115, "step": 530 }, { "epoch": 9.31578947368421, "grad_norm": 19.511629104614258, "learning_rate": 8.269230769230769e-08, "loss": 0.4552, "step": 531 }, { "epoch": 9.333333333333334, "grad_norm": 42.41179275512695, "learning_rate": 8.076923076923077e-08, "loss": 1.0983, "step": 532 }, { "epoch": 9.350877192982455, "grad_norm": 26.47096061706543, "learning_rate": 7.884615384615384e-08, "loss": 0.5845, "step": 533 }, { "epoch": 9.368421052631579, "grad_norm": 30.966955184936523, "learning_rate": 7.692307692307692e-08, "loss": 0.6616, "step": 534 }, { "epoch": 9.385964912280702, "grad_norm": 40.17842102050781, "learning_rate": 7.5e-08, "loss": 0.854, "step": 535 }, { "epoch": 9.403508771929825, "grad_norm": 37.50959396362305, "learning_rate": 7.307692307692308e-08, "loss": 0.9193, "step": 536 }, { "epoch": 9.421052631578947, "grad_norm": 32.44236755371094, "learning_rate": 7.115384615384614e-08, "loss": 0.855, "step": 537 }, { "epoch": 9.43859649122807, "grad_norm": 37.18986511230469, "learning_rate": 6.923076923076923e-08, "loss": 0.9304, "step": 538 }, { "epoch": 9.456140350877194, "grad_norm": 36.91654586791992, "learning_rate": 6.73076923076923e-08, "loss": 0.8258, "step": 539 }, { "epoch": 9.473684210526315, "grad_norm": 27.073163986206055, "learning_rate": 6.538461538461538e-08, "loss": 0.4538, "step": 540 }, { "epoch": 9.491228070175438, "grad_norm": 33.02398681640625, "learning_rate": 6.346153846153845e-08, "loss": 0.8084, "step": 541 }, { "epoch": 9.508771929824562, "grad_norm": 25.321035385131836, "learning_rate": 6.153846153846154e-08, "loss": 0.6776, "step": 542 }, { "epoch": 9.526315789473685, "grad_norm": 35.03627395629883, "learning_rate": 5.961538461538461e-08, "loss": 0.7051, "step": 543 }, { "epoch": 9.543859649122806, "grad_norm": 32.04972457885742, "learning_rate": 5.7692307692307695e-08, "loss": 1.1416, "step": 544 }, { "epoch": 9.56140350877193, "grad_norm": 32.8387451171875, "learning_rate": 5.576923076923077e-08, "loss": 0.7075, "step": 545 }, { "epoch": 9.578947368421053, "grad_norm": 53.04689407348633, "learning_rate": 5.384615384615385e-08, "loss": 0.8701, "step": 546 }, { "epoch": 9.596491228070175, "grad_norm": 23.254966735839844, "learning_rate": 5.1923076923076924e-08, "loss": 0.4444, "step": 547 }, { "epoch": 9.614035087719298, "grad_norm": 42.885440826416016, "learning_rate": 5e-08, "loss": 0.8811, "step": 548 }, { "epoch": 9.631578947368421, "grad_norm": 29.11221694946289, "learning_rate": 4.807692307692308e-08, "loss": 0.7966, "step": 549 }, { "epoch": 9.649122807017545, "grad_norm": 62.373077392578125, "learning_rate": 4.615384615384615e-08, "loss": 0.9317, "step": 550 }, { "epoch": 9.666666666666666, "grad_norm": 43.64496612548828, "learning_rate": 4.423076923076923e-08, "loss": 0.7716, "step": 551 }, { "epoch": 9.68421052631579, "grad_norm": 30.535717010498047, "learning_rate": 4.230769230769231e-08, "loss": 1.0664, "step": 552 }, { "epoch": 9.701754385964913, "grad_norm": 24.165536880493164, "learning_rate": 4.038461538461539e-08, "loss": 0.8169, "step": 553 }, { "epoch": 9.719298245614034, "grad_norm": 47.13362503051758, "learning_rate": 3.846153846153846e-08, "loss": 0.7841, "step": 554 }, { "epoch": 9.736842105263158, "grad_norm": 28.94150161743164, "learning_rate": 3.653846153846154e-08, "loss": 0.7046, "step": 555 }, { "epoch": 9.75438596491228, "grad_norm": 56.8846549987793, "learning_rate": 3.4615384615384616e-08, "loss": 0.8988, "step": 556 }, { "epoch": 9.771929824561404, "grad_norm": 30.389190673828125, "learning_rate": 3.269230769230769e-08, "loss": 0.7905, "step": 557 }, { "epoch": 9.789473684210526, "grad_norm": 30.133394241333008, "learning_rate": 3.076923076923077e-08, "loss": 0.5083, "step": 558 }, { "epoch": 9.807017543859649, "grad_norm": 46.20474624633789, "learning_rate": 2.8846153846153848e-08, "loss": 0.9206, "step": 559 }, { "epoch": 9.824561403508772, "grad_norm": 38.54856872558594, "learning_rate": 2.6923076923076925e-08, "loss": 0.9944, "step": 560 }, { "epoch": 9.842105263157894, "grad_norm": 73.813232421875, "learning_rate": 2.5e-08, "loss": 0.9167, "step": 561 }, { "epoch": 9.859649122807017, "grad_norm": 42.26284408569336, "learning_rate": 2.3076923076923076e-08, "loss": 0.8989, "step": 562 }, { "epoch": 9.87719298245614, "grad_norm": 36.5334587097168, "learning_rate": 2.1153846153846153e-08, "loss": 0.6554, "step": 563 }, { "epoch": 9.894736842105264, "grad_norm": 30.85855484008789, "learning_rate": 1.923076923076923e-08, "loss": 0.4981, "step": 564 }, { "epoch": 9.912280701754385, "grad_norm": 38.74631881713867, "learning_rate": 1.7307692307692308e-08, "loss": 0.8997, "step": 565 }, { "epoch": 9.929824561403509, "grad_norm": 50.60679244995117, "learning_rate": 1.5384615384615385e-08, "loss": 0.9265, "step": 566 }, { "epoch": 9.947368421052632, "grad_norm": 36.15625762939453, "learning_rate": 1.3461538461538462e-08, "loss": 1.0373, "step": 567 }, { "epoch": 9.964912280701755, "grad_norm": 64.9268798828125, "learning_rate": 1.1538461538461538e-08, "loss": 1.0077, "step": 568 }, { "epoch": 9.982456140350877, "grad_norm": 28.213552474975586, "learning_rate": 9.615384615384615e-09, "loss": 1.102, "step": 569 }, { "epoch": 10.0, "grad_norm": 47.85600280761719, "learning_rate": 7.692307692307693e-09, "loss": 0.555, "step": 570 }, { "epoch": 10.0, "eval_accuracy": 0.5977011494252874, "eval_loss": 1.0249524116516113, "eval_roc_auc": 0.8248013488853114, "eval_runtime": 23.6967, "eval_samples_per_second": 11.014, "eval_steps_per_second": 0.717, "step": 570 } ], "logging_steps": 1, "max_steps": 570, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.204020265084518e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }