Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_metric": 1.0249524116516113, | |
| "best_model_checkpoint": "AST-vocal-disorder-classification/checkpoint-570", | |
| "epoch": 10.0, | |
| "eval_steps": 1, | |
| "global_step": 570, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.017543859649122806, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.0, | |
| "loss": 2.988, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.03508771929824561, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.0, | |
| "loss": 2.6954, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.05263157894736842, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.0, | |
| "loss": 2.2632, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.07017543859649122, | |
| "grad_norm": 165.02626037597656, | |
| "learning_rate": 2e-08, | |
| "loss": 3.195, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.08771929824561403, | |
| "grad_norm": 168.1376495361328, | |
| "learning_rate": 4e-08, | |
| "loss": 2.8908, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": Infinity, | |
| "learning_rate": 4e-08, | |
| "loss": 2.7552, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.12280701754385964, | |
| "grad_norm": 158.0127716064453, | |
| "learning_rate": 6e-08, | |
| "loss": 3.1245, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.14035087719298245, | |
| "grad_norm": 150.22560119628906, | |
| "learning_rate": 8e-08, | |
| "loss": 2.8198, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.15789473684210525, | |
| "grad_norm": 133.4413299560547, | |
| "learning_rate": 1e-07, | |
| "loss": 2.4031, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.17543859649122806, | |
| "grad_norm": 99.31004333496094, | |
| "learning_rate": 1.2e-07, | |
| "loss": 2.457, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.19298245614035087, | |
| "grad_norm": 118.67373657226562, | |
| "learning_rate": 1.4e-07, | |
| "loss": 2.5123, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 132.25393676757812, | |
| "learning_rate": 1.6e-07, | |
| "loss": 2.5434, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.22807017543859648, | |
| "grad_norm": 94.40389251708984, | |
| "learning_rate": 1.8e-07, | |
| "loss": 2.3049, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.24561403508771928, | |
| "grad_norm": 125.89042663574219, | |
| "learning_rate": 2e-07, | |
| "loss": 2.5478, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.2631578947368421, | |
| "grad_norm": 142.48435974121094, | |
| "learning_rate": 2.1999999999999998e-07, | |
| "loss": 2.8247, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.2807017543859649, | |
| "grad_norm": 126.35132598876953, | |
| "learning_rate": 2.4e-07, | |
| "loss": 2.4798, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.2982456140350877, | |
| "grad_norm": 151.13067626953125, | |
| "learning_rate": 2.6e-07, | |
| "loss": 2.7472, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "grad_norm": 91.00186920166016, | |
| "learning_rate": 2.8e-07, | |
| "loss": 2.2366, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 81.35576629638672, | |
| "learning_rate": 3e-07, | |
| "loss": 2.3093, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.3508771929824561, | |
| "grad_norm": 164.45582580566406, | |
| "learning_rate": 3.2e-07, | |
| "loss": 2.9604, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3684210526315789, | |
| "grad_norm": 118.8172378540039, | |
| "learning_rate": 3.4000000000000003e-07, | |
| "loss": 2.3687, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.38596491228070173, | |
| "grad_norm": 136.17398071289062, | |
| "learning_rate": 3.6e-07, | |
| "loss": 2.3948, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.40350877192982454, | |
| "grad_norm": 118.8042221069336, | |
| "learning_rate": 3.7999999999999996e-07, | |
| "loss": 2.2426, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 107.0354995727539, | |
| "learning_rate": 4e-07, | |
| "loss": 2.3865, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.43859649122807015, | |
| "grad_norm": 77.19119262695312, | |
| "learning_rate": 4.1999999999999995e-07, | |
| "loss": 1.9904, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.45614035087719296, | |
| "grad_norm": 140.62896728515625, | |
| "learning_rate": 4.3999999999999997e-07, | |
| "loss": 2.7652, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.47368421052631576, | |
| "grad_norm": 75.05490112304688, | |
| "learning_rate": 4.6e-07, | |
| "loss": 2.075, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.49122807017543857, | |
| "grad_norm": 117.8460922241211, | |
| "learning_rate": 4.8e-07, | |
| "loss": 2.5994, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.5087719298245614, | |
| "grad_norm": 113.06562042236328, | |
| "learning_rate": 5e-07, | |
| "loss": 1.8539, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 110.89696502685547, | |
| "learning_rate": 5.2e-07, | |
| "loss": 2.1113, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.543859649122807, | |
| "grad_norm": 98.19458770751953, | |
| "learning_rate": 5.4e-07, | |
| "loss": 2.1587, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.5614035087719298, | |
| "grad_norm": 66.95838928222656, | |
| "learning_rate": 5.6e-07, | |
| "loss": 1.8982, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.5789473684210527, | |
| "grad_norm": 52.46982192993164, | |
| "learning_rate": 5.8e-07, | |
| "loss": 1.6926, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.5964912280701754, | |
| "grad_norm": 79.77543640136719, | |
| "learning_rate": 6e-07, | |
| "loss": 1.9471, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.6140350877192983, | |
| "grad_norm": 116.57611846923828, | |
| "learning_rate": 6.2e-07, | |
| "loss": 1.799, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 91.27916717529297, | |
| "learning_rate": 6.4e-07, | |
| "loss": 2.2582, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.6491228070175439, | |
| "grad_norm": 79.36409759521484, | |
| "learning_rate": 6.6e-07, | |
| "loss": 2.1673, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 65.35518646240234, | |
| "learning_rate": 6.800000000000001e-07, | |
| "loss": 1.7037, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.6842105263157895, | |
| "grad_norm": 42.132991790771484, | |
| "learning_rate": 7e-07, | |
| "loss": 1.4278, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.7017543859649122, | |
| "grad_norm": 51.01911544799805, | |
| "learning_rate": 7.2e-07, | |
| "loss": 1.7739, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7192982456140351, | |
| "grad_norm": 65.60211181640625, | |
| "learning_rate": 7.4e-07, | |
| "loss": 1.7796, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.7368421052631579, | |
| "grad_norm": 48.78630828857422, | |
| "learning_rate": 7.599999999999999e-07, | |
| "loss": 1.5501, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.7543859649122807, | |
| "grad_norm": 29.828662872314453, | |
| "learning_rate": 7.799999999999999e-07, | |
| "loss": 1.56, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.7719298245614035, | |
| "grad_norm": 50.522647857666016, | |
| "learning_rate": 8e-07, | |
| "loss": 1.4566, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.7894736842105263, | |
| "grad_norm": 52.197025299072266, | |
| "learning_rate": 8.199999999999999e-07, | |
| "loss": 1.5521, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.8070175438596491, | |
| "grad_norm": 41.581092834472656, | |
| "learning_rate": 8.399999999999999e-07, | |
| "loss": 1.6864, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.8245614035087719, | |
| "grad_norm": 61.20100021362305, | |
| "learning_rate": 8.599999999999999e-07, | |
| "loss": 1.3111, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 39.471736907958984, | |
| "learning_rate": 8.799999999999999e-07, | |
| "loss": 1.6241, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.8596491228070176, | |
| "grad_norm": 39.578338623046875, | |
| "learning_rate": 9e-07, | |
| "loss": 1.4799, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.8771929824561403, | |
| "grad_norm": 57.32265090942383, | |
| "learning_rate": 9.2e-07, | |
| "loss": 1.8162, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.8947368421052632, | |
| "grad_norm": 37.478973388671875, | |
| "learning_rate": 9.399999999999999e-07, | |
| "loss": 1.5859, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.9122807017543859, | |
| "grad_norm": 45.59309387207031, | |
| "learning_rate": 9.6e-07, | |
| "loss": 1.3365, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.9298245614035088, | |
| "grad_norm": 47.914791107177734, | |
| "learning_rate": 9.8e-07, | |
| "loss": 1.7568, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.9473684210526315, | |
| "grad_norm": 31.730653762817383, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6654, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.9649122807017544, | |
| "grad_norm": 47.1168327331543, | |
| "learning_rate": 9.98076923076923e-07, | |
| "loss": 1.2154, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.9824561403508771, | |
| "grad_norm": 34.07067108154297, | |
| "learning_rate": 9.961538461538461e-07, | |
| "loss": 1.296, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 32.9833869934082, | |
| "learning_rate": 9.942307692307691e-07, | |
| "loss": 1.5427, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.3831417624521073, | |
| "eval_loss": 1.5033824443817139, | |
| "eval_roc_auc": 0.5521317875556301, | |
| "eval_runtime": 24.2654, | |
| "eval_samples_per_second": 10.756, | |
| "eval_steps_per_second": 0.701, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.0175438596491229, | |
| "grad_norm": 21.84532356262207, | |
| "learning_rate": 9.923076923076923e-07, | |
| "loss": 1.3469, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.0350877192982457, | |
| "grad_norm": 54.183753967285156, | |
| "learning_rate": 9.903846153846153e-07, | |
| "loss": 1.5014, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 45.070030212402344, | |
| "learning_rate": 9.884615384615385e-07, | |
| "loss": 1.6736, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.0701754385964912, | |
| "grad_norm": 54.34022903442383, | |
| "learning_rate": 9.865384615384615e-07, | |
| "loss": 1.7181, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.087719298245614, | |
| "grad_norm": 61.96958923339844, | |
| "learning_rate": 9.846153846153847e-07, | |
| "loss": 1.7463, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.1052631578947367, | |
| "grad_norm": 38.20405960083008, | |
| "learning_rate": 9.826923076923076e-07, | |
| "loss": 1.516, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.1228070175438596, | |
| "grad_norm": 31.333251953125, | |
| "learning_rate": 9.807692307692306e-07, | |
| "loss": 1.6652, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.1403508771929824, | |
| "grad_norm": 33.82837677001953, | |
| "learning_rate": 9.788461538461538e-07, | |
| "loss": 1.3185, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.1578947368421053, | |
| "grad_norm": 44.01963424682617, | |
| "learning_rate": 9.769230769230768e-07, | |
| "loss": 1.2599, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.1754385964912282, | |
| "grad_norm": 41.35686492919922, | |
| "learning_rate": 9.75e-07, | |
| "loss": 1.4831, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.1929824561403508, | |
| "grad_norm": 37.442283630371094, | |
| "learning_rate": 9.73076923076923e-07, | |
| "loss": 1.6914, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.2105263157894737, | |
| "grad_norm": 45.21449661254883, | |
| "learning_rate": 9.711538461538462e-07, | |
| "loss": 1.0162, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.2280701754385965, | |
| "grad_norm": 37.826507568359375, | |
| "learning_rate": 9.692307692307691e-07, | |
| "loss": 1.5321, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.2456140350877192, | |
| "grad_norm": 81.76914978027344, | |
| "learning_rate": 9.673076923076923e-07, | |
| "loss": 1.7058, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.263157894736842, | |
| "grad_norm": 29.697542190551758, | |
| "learning_rate": 9.653846153846153e-07, | |
| "loss": 1.1953, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.280701754385965, | |
| "grad_norm": 25.966896057128906, | |
| "learning_rate": 9.634615384615385e-07, | |
| "loss": 1.2336, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.2982456140350878, | |
| "grad_norm": 27.649991989135742, | |
| "learning_rate": 9.615384615384615e-07, | |
| "loss": 1.4547, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.3157894736842106, | |
| "grad_norm": 85.1219482421875, | |
| "learning_rate": 9.596153846153847e-07, | |
| "loss": 1.8594, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 45.845516204833984, | |
| "learning_rate": 9.576923076923077e-07, | |
| "loss": 1.4723, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.3508771929824561, | |
| "grad_norm": 33.39055633544922, | |
| "learning_rate": 9.557692307692309e-07, | |
| "loss": 1.3105, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.368421052631579, | |
| "grad_norm": 32.90497970581055, | |
| "learning_rate": 9.538461538461538e-07, | |
| "loss": 1.551, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.3859649122807016, | |
| "grad_norm": 18.763742446899414, | |
| "learning_rate": 9.519230769230768e-07, | |
| "loss": 1.1766, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.4035087719298245, | |
| "grad_norm": 38.28258514404297, | |
| "learning_rate": 9.499999999999999e-07, | |
| "loss": 1.3663, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.4210526315789473, | |
| "grad_norm": 44.95620346069336, | |
| "learning_rate": 9.48076923076923e-07, | |
| "loss": 1.4312, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.4385964912280702, | |
| "grad_norm": 46.334720611572266, | |
| "learning_rate": 9.461538461538461e-07, | |
| "loss": 1.5499, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.456140350877193, | |
| "grad_norm": 24.238452911376953, | |
| "learning_rate": 9.442307692307692e-07, | |
| "loss": 1.4905, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.4736842105263157, | |
| "grad_norm": 33.23431396484375, | |
| "learning_rate": 9.423076923076923e-07, | |
| "loss": 1.3647, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.4912280701754386, | |
| "grad_norm": 46.839908599853516, | |
| "learning_rate": 9.403846153846153e-07, | |
| "loss": 1.7358, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.5087719298245614, | |
| "grad_norm": 27.96128273010254, | |
| "learning_rate": 9.384615384615384e-07, | |
| "loss": 1.4824, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.526315789473684, | |
| "grad_norm": 47.32505416870117, | |
| "learning_rate": 9.365384615384615e-07, | |
| "loss": 1.5127, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.543859649122807, | |
| "grad_norm": 31.273122787475586, | |
| "learning_rate": 9.346153846153846e-07, | |
| "loss": 1.1443, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.5614035087719298, | |
| "grad_norm": 47.530086517333984, | |
| "learning_rate": 9.326923076923077e-07, | |
| "loss": 1.4594, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 52.47103500366211, | |
| "learning_rate": 9.307692307692308e-07, | |
| "loss": 1.3066, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.5964912280701755, | |
| "grad_norm": 48.503936767578125, | |
| "learning_rate": 9.288461538461539e-07, | |
| "loss": 1.2926, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.6140350877192984, | |
| "grad_norm": 37.38385009765625, | |
| "learning_rate": 9.26923076923077e-07, | |
| "loss": 1.2452, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.631578947368421, | |
| "grad_norm": 69.00371551513672, | |
| "learning_rate": 9.25e-07, | |
| "loss": 1.7103, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.6491228070175439, | |
| "grad_norm": 20.759923934936523, | |
| "learning_rate": 9.230769230769231e-07, | |
| "loss": 1.4222, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 17.494487762451172, | |
| "learning_rate": 9.211538461538461e-07, | |
| "loss": 1.3884, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.6842105263157894, | |
| "grad_norm": 72.35301971435547, | |
| "learning_rate": 9.192307692307692e-07, | |
| "loss": 1.7956, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.7017543859649122, | |
| "grad_norm": 58.388648986816406, | |
| "learning_rate": 9.173076923076922e-07, | |
| "loss": 1.3706, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.719298245614035, | |
| "grad_norm": 32.08078384399414, | |
| "learning_rate": 9.153846153846153e-07, | |
| "loss": 1.3232, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.736842105263158, | |
| "grad_norm": 43.424861907958984, | |
| "learning_rate": 9.134615384615383e-07, | |
| "loss": 1.0919, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.7543859649122808, | |
| "grad_norm": 43.55315017700195, | |
| "learning_rate": 9.115384615384614e-07, | |
| "loss": 1.3153, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.7719298245614035, | |
| "grad_norm": 12.423308372497559, | |
| "learning_rate": 9.096153846153845e-07, | |
| "loss": 1.2841, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 1.7894736842105263, | |
| "grad_norm": 32.44642639160156, | |
| "learning_rate": 9.076923076923076e-07, | |
| "loss": 1.2104, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.807017543859649, | |
| "grad_norm": 74.81303405761719, | |
| "learning_rate": 9.057692307692307e-07, | |
| "loss": 0.9296, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.8245614035087718, | |
| "grad_norm": 62.9661979675293, | |
| "learning_rate": 9.038461538461538e-07, | |
| "loss": 1.1964, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.8421052631578947, | |
| "grad_norm": 52.237205505371094, | |
| "learning_rate": 9.019230769230769e-07, | |
| "loss": 1.3407, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.8596491228070176, | |
| "grad_norm": 26.808528900146484, | |
| "learning_rate": 9e-07, | |
| "loss": 1.1248, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.8771929824561404, | |
| "grad_norm": 28.834348678588867, | |
| "learning_rate": 8.98076923076923e-07, | |
| "loss": 1.1314, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.8947368421052633, | |
| "grad_norm": 58.954833984375, | |
| "learning_rate": 8.961538461538461e-07, | |
| "loss": 1.1691, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.912280701754386, | |
| "grad_norm": 24.54899024963379, | |
| "learning_rate": 8.942307692307692e-07, | |
| "loss": 1.3596, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.9298245614035088, | |
| "grad_norm": 38.63195037841797, | |
| "learning_rate": 8.923076923076923e-07, | |
| "loss": 1.1972, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.9473684210526314, | |
| "grad_norm": 40.493141174316406, | |
| "learning_rate": 8.903846153846153e-07, | |
| "loss": 1.4167, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.9649122807017543, | |
| "grad_norm": 25.149085998535156, | |
| "learning_rate": 8.884615384615384e-07, | |
| "loss": 1.1808, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.9824561403508771, | |
| "grad_norm": 26.5919132232666, | |
| "learning_rate": 8.865384615384615e-07, | |
| "loss": 1.2026, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 27.22132682800293, | |
| "learning_rate": 8.846153846153846e-07, | |
| "loss": 1.1829, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.44061302681992337, | |
| "eval_loss": 1.3389430046081543, | |
| "eval_roc_auc": 0.7114645372622986, | |
| "eval_runtime": 22.9802, | |
| "eval_samples_per_second": 11.358, | |
| "eval_steps_per_second": 0.74, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 2.017543859649123, | |
| "grad_norm": 45.17961883544922, | |
| "learning_rate": 8.826923076923076e-07, | |
| "loss": 1.2122, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.0350877192982457, | |
| "grad_norm": 39.010257720947266, | |
| "learning_rate": 8.807692307692307e-07, | |
| "loss": 1.143, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 2.0526315789473686, | |
| "grad_norm": 45.62030029296875, | |
| "learning_rate": 8.788461538461538e-07, | |
| "loss": 1.4336, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 2.0701754385964914, | |
| "grad_norm": 39.72404098510742, | |
| "learning_rate": 8.769230769230769e-07, | |
| "loss": 1.2491, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 2.087719298245614, | |
| "grad_norm": 60.91448211669922, | |
| "learning_rate": 8.75e-07, | |
| "loss": 1.3879, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 22.340192794799805, | |
| "learning_rate": 8.730769230769231e-07, | |
| "loss": 1.1815, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.1228070175438596, | |
| "grad_norm": 51.66433334350586, | |
| "learning_rate": 8.711538461538462e-07, | |
| "loss": 1.3663, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 2.1403508771929824, | |
| "grad_norm": 34.69876480102539, | |
| "learning_rate": 8.692307692307692e-07, | |
| "loss": 0.8601, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.1578947368421053, | |
| "grad_norm": 31.769758224487305, | |
| "learning_rate": 8.673076923076923e-07, | |
| "loss": 1.067, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 2.175438596491228, | |
| "grad_norm": 15.30595588684082, | |
| "learning_rate": 8.653846153846154e-07, | |
| "loss": 1.2825, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 2.192982456140351, | |
| "grad_norm": 50.3326416015625, | |
| "learning_rate": 8.634615384615385e-07, | |
| "loss": 1.2839, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.2105263157894735, | |
| "grad_norm": 39.54274368286133, | |
| "learning_rate": 8.615384615384616e-07, | |
| "loss": 1.2719, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 2.2280701754385963, | |
| "grad_norm": 40.03786087036133, | |
| "learning_rate": 8.596153846153846e-07, | |
| "loss": 1.2902, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 2.245614035087719, | |
| "grad_norm": 25.278501510620117, | |
| "learning_rate": 8.576923076923076e-07, | |
| "loss": 0.9576, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 2.263157894736842, | |
| "grad_norm": 74.27449035644531, | |
| "learning_rate": 8.557692307692306e-07, | |
| "loss": 1.6626, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 2.280701754385965, | |
| "grad_norm": 49.429100036621094, | |
| "learning_rate": 8.538461538461537e-07, | |
| "loss": 1.1279, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.2982456140350878, | |
| "grad_norm": 66.35237121582031, | |
| "learning_rate": 8.519230769230768e-07, | |
| "loss": 1.3382, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 2.3157894736842106, | |
| "grad_norm": 54.39750671386719, | |
| "learning_rate": 8.499999999999999e-07, | |
| "loss": 1.1413, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 39.203617095947266, | |
| "learning_rate": 8.48076923076923e-07, | |
| "loss": 1.646, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 2.3508771929824563, | |
| "grad_norm": 40.577457427978516, | |
| "learning_rate": 8.461538461538461e-07, | |
| "loss": 1.5547, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 2.3684210526315788, | |
| "grad_norm": 61.2403564453125, | |
| "learning_rate": 8.442307692307692e-07, | |
| "loss": 1.4927, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.3859649122807016, | |
| "grad_norm": 27.469097137451172, | |
| "learning_rate": 8.423076923076923e-07, | |
| "loss": 1.2704, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 2.4035087719298245, | |
| "grad_norm": 41.40801239013672, | |
| "learning_rate": 8.403846153846153e-07, | |
| "loss": 1.12, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 2.4210526315789473, | |
| "grad_norm": 31.15001106262207, | |
| "learning_rate": 8.384615384615384e-07, | |
| "loss": 1.1188, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 2.43859649122807, | |
| "grad_norm": 48.32929611206055, | |
| "learning_rate": 8.365384615384615e-07, | |
| "loss": 0.9936, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 2.456140350877193, | |
| "grad_norm": 43.33560562133789, | |
| "learning_rate": 8.346153846153846e-07, | |
| "loss": 1.5216, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.473684210526316, | |
| "grad_norm": 34.28059387207031, | |
| "learning_rate": 8.326923076923077e-07, | |
| "loss": 1.328, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 2.4912280701754383, | |
| "grad_norm": 55.07752227783203, | |
| "learning_rate": 8.307692307692308e-07, | |
| "loss": 1.3941, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 2.5087719298245617, | |
| "grad_norm": 33.26469421386719, | |
| "learning_rate": 8.288461538461539e-07, | |
| "loss": 1.0795, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 2.526315789473684, | |
| "grad_norm": 41.03183364868164, | |
| "learning_rate": 8.269230769230768e-07, | |
| "loss": 1.1216, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 2.543859649122807, | |
| "grad_norm": 27.419574737548828, | |
| "learning_rate": 8.249999999999999e-07, | |
| "loss": 1.2125, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.56140350877193, | |
| "grad_norm": 34.500667572021484, | |
| "learning_rate": 8.23076923076923e-07, | |
| "loss": 0.9645, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 2.5789473684210527, | |
| "grad_norm": 33.50322341918945, | |
| "learning_rate": 8.211538461538461e-07, | |
| "loss": 1.1995, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 2.5964912280701755, | |
| "grad_norm": 73.93108367919922, | |
| "learning_rate": 8.192307692307692e-07, | |
| "loss": 1.7082, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 2.6140350877192984, | |
| "grad_norm": 14.201051712036133, | |
| "learning_rate": 8.173076923076923e-07, | |
| "loss": 1.1467, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 23.20082664489746, | |
| "learning_rate": 8.153846153846154e-07, | |
| "loss": 1.2548, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.6491228070175437, | |
| "grad_norm": 37.417213439941406, | |
| "learning_rate": 8.134615384615385e-07, | |
| "loss": 0.9575, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 69.82878112792969, | |
| "learning_rate": 8.115384615384615e-07, | |
| "loss": 1.3022, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 2.6842105263157894, | |
| "grad_norm": 45.68886184692383, | |
| "learning_rate": 8.096153846153846e-07, | |
| "loss": 1.1804, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 2.7017543859649122, | |
| "grad_norm": 51.7285270690918, | |
| "learning_rate": 8.076923076923077e-07, | |
| "loss": 1.3008, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 2.719298245614035, | |
| "grad_norm": 51.00165939331055, | |
| "learning_rate": 8.057692307692308e-07, | |
| "loss": 1.2146, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.736842105263158, | |
| "grad_norm": 56.5025634765625, | |
| "learning_rate": 8.038461538461538e-07, | |
| "loss": 1.7964, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 2.754385964912281, | |
| "grad_norm": 29.800735473632812, | |
| "learning_rate": 8.019230769230769e-07, | |
| "loss": 1.0893, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 2.7719298245614032, | |
| "grad_norm": 35.997886657714844, | |
| "learning_rate": 8e-07, | |
| "loss": 1.2403, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 2.7894736842105265, | |
| "grad_norm": 65.59085083007812, | |
| "learning_rate": 7.98076923076923e-07, | |
| "loss": 1.0296, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 2.807017543859649, | |
| "grad_norm": 57.628726959228516, | |
| "learning_rate": 7.96153846153846e-07, | |
| "loss": 1.3895, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.824561403508772, | |
| "grad_norm": 51.50771713256836, | |
| "learning_rate": 7.942307692307691e-07, | |
| "loss": 0.7671, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 2.8421052631578947, | |
| "grad_norm": 17.941665649414062, | |
| "learning_rate": 7.923076923076922e-07, | |
| "loss": 1.0704, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 2.8596491228070176, | |
| "grad_norm": 52.35181427001953, | |
| "learning_rate": 7.903846153846153e-07, | |
| "loss": 1.13, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 2.8771929824561404, | |
| "grad_norm": 42.02775955200195, | |
| "learning_rate": 7.884615384615384e-07, | |
| "loss": 0.8805, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 2.8947368421052633, | |
| "grad_norm": 58.180484771728516, | |
| "learning_rate": 7.865384615384615e-07, | |
| "loss": 1.1904, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.912280701754386, | |
| "grad_norm": 30.56048583984375, | |
| "learning_rate": 7.846153846153846e-07, | |
| "loss": 1.1506, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 2.9298245614035086, | |
| "grad_norm": 39.470184326171875, | |
| "learning_rate": 7.826923076923076e-07, | |
| "loss": 0.741, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 2.9473684210526314, | |
| "grad_norm": 72.42061614990234, | |
| "learning_rate": 7.807692307692307e-07, | |
| "loss": 1.3581, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 2.9649122807017543, | |
| "grad_norm": 42.25473403930664, | |
| "learning_rate": 7.788461538461538e-07, | |
| "loss": 1.3318, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 2.982456140350877, | |
| "grad_norm": 28.385595321655273, | |
| "learning_rate": 7.769230769230769e-07, | |
| "loss": 1.0435, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 44.694984436035156, | |
| "learning_rate": 7.75e-07, | |
| "loss": 1.2793, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5019157088122606, | |
| "eval_loss": 1.2615339756011963, | |
| "eval_roc_auc": 0.7588809381350943, | |
| "eval_runtime": 24.6917, | |
| "eval_samples_per_second": 10.57, | |
| "eval_steps_per_second": 0.688, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 3.017543859649123, | |
| "grad_norm": 42.023494720458984, | |
| "learning_rate": 7.730769230769231e-07, | |
| "loss": 1.1126, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 3.0350877192982457, | |
| "grad_norm": 79.69966125488281, | |
| "learning_rate": 7.711538461538462e-07, | |
| "loss": 1.2264, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 3.0526315789473686, | |
| "grad_norm": 33.83009719848633, | |
| "learning_rate": 7.692307692307693e-07, | |
| "loss": 1.0632, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 3.0701754385964914, | |
| "grad_norm": 42.74213409423828, | |
| "learning_rate": 7.673076923076923e-07, | |
| "loss": 0.7554, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.087719298245614, | |
| "grad_norm": 29.90761375427246, | |
| "learning_rate": 7.653846153846153e-07, | |
| "loss": 1.1182, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 3.1052631578947367, | |
| "grad_norm": 34.67997741699219, | |
| "learning_rate": 7.634615384615384e-07, | |
| "loss": 1.2303, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 3.1228070175438596, | |
| "grad_norm": 28.055559158325195, | |
| "learning_rate": 7.615384615384615e-07, | |
| "loss": 0.9286, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 3.1403508771929824, | |
| "grad_norm": 48.56884765625, | |
| "learning_rate": 7.596153846153846e-07, | |
| "loss": 0.9129, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 3.1578947368421053, | |
| "grad_norm": 23.18881607055664, | |
| "learning_rate": 7.576923076923077e-07, | |
| "loss": 1.1422, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.175438596491228, | |
| "grad_norm": 19.716148376464844, | |
| "learning_rate": 7.557692307692308e-07, | |
| "loss": 1.1811, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 3.192982456140351, | |
| "grad_norm": 23.179607391357422, | |
| "learning_rate": 7.538461538461538e-07, | |
| "loss": 0.9284, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 3.2105263157894735, | |
| "grad_norm": 21.30498504638672, | |
| "learning_rate": 7.519230769230769e-07, | |
| "loss": 0.909, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 3.2280701754385963, | |
| "grad_norm": 39.3275260925293, | |
| "learning_rate": 7.5e-07, | |
| "loss": 1.1318, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 3.245614035087719, | |
| "grad_norm": 52.91360855102539, | |
| "learning_rate": 7.48076923076923e-07, | |
| "loss": 1.2176, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 3.263157894736842, | |
| "grad_norm": 40.860450744628906, | |
| "learning_rate": 7.461538461538461e-07, | |
| "loss": 1.2245, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 3.280701754385965, | |
| "grad_norm": 28.36117935180664, | |
| "learning_rate": 7.442307692307692e-07, | |
| "loss": 1.0225, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 3.2982456140350878, | |
| "grad_norm": 40.48318099975586, | |
| "learning_rate": 7.423076923076923e-07, | |
| "loss": 1.289, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 3.3157894736842106, | |
| "grad_norm": 27.249305725097656, | |
| "learning_rate": 7.403846153846153e-07, | |
| "loss": 1.2829, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 23.647472381591797, | |
| "learning_rate": 7.384615384615384e-07, | |
| "loss": 1.223, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.3508771929824563, | |
| "grad_norm": 21.608871459960938, | |
| "learning_rate": 7.365384615384615e-07, | |
| "loss": 1.0349, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 3.3684210526315788, | |
| "grad_norm": 49.232421875, | |
| "learning_rate": 7.346153846153846e-07, | |
| "loss": 1.1577, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 3.3859649122807016, | |
| "grad_norm": 35.15854263305664, | |
| "learning_rate": 7.326923076923076e-07, | |
| "loss": 0.8111, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 3.4035087719298245, | |
| "grad_norm": 33.47184371948242, | |
| "learning_rate": 7.307692307692307e-07, | |
| "loss": 1.4127, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 3.4210526315789473, | |
| "grad_norm": 38.76816940307617, | |
| "learning_rate": 7.288461538461538e-07, | |
| "loss": 1.2329, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 3.43859649122807, | |
| "grad_norm": 64.3719253540039, | |
| "learning_rate": 7.269230769230769e-07, | |
| "loss": 1.4848, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 3.456140350877193, | |
| "grad_norm": 26.278711318969727, | |
| "learning_rate": 7.249999999999999e-07, | |
| "loss": 1.1606, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 3.473684210526316, | |
| "grad_norm": 33.774658203125, | |
| "learning_rate": 7.23076923076923e-07, | |
| "loss": 1.1549, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 3.4912280701754383, | |
| "grad_norm": 27.449785232543945, | |
| "learning_rate": 7.211538461538461e-07, | |
| "loss": 0.7551, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 3.5087719298245617, | |
| "grad_norm": 18.257455825805664, | |
| "learning_rate": 7.192307692307692e-07, | |
| "loss": 0.9322, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.526315789473684, | |
| "grad_norm": 54.30673599243164, | |
| "learning_rate": 7.173076923076923e-07, | |
| "loss": 0.7284, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 3.543859649122807, | |
| "grad_norm": 56.77223587036133, | |
| "learning_rate": 7.153846153846154e-07, | |
| "loss": 1.0322, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 3.56140350877193, | |
| "grad_norm": 23.362255096435547, | |
| "learning_rate": 7.134615384615385e-07, | |
| "loss": 0.9139, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 3.5789473684210527, | |
| "grad_norm": 46.12820816040039, | |
| "learning_rate": 7.115384615384616e-07, | |
| "loss": 0.9289, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 3.5964912280701755, | |
| "grad_norm": 38.374629974365234, | |
| "learning_rate": 7.096153846153846e-07, | |
| "loss": 0.9759, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 3.6140350877192984, | |
| "grad_norm": 40.8470458984375, | |
| "learning_rate": 7.076923076923077e-07, | |
| "loss": 0.9255, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 3.6315789473684212, | |
| "grad_norm": 51.18167495727539, | |
| "learning_rate": 7.057692307692308e-07, | |
| "loss": 1.6224, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 3.6491228070175437, | |
| "grad_norm": 65.96550750732422, | |
| "learning_rate": 7.038461538461539e-07, | |
| "loss": 1.4234, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 3.6666666666666665, | |
| "grad_norm": 46.82773208618164, | |
| "learning_rate": 7.019230769230769e-07, | |
| "loss": 1.4326, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 3.6842105263157894, | |
| "grad_norm": 29.189741134643555, | |
| "learning_rate": 7e-07, | |
| "loss": 0.9554, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.7017543859649122, | |
| "grad_norm": 21.677045822143555, | |
| "learning_rate": 6.980769230769231e-07, | |
| "loss": 1.2323, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 3.719298245614035, | |
| "grad_norm": 55.265682220458984, | |
| "learning_rate": 6.961538461538461e-07, | |
| "loss": 1.211, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 3.736842105263158, | |
| "grad_norm": 31.52195167541504, | |
| "learning_rate": 6.942307692307691e-07, | |
| "loss": 0.9928, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 3.754385964912281, | |
| "grad_norm": 70.80876159667969, | |
| "learning_rate": 6.923076923076922e-07, | |
| "loss": 1.4359, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 3.7719298245614032, | |
| "grad_norm": 40.67531967163086, | |
| "learning_rate": 6.903846153846153e-07, | |
| "loss": 1.1827, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 3.7894736842105265, | |
| "grad_norm": 33.12160873413086, | |
| "learning_rate": 6.884615384615384e-07, | |
| "loss": 1.0607, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 3.807017543859649, | |
| "grad_norm": 47.94017791748047, | |
| "learning_rate": 6.865384615384615e-07, | |
| "loss": 0.8461, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 3.824561403508772, | |
| "grad_norm": 46.735321044921875, | |
| "learning_rate": 6.846153846153846e-07, | |
| "loss": 0.8335, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 3.8421052631578947, | |
| "grad_norm": 44.989681243896484, | |
| "learning_rate": 6.826923076923076e-07, | |
| "loss": 1.0917, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 3.8596491228070176, | |
| "grad_norm": 33.04579162597656, | |
| "learning_rate": 6.807692307692307e-07, | |
| "loss": 0.8094, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.8771929824561404, | |
| "grad_norm": 26.442249298095703, | |
| "learning_rate": 6.788461538461538e-07, | |
| "loss": 1.3672, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 3.8947368421052633, | |
| "grad_norm": 47.97966003417969, | |
| "learning_rate": 6.769230769230769e-07, | |
| "loss": 1.1112, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 3.912280701754386, | |
| "grad_norm": 49.454437255859375, | |
| "learning_rate": 6.75e-07, | |
| "loss": 1.2617, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 3.9298245614035086, | |
| "grad_norm": 39.12055587768555, | |
| "learning_rate": 6.730769230769231e-07, | |
| "loss": 1.3417, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 3.9473684210526314, | |
| "grad_norm": 38.112548828125, | |
| "learning_rate": 6.711538461538461e-07, | |
| "loss": 1.1137, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 3.9649122807017543, | |
| "grad_norm": 18.497282028198242, | |
| "learning_rate": 6.692307692307692e-07, | |
| "loss": 0.636, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 3.982456140350877, | |
| "grad_norm": 39.13663101196289, | |
| "learning_rate": 6.673076923076922e-07, | |
| "loss": 1.1667, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 27.05844497680664, | |
| "learning_rate": 6.653846153846153e-07, | |
| "loss": 0.9226, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.5363984674329502, | |
| "eval_loss": 1.1785928010940552, | |
| "eval_roc_auc": 0.7825664232388221, | |
| "eval_runtime": 24.813, | |
| "eval_samples_per_second": 10.519, | |
| "eval_steps_per_second": 0.685, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 4.017543859649122, | |
| "grad_norm": 40.03508758544922, | |
| "learning_rate": 6.634615384615384e-07, | |
| "loss": 0.6566, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 4.035087719298246, | |
| "grad_norm": 31.557764053344727, | |
| "learning_rate": 6.615384615384615e-07, | |
| "loss": 0.9575, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 4.052631578947368, | |
| "grad_norm": 25.278390884399414, | |
| "learning_rate": 6.596153846153846e-07, | |
| "loss": 0.7892, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 4.0701754385964914, | |
| "grad_norm": 30.69614028930664, | |
| "learning_rate": 6.576923076923077e-07, | |
| "loss": 1.0433, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 4.087719298245614, | |
| "grad_norm": 58.29319381713867, | |
| "learning_rate": 6.557692307692308e-07, | |
| "loss": 1.5216, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 4.105263157894737, | |
| "grad_norm": 42.853153228759766, | |
| "learning_rate": 6.538461538461538e-07, | |
| "loss": 1.22, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 4.12280701754386, | |
| "grad_norm": 44.47975540161133, | |
| "learning_rate": 6.519230769230769e-07, | |
| "loss": 1.0883, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 4.140350877192983, | |
| "grad_norm": 32.618743896484375, | |
| "learning_rate": 6.5e-07, | |
| "loss": 0.8169, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 4.157894736842105, | |
| "grad_norm": 45.881961822509766, | |
| "learning_rate": 6.480769230769231e-07, | |
| "loss": 1.0063, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 4.175438596491228, | |
| "grad_norm": 34.217777252197266, | |
| "learning_rate": 6.461538461538462e-07, | |
| "loss": 1.3464, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 4.192982456140351, | |
| "grad_norm": 26.423919677734375, | |
| "learning_rate": 6.442307692307693e-07, | |
| "loss": 1.001, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 4.2105263157894735, | |
| "grad_norm": 29.074453353881836, | |
| "learning_rate": 6.423076923076924e-07, | |
| "loss": 1.0627, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.228070175438597, | |
| "grad_norm": 38.35462188720703, | |
| "learning_rate": 6.403846153846154e-07, | |
| "loss": 0.8004, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 4.245614035087719, | |
| "grad_norm": 21.466026306152344, | |
| "learning_rate": 6.384615384615383e-07, | |
| "loss": 1.1149, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 4.2631578947368425, | |
| "grad_norm": 34.41309356689453, | |
| "learning_rate": 6.365384615384614e-07, | |
| "loss": 1.1782, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 4.280701754385965, | |
| "grad_norm": 34.77060317993164, | |
| "learning_rate": 6.346153846153845e-07, | |
| "loss": 0.9231, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 4.298245614035087, | |
| "grad_norm": 26.94576072692871, | |
| "learning_rate": 6.326923076923076e-07, | |
| "loss": 1.1394, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 4.315789473684211, | |
| "grad_norm": 19.103694915771484, | |
| "learning_rate": 6.307692307692307e-07, | |
| "loss": 0.939, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 4.333333333333333, | |
| "grad_norm": 40.02509307861328, | |
| "learning_rate": 6.288461538461538e-07, | |
| "loss": 1.1394, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 4.350877192982456, | |
| "grad_norm": 62.130611419677734, | |
| "learning_rate": 6.269230769230769e-07, | |
| "loss": 1.1179, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 4.368421052631579, | |
| "grad_norm": 45.432708740234375, | |
| "learning_rate": 6.249999999999999e-07, | |
| "loss": 0.8211, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 4.385964912280702, | |
| "grad_norm": 52.313697814941406, | |
| "learning_rate": 6.23076923076923e-07, | |
| "loss": 1.1714, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.4035087719298245, | |
| "grad_norm": 24.41583824157715, | |
| "learning_rate": 6.211538461538461e-07, | |
| "loss": 1.0351, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 4.421052631578947, | |
| "grad_norm": 45.19395446777344, | |
| "learning_rate": 6.192307692307692e-07, | |
| "loss": 0.6095, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 4.43859649122807, | |
| "grad_norm": 31.14399528503418, | |
| "learning_rate": 6.173076923076923e-07, | |
| "loss": 1.138, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 4.456140350877193, | |
| "grad_norm": 48.5388298034668, | |
| "learning_rate": 6.153846153846154e-07, | |
| "loss": 1.2723, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 4.473684210526316, | |
| "grad_norm": 63.80923843383789, | |
| "learning_rate": 6.134615384615385e-07, | |
| "loss": 1.272, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 4.491228070175438, | |
| "grad_norm": 24.565670013427734, | |
| "learning_rate": 6.115384615384616e-07, | |
| "loss": 0.8632, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 4.508771929824562, | |
| "grad_norm": 27.799039840698242, | |
| "learning_rate": 6.096153846153846e-07, | |
| "loss": 0.9728, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 4.526315789473684, | |
| "grad_norm": 32.81442642211914, | |
| "learning_rate": 6.076923076923076e-07, | |
| "loss": 1.0024, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 4.543859649122807, | |
| "grad_norm": 19.44063949584961, | |
| "learning_rate": 6.057692307692307e-07, | |
| "loss": 1.1237, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 4.56140350877193, | |
| "grad_norm": 50.00696563720703, | |
| "learning_rate": 6.038461538461538e-07, | |
| "loss": 0.9283, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.578947368421053, | |
| "grad_norm": 35.66808319091797, | |
| "learning_rate": 6.019230769230769e-07, | |
| "loss": 0.7856, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 4.5964912280701755, | |
| "grad_norm": 36.49198913574219, | |
| "learning_rate": 6e-07, | |
| "loss": 1.0933, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 4.614035087719298, | |
| "grad_norm": 29.842639923095703, | |
| "learning_rate": 5.980769230769231e-07, | |
| "loss": 0.9233, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 4.631578947368421, | |
| "grad_norm": 80.13423156738281, | |
| "learning_rate": 5.961538461538461e-07, | |
| "loss": 1.2123, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 4.649122807017544, | |
| "grad_norm": 43.099300384521484, | |
| "learning_rate": 5.942307692307692e-07, | |
| "loss": 1.0768, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 4.666666666666667, | |
| "grad_norm": 32.86907196044922, | |
| "learning_rate": 5.923076923076923e-07, | |
| "loss": 0.8827, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 4.684210526315789, | |
| "grad_norm": 51.32353973388672, | |
| "learning_rate": 5.903846153846154e-07, | |
| "loss": 1.2962, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 4.701754385964913, | |
| "grad_norm": 38.76735305786133, | |
| "learning_rate": 5.884615384615385e-07, | |
| "loss": 0.5998, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 4.719298245614035, | |
| "grad_norm": 29.20305824279785, | |
| "learning_rate": 5.865384615384616e-07, | |
| "loss": 0.8401, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 4.7368421052631575, | |
| "grad_norm": 37.6125373840332, | |
| "learning_rate": 5.846153846153847e-07, | |
| "loss": 1.0015, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.754385964912281, | |
| "grad_norm": 60.04852294921875, | |
| "learning_rate": 5.826923076923078e-07, | |
| "loss": 1.052, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 4.771929824561403, | |
| "grad_norm": 48.63616943359375, | |
| "learning_rate": 5.807692307692307e-07, | |
| "loss": 1.0901, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 4.7894736842105265, | |
| "grad_norm": 42.764347076416016, | |
| "learning_rate": 5.788461538461538e-07, | |
| "loss": 0.5775, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 4.807017543859649, | |
| "grad_norm": 38.956268310546875, | |
| "learning_rate": 5.769230769230768e-07, | |
| "loss": 1.13, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 4.824561403508772, | |
| "grad_norm": 39.21760940551758, | |
| "learning_rate": 5.749999999999999e-07, | |
| "loss": 1.0837, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 4.842105263157895, | |
| "grad_norm": 28.150279998779297, | |
| "learning_rate": 5.73076923076923e-07, | |
| "loss": 0.7626, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 4.859649122807017, | |
| "grad_norm": 37.51382827758789, | |
| "learning_rate": 5.711538461538461e-07, | |
| "loss": 0.8416, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 4.87719298245614, | |
| "grad_norm": 62.94125747680664, | |
| "learning_rate": 5.692307692307692e-07, | |
| "loss": 0.9175, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 4.894736842105263, | |
| "grad_norm": 26.954492568969727, | |
| "learning_rate": 5.673076923076922e-07, | |
| "loss": 1.233, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 4.912280701754386, | |
| "grad_norm": 35.33868408203125, | |
| "learning_rate": 5.653846153846153e-07, | |
| "loss": 0.8353, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.9298245614035086, | |
| "grad_norm": 43.24800491333008, | |
| "learning_rate": 5.634615384615384e-07, | |
| "loss": 0.876, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 4.947368421052632, | |
| "grad_norm": 36.65079879760742, | |
| "learning_rate": 5.615384615384615e-07, | |
| "loss": 1.1462, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 4.964912280701754, | |
| "grad_norm": 20.44588851928711, | |
| "learning_rate": 5.596153846153846e-07, | |
| "loss": 0.9175, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 4.982456140350877, | |
| "grad_norm": 29.738140106201172, | |
| "learning_rate": 5.576923076923077e-07, | |
| "loss": 0.878, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 69.77880859375, | |
| "learning_rate": 5.557692307692308e-07, | |
| "loss": 1.176, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.5363984674329502, | |
| "eval_loss": 1.1245133876800537, | |
| "eval_roc_auc": 0.8024073042505275, | |
| "eval_runtime": 24.2409, | |
| "eval_samples_per_second": 10.767, | |
| "eval_steps_per_second": 0.701, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 5.017543859649122, | |
| "grad_norm": 43.9262580871582, | |
| "learning_rate": 5.538461538461539e-07, | |
| "loss": 0.9786, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 5.035087719298246, | |
| "grad_norm": 42.07478713989258, | |
| "learning_rate": 5.519230769230769e-07, | |
| "loss": 0.8725, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 5.052631578947368, | |
| "grad_norm": 18.638660430908203, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.5895, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 5.0701754385964914, | |
| "grad_norm": 29.218503952026367, | |
| "learning_rate": 5.480769230769231e-07, | |
| "loss": 0.8934, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 5.087719298245614, | |
| "grad_norm": 72.49881744384766, | |
| "learning_rate": 5.461538461538461e-07, | |
| "loss": 1.0794, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 5.105263157894737, | |
| "grad_norm": 47.327239990234375, | |
| "learning_rate": 5.442307692307692e-07, | |
| "loss": 0.9808, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 5.12280701754386, | |
| "grad_norm": 49.39778137207031, | |
| "learning_rate": 5.423076923076923e-07, | |
| "loss": 1.0015, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 5.140350877192983, | |
| "grad_norm": 36.040836334228516, | |
| "learning_rate": 5.403846153846154e-07, | |
| "loss": 1.0248, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 5.157894736842105, | |
| "grad_norm": 33.901615142822266, | |
| "learning_rate": 5.384615384615384e-07, | |
| "loss": 0.908, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 5.175438596491228, | |
| "grad_norm": 32.479549407958984, | |
| "learning_rate": 5.365384615384615e-07, | |
| "loss": 0.7743, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 5.192982456140351, | |
| "grad_norm": 73.39112854003906, | |
| "learning_rate": 5.346153846153846e-07, | |
| "loss": 0.911, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 5.2105263157894735, | |
| "grad_norm": 50.840206146240234, | |
| "learning_rate": 5.326923076923077e-07, | |
| "loss": 1.1771, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 5.228070175438597, | |
| "grad_norm": 29.241947174072266, | |
| "learning_rate": 5.307692307692308e-07, | |
| "loss": 1.0472, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 5.245614035087719, | |
| "grad_norm": 32.0538215637207, | |
| "learning_rate": 5.288461538461539e-07, | |
| "loss": 0.9781, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 5.2631578947368425, | |
| "grad_norm": 39.43052291870117, | |
| "learning_rate": 5.269230769230769e-07, | |
| "loss": 0.9577, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.280701754385965, | |
| "grad_norm": 23.0631160736084, | |
| "learning_rate": 5.25e-07, | |
| "loss": 1.0764, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 5.298245614035087, | |
| "grad_norm": 46.16770553588867, | |
| "learning_rate": 5.23076923076923e-07, | |
| "loss": 0.792, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 5.315789473684211, | |
| "grad_norm": 51.57275390625, | |
| "learning_rate": 5.211538461538461e-07, | |
| "loss": 0.9721, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 5.333333333333333, | |
| "grad_norm": 30.06395721435547, | |
| "learning_rate": 5.192307692307692e-07, | |
| "loss": 1.0893, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 5.350877192982456, | |
| "grad_norm": 71.85147094726562, | |
| "learning_rate": 5.173076923076923e-07, | |
| "loss": 1.2034, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 5.368421052631579, | |
| "grad_norm": 45.4450569152832, | |
| "learning_rate": 5.153846153846153e-07, | |
| "loss": 0.7588, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 5.385964912280702, | |
| "grad_norm": 40.09158706665039, | |
| "learning_rate": 5.134615384615384e-07, | |
| "loss": 0.9968, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 5.4035087719298245, | |
| "grad_norm": 62.48497772216797, | |
| "learning_rate": 5.115384615384615e-07, | |
| "loss": 1.2783, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 5.421052631578947, | |
| "grad_norm": 59.85103988647461, | |
| "learning_rate": 5.096153846153845e-07, | |
| "loss": 1.2234, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 5.43859649122807, | |
| "grad_norm": 41.75504684448242, | |
| "learning_rate": 5.076923076923076e-07, | |
| "loss": 1.0972, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 5.456140350877193, | |
| "grad_norm": 25.908830642700195, | |
| "learning_rate": 5.057692307692307e-07, | |
| "loss": 1.0729, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 5.473684210526316, | |
| "grad_norm": 52.866825103759766, | |
| "learning_rate": 5.038461538461538e-07, | |
| "loss": 0.7172, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 5.491228070175438, | |
| "grad_norm": 62.79011917114258, | |
| "learning_rate": 5.019230769230769e-07, | |
| "loss": 0.8971, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 5.508771929824562, | |
| "grad_norm": 61.6699333190918, | |
| "learning_rate": 5e-07, | |
| "loss": 1.1901, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 5.526315789473684, | |
| "grad_norm": 39.02582550048828, | |
| "learning_rate": 4.980769230769231e-07, | |
| "loss": 0.9623, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 5.543859649122807, | |
| "grad_norm": 20.786083221435547, | |
| "learning_rate": 4.961538461538462e-07, | |
| "loss": 0.9772, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 5.56140350877193, | |
| "grad_norm": 29.874135971069336, | |
| "learning_rate": 4.942307692307692e-07, | |
| "loss": 0.5292, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 5.578947368421053, | |
| "grad_norm": 73.50117492675781, | |
| "learning_rate": 4.923076923076923e-07, | |
| "loss": 1.4202, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 5.5964912280701755, | |
| "grad_norm": 28.74661636352539, | |
| "learning_rate": 4.903846153846153e-07, | |
| "loss": 0.6696, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 5.614035087719298, | |
| "grad_norm": 38.96842956542969, | |
| "learning_rate": 4.884615384615384e-07, | |
| "loss": 0.9159, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.631578947368421, | |
| "grad_norm": 36.87344741821289, | |
| "learning_rate": 4.865384615384615e-07, | |
| "loss": 0.9728, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 5.649122807017544, | |
| "grad_norm": 38.103668212890625, | |
| "learning_rate": 4.846153846153846e-07, | |
| "loss": 0.9663, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 5.666666666666667, | |
| "grad_norm": 24.839292526245117, | |
| "learning_rate": 4.826923076923077e-07, | |
| "loss": 0.8722, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 5.684210526315789, | |
| "grad_norm": 27.795520782470703, | |
| "learning_rate": 4.807692307692307e-07, | |
| "loss": 0.7313, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 5.701754385964913, | |
| "grad_norm": 48.86296844482422, | |
| "learning_rate": 4.788461538461538e-07, | |
| "loss": 0.7879, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 5.719298245614035, | |
| "grad_norm": 43.693058013916016, | |
| "learning_rate": 4.769230769230769e-07, | |
| "loss": 0.944, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 5.7368421052631575, | |
| "grad_norm": 26.41690444946289, | |
| "learning_rate": 4.7499999999999995e-07, | |
| "loss": 0.6766, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 5.754385964912281, | |
| "grad_norm": 51.20581817626953, | |
| "learning_rate": 4.7307692307692304e-07, | |
| "loss": 0.8956, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 5.771929824561403, | |
| "grad_norm": 42.70225143432617, | |
| "learning_rate": 4.711538461538461e-07, | |
| "loss": 0.9747, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 5.7894736842105265, | |
| "grad_norm": 32.69301986694336, | |
| "learning_rate": 4.692307692307692e-07, | |
| "loss": 0.5307, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 5.807017543859649, | |
| "grad_norm": 47.880863189697266, | |
| "learning_rate": 4.673076923076923e-07, | |
| "loss": 0.7341, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 5.824561403508772, | |
| "grad_norm": 37.469017028808594, | |
| "learning_rate": 4.653846153846154e-07, | |
| "loss": 0.8207, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 5.842105263157895, | |
| "grad_norm": 44.53699493408203, | |
| "learning_rate": 4.634615384615385e-07, | |
| "loss": 1.0366, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 5.859649122807017, | |
| "grad_norm": 34.84064483642578, | |
| "learning_rate": 4.6153846153846156e-07, | |
| "loss": 0.9788, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 5.87719298245614, | |
| "grad_norm": 35.885215759277344, | |
| "learning_rate": 4.596153846153846e-07, | |
| "loss": 0.8453, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 5.894736842105263, | |
| "grad_norm": 33.525081634521484, | |
| "learning_rate": 4.5769230769230763e-07, | |
| "loss": 0.6823, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 5.912280701754386, | |
| "grad_norm": 46.2805290222168, | |
| "learning_rate": 4.557692307692307e-07, | |
| "loss": 0.9438, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 5.9298245614035086, | |
| "grad_norm": 56.342018127441406, | |
| "learning_rate": 4.538461538461538e-07, | |
| "loss": 1.2671, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 5.947368421052632, | |
| "grad_norm": 32.246089935302734, | |
| "learning_rate": 4.519230769230769e-07, | |
| "loss": 0.9635, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 5.964912280701754, | |
| "grad_norm": 39.31554412841797, | |
| "learning_rate": 4.5e-07, | |
| "loss": 0.8637, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 5.982456140350877, | |
| "grad_norm": 32.075775146484375, | |
| "learning_rate": 4.4807692307692307e-07, | |
| "loss": 1.0645, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 33.591068267822266, | |
| "learning_rate": 4.4615384615384615e-07, | |
| "loss": 0.7403, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.5708812260536399, | |
| "eval_loss": 1.0752440690994263, | |
| "eval_roc_auc": 0.8091941568824714, | |
| "eval_runtime": 25.1571, | |
| "eval_samples_per_second": 10.375, | |
| "eval_steps_per_second": 0.676, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 6.017543859649122, | |
| "grad_norm": 23.306591033935547, | |
| "learning_rate": 4.442307692307692e-07, | |
| "loss": 0.5686, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 6.035087719298246, | |
| "grad_norm": 44.798919677734375, | |
| "learning_rate": 4.423076923076923e-07, | |
| "loss": 1.203, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 6.052631578947368, | |
| "grad_norm": 23.386505126953125, | |
| "learning_rate": 4.4038461538461536e-07, | |
| "loss": 0.7083, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 6.0701754385964914, | |
| "grad_norm": 49.338531494140625, | |
| "learning_rate": 4.3846153846153845e-07, | |
| "loss": 0.9921, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 6.087719298245614, | |
| "grad_norm": 26.851451873779297, | |
| "learning_rate": 4.3653846153846154e-07, | |
| "loss": 0.7737, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 6.105263157894737, | |
| "grad_norm": 41.42471694946289, | |
| "learning_rate": 4.346153846153846e-07, | |
| "loss": 1.1449, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 6.12280701754386, | |
| "grad_norm": 30.84017562866211, | |
| "learning_rate": 4.326923076923077e-07, | |
| "loss": 1.0377, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 6.140350877192983, | |
| "grad_norm": 21.94278335571289, | |
| "learning_rate": 4.307692307692308e-07, | |
| "loss": 0.7842, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 6.157894736842105, | |
| "grad_norm": 54.80387496948242, | |
| "learning_rate": 4.288461538461538e-07, | |
| "loss": 0.6877, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 6.175438596491228, | |
| "grad_norm": 52.03696060180664, | |
| "learning_rate": 4.2692307692307687e-07, | |
| "loss": 0.819, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 6.192982456140351, | |
| "grad_norm": 47.98991775512695, | |
| "learning_rate": 4.2499999999999995e-07, | |
| "loss": 1.2738, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 6.2105263157894735, | |
| "grad_norm": 31.361557006835938, | |
| "learning_rate": 4.2307692307692304e-07, | |
| "loss": 1.0351, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 6.228070175438597, | |
| "grad_norm": 58.75925827026367, | |
| "learning_rate": 4.2115384615384613e-07, | |
| "loss": 0.9005, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 6.245614035087719, | |
| "grad_norm": 61.34735107421875, | |
| "learning_rate": 4.192307692307692e-07, | |
| "loss": 1.335, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 6.2631578947368425, | |
| "grad_norm": 38.18897247314453, | |
| "learning_rate": 4.173076923076923e-07, | |
| "loss": 0.8518, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 6.280701754385965, | |
| "grad_norm": 47.511199951171875, | |
| "learning_rate": 4.153846153846154e-07, | |
| "loss": 1.0651, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 6.298245614035087, | |
| "grad_norm": 52.5938720703125, | |
| "learning_rate": 4.134615384615384e-07, | |
| "loss": 1.2098, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 6.315789473684211, | |
| "grad_norm": 46.33355712890625, | |
| "learning_rate": 4.115384615384615e-07, | |
| "loss": 0.6994, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 6.333333333333333, | |
| "grad_norm": 43.15740203857422, | |
| "learning_rate": 4.096153846153846e-07, | |
| "loss": 1.0389, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 6.350877192982456, | |
| "grad_norm": 33.72315216064453, | |
| "learning_rate": 4.076923076923077e-07, | |
| "loss": 0.862, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 6.368421052631579, | |
| "grad_norm": 50.00696563720703, | |
| "learning_rate": 4.0576923076923077e-07, | |
| "loss": 0.9237, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 6.385964912280702, | |
| "grad_norm": 22.04112434387207, | |
| "learning_rate": 4.0384615384615386e-07, | |
| "loss": 0.7235, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 6.4035087719298245, | |
| "grad_norm": 56.314048767089844, | |
| "learning_rate": 4.019230769230769e-07, | |
| "loss": 1.048, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 6.421052631578947, | |
| "grad_norm": 26.58925437927246, | |
| "learning_rate": 4e-07, | |
| "loss": 0.6484, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 6.43859649122807, | |
| "grad_norm": 55.187747955322266, | |
| "learning_rate": 3.98076923076923e-07, | |
| "loss": 0.9495, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 6.456140350877193, | |
| "grad_norm": 45.539772033691406, | |
| "learning_rate": 3.961538461538461e-07, | |
| "loss": 0.6283, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 6.473684210526316, | |
| "grad_norm": 40.78237533569336, | |
| "learning_rate": 3.942307692307692e-07, | |
| "loss": 0.5378, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 6.491228070175438, | |
| "grad_norm": 24.31471824645996, | |
| "learning_rate": 3.923076923076923e-07, | |
| "loss": 0.7294, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 6.508771929824562, | |
| "grad_norm": 30.035335540771484, | |
| "learning_rate": 3.9038461538461536e-07, | |
| "loss": 0.8559, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 6.526315789473684, | |
| "grad_norm": 31.007173538208008, | |
| "learning_rate": 3.8846153846153845e-07, | |
| "loss": 0.802, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 6.543859649122807, | |
| "grad_norm": 43.71822738647461, | |
| "learning_rate": 3.8653846153846154e-07, | |
| "loss": 0.748, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 6.56140350877193, | |
| "grad_norm": 21.99559783935547, | |
| "learning_rate": 3.8461538461538463e-07, | |
| "loss": 0.6553, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 6.578947368421053, | |
| "grad_norm": 44.91862869262695, | |
| "learning_rate": 3.8269230769230766e-07, | |
| "loss": 0.8155, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 6.5964912280701755, | |
| "grad_norm": 28.355440139770508, | |
| "learning_rate": 3.8076923076923075e-07, | |
| "loss": 1.1146, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 6.614035087719298, | |
| "grad_norm": 28.48116111755371, | |
| "learning_rate": 3.7884615384615384e-07, | |
| "loss": 1.1448, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 6.631578947368421, | |
| "grad_norm": 50.27273941040039, | |
| "learning_rate": 3.769230769230769e-07, | |
| "loss": 1.0349, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 6.649122807017544, | |
| "grad_norm": 32.425384521484375, | |
| "learning_rate": 3.75e-07, | |
| "loss": 0.8526, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 73.90445709228516, | |
| "learning_rate": 3.7307692307692304e-07, | |
| "loss": 1.2825, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 6.684210526315789, | |
| "grad_norm": 48.79448699951172, | |
| "learning_rate": 3.7115384615384613e-07, | |
| "loss": 0.6351, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 6.701754385964913, | |
| "grad_norm": 24.40697479248047, | |
| "learning_rate": 3.692307692307692e-07, | |
| "loss": 0.7081, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 6.719298245614035, | |
| "grad_norm": 47.5269660949707, | |
| "learning_rate": 3.673076923076923e-07, | |
| "loss": 0.671, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 6.7368421052631575, | |
| "grad_norm": 37.11399841308594, | |
| "learning_rate": 3.6538461538461534e-07, | |
| "loss": 0.6574, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 6.754385964912281, | |
| "grad_norm": 43.93232345581055, | |
| "learning_rate": 3.6346153846153843e-07, | |
| "loss": 1.0039, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 6.771929824561403, | |
| "grad_norm": 35.62379455566406, | |
| "learning_rate": 3.615384615384615e-07, | |
| "loss": 0.7748, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 6.7894736842105265, | |
| "grad_norm": 68.19344329833984, | |
| "learning_rate": 3.596153846153846e-07, | |
| "loss": 1.033, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 6.807017543859649, | |
| "grad_norm": 39.3653678894043, | |
| "learning_rate": 3.576923076923077e-07, | |
| "loss": 0.5758, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 6.824561403508772, | |
| "grad_norm": 30.772722244262695, | |
| "learning_rate": 3.557692307692308e-07, | |
| "loss": 0.8695, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 6.842105263157895, | |
| "grad_norm": 32.83871078491211, | |
| "learning_rate": 3.5384615384615386e-07, | |
| "loss": 1.0549, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 6.859649122807017, | |
| "grad_norm": 62.99473190307617, | |
| "learning_rate": 3.5192307692307695e-07, | |
| "loss": 0.9173, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 6.87719298245614, | |
| "grad_norm": 42.774044036865234, | |
| "learning_rate": 3.5e-07, | |
| "loss": 1.0925, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 6.894736842105263, | |
| "grad_norm": 45.01066589355469, | |
| "learning_rate": 3.4807692307692307e-07, | |
| "loss": 1.0266, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 6.912280701754386, | |
| "grad_norm": 30.733928680419922, | |
| "learning_rate": 3.461538461538461e-07, | |
| "loss": 0.7034, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 6.9298245614035086, | |
| "grad_norm": 19.22307014465332, | |
| "learning_rate": 3.442307692307692e-07, | |
| "loss": 0.9147, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 6.947368421052632, | |
| "grad_norm": 68.61734008789062, | |
| "learning_rate": 3.423076923076923e-07, | |
| "loss": 1.1881, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 6.964912280701754, | |
| "grad_norm": 33.16205978393555, | |
| "learning_rate": 3.4038461538461537e-07, | |
| "loss": 0.7483, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 6.982456140350877, | |
| "grad_norm": 19.65161895751953, | |
| "learning_rate": 3.3846153846153845e-07, | |
| "loss": 0.6954, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 92.69136810302734, | |
| "learning_rate": 3.3653846153846154e-07, | |
| "loss": 1.1487, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.5632183908045977, | |
| "eval_loss": 1.069606065750122, | |
| "eval_roc_auc": 0.8184030717743787, | |
| "eval_runtime": 24.405, | |
| "eval_samples_per_second": 10.695, | |
| "eval_steps_per_second": 0.697, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 7.017543859649122, | |
| "grad_norm": 87.78544616699219, | |
| "learning_rate": 3.346153846153846e-07, | |
| "loss": 0.976, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 7.035087719298246, | |
| "grad_norm": 27.48891258239746, | |
| "learning_rate": 3.3269230769230766e-07, | |
| "loss": 0.5747, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 7.052631578947368, | |
| "grad_norm": 45.76131057739258, | |
| "learning_rate": 3.3076923076923075e-07, | |
| "loss": 0.9605, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 7.0701754385964914, | |
| "grad_norm": 20.498069763183594, | |
| "learning_rate": 3.2884615384615384e-07, | |
| "loss": 0.5053, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 7.087719298245614, | |
| "grad_norm": 22.58955192565918, | |
| "learning_rate": 3.269230769230769e-07, | |
| "loss": 0.5368, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 7.105263157894737, | |
| "grad_norm": 22.225399017333984, | |
| "learning_rate": 3.25e-07, | |
| "loss": 0.777, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 7.12280701754386, | |
| "grad_norm": 39.7629508972168, | |
| "learning_rate": 3.230769230769231e-07, | |
| "loss": 0.7354, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 7.140350877192983, | |
| "grad_norm": 58.76552200317383, | |
| "learning_rate": 3.211538461538462e-07, | |
| "loss": 1.1802, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 7.157894736842105, | |
| "grad_norm": 22.695573806762695, | |
| "learning_rate": 3.1923076923076917e-07, | |
| "loss": 0.8024, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 7.175438596491228, | |
| "grad_norm": 33.9383430480957, | |
| "learning_rate": 3.1730769230769225e-07, | |
| "loss": 1.0508, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 7.192982456140351, | |
| "grad_norm": 30.876842498779297, | |
| "learning_rate": 3.1538461538461534e-07, | |
| "loss": 0.7214, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 7.2105263157894735, | |
| "grad_norm": 36.76365280151367, | |
| "learning_rate": 3.1346153846153843e-07, | |
| "loss": 0.7907, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 7.228070175438597, | |
| "grad_norm": 65.87890625, | |
| "learning_rate": 3.115384615384615e-07, | |
| "loss": 1.2976, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 7.245614035087719, | |
| "grad_norm": 43.14383316040039, | |
| "learning_rate": 3.096153846153846e-07, | |
| "loss": 1.1367, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 7.2631578947368425, | |
| "grad_norm": 27.98732566833496, | |
| "learning_rate": 3.076923076923077e-07, | |
| "loss": 1.0924, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 7.280701754385965, | |
| "grad_norm": 48.689537048339844, | |
| "learning_rate": 3.057692307692308e-07, | |
| "loss": 1.0298, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 7.298245614035087, | |
| "grad_norm": 27.990142822265625, | |
| "learning_rate": 3.038461538461538e-07, | |
| "loss": 0.9779, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 7.315789473684211, | |
| "grad_norm": 29.33123016357422, | |
| "learning_rate": 3.019230769230769e-07, | |
| "loss": 0.7683, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 7.333333333333333, | |
| "grad_norm": 26.44052505493164, | |
| "learning_rate": 3e-07, | |
| "loss": 0.8771, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 7.350877192982456, | |
| "grad_norm": 28.419343948364258, | |
| "learning_rate": 2.980769230769231e-07, | |
| "loss": 0.9028, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 7.368421052631579, | |
| "grad_norm": 38.75981521606445, | |
| "learning_rate": 2.9615384615384616e-07, | |
| "loss": 0.9058, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 7.385964912280702, | |
| "grad_norm": 27.943565368652344, | |
| "learning_rate": 2.9423076923076925e-07, | |
| "loss": 0.5811, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 7.4035087719298245, | |
| "grad_norm": 38.048736572265625, | |
| "learning_rate": 2.9230769230769234e-07, | |
| "loss": 1.0206, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 7.421052631578947, | |
| "grad_norm": 30.790170669555664, | |
| "learning_rate": 2.9038461538461537e-07, | |
| "loss": 0.8268, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 7.43859649122807, | |
| "grad_norm": 52.75239562988281, | |
| "learning_rate": 2.884615384615384e-07, | |
| "loss": 1.0024, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 7.456140350877193, | |
| "grad_norm": 30.304975509643555, | |
| "learning_rate": 2.865384615384615e-07, | |
| "loss": 0.7764, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 7.473684210526316, | |
| "grad_norm": 44.08205795288086, | |
| "learning_rate": 2.846153846153846e-07, | |
| "loss": 0.6279, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 7.491228070175438, | |
| "grad_norm": 29.00324058532715, | |
| "learning_rate": 2.8269230769230767e-07, | |
| "loss": 0.7732, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 7.508771929824562, | |
| "grad_norm": 44.4395637512207, | |
| "learning_rate": 2.8076923076923075e-07, | |
| "loss": 0.5693, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 7.526315789473684, | |
| "grad_norm": 28.682804107666016, | |
| "learning_rate": 2.7884615384615384e-07, | |
| "loss": 0.8483, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 7.543859649122807, | |
| "grad_norm": 71.88604736328125, | |
| "learning_rate": 2.7692307692307693e-07, | |
| "loss": 1.0121, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 7.56140350877193, | |
| "grad_norm": 37.93716049194336, | |
| "learning_rate": 2.75e-07, | |
| "loss": 0.9846, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 7.578947368421053, | |
| "grad_norm": 68.40493774414062, | |
| "learning_rate": 2.7307692307692305e-07, | |
| "loss": 0.7968, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 7.5964912280701755, | |
| "grad_norm": 34.130680084228516, | |
| "learning_rate": 2.7115384615384614e-07, | |
| "loss": 0.6804, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 7.614035087719298, | |
| "grad_norm": 66.54443359375, | |
| "learning_rate": 2.692307692307692e-07, | |
| "loss": 0.8493, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 7.631578947368421, | |
| "grad_norm": 30.927793502807617, | |
| "learning_rate": 2.673076923076923e-07, | |
| "loss": 0.8261, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 7.649122807017544, | |
| "grad_norm": 31.034412384033203, | |
| "learning_rate": 2.653846153846154e-07, | |
| "loss": 1.0382, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 7.666666666666667, | |
| "grad_norm": 28.199138641357422, | |
| "learning_rate": 2.6346153846153843e-07, | |
| "loss": 0.6613, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 7.684210526315789, | |
| "grad_norm": 34.426605224609375, | |
| "learning_rate": 2.615384615384615e-07, | |
| "loss": 0.8676, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 7.701754385964913, | |
| "grad_norm": 26.49811553955078, | |
| "learning_rate": 2.596153846153846e-07, | |
| "loss": 0.6587, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 7.719298245614035, | |
| "grad_norm": 28.70757293701172, | |
| "learning_rate": 2.5769230769230764e-07, | |
| "loss": 0.7954, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 7.7368421052631575, | |
| "grad_norm": 38.85836410522461, | |
| "learning_rate": 2.5576923076923073e-07, | |
| "loss": 0.8062, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 7.754385964912281, | |
| "grad_norm": 26.44515609741211, | |
| "learning_rate": 2.538461538461538e-07, | |
| "loss": 1.0693, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 7.771929824561403, | |
| "grad_norm": 55.86748504638672, | |
| "learning_rate": 2.519230769230769e-07, | |
| "loss": 0.9166, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 7.7894736842105265, | |
| "grad_norm": 28.77703857421875, | |
| "learning_rate": 2.5e-07, | |
| "loss": 0.8507, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 7.807017543859649, | |
| "grad_norm": 39.333343505859375, | |
| "learning_rate": 2.480769230769231e-07, | |
| "loss": 1.02, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 7.824561403508772, | |
| "grad_norm": 44.16427230834961, | |
| "learning_rate": 2.4615384615384616e-07, | |
| "loss": 1.0713, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 7.842105263157895, | |
| "grad_norm": 22.932966232299805, | |
| "learning_rate": 2.442307692307692e-07, | |
| "loss": 0.4415, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 7.859649122807017, | |
| "grad_norm": 48.378841400146484, | |
| "learning_rate": 2.423076923076923e-07, | |
| "loss": 0.7085, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 7.87719298245614, | |
| "grad_norm": 50.131019592285156, | |
| "learning_rate": 2.4038461538461537e-07, | |
| "loss": 0.9735, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 7.894736842105263, | |
| "grad_norm": 20.94889259338379, | |
| "learning_rate": 2.3846153846153846e-07, | |
| "loss": 0.6935, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 7.912280701754386, | |
| "grad_norm": 30.711423873901367, | |
| "learning_rate": 2.3653846153846152e-07, | |
| "loss": 0.7382, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 7.9298245614035086, | |
| "grad_norm": 37.16843795776367, | |
| "learning_rate": 2.346153846153846e-07, | |
| "loss": 1.0025, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 7.947368421052632, | |
| "grad_norm": 61.31568908691406, | |
| "learning_rate": 2.326923076923077e-07, | |
| "loss": 0.7435, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 7.964912280701754, | |
| "grad_norm": 30.279325485229492, | |
| "learning_rate": 2.3076923076923078e-07, | |
| "loss": 0.8862, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 7.982456140350877, | |
| "grad_norm": 22.893749237060547, | |
| "learning_rate": 2.2884615384615382e-07, | |
| "loss": 0.8411, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 33.30168533325195, | |
| "learning_rate": 2.269230769230769e-07, | |
| "loss": 0.521, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5747126436781609, | |
| "eval_loss": 1.0445302724838257, | |
| "eval_roc_auc": 0.821241054744927, | |
| "eval_runtime": 24.8722, | |
| "eval_samples_per_second": 10.494, | |
| "eval_steps_per_second": 0.683, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 8.017543859649123, | |
| "grad_norm": 32.153465270996094, | |
| "learning_rate": 2.25e-07, | |
| "loss": 0.8686, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 8.035087719298245, | |
| "grad_norm": 26.060014724731445, | |
| "learning_rate": 2.2307692307692308e-07, | |
| "loss": 0.7634, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 8.052631578947368, | |
| "grad_norm": 60.5893440246582, | |
| "learning_rate": 2.2115384615384614e-07, | |
| "loss": 1.3411, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 8.070175438596491, | |
| "grad_norm": 28.305891036987305, | |
| "learning_rate": 2.1923076923076922e-07, | |
| "loss": 0.6241, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 8.087719298245615, | |
| "grad_norm": 20.124664306640625, | |
| "learning_rate": 2.173076923076923e-07, | |
| "loss": 0.591, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 8.105263157894736, | |
| "grad_norm": 87.66210174560547, | |
| "learning_rate": 2.153846153846154e-07, | |
| "loss": 1.2101, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 8.12280701754386, | |
| "grad_norm": 35.75629425048828, | |
| "learning_rate": 2.1346153846153843e-07, | |
| "loss": 0.8357, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 8.140350877192983, | |
| "grad_norm": 58.722328186035156, | |
| "learning_rate": 2.1153846153846152e-07, | |
| "loss": 1.0812, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 8.157894736842104, | |
| "grad_norm": 45.29573059082031, | |
| "learning_rate": 2.096153846153846e-07, | |
| "loss": 0.7323, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 8.175438596491228, | |
| "grad_norm": 29.968141555786133, | |
| "learning_rate": 2.076923076923077e-07, | |
| "loss": 0.6938, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 8.192982456140351, | |
| "grad_norm": 37.96227264404297, | |
| "learning_rate": 2.0576923076923076e-07, | |
| "loss": 0.9054, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 8.210526315789474, | |
| "grad_norm": 66.0616455078125, | |
| "learning_rate": 2.0384615384615384e-07, | |
| "loss": 0.9688, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 8.228070175438596, | |
| "grad_norm": 70.00723266601562, | |
| "learning_rate": 2.0192307692307693e-07, | |
| "loss": 1.1553, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 8.24561403508772, | |
| "grad_norm": 39.04280090332031, | |
| "learning_rate": 2e-07, | |
| "loss": 0.9341, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 8.263157894736842, | |
| "grad_norm": 34.20655059814453, | |
| "learning_rate": 1.9807692307692305e-07, | |
| "loss": 0.9593, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 8.280701754385966, | |
| "grad_norm": 63.928348541259766, | |
| "learning_rate": 1.9615384615384614e-07, | |
| "loss": 0.9055, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 8.298245614035087, | |
| "grad_norm": 71.03302001953125, | |
| "learning_rate": 1.9423076923076923e-07, | |
| "loss": 0.921, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 8.31578947368421, | |
| "grad_norm": 37.35957717895508, | |
| "learning_rate": 1.9230769230769231e-07, | |
| "loss": 0.6336, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 8.333333333333334, | |
| "grad_norm": 17.601655960083008, | |
| "learning_rate": 1.9038461538461537e-07, | |
| "loss": 0.4762, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 8.350877192982455, | |
| "grad_norm": 45.87922286987305, | |
| "learning_rate": 1.8846153846153846e-07, | |
| "loss": 0.7384, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 8.368421052631579, | |
| "grad_norm": 35.179988861083984, | |
| "learning_rate": 1.8653846153846152e-07, | |
| "loss": 0.6582, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 8.385964912280702, | |
| "grad_norm": 29.4314022064209, | |
| "learning_rate": 1.846153846153846e-07, | |
| "loss": 0.6557, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 8.403508771929825, | |
| "grad_norm": 39.96945571899414, | |
| "learning_rate": 1.8269230769230767e-07, | |
| "loss": 0.9508, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 8.421052631578947, | |
| "grad_norm": 41.48187255859375, | |
| "learning_rate": 1.8076923076923076e-07, | |
| "loss": 0.6461, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 8.43859649122807, | |
| "grad_norm": 46.457733154296875, | |
| "learning_rate": 1.7884615384615384e-07, | |
| "loss": 0.7903, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 8.456140350877194, | |
| "grad_norm": 60.275028228759766, | |
| "learning_rate": 1.7692307692307693e-07, | |
| "loss": 0.9536, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 8.473684210526315, | |
| "grad_norm": 32.316471099853516, | |
| "learning_rate": 1.75e-07, | |
| "loss": 0.7564, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 8.491228070175438, | |
| "grad_norm": 30.793420791625977, | |
| "learning_rate": 1.7307692307692305e-07, | |
| "loss": 0.5605, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 8.508771929824562, | |
| "grad_norm": 19.355497360229492, | |
| "learning_rate": 1.7115384615384614e-07, | |
| "loss": 0.5652, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 8.526315789473685, | |
| "grad_norm": 24.5403995513916, | |
| "learning_rate": 1.6923076923076923e-07, | |
| "loss": 0.6776, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 8.543859649122806, | |
| "grad_norm": 35.900184631347656, | |
| "learning_rate": 1.673076923076923e-07, | |
| "loss": 0.7501, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 8.56140350877193, | |
| "grad_norm": 28.108530044555664, | |
| "learning_rate": 1.6538461538461538e-07, | |
| "loss": 0.5605, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 8.578947368421053, | |
| "grad_norm": 39.321144104003906, | |
| "learning_rate": 1.6346153846153846e-07, | |
| "loss": 0.9753, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 8.596491228070175, | |
| "grad_norm": 24.50644874572754, | |
| "learning_rate": 1.6153846153846155e-07, | |
| "loss": 0.9194, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 8.614035087719298, | |
| "grad_norm": 50.84926986694336, | |
| "learning_rate": 1.5961538461538458e-07, | |
| "loss": 0.9425, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 8.631578947368421, | |
| "grad_norm": 38.82880401611328, | |
| "learning_rate": 1.5769230769230767e-07, | |
| "loss": 0.8447, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 8.649122807017545, | |
| "grad_norm": 48.61659240722656, | |
| "learning_rate": 1.5576923076923076e-07, | |
| "loss": 0.9379, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 8.666666666666666, | |
| "grad_norm": 41.58673095703125, | |
| "learning_rate": 1.5384615384615385e-07, | |
| "loss": 0.8007, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 8.68421052631579, | |
| "grad_norm": 49.88103485107422, | |
| "learning_rate": 1.519230769230769e-07, | |
| "loss": 1.2287, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 8.701754385964913, | |
| "grad_norm": 32.42145538330078, | |
| "learning_rate": 1.5e-07, | |
| "loss": 0.655, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 8.719298245614034, | |
| "grad_norm": 31.83061981201172, | |
| "learning_rate": 1.4807692307692308e-07, | |
| "loss": 0.7932, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 8.736842105263158, | |
| "grad_norm": 45.73976516723633, | |
| "learning_rate": 1.4615384615384617e-07, | |
| "loss": 0.7517, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 8.75438596491228, | |
| "grad_norm": 24.301603317260742, | |
| "learning_rate": 1.442307692307692e-07, | |
| "loss": 0.6368, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 8.771929824561404, | |
| "grad_norm": 43.589569091796875, | |
| "learning_rate": 1.423076923076923e-07, | |
| "loss": 0.7572, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 8.789473684210526, | |
| "grad_norm": 55.28351974487305, | |
| "learning_rate": 1.4038461538461538e-07, | |
| "loss": 0.8073, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 8.807017543859649, | |
| "grad_norm": 29.774131774902344, | |
| "learning_rate": 1.3846153846153846e-07, | |
| "loss": 0.7737, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 8.824561403508772, | |
| "grad_norm": 62.9353141784668, | |
| "learning_rate": 1.3653846153846152e-07, | |
| "loss": 0.7529, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 8.842105263157894, | |
| "grad_norm": 38.0306510925293, | |
| "learning_rate": 1.346153846153846e-07, | |
| "loss": 0.7406, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 8.859649122807017, | |
| "grad_norm": 41.864105224609375, | |
| "learning_rate": 1.326923076923077e-07, | |
| "loss": 0.9119, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 8.87719298245614, | |
| "grad_norm": 67.60685729980469, | |
| "learning_rate": 1.3076923076923076e-07, | |
| "loss": 0.828, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 8.894736842105264, | |
| "grad_norm": 37.95588302612305, | |
| "learning_rate": 1.2884615384615382e-07, | |
| "loss": 0.8381, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 8.912280701754385, | |
| "grad_norm": 40.29434585571289, | |
| "learning_rate": 1.269230769230769e-07, | |
| "loss": 0.8085, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 8.929824561403509, | |
| "grad_norm": 68.67707061767578, | |
| "learning_rate": 1.25e-07, | |
| "loss": 1.0056, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 8.947368421052632, | |
| "grad_norm": 45.547428131103516, | |
| "learning_rate": 1.2307692307692308e-07, | |
| "loss": 0.7981, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 8.964912280701755, | |
| "grad_norm": 39.541439056396484, | |
| "learning_rate": 1.2115384615384614e-07, | |
| "loss": 0.6487, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 8.982456140350877, | |
| "grad_norm": 28.73076820373535, | |
| "learning_rate": 1.1923076923076923e-07, | |
| "loss": 0.5069, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 31.97428321838379, | |
| "learning_rate": 1.173076923076923e-07, | |
| "loss": 0.9922, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.5977011494252874, | |
| "eval_loss": 1.0299025774002075, | |
| "eval_roc_auc": 0.8216890808633837, | |
| "eval_runtime": 34.5614, | |
| "eval_samples_per_second": 7.552, | |
| "eval_steps_per_second": 0.492, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 9.017543859649123, | |
| "grad_norm": 48.0136833190918, | |
| "learning_rate": 1.1538461538461539e-07, | |
| "loss": 1.0235, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 9.035087719298245, | |
| "grad_norm": 28.533130645751953, | |
| "learning_rate": 1.1346153846153845e-07, | |
| "loss": 0.5459, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 9.052631578947368, | |
| "grad_norm": 41.17901611328125, | |
| "learning_rate": 1.1153846153846154e-07, | |
| "loss": 0.5279, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 9.070175438596491, | |
| "grad_norm": 32.64225387573242, | |
| "learning_rate": 1.0961538461538461e-07, | |
| "loss": 1.0252, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 9.087719298245615, | |
| "grad_norm": 32.45205307006836, | |
| "learning_rate": 1.076923076923077e-07, | |
| "loss": 0.6554, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 9.105263157894736, | |
| "grad_norm": 35.420379638671875, | |
| "learning_rate": 1.0576923076923076e-07, | |
| "loss": 0.6954, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 9.12280701754386, | |
| "grad_norm": 53.34769058227539, | |
| "learning_rate": 1.0384615384615385e-07, | |
| "loss": 0.7513, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 9.140350877192983, | |
| "grad_norm": 28.763874053955078, | |
| "learning_rate": 1.0192307692307692e-07, | |
| "loss": 0.5913, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 9.157894736842104, | |
| "grad_norm": 24.717885971069336, | |
| "learning_rate": 1e-07, | |
| "loss": 0.6692, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 9.175438596491228, | |
| "grad_norm": 25.547611236572266, | |
| "learning_rate": 9.807692307692307e-08, | |
| "loss": 0.75, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 9.192982456140351, | |
| "grad_norm": 41.97018051147461, | |
| "learning_rate": 9.615384615384616e-08, | |
| "loss": 0.6604, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 9.210526315789474, | |
| "grad_norm": 80.47174835205078, | |
| "learning_rate": 9.423076923076923e-08, | |
| "loss": 1.2781, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 9.228070175438596, | |
| "grad_norm": 27.064115524291992, | |
| "learning_rate": 9.23076923076923e-08, | |
| "loss": 0.7449, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 9.24561403508772, | |
| "grad_norm": 34.857425689697266, | |
| "learning_rate": 9.038461538461538e-08, | |
| "loss": 0.5484, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 9.263157894736842, | |
| "grad_norm": 63.48345184326172, | |
| "learning_rate": 8.846153846153847e-08, | |
| "loss": 0.8263, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 9.280701754385966, | |
| "grad_norm": 36.6185417175293, | |
| "learning_rate": 8.653846153846153e-08, | |
| "loss": 0.8279, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 9.298245614035087, | |
| "grad_norm": 45.01316833496094, | |
| "learning_rate": 8.461538461538461e-08, | |
| "loss": 0.9115, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 9.31578947368421, | |
| "grad_norm": 19.511629104614258, | |
| "learning_rate": 8.269230769230769e-08, | |
| "loss": 0.4552, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 9.333333333333334, | |
| "grad_norm": 42.41179275512695, | |
| "learning_rate": 8.076923076923077e-08, | |
| "loss": 1.0983, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 9.350877192982455, | |
| "grad_norm": 26.47096061706543, | |
| "learning_rate": 7.884615384615384e-08, | |
| "loss": 0.5845, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 9.368421052631579, | |
| "grad_norm": 30.966955184936523, | |
| "learning_rate": 7.692307692307692e-08, | |
| "loss": 0.6616, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 9.385964912280702, | |
| "grad_norm": 40.17842102050781, | |
| "learning_rate": 7.5e-08, | |
| "loss": 0.854, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 9.403508771929825, | |
| "grad_norm": 37.50959396362305, | |
| "learning_rate": 7.307692307692308e-08, | |
| "loss": 0.9193, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 9.421052631578947, | |
| "grad_norm": 32.44236755371094, | |
| "learning_rate": 7.115384615384614e-08, | |
| "loss": 0.855, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 9.43859649122807, | |
| "grad_norm": 37.18986511230469, | |
| "learning_rate": 6.923076923076923e-08, | |
| "loss": 0.9304, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 9.456140350877194, | |
| "grad_norm": 36.91654586791992, | |
| "learning_rate": 6.73076923076923e-08, | |
| "loss": 0.8258, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 9.473684210526315, | |
| "grad_norm": 27.073163986206055, | |
| "learning_rate": 6.538461538461538e-08, | |
| "loss": 0.4538, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 9.491228070175438, | |
| "grad_norm": 33.02398681640625, | |
| "learning_rate": 6.346153846153845e-08, | |
| "loss": 0.8084, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 9.508771929824562, | |
| "grad_norm": 25.321035385131836, | |
| "learning_rate": 6.153846153846154e-08, | |
| "loss": 0.6776, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 9.526315789473685, | |
| "grad_norm": 35.03627395629883, | |
| "learning_rate": 5.961538461538461e-08, | |
| "loss": 0.7051, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 9.543859649122806, | |
| "grad_norm": 32.04972457885742, | |
| "learning_rate": 5.7692307692307695e-08, | |
| "loss": 1.1416, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 9.56140350877193, | |
| "grad_norm": 32.8387451171875, | |
| "learning_rate": 5.576923076923077e-08, | |
| "loss": 0.7075, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 9.578947368421053, | |
| "grad_norm": 53.04689407348633, | |
| "learning_rate": 5.384615384615385e-08, | |
| "loss": 0.8701, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 9.596491228070175, | |
| "grad_norm": 23.254966735839844, | |
| "learning_rate": 5.1923076923076924e-08, | |
| "loss": 0.4444, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 9.614035087719298, | |
| "grad_norm": 42.885440826416016, | |
| "learning_rate": 5e-08, | |
| "loss": 0.8811, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 9.631578947368421, | |
| "grad_norm": 29.11221694946289, | |
| "learning_rate": 4.807692307692308e-08, | |
| "loss": 0.7966, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 9.649122807017545, | |
| "grad_norm": 62.373077392578125, | |
| "learning_rate": 4.615384615384615e-08, | |
| "loss": 0.9317, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 9.666666666666666, | |
| "grad_norm": 43.64496612548828, | |
| "learning_rate": 4.423076923076923e-08, | |
| "loss": 0.7716, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 9.68421052631579, | |
| "grad_norm": 30.535717010498047, | |
| "learning_rate": 4.230769230769231e-08, | |
| "loss": 1.0664, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 9.701754385964913, | |
| "grad_norm": 24.165536880493164, | |
| "learning_rate": 4.038461538461539e-08, | |
| "loss": 0.8169, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 9.719298245614034, | |
| "grad_norm": 47.13362503051758, | |
| "learning_rate": 3.846153846153846e-08, | |
| "loss": 0.7841, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 9.736842105263158, | |
| "grad_norm": 28.94150161743164, | |
| "learning_rate": 3.653846153846154e-08, | |
| "loss": 0.7046, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 9.75438596491228, | |
| "grad_norm": 56.8846549987793, | |
| "learning_rate": 3.4615384615384616e-08, | |
| "loss": 0.8988, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 9.771929824561404, | |
| "grad_norm": 30.389190673828125, | |
| "learning_rate": 3.269230769230769e-08, | |
| "loss": 0.7905, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 9.789473684210526, | |
| "grad_norm": 30.133394241333008, | |
| "learning_rate": 3.076923076923077e-08, | |
| "loss": 0.5083, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 9.807017543859649, | |
| "grad_norm": 46.20474624633789, | |
| "learning_rate": 2.8846153846153848e-08, | |
| "loss": 0.9206, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 9.824561403508772, | |
| "grad_norm": 38.54856872558594, | |
| "learning_rate": 2.6923076923076925e-08, | |
| "loss": 0.9944, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 9.842105263157894, | |
| "grad_norm": 73.813232421875, | |
| "learning_rate": 2.5e-08, | |
| "loss": 0.9167, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 9.859649122807017, | |
| "grad_norm": 42.26284408569336, | |
| "learning_rate": 2.3076923076923076e-08, | |
| "loss": 0.8989, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 9.87719298245614, | |
| "grad_norm": 36.5334587097168, | |
| "learning_rate": 2.1153846153846153e-08, | |
| "loss": 0.6554, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 9.894736842105264, | |
| "grad_norm": 30.85855484008789, | |
| "learning_rate": 1.923076923076923e-08, | |
| "loss": 0.4981, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 9.912280701754385, | |
| "grad_norm": 38.74631881713867, | |
| "learning_rate": 1.7307692307692308e-08, | |
| "loss": 0.8997, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 9.929824561403509, | |
| "grad_norm": 50.60679244995117, | |
| "learning_rate": 1.5384615384615385e-08, | |
| "loss": 0.9265, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 9.947368421052632, | |
| "grad_norm": 36.15625762939453, | |
| "learning_rate": 1.3461538461538462e-08, | |
| "loss": 1.0373, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 9.964912280701755, | |
| "grad_norm": 64.9268798828125, | |
| "learning_rate": 1.1538461538461538e-08, | |
| "loss": 1.0077, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 9.982456140350877, | |
| "grad_norm": 28.213552474975586, | |
| "learning_rate": 9.615384615384615e-09, | |
| "loss": 1.102, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 47.85600280761719, | |
| "learning_rate": 7.692307692307693e-09, | |
| "loss": 0.555, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.5977011494252874, | |
| "eval_loss": 1.0249524116516113, | |
| "eval_roc_auc": 0.8248013488853114, | |
| "eval_runtime": 23.6967, | |
| "eval_samples_per_second": 11.014, | |
| "eval_steps_per_second": 0.717, | |
| "step": 570 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 570, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.204020265084518e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |