| { |
| "best_global_step": 1220, |
| "best_metric": 0.9425042335887034, |
| "best_model_checkpoint": "step_model_bert_large/checkpoint-1220", |
| "epoch": 9.922448979591836, |
| "eval_steps": 500, |
| "global_step": 1220, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00816326530612245, |
| "grad_norm": 20.771753311157227, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.1365, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0163265306122449, |
| "grad_norm": 70.12383270263672, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 1.051, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.024489795918367346, |
| "grad_norm": 31.964258193969727, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 1.1296, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0326530612244898, |
| "grad_norm": 331.50640869140625, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 0.8467, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.04081632653061224, |
| "grad_norm": 42.2370491027832, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.7934, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.04897959183673469, |
| "grad_norm": 47.351070404052734, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 0.577, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.05714285714285714, |
| "grad_norm": 18.631610870361328, |
| "learning_rate": 1.1200000000000001e-05, |
| "loss": 0.5044, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0653061224489796, |
| "grad_norm": 20.154150009155273, |
| "learning_rate": 1.2800000000000001e-05, |
| "loss": 0.4216, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.07346938775510205, |
| "grad_norm": 12.097977638244629, |
| "learning_rate": 1.4400000000000001e-05, |
| "loss": 0.4446, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.08163265306122448, |
| "grad_norm": 9.474892616271973, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.2463, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08979591836734693, |
| "grad_norm": 7.300495624542236, |
| "learning_rate": 1.76e-05, |
| "loss": 0.289, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.09795918367346938, |
| "grad_norm": 7.943906307220459, |
| "learning_rate": 1.9200000000000003e-05, |
| "loss": 0.2522, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.10612244897959183, |
| "grad_norm": 2.966740846633911, |
| "learning_rate": 2.08e-05, |
| "loss": 0.1157, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.11428571428571428, |
| "grad_norm": 6.358625888824463, |
| "learning_rate": 2.2400000000000002e-05, |
| "loss": 0.2464, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.12244897959183673, |
| "grad_norm": 2.499382972717285, |
| "learning_rate": 2.4e-05, |
| "loss": 0.1134, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.1306122448979592, |
| "grad_norm": 3.328634023666382, |
| "learning_rate": 2.5600000000000002e-05, |
| "loss": 0.1879, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.13877551020408163, |
| "grad_norm": 13.66771125793457, |
| "learning_rate": 2.7200000000000004e-05, |
| "loss": 0.2889, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1469387755102041, |
| "grad_norm": 11.846375465393066, |
| "learning_rate": 2.8800000000000002e-05, |
| "loss": 0.3625, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.15510204081632653, |
| "grad_norm": 9.650957107543945, |
| "learning_rate": 3.0400000000000004e-05, |
| "loss": 0.1811, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.16326530612244897, |
| "grad_norm": 5.533182621002197, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.1549, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.17142857142857143, |
| "grad_norm": 6.746736526489258, |
| "learning_rate": 3.3600000000000004e-05, |
| "loss": 0.2285, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.17959183673469387, |
| "grad_norm": 3.802558660507202, |
| "learning_rate": 3.52e-05, |
| "loss": 0.1377, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.18775510204081633, |
| "grad_norm": 6.560689926147461, |
| "learning_rate": 3.680000000000001e-05, |
| "loss": 0.1661, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.19591836734693877, |
| "grad_norm": 4.309365272521973, |
| "learning_rate": 3.8400000000000005e-05, |
| "loss": 0.1222, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.20408163265306123, |
| "grad_norm": 2.247054100036621, |
| "learning_rate": 4e-05, |
| "loss": 0.1269, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.21224489795918366, |
| "grad_norm": 8.494240760803223, |
| "learning_rate": 4.16e-05, |
| "loss": 0.2676, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.22040816326530613, |
| "grad_norm": 2.9743239879608154, |
| "learning_rate": 4.3200000000000007e-05, |
| "loss": 0.1479, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "grad_norm": 2.945835590362549, |
| "learning_rate": 4.4800000000000005e-05, |
| "loss": 0.1216, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.23673469387755103, |
| "grad_norm": 10.777807235717773, |
| "learning_rate": 4.64e-05, |
| "loss": 0.3272, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.24489795918367346, |
| "grad_norm": 9.199320793151855, |
| "learning_rate": 4.8e-05, |
| "loss": 0.2623, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2530612244897959, |
| "grad_norm": 3.7975552082061768, |
| "learning_rate": 4.9600000000000006e-05, |
| "loss": 0.1542, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.2612244897959184, |
| "grad_norm": 4.61702823638916, |
| "learning_rate": 5.1200000000000004e-05, |
| "loss": 0.1828, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.2693877551020408, |
| "grad_norm": 2.2006444931030273, |
| "learning_rate": 5.280000000000001e-05, |
| "loss": 0.1663, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.27755102040816326, |
| "grad_norm": 3.2792046070098877, |
| "learning_rate": 5.440000000000001e-05, |
| "loss": 0.1296, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 2.931986093521118, |
| "learning_rate": 5.6e-05, |
| "loss": 0.1744, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2938775510204082, |
| "grad_norm": 1.6390964984893799, |
| "learning_rate": 5.7600000000000004e-05, |
| "loss": 0.129, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.3020408163265306, |
| "grad_norm": 1.0177584886550903, |
| "learning_rate": 5.92e-05, |
| "loss": 0.0843, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.31020408163265306, |
| "grad_norm": 1.9508525133132935, |
| "learning_rate": 6.080000000000001e-05, |
| "loss": 0.0753, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.3183673469387755, |
| "grad_norm": 2.6601903438568115, |
| "learning_rate": 6.240000000000001e-05, |
| "loss": 0.1817, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.32653061224489793, |
| "grad_norm": 3.9204416275024414, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.1773, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3346938775510204, |
| "grad_norm": 2.708138942718506, |
| "learning_rate": 6.56e-05, |
| "loss": 0.1899, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.34285714285714286, |
| "grad_norm": 2.626298666000366, |
| "learning_rate": 6.720000000000001e-05, |
| "loss": 0.1134, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.3510204081632653, |
| "grad_norm": 0.8519235253334045, |
| "learning_rate": 6.88e-05, |
| "loss": 0.1421, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.35918367346938773, |
| "grad_norm": 1.024389624595642, |
| "learning_rate": 7.04e-05, |
| "loss": 0.1215, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.3673469387755102, |
| "grad_norm": 4.064274311065674, |
| "learning_rate": 7.2e-05, |
| "loss": 0.1537, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.37551020408163266, |
| "grad_norm": 6.495007514953613, |
| "learning_rate": 7.360000000000001e-05, |
| "loss": 0.2303, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.3836734693877551, |
| "grad_norm": 1.5005526542663574, |
| "learning_rate": 7.52e-05, |
| "loss": 0.1672, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.39183673469387753, |
| "grad_norm": 2.671656370162964, |
| "learning_rate": 7.680000000000001e-05, |
| "loss": 0.1365, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.1992692947387695, |
| "learning_rate": 7.840000000000001e-05, |
| "loss": 0.1168, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.40816326530612246, |
| "grad_norm": 0.7369860410690308, |
| "learning_rate": 8e-05, |
| "loss": 0.1121, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4163265306122449, |
| "grad_norm": 3.4734129905700684, |
| "learning_rate": 7.993162393162394e-05, |
| "loss": 0.1931, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.42448979591836733, |
| "grad_norm": 1.9376534223556519, |
| "learning_rate": 7.986324786324788e-05, |
| "loss": 0.2017, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.4326530612244898, |
| "grad_norm": 0.9877994656562805, |
| "learning_rate": 7.97948717948718e-05, |
| "loss": 0.088, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.44081632653061226, |
| "grad_norm": 2.119616746902466, |
| "learning_rate": 7.972649572649573e-05, |
| "loss": 0.1099, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.4489795918367347, |
| "grad_norm": 1.2248163223266602, |
| "learning_rate": 7.965811965811966e-05, |
| "loss": 0.1335, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 0.7621262669563293, |
| "learning_rate": 7.95897435897436e-05, |
| "loss": 0.071, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.46530612244897956, |
| "grad_norm": 3.021927833557129, |
| "learning_rate": 7.952136752136753e-05, |
| "loss": 0.1307, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.47346938775510206, |
| "grad_norm": 0.6796000599861145, |
| "learning_rate": 7.945299145299147e-05, |
| "loss": 0.0827, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.4816326530612245, |
| "grad_norm": 1.4021790027618408, |
| "learning_rate": 7.938461538461539e-05, |
| "loss": 0.1144, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.4897959183673469, |
| "grad_norm": 1.6122393608093262, |
| "learning_rate": 7.931623931623932e-05, |
| "loss": 0.0864, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.49795918367346936, |
| "grad_norm": 1.5375205278396606, |
| "learning_rate": 7.924786324786326e-05, |
| "loss": 0.0939, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.5061224489795918, |
| "grad_norm": 0.8782357573509216, |
| "learning_rate": 7.917948717948719e-05, |
| "loss": 0.0918, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.5142857142857142, |
| "grad_norm": 2.1417829990386963, |
| "learning_rate": 7.911111111111112e-05, |
| "loss": 0.0922, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.5224489795918368, |
| "grad_norm": 0.5513861179351807, |
| "learning_rate": 7.904273504273506e-05, |
| "loss": 0.081, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.5306122448979592, |
| "grad_norm": 0.6667738556861877, |
| "learning_rate": 7.897435897435898e-05, |
| "loss": 0.0451, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5387755102040817, |
| "grad_norm": 1.489629864692688, |
| "learning_rate": 7.890598290598291e-05, |
| "loss": 0.0761, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.5469387755102041, |
| "grad_norm": 2.571826457977295, |
| "learning_rate": 7.883760683760685e-05, |
| "loss": 0.115, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.5551020408163265, |
| "grad_norm": 0.7235465049743652, |
| "learning_rate": 7.876923076923078e-05, |
| "loss": 0.0794, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.563265306122449, |
| "grad_norm": 0.9554158449172974, |
| "learning_rate": 7.87008547008547e-05, |
| "loss": 0.0733, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 5.222618103027344, |
| "learning_rate": 7.863247863247864e-05, |
| "loss": 0.171, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5795918367346938, |
| "grad_norm": 0.9777473211288452, |
| "learning_rate": 7.856410256410257e-05, |
| "loss": 0.0922, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.5877551020408164, |
| "grad_norm": 0.7969712615013123, |
| "learning_rate": 7.849572649572649e-05, |
| "loss": 0.0741, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.5959183673469388, |
| "grad_norm": 1.5794110298156738, |
| "learning_rate": 7.842735042735043e-05, |
| "loss": 0.1096, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.6040816326530613, |
| "grad_norm": 0.6841784119606018, |
| "learning_rate": 7.835897435897436e-05, |
| "loss": 0.0809, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.6122448979591837, |
| "grad_norm": 0.4617042541503906, |
| "learning_rate": 7.82905982905983e-05, |
| "loss": 0.0678, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6204081632653061, |
| "grad_norm": 5.837403297424316, |
| "learning_rate": 7.822222222222223e-05, |
| "loss": 0.1176, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.6285714285714286, |
| "grad_norm": 4.160293102264404, |
| "learning_rate": 7.815384615384616e-05, |
| "loss": 0.2872, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.636734693877551, |
| "grad_norm": 2.905837059020996, |
| "learning_rate": 7.808547008547008e-05, |
| "loss": 0.1861, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.6448979591836734, |
| "grad_norm": 1.3452632427215576, |
| "learning_rate": 7.801709401709402e-05, |
| "loss": 0.1925, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.6530612244897959, |
| "grad_norm": 0.8246756792068481, |
| "learning_rate": 7.794871794871795e-05, |
| "loss": 0.0676, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6612244897959184, |
| "grad_norm": 2.3986732959747314, |
| "learning_rate": 7.788034188034189e-05, |
| "loss": 0.1415, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.6693877551020408, |
| "grad_norm": 2.3531203269958496, |
| "learning_rate": 7.781196581196582e-05, |
| "loss": 0.1225, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.6775510204081633, |
| "grad_norm": 1.0808194875717163, |
| "learning_rate": 7.774358974358976e-05, |
| "loss": 0.1161, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 2.469728708267212, |
| "learning_rate": 7.767521367521368e-05, |
| "loss": 0.2704, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.6938775510204082, |
| "grad_norm": 1.8167622089385986, |
| "learning_rate": 7.760683760683761e-05, |
| "loss": 0.1206, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.7020408163265306, |
| "grad_norm": 0.6328538656234741, |
| "learning_rate": 7.753846153846154e-05, |
| "loss": 0.1357, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.710204081632653, |
| "grad_norm": 1.4388840198516846, |
| "learning_rate": 7.747008547008548e-05, |
| "loss": 0.1348, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.7183673469387755, |
| "grad_norm": 0.9719085097312927, |
| "learning_rate": 7.740170940170941e-05, |
| "loss": 0.1777, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.726530612244898, |
| "grad_norm": 0.39901578426361084, |
| "learning_rate": 7.733333333333333e-05, |
| "loss": 0.0822, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.7346938775510204, |
| "grad_norm": 1.6550663709640503, |
| "learning_rate": 7.726495726495727e-05, |
| "loss": 0.1101, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7428571428571429, |
| "grad_norm": 0.5123804211616516, |
| "learning_rate": 7.71965811965812e-05, |
| "loss": 0.0794, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.7510204081632653, |
| "grad_norm": 0.5998639464378357, |
| "learning_rate": 7.712820512820514e-05, |
| "loss": 0.1104, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.7591836734693878, |
| "grad_norm": 0.5731858611106873, |
| "learning_rate": 7.705982905982907e-05, |
| "loss": 0.148, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.7673469387755102, |
| "grad_norm": 1.3274452686309814, |
| "learning_rate": 7.6991452991453e-05, |
| "loss": 0.1201, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.7755102040816326, |
| "grad_norm": 0.5123000741004944, |
| "learning_rate": 7.692307692307693e-05, |
| "loss": 0.0675, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.7836734693877551, |
| "grad_norm": 0.558822751045227, |
| "learning_rate": 7.685470085470086e-05, |
| "loss": 0.0394, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.7918367346938775, |
| "grad_norm": 0.893507719039917, |
| "learning_rate": 7.67863247863248e-05, |
| "loss": 0.1375, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.26979655027389526, |
| "learning_rate": 7.671794871794873e-05, |
| "loss": 0.0371, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.8081632653061225, |
| "grad_norm": 1.924552321434021, |
| "learning_rate": 7.664957264957266e-05, |
| "loss": 0.0691, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.8163265306122449, |
| "grad_norm": 1.3021774291992188, |
| "learning_rate": 7.65811965811966e-05, |
| "loss": 0.1223, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8244897959183674, |
| "grad_norm": 1.297431230545044, |
| "learning_rate": 7.651282051282052e-05, |
| "loss": 0.0751, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.8326530612244898, |
| "grad_norm": 0.5344750881195068, |
| "learning_rate": 7.644444444444445e-05, |
| "loss": 0.0973, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.8408163265306122, |
| "grad_norm": 2.1697866916656494, |
| "learning_rate": 7.637606837606839e-05, |
| "loss": 0.1845, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.8489795918367347, |
| "grad_norm": 1.7886005640029907, |
| "learning_rate": 7.630769230769232e-05, |
| "loss": 0.1352, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.9444314241409302, |
| "learning_rate": 7.623931623931624e-05, |
| "loss": 0.1281, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.8653061224489796, |
| "grad_norm": 1.9579631090164185, |
| "learning_rate": 7.617094017094018e-05, |
| "loss": 0.1374, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.8734693877551021, |
| "grad_norm": 0.9300099015235901, |
| "learning_rate": 7.610256410256411e-05, |
| "loss": 0.0889, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.8816326530612245, |
| "grad_norm": 0.27003607153892517, |
| "learning_rate": 7.603418803418803e-05, |
| "loss": 0.0428, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.889795918367347, |
| "grad_norm": 0.3849831819534302, |
| "learning_rate": 7.596581196581196e-05, |
| "loss": 0.063, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.8979591836734694, |
| "grad_norm": 0.7666690945625305, |
| "learning_rate": 7.58974358974359e-05, |
| "loss": 0.0481, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.9061224489795918, |
| "grad_norm": 0.4851779639720917, |
| "learning_rate": 7.582905982905983e-05, |
| "loss": 0.0303, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 0.21981802582740784, |
| "learning_rate": 7.576068376068377e-05, |
| "loss": 0.0263, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.9224489795918367, |
| "grad_norm": 0.8145710229873657, |
| "learning_rate": 7.56923076923077e-05, |
| "loss": 0.0995, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.9306122448979591, |
| "grad_norm": 0.6933770179748535, |
| "learning_rate": 7.562393162393162e-05, |
| "loss": 0.1012, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.9387755102040817, |
| "grad_norm": 1.0062135457992554, |
| "learning_rate": 7.555555555555556e-05, |
| "loss": 0.1136, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.9469387755102041, |
| "grad_norm": 0.7434647083282471, |
| "learning_rate": 7.548717948717949e-05, |
| "loss": 0.0967, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.9551020408163265, |
| "grad_norm": 0.5385677218437195, |
| "learning_rate": 7.541880341880342e-05, |
| "loss": 0.0536, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.963265306122449, |
| "grad_norm": 1.739890694618225, |
| "learning_rate": 7.535042735042736e-05, |
| "loss": 0.2595, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.9714285714285714, |
| "grad_norm": 1.1082196235656738, |
| "learning_rate": 7.52820512820513e-05, |
| "loss": 0.2055, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.9795918367346939, |
| "grad_norm": 0.943108320236206, |
| "learning_rate": 7.521367521367521e-05, |
| "loss": 0.0494, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9877551020408163, |
| "grad_norm": 1.5185785293579102, |
| "learning_rate": 7.514529914529915e-05, |
| "loss": 0.0991, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.9959183673469387, |
| "grad_norm": 0.9091699123382568, |
| "learning_rate": 7.507692307692308e-05, |
| "loss": 0.1214, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.1665633916854858, |
| "learning_rate": 7.500854700854702e-05, |
| "loss": 0.0315, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.926479222388936, |
| "eval_f1": 0.926986696070817, |
| "eval_loss": 0.1320565938949585, |
| "eval_precision": 0.9281119315465131, |
| "eval_recall": 0.926479222388936, |
| "eval_runtime": 27.6732, |
| "eval_samples_per_second": 15.755, |
| "eval_steps_per_second": 15.755, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.0081632653061225, |
| "grad_norm": 0.6033539175987244, |
| "learning_rate": 7.494017094017095e-05, |
| "loss": 0.0929, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.0163265306122449, |
| "grad_norm": 6.349571704864502, |
| "learning_rate": 7.487179487179487e-05, |
| "loss": 0.2896, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.0244897959183674, |
| "grad_norm": 0.9741121530532837, |
| "learning_rate": 7.48034188034188e-05, |
| "loss": 0.0812, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.0326530612244897, |
| "grad_norm": 0.3600277304649353, |
| "learning_rate": 7.473504273504274e-05, |
| "loss": 0.072, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.0408163265306123, |
| "grad_norm": 0.32495835423469543, |
| "learning_rate": 7.466666666666667e-05, |
| "loss": 0.0385, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.0489795918367346, |
| "grad_norm": 0.8278759121894836, |
| "learning_rate": 7.459829059829061e-05, |
| "loss": 0.143, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.0571428571428572, |
| "grad_norm": 0.7462875247001648, |
| "learning_rate": 7.452991452991454e-05, |
| "loss": 0.1005, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.0653061224489795, |
| "grad_norm": 1.0437004566192627, |
| "learning_rate": 7.446153846153846e-05, |
| "loss": 0.0937, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.073469387755102, |
| "grad_norm": 0.7965916991233826, |
| "learning_rate": 7.43931623931624e-05, |
| "loss": 0.1018, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.0816326530612246, |
| "grad_norm": 0.33361029624938965, |
| "learning_rate": 7.432478632478633e-05, |
| "loss": 0.0326, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.089795918367347, |
| "grad_norm": 1.7304506301879883, |
| "learning_rate": 7.425641025641027e-05, |
| "loss": 0.195, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.0979591836734695, |
| "grad_norm": 0.43233224749565125, |
| "learning_rate": 7.41880341880342e-05, |
| "loss": 0.0492, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.1061224489795918, |
| "grad_norm": 0.6107978820800781, |
| "learning_rate": 7.411965811965814e-05, |
| "loss": 0.0877, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.1142857142857143, |
| "grad_norm": 0.41433191299438477, |
| "learning_rate": 7.405128205128206e-05, |
| "loss": 0.0604, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.1224489795918366, |
| "grad_norm": 0.44921883940696716, |
| "learning_rate": 7.398290598290599e-05, |
| "loss": 0.0549, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.1306122448979592, |
| "grad_norm": 0.5607606172561646, |
| "learning_rate": 7.391452991452992e-05, |
| "loss": 0.0865, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.1387755102040815, |
| "grad_norm": 0.46840882301330566, |
| "learning_rate": 7.384615384615386e-05, |
| "loss": 0.0884, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.146938775510204, |
| "grad_norm": 0.4898800253868103, |
| "learning_rate": 7.377777777777779e-05, |
| "loss": 0.0657, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.1551020408163266, |
| "grad_norm": 0.5272047519683838, |
| "learning_rate": 7.370940170940171e-05, |
| "loss": 0.11, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.163265306122449, |
| "grad_norm": 0.563673198223114, |
| "learning_rate": 7.364102564102565e-05, |
| "loss": 0.0906, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.1714285714285715, |
| "grad_norm": 0.3086874186992645, |
| "learning_rate": 7.357264957264957e-05, |
| "loss": 0.0327, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.1795918367346938, |
| "grad_norm": 0.8381314873695374, |
| "learning_rate": 7.35042735042735e-05, |
| "loss": 0.0474, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.1877551020408164, |
| "grad_norm": 0.47310200333595276, |
| "learning_rate": 7.343589743589744e-05, |
| "loss": 0.0459, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.1959183673469387, |
| "grad_norm": 1.3210771083831787, |
| "learning_rate": 7.336752136752137e-05, |
| "loss": 0.2073, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.2040816326530612, |
| "grad_norm": 2.012478828430176, |
| "learning_rate": 7.32991452991453e-05, |
| "loss": 0.1268, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.2122448979591836, |
| "grad_norm": 0.31665048003196716, |
| "learning_rate": 7.323076923076924e-05, |
| "loss": 0.0362, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.220408163265306, |
| "grad_norm": 0.4273552894592285, |
| "learning_rate": 7.316239316239316e-05, |
| "loss": 0.0429, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.2285714285714286, |
| "grad_norm": 0.6782147884368896, |
| "learning_rate": 7.30940170940171e-05, |
| "loss": 0.0825, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.236734693877551, |
| "grad_norm": 1.7549127340316772, |
| "learning_rate": 7.302564102564103e-05, |
| "loss": 0.0874, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.2448979591836735, |
| "grad_norm": 1.1840424537658691, |
| "learning_rate": 7.295726495726496e-05, |
| "loss": 0.1183, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.2530612244897958, |
| "grad_norm": 0.40625783801078796, |
| "learning_rate": 7.28888888888889e-05, |
| "loss": 0.0583, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.2612244897959184, |
| "grad_norm": 1.0906763076782227, |
| "learning_rate": 7.282051282051283e-05, |
| "loss": 0.1058, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.269387755102041, |
| "grad_norm": 1.2108123302459717, |
| "learning_rate": 7.275213675213675e-05, |
| "loss": 0.0991, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.2775510204081633, |
| "grad_norm": 0.6065159440040588, |
| "learning_rate": 7.268376068376069e-05, |
| "loss": 0.0481, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 1.5350733995437622, |
| "learning_rate": 7.261538461538462e-05, |
| "loss": 0.0569, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.2938775510204081, |
| "grad_norm": 0.49242013692855835, |
| "learning_rate": 7.254700854700855e-05, |
| "loss": 0.0664, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.3020408163265307, |
| "grad_norm": 0.40741172432899475, |
| "learning_rate": 7.247863247863249e-05, |
| "loss": 0.0804, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.310204081632653, |
| "grad_norm": 0.1994098573923111, |
| "learning_rate": 7.241025641025641e-05, |
| "loss": 0.0315, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.3183673469387756, |
| "grad_norm": 0.24300333857536316, |
| "learning_rate": 7.234188034188034e-05, |
| "loss": 0.028, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.3265306122448979, |
| "grad_norm": 0.2567465007305145, |
| "learning_rate": 7.227350427350428e-05, |
| "loss": 0.046, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.3346938775510204, |
| "grad_norm": 1.0523180961608887, |
| "learning_rate": 7.220512820512821e-05, |
| "loss": 0.0833, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.342857142857143, |
| "grad_norm": 1.451838731765747, |
| "learning_rate": 7.213675213675215e-05, |
| "loss": 0.1063, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.3510204081632653, |
| "grad_norm": 0.3077213764190674, |
| "learning_rate": 7.206837606837608e-05, |
| "loss": 0.0137, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.3591836734693876, |
| "grad_norm": 1.869543194770813, |
| "learning_rate": 7.2e-05, |
| "loss": 0.1303, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.3673469387755102, |
| "grad_norm": 0.4209451675415039, |
| "learning_rate": 7.193162393162394e-05, |
| "loss": 0.0765, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.3755102040816327, |
| "grad_norm": 0.5088202953338623, |
| "learning_rate": 7.186324786324787e-05, |
| "loss": 0.0578, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.383673469387755, |
| "grad_norm": 0.37874314188957214, |
| "learning_rate": 7.17948717948718e-05, |
| "loss": 0.0433, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.3918367346938776, |
| "grad_norm": 1.1338469982147217, |
| "learning_rate": 7.172649572649574e-05, |
| "loss": 0.0552, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.8180486559867859, |
| "learning_rate": 7.165811965811967e-05, |
| "loss": 0.0663, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.4081632653061225, |
| "grad_norm": 0.8368282914161682, |
| "learning_rate": 7.15897435897436e-05, |
| "loss": 0.0706, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.416326530612245, |
| "grad_norm": 0.4246748089790344, |
| "learning_rate": 7.152136752136753e-05, |
| "loss": 0.0765, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.4244897959183673, |
| "grad_norm": 1.6097643375396729, |
| "learning_rate": 7.145299145299146e-05, |
| "loss": 0.1153, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.4326530612244899, |
| "grad_norm": 1.8011287450790405, |
| "learning_rate": 7.13846153846154e-05, |
| "loss": 0.1639, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.4408163265306122, |
| "grad_norm": 0.49854835867881775, |
| "learning_rate": 7.131623931623933e-05, |
| "loss": 0.0945, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.4489795918367347, |
| "grad_norm": 0.8556779623031616, |
| "learning_rate": 7.124786324786325e-05, |
| "loss": 0.0688, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.457142857142857, |
| "grad_norm": 0.3972812294960022, |
| "learning_rate": 7.117948717948719e-05, |
| "loss": 0.0838, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.4653061224489796, |
| "grad_norm": 1.9628223180770874, |
| "learning_rate": 7.11111111111111e-05, |
| "loss": 0.1525, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.473469387755102, |
| "grad_norm": 1.4790903329849243, |
| "learning_rate": 7.104273504273504e-05, |
| "loss": 0.0896, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.4816326530612245, |
| "grad_norm": 1.1479032039642334, |
| "learning_rate": 7.097435897435897e-05, |
| "loss": 0.0908, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.489795918367347, |
| "grad_norm": 1.7905946969985962, |
| "learning_rate": 7.090598290598291e-05, |
| "loss": 0.1608, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.4979591836734694, |
| "grad_norm": 0.5437545776367188, |
| "learning_rate": 7.083760683760684e-05, |
| "loss": 0.0571, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.5061224489795917, |
| "grad_norm": 0.401934415102005, |
| "learning_rate": 7.076923076923078e-05, |
| "loss": 0.0304, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.5142857142857142, |
| "grad_norm": 1.8619917631149292, |
| "learning_rate": 7.07008547008547e-05, |
| "loss": 0.2575, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.5224489795918368, |
| "grad_norm": 1.164413332939148, |
| "learning_rate": 7.063247863247863e-05, |
| "loss": 0.1058, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.5306122448979593, |
| "grad_norm": 0.6225583553314209, |
| "learning_rate": 7.056410256410257e-05, |
| "loss": 0.0939, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.5387755102040817, |
| "grad_norm": 0.49891531467437744, |
| "learning_rate": 7.04957264957265e-05, |
| "loss": 0.1356, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.546938775510204, |
| "grad_norm": 0.4343854784965515, |
| "learning_rate": 7.042735042735044e-05, |
| "loss": 0.0822, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.5551020408163265, |
| "grad_norm": 0.6476932168006897, |
| "learning_rate": 7.035897435897437e-05, |
| "loss": 0.0851, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.563265306122449, |
| "grad_norm": 0.5360276699066162, |
| "learning_rate": 7.029059829059829e-05, |
| "loss": 0.1255, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 0.3216440975666046, |
| "learning_rate": 7.022222222222222e-05, |
| "loss": 0.0479, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.5795918367346937, |
| "grad_norm": 0.4395454525947571, |
| "learning_rate": 7.015384615384616e-05, |
| "loss": 0.0359, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.5877551020408163, |
| "grad_norm": 0.4362776577472687, |
| "learning_rate": 7.008547008547009e-05, |
| "loss": 0.0926, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.5959183673469388, |
| "grad_norm": 0.2993446886539459, |
| "learning_rate": 7.001709401709403e-05, |
| "loss": 0.06, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.6040816326530614, |
| "grad_norm": 0.4686635136604309, |
| "learning_rate": 6.994871794871795e-05, |
| "loss": 0.1206, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.6122448979591837, |
| "grad_norm": 0.30804672837257385, |
| "learning_rate": 6.988034188034188e-05, |
| "loss": 0.0525, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.620408163265306, |
| "grad_norm": 0.4085846245288849, |
| "learning_rate": 6.981196581196582e-05, |
| "loss": 0.0537, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.6285714285714286, |
| "grad_norm": 0.33213263750076294, |
| "learning_rate": 6.974358974358975e-05, |
| "loss": 0.0392, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.636734693877551, |
| "grad_norm": 0.3664593994617462, |
| "learning_rate": 6.967521367521368e-05, |
| "loss": 0.0743, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.6448979591836734, |
| "grad_norm": 0.9924725294113159, |
| "learning_rate": 6.960683760683762e-05, |
| "loss": 0.0589, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.6530612244897958, |
| "grad_norm": 0.4398384690284729, |
| "learning_rate": 6.953846153846154e-05, |
| "loss": 0.0669, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.6612244897959183, |
| "grad_norm": 0.5044336318969727, |
| "learning_rate": 6.947008547008547e-05, |
| "loss": 0.0679, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.6693877551020408, |
| "grad_norm": 0.9199581146240234, |
| "learning_rate": 6.940170940170941e-05, |
| "loss": 0.1626, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.6775510204081634, |
| "grad_norm": 1.3309195041656494, |
| "learning_rate": 6.933333333333334e-05, |
| "loss": 0.1953, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.6857142857142857, |
| "grad_norm": 0.22333349287509918, |
| "learning_rate": 6.926495726495728e-05, |
| "loss": 0.0414, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.693877551020408, |
| "grad_norm": 0.6131277680397034, |
| "learning_rate": 6.919658119658121e-05, |
| "loss": 0.0808, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.7020408163265306, |
| "grad_norm": 0.7656962871551514, |
| "learning_rate": 6.912820512820513e-05, |
| "loss": 0.0693, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.7102040816326531, |
| "grad_norm": 0.8141290545463562, |
| "learning_rate": 6.905982905982907e-05, |
| "loss": 0.0801, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.7183673469387755, |
| "grad_norm": 0.5475645065307617, |
| "learning_rate": 6.8991452991453e-05, |
| "loss": 0.066, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.726530612244898, |
| "grad_norm": 0.33460551500320435, |
| "learning_rate": 6.892307692307693e-05, |
| "loss": 0.1342, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.7346938775510203, |
| "grad_norm": 0.347888708114624, |
| "learning_rate": 6.885470085470087e-05, |
| "loss": 0.0534, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.7428571428571429, |
| "grad_norm": 0.32002532482147217, |
| "learning_rate": 6.878632478632479e-05, |
| "loss": 0.0563, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.7510204081632654, |
| "grad_norm": 1.1996527910232544, |
| "learning_rate": 6.871794871794872e-05, |
| "loss": 0.1047, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.7591836734693878, |
| "grad_norm": 0.3641211688518524, |
| "learning_rate": 6.864957264957264e-05, |
| "loss": 0.0422, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.76734693877551, |
| "grad_norm": 0.3480508029460907, |
| "learning_rate": 6.858119658119658e-05, |
| "loss": 0.0491, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.7755102040816326, |
| "grad_norm": 0.4025651812553406, |
| "learning_rate": 6.851282051282051e-05, |
| "loss": 0.0523, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.7836734693877552, |
| "grad_norm": 0.5428391098976135, |
| "learning_rate": 6.844444444444445e-05, |
| "loss": 0.0684, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.7918367346938775, |
| "grad_norm": 0.6161150932312012, |
| "learning_rate": 6.837606837606838e-05, |
| "loss": 0.0795, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.4824294149875641, |
| "learning_rate": 6.830769230769232e-05, |
| "loss": 0.0926, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.8081632653061224, |
| "grad_norm": 1.10171639919281, |
| "learning_rate": 6.823931623931624e-05, |
| "loss": 0.1577, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.816326530612245, |
| "grad_norm": 0.4547288417816162, |
| "learning_rate": 6.817094017094017e-05, |
| "loss": 0.0496, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.8244897959183675, |
| "grad_norm": 0.44235700368881226, |
| "learning_rate": 6.81025641025641e-05, |
| "loss": 0.0453, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.8326530612244898, |
| "grad_norm": 0.8762555718421936, |
| "learning_rate": 6.803418803418804e-05, |
| "loss": 0.1378, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.8408163265306121, |
| "grad_norm": 0.8897255063056946, |
| "learning_rate": 6.796581196581197e-05, |
| "loss": 0.0713, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.8489795918367347, |
| "grad_norm": 0.7720091938972473, |
| "learning_rate": 6.789743589743591e-05, |
| "loss": 0.0849, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 0.5211923718452454, |
| "learning_rate": 6.782905982905983e-05, |
| "loss": 0.1556, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.8653061224489798, |
| "grad_norm": 0.6279407739639282, |
| "learning_rate": 6.776068376068376e-05, |
| "loss": 0.0815, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.873469387755102, |
| "grad_norm": 0.28113502264022827, |
| "learning_rate": 6.76923076923077e-05, |
| "loss": 0.0581, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.8816326530612244, |
| "grad_norm": 0.4654277563095093, |
| "learning_rate": 6.762393162393163e-05, |
| "loss": 0.0372, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.889795918367347, |
| "grad_norm": 0.6376189589500427, |
| "learning_rate": 6.755555555555557e-05, |
| "loss": 0.066, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.8979591836734695, |
| "grad_norm": 0.28285926580429077, |
| "learning_rate": 6.748717948717949e-05, |
| "loss": 0.0605, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.9061224489795918, |
| "grad_norm": 0.6249369382858276, |
| "learning_rate": 6.741880341880342e-05, |
| "loss": 0.0789, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.9142857142857141, |
| "grad_norm": 1.2941783666610718, |
| "learning_rate": 6.735042735042735e-05, |
| "loss": 0.1235, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.9224489795918367, |
| "grad_norm": 0.27006468176841736, |
| "learning_rate": 6.728205128205129e-05, |
| "loss": 0.0458, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.9306122448979592, |
| "grad_norm": 0.595116376876831, |
| "learning_rate": 6.721367521367522e-05, |
| "loss": 0.0895, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.9387755102040818, |
| "grad_norm": 0.24352626502513885, |
| "learning_rate": 6.714529914529916e-05, |
| "loss": 0.0302, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.9469387755102041, |
| "grad_norm": 0.8554300665855408, |
| "learning_rate": 6.707692307692308e-05, |
| "loss": 0.1388, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.9551020408163264, |
| "grad_norm": 0.6483756899833679, |
| "learning_rate": 6.700854700854701e-05, |
| "loss": 0.1062, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.963265306122449, |
| "grad_norm": 0.8733114004135132, |
| "learning_rate": 6.694017094017095e-05, |
| "loss": 0.0837, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.9714285714285715, |
| "grad_norm": 0.35388854146003723, |
| "learning_rate": 6.687179487179488e-05, |
| "loss": 0.0546, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.9795918367346939, |
| "grad_norm": 0.20294742286205292, |
| "learning_rate": 6.680341880341881e-05, |
| "loss": 0.0234, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.9877551020408162, |
| "grad_norm": 0.5089455842971802, |
| "learning_rate": 6.673504273504275e-05, |
| "loss": 0.0426, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.9959183673469387, |
| "grad_norm": 0.3105431795120239, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 0.0402, |
| "step": 245 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.25790804624557495, |
| "learning_rate": 6.65982905982906e-05, |
| "loss": 0.0216, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9410268119251093, |
| "eval_f1": 0.940523295891238, |
| "eval_loss": 0.1141602024435997, |
| "eval_precision": 0.9408064294905566, |
| "eval_recall": 0.9410268119251093, |
| "eval_runtime": 24.7522, |
| "eval_samples_per_second": 17.615, |
| "eval_steps_per_second": 17.615, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.0081632653061225, |
| "grad_norm": 0.35438716411590576, |
| "learning_rate": 6.652991452991454e-05, |
| "loss": 0.0662, |
| "step": 247 |
| }, |
| { |
| "epoch": 2.016326530612245, |
| "grad_norm": 0.4929123818874359, |
| "learning_rate": 6.646153846153847e-05, |
| "loss": 0.0615, |
| "step": 248 |
| }, |
| { |
| "epoch": 2.024489795918367, |
| "grad_norm": 0.360612690448761, |
| "learning_rate": 6.639316239316241e-05, |
| "loss": 0.0469, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.0326530612244897, |
| "grad_norm": 0.4224632978439331, |
| "learning_rate": 6.632478632478634e-05, |
| "loss": 0.0565, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.0408163265306123, |
| "grad_norm": 0.32445797324180603, |
| "learning_rate": 6.625641025641026e-05, |
| "loss": 0.0288, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.048979591836735, |
| "grad_norm": 0.8451316356658936, |
| "learning_rate": 6.618803418803418e-05, |
| "loss": 0.1075, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.057142857142857, |
| "grad_norm": 0.6300255656242371, |
| "learning_rate": 6.611965811965812e-05, |
| "loss": 0.0955, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.0653061224489795, |
| "grad_norm": 0.17683130502700806, |
| "learning_rate": 6.605128205128205e-05, |
| "loss": 0.028, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.073469387755102, |
| "grad_norm": 0.2518182098865509, |
| "learning_rate": 6.598290598290599e-05, |
| "loss": 0.018, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.0816326530612246, |
| "grad_norm": 0.29837292432785034, |
| "learning_rate": 6.591452991452992e-05, |
| "loss": 0.0331, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.089795918367347, |
| "grad_norm": 0.44716575741767883, |
| "learning_rate": 6.584615384615385e-05, |
| "loss": 0.0701, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.0979591836734692, |
| "grad_norm": 0.47956037521362305, |
| "learning_rate": 6.577777777777777e-05, |
| "loss": 0.0373, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.1061224489795918, |
| "grad_norm": 0.2430446296930313, |
| "learning_rate": 6.570940170940171e-05, |
| "loss": 0.035, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.1142857142857143, |
| "grad_norm": 0.9001370072364807, |
| "learning_rate": 6.564102564102564e-05, |
| "loss": 0.0927, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.122448979591837, |
| "grad_norm": 0.37475600838661194, |
| "learning_rate": 6.557264957264958e-05, |
| "loss": 0.0655, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.130612244897959, |
| "grad_norm": 0.33517739176750183, |
| "learning_rate": 6.550427350427351e-05, |
| "loss": 0.0472, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.1387755102040815, |
| "grad_norm": 0.22833134233951569, |
| "learning_rate": 6.543589743589745e-05, |
| "loss": 0.0285, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.146938775510204, |
| "grad_norm": 0.34427952766418457, |
| "learning_rate": 6.536752136752137e-05, |
| "loss": 0.0592, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.1551020408163266, |
| "grad_norm": 0.6570462584495544, |
| "learning_rate": 6.52991452991453e-05, |
| "loss": 0.0594, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.163265306122449, |
| "grad_norm": 0.9344841837882996, |
| "learning_rate": 6.523076923076923e-05, |
| "loss": 0.0732, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.1714285714285713, |
| "grad_norm": 0.1269778609275818, |
| "learning_rate": 6.516239316239317e-05, |
| "loss": 0.0192, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.179591836734694, |
| "grad_norm": 0.3492695391178131, |
| "learning_rate": 6.50940170940171e-05, |
| "loss": 0.0285, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.1877551020408164, |
| "grad_norm": 0.34235745668411255, |
| "learning_rate": 6.502564102564104e-05, |
| "loss": 0.0472, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.195918367346939, |
| "grad_norm": 0.7924789786338806, |
| "learning_rate": 6.495726495726496e-05, |
| "loss": 0.0603, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.204081632653061, |
| "grad_norm": 0.8306127190589905, |
| "learning_rate": 6.488888888888889e-05, |
| "loss": 0.0688, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.2122448979591836, |
| "grad_norm": 0.6912404894828796, |
| "learning_rate": 6.482051282051283e-05, |
| "loss": 0.0596, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.220408163265306, |
| "grad_norm": 0.557754635810852, |
| "learning_rate": 6.475213675213676e-05, |
| "loss": 0.0591, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.2285714285714286, |
| "grad_norm": 0.9123916029930115, |
| "learning_rate": 6.46837606837607e-05, |
| "loss": 0.0554, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.236734693877551, |
| "grad_norm": 1.5349823236465454, |
| "learning_rate": 6.461538461538462e-05, |
| "loss": 0.1467, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.2448979591836733, |
| "grad_norm": 0.6170281767845154, |
| "learning_rate": 6.454700854700855e-05, |
| "loss": 0.0721, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.253061224489796, |
| "grad_norm": 0.49400895833969116, |
| "learning_rate": 6.447863247863248e-05, |
| "loss": 0.0576, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.2612244897959184, |
| "grad_norm": 0.2935521602630615, |
| "learning_rate": 6.441025641025642e-05, |
| "loss": 0.0241, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.269387755102041, |
| "grad_norm": 1.0610945224761963, |
| "learning_rate": 6.434188034188035e-05, |
| "loss": 0.1248, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.277551020408163, |
| "grad_norm": 0.6663985252380371, |
| "learning_rate": 6.427350427350429e-05, |
| "loss": 0.0525, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.2857142857142856, |
| "grad_norm": 0.9457942247390747, |
| "learning_rate": 6.420512820512821e-05, |
| "loss": 0.0665, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.293877551020408, |
| "grad_norm": 0.17783474922180176, |
| "learning_rate": 6.413675213675214e-05, |
| "loss": 0.0203, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.3020408163265307, |
| "grad_norm": 0.6842500567436218, |
| "learning_rate": 6.406837606837608e-05, |
| "loss": 0.065, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.3102040816326532, |
| "grad_norm": 0.39229097962379456, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.0382, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.3183673469387753, |
| "grad_norm": 0.5632251501083374, |
| "learning_rate": 6.393162393162394e-05, |
| "loss": 0.0468, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.326530612244898, |
| "grad_norm": 1.3715561628341675, |
| "learning_rate": 6.386324786324788e-05, |
| "loss": 0.0812, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.3346938775510204, |
| "grad_norm": 0.5640051960945129, |
| "learning_rate": 6.37948717948718e-05, |
| "loss": 0.0513, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.342857142857143, |
| "grad_norm": 0.438909113407135, |
| "learning_rate": 6.372649572649573e-05, |
| "loss": 0.027, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.351020408163265, |
| "grad_norm": 0.8141554594039917, |
| "learning_rate": 6.365811965811965e-05, |
| "loss": 0.1043, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.3591836734693876, |
| "grad_norm": 0.381523072719574, |
| "learning_rate": 6.358974358974359e-05, |
| "loss": 0.0339, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.36734693877551, |
| "grad_norm": 0.5834519863128662, |
| "learning_rate": 6.352136752136752e-05, |
| "loss": 0.0652, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.3755102040816327, |
| "grad_norm": 0.5051060914993286, |
| "learning_rate": 6.345299145299146e-05, |
| "loss": 0.071, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.3836734693877553, |
| "grad_norm": 1.0093952417373657, |
| "learning_rate": 6.338461538461539e-05, |
| "loss": 0.0988, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.3918367346938774, |
| "grad_norm": 0.9834301471710205, |
| "learning_rate": 6.331623931623931e-05, |
| "loss": 0.1045, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.25004687905311584, |
| "learning_rate": 6.324786324786325e-05, |
| "loss": 0.026, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.4081632653061225, |
| "grad_norm": 0.3586501479148865, |
| "learning_rate": 6.317948717948718e-05, |
| "loss": 0.0364, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.416326530612245, |
| "grad_norm": 1.293340802192688, |
| "learning_rate": 6.311111111111112e-05, |
| "loss": 0.1104, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.424489795918367, |
| "grad_norm": 0.8027708530426025, |
| "learning_rate": 6.304273504273505e-05, |
| "loss": 0.0581, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.4326530612244897, |
| "grad_norm": 0.5780189633369446, |
| "learning_rate": 6.297435897435898e-05, |
| "loss": 0.0485, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.440816326530612, |
| "grad_norm": 1.5172946453094482, |
| "learning_rate": 6.29059829059829e-05, |
| "loss": 0.1051, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.4489795918367347, |
| "grad_norm": 0.3469559848308563, |
| "learning_rate": 6.283760683760684e-05, |
| "loss": 0.0499, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.4571428571428573, |
| "grad_norm": 0.4521036744117737, |
| "learning_rate": 6.276923076923077e-05, |
| "loss": 0.0273, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.4653061224489794, |
| "grad_norm": 0.43015456199645996, |
| "learning_rate": 6.270085470085471e-05, |
| "loss": 0.0515, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.473469387755102, |
| "grad_norm": 0.9717444181442261, |
| "learning_rate": 6.263247863247864e-05, |
| "loss": 0.0945, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.4816326530612245, |
| "grad_norm": 1.5522366762161255, |
| "learning_rate": 6.256410256410258e-05, |
| "loss": 0.122, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.489795918367347, |
| "grad_norm": 0.4901497960090637, |
| "learning_rate": 6.24957264957265e-05, |
| "loss": 0.0408, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.497959183673469, |
| "grad_norm": 0.5403264164924622, |
| "learning_rate": 6.242735042735043e-05, |
| "loss": 0.0413, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.5061224489795917, |
| "grad_norm": 0.25704896450042725, |
| "learning_rate": 6.235897435897436e-05, |
| "loss": 0.0437, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.5142857142857142, |
| "grad_norm": 0.43089812994003296, |
| "learning_rate": 6.22905982905983e-05, |
| "loss": 0.0488, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.522448979591837, |
| "grad_norm": 1.8273061513900757, |
| "learning_rate": 6.222222222222223e-05, |
| "loss": 0.1857, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.5306122448979593, |
| "grad_norm": 0.46482834219932556, |
| "learning_rate": 6.215384615384615e-05, |
| "loss": 0.0571, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.538775510204082, |
| "grad_norm": 0.7548282742500305, |
| "learning_rate": 6.208547008547009e-05, |
| "loss": 0.0681, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.546938775510204, |
| "grad_norm": 0.4708963930606842, |
| "learning_rate": 6.201709401709402e-05, |
| "loss": 0.0416, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.5551020408163265, |
| "grad_norm": 0.4314163029193878, |
| "learning_rate": 6.194871794871796e-05, |
| "loss": 0.0344, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.563265306122449, |
| "grad_norm": 0.23221971094608307, |
| "learning_rate": 6.188034188034189e-05, |
| "loss": 0.0315, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 0.35419610142707825, |
| "learning_rate": 6.181196581196583e-05, |
| "loss": 0.0223, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.5795918367346937, |
| "grad_norm": 0.3372536897659302, |
| "learning_rate": 6.174358974358975e-05, |
| "loss": 0.0491, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.5877551020408163, |
| "grad_norm": 0.262692391872406, |
| "learning_rate": 6.167521367521368e-05, |
| "loss": 0.026, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.595918367346939, |
| "grad_norm": 0.30098071694374084, |
| "learning_rate": 6.160683760683761e-05, |
| "loss": 0.0295, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.6040816326530614, |
| "grad_norm": 0.3119616210460663, |
| "learning_rate": 6.153846153846155e-05, |
| "loss": 0.0364, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.612244897959184, |
| "grad_norm": 0.5946303606033325, |
| "learning_rate": 6.147008547008548e-05, |
| "loss": 0.0525, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.620408163265306, |
| "grad_norm": 0.4923531711101532, |
| "learning_rate": 6.140170940170942e-05, |
| "loss": 0.0352, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.6285714285714286, |
| "grad_norm": 1.0566976070404053, |
| "learning_rate": 6.133333333333334e-05, |
| "loss": 0.1034, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.636734693877551, |
| "grad_norm": 0.44877099990844727, |
| "learning_rate": 6.126495726495727e-05, |
| "loss": 0.0447, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.644897959183673, |
| "grad_norm": 0.749967098236084, |
| "learning_rate": 6.119658119658119e-05, |
| "loss": 0.103, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.6530612244897958, |
| "grad_norm": 0.5334205031394958, |
| "learning_rate": 6.112820512820513e-05, |
| "loss": 0.045, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.6612244897959183, |
| "grad_norm": 0.6284877061843872, |
| "learning_rate": 6.105982905982906e-05, |
| "loss": 0.0766, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.669387755102041, |
| "grad_norm": 0.1609213799238205, |
| "learning_rate": 6.0991452991452996e-05, |
| "loss": 0.0149, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.6775510204081634, |
| "grad_norm": 0.208161860704422, |
| "learning_rate": 6.092307692307692e-05, |
| "loss": 0.0228, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.685714285714286, |
| "grad_norm": 0.5576138496398926, |
| "learning_rate": 6.085470085470086e-05, |
| "loss": 0.0649, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.693877551020408, |
| "grad_norm": 0.17669138312339783, |
| "learning_rate": 6.078632478632479e-05, |
| "loss": 0.0175, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.7020408163265306, |
| "grad_norm": 0.39149168133735657, |
| "learning_rate": 6.071794871794872e-05, |
| "loss": 0.0443, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.710204081632653, |
| "grad_norm": 0.2674602270126343, |
| "learning_rate": 6.064957264957265e-05, |
| "loss": 0.0371, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.7183673469387752, |
| "grad_norm": 0.39889994263648987, |
| "learning_rate": 6.058119658119658e-05, |
| "loss": 0.0775, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.726530612244898, |
| "grad_norm": 0.2623346745967865, |
| "learning_rate": 6.0512820512820515e-05, |
| "loss": 0.0318, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.7346938775510203, |
| "grad_norm": 0.34579864144325256, |
| "learning_rate": 6.044444444444445e-05, |
| "loss": 0.0379, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.742857142857143, |
| "grad_norm": 0.2815621495246887, |
| "learning_rate": 6.037606837606838e-05, |
| "loss": 0.0511, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.7510204081632654, |
| "grad_norm": 0.34444335103034973, |
| "learning_rate": 6.030769230769231e-05, |
| "loss": 0.056, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.759183673469388, |
| "grad_norm": 0.5257909893989563, |
| "learning_rate": 6.0239316239316245e-05, |
| "loss": 0.0736, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.76734693877551, |
| "grad_norm": 0.32044798135757446, |
| "learning_rate": 6.017094017094017e-05, |
| "loss": 0.0376, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.7755102040816326, |
| "grad_norm": 0.37229958176612854, |
| "learning_rate": 6.010256410256411e-05, |
| "loss": 0.0414, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.783673469387755, |
| "grad_norm": 0.4967615306377411, |
| "learning_rate": 6.003418803418804e-05, |
| "loss": 0.0925, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.7918367346938773, |
| "grad_norm": 0.320186048746109, |
| "learning_rate": 5.996581196581197e-05, |
| "loss": 0.0444, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.5603439807891846, |
| "learning_rate": 5.98974358974359e-05, |
| "loss": 0.0534, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.8081632653061224, |
| "grad_norm": 0.33432868123054504, |
| "learning_rate": 5.982905982905984e-05, |
| "loss": 0.0262, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.816326530612245, |
| "grad_norm": 0.4188118278980255, |
| "learning_rate": 5.9760683760683765e-05, |
| "loss": 0.0823, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.8244897959183675, |
| "grad_norm": 0.24907562136650085, |
| "learning_rate": 5.96923076923077e-05, |
| "loss": 0.0134, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.83265306122449, |
| "grad_norm": 0.42357468605041504, |
| "learning_rate": 5.962393162393163e-05, |
| "loss": 0.0386, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.840816326530612, |
| "grad_norm": 0.9156424403190613, |
| "learning_rate": 5.955555555555556e-05, |
| "loss": 0.0803, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.8489795918367347, |
| "grad_norm": 0.45649656653404236, |
| "learning_rate": 5.9487179487179495e-05, |
| "loss": 0.0627, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.3821771442890167, |
| "learning_rate": 5.941880341880343e-05, |
| "loss": 0.0809, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.8653061224489798, |
| "grad_norm": 0.4800649881362915, |
| "learning_rate": 5.9350427350427357e-05, |
| "loss": 0.0367, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.873469387755102, |
| "grad_norm": 0.4957541525363922, |
| "learning_rate": 5.928205128205129e-05, |
| "loss": 0.0577, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.8816326530612244, |
| "grad_norm": 0.5091708898544312, |
| "learning_rate": 5.921367521367522e-05, |
| "loss": 0.086, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.889795918367347, |
| "grad_norm": 0.6676978468894958, |
| "learning_rate": 5.914529914529915e-05, |
| "loss": 0.0234, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.8979591836734695, |
| "grad_norm": 0.6917971968650818, |
| "learning_rate": 5.907692307692309e-05, |
| "loss": 0.0561, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.906122448979592, |
| "grad_norm": 0.5658119916915894, |
| "learning_rate": 5.9008547008547014e-05, |
| "loss": 0.0497, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.914285714285714, |
| "grad_norm": 0.6094546318054199, |
| "learning_rate": 5.894017094017095e-05, |
| "loss": 0.0952, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.9224489795918367, |
| "grad_norm": 0.39741581678390503, |
| "learning_rate": 5.887179487179488e-05, |
| "loss": 0.0618, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.9306122448979592, |
| "grad_norm": 0.6804453730583191, |
| "learning_rate": 5.880341880341881e-05, |
| "loss": 0.0616, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.938775510204082, |
| "grad_norm": 0.9075838923454285, |
| "learning_rate": 5.873504273504274e-05, |
| "loss": 0.0499, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.946938775510204, |
| "grad_norm": 0.33524271845817566, |
| "learning_rate": 5.8666666666666665e-05, |
| "loss": 0.049, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.9551020408163264, |
| "grad_norm": 0.3323378264904022, |
| "learning_rate": 5.85982905982906e-05, |
| "loss": 0.0281, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.963265306122449, |
| "grad_norm": 0.24611520767211914, |
| "learning_rate": 5.8529914529914534e-05, |
| "loss": 0.0708, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.9714285714285715, |
| "grad_norm": 1.779318928718567, |
| "learning_rate": 5.846153846153846e-05, |
| "loss": 0.1223, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.979591836734694, |
| "grad_norm": 0.5884644985198975, |
| "learning_rate": 5.8393162393162395e-05, |
| "loss": 0.0352, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.987755102040816, |
| "grad_norm": 0.47888728976249695, |
| "learning_rate": 5.832478632478633e-05, |
| "loss": 0.0689, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.9959183673469387, |
| "grad_norm": 0.33537718653678894, |
| "learning_rate": 5.825641025641026e-05, |
| "loss": 0.0357, |
| "step": 368 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.3279494047164917, |
| "learning_rate": 5.818803418803419e-05, |
| "loss": 0.0286, |
| "step": 369 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9323504468654185, |
| "eval_f1": 0.9307983009212302, |
| "eval_loss": 0.15686975419521332, |
| "eval_precision": 0.9341369053023137, |
| "eval_recall": 0.9323504468654185, |
| "eval_runtime": 25.0411, |
| "eval_samples_per_second": 17.411, |
| "eval_steps_per_second": 17.411, |
| "step": 369 |
| }, |
| { |
| "epoch": 3.0081632653061225, |
| "grad_norm": 0.8343735933303833, |
| "learning_rate": 5.8119658119658126e-05, |
| "loss": 0.082, |
| "step": 370 |
| }, |
| { |
| "epoch": 3.016326530612245, |
| "grad_norm": 0.6073836088180542, |
| "learning_rate": 5.805128205128205e-05, |
| "loss": 0.0558, |
| "step": 371 |
| }, |
| { |
| "epoch": 3.024489795918367, |
| "grad_norm": 0.42929086089134216, |
| "learning_rate": 5.798290598290599e-05, |
| "loss": 0.029, |
| "step": 372 |
| }, |
| { |
| "epoch": 3.0326530612244897, |
| "grad_norm": 0.3041161298751831, |
| "learning_rate": 5.7914529914529915e-05, |
| "loss": 0.0346, |
| "step": 373 |
| }, |
| { |
| "epoch": 3.0408163265306123, |
| "grad_norm": 0.3552093505859375, |
| "learning_rate": 5.784615384615385e-05, |
| "loss": 0.0231, |
| "step": 374 |
| }, |
| { |
| "epoch": 3.048979591836735, |
| "grad_norm": 0.5431807637214661, |
| "learning_rate": 5.777777777777778e-05, |
| "loss": 0.0455, |
| "step": 375 |
| }, |
| { |
| "epoch": 3.057142857142857, |
| "grad_norm": 0.5495198369026184, |
| "learning_rate": 5.770940170940171e-05, |
| "loss": 0.0103, |
| "step": 376 |
| }, |
| { |
| "epoch": 3.0653061224489795, |
| "grad_norm": 1.091922402381897, |
| "learning_rate": 5.7641025641025645e-05, |
| "loss": 0.0615, |
| "step": 377 |
| }, |
| { |
| "epoch": 3.073469387755102, |
| "grad_norm": 0.39272844791412354, |
| "learning_rate": 5.757264957264958e-05, |
| "loss": 0.0652, |
| "step": 378 |
| }, |
| { |
| "epoch": 3.0816326530612246, |
| "grad_norm": 0.39859914779663086, |
| "learning_rate": 5.750427350427351e-05, |
| "loss": 0.0523, |
| "step": 379 |
| }, |
| { |
| "epoch": 3.089795918367347, |
| "grad_norm": 0.17209431529045105, |
| "learning_rate": 5.743589743589744e-05, |
| "loss": 0.0262, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.0979591836734692, |
| "grad_norm": 0.6512501239776611, |
| "learning_rate": 5.7367521367521375e-05, |
| "loss": 0.0568, |
| "step": 381 |
| }, |
| { |
| "epoch": 3.1061224489795918, |
| "grad_norm": 0.4490756690502167, |
| "learning_rate": 5.72991452991453e-05, |
| "loss": 0.0206, |
| "step": 382 |
| }, |
| { |
| "epoch": 3.1142857142857143, |
| "grad_norm": 0.54147869348526, |
| "learning_rate": 5.723076923076924e-05, |
| "loss": 0.0411, |
| "step": 383 |
| }, |
| { |
| "epoch": 3.122448979591837, |
| "grad_norm": 0.6576200723648071, |
| "learning_rate": 5.716239316239317e-05, |
| "loss": 0.0537, |
| "step": 384 |
| }, |
| { |
| "epoch": 3.130612244897959, |
| "grad_norm": 0.3526168763637543, |
| "learning_rate": 5.70940170940171e-05, |
| "loss": 0.0348, |
| "step": 385 |
| }, |
| { |
| "epoch": 3.1387755102040815, |
| "grad_norm": 0.43255260586738586, |
| "learning_rate": 5.702564102564103e-05, |
| "loss": 0.0279, |
| "step": 386 |
| }, |
| { |
| "epoch": 3.146938775510204, |
| "grad_norm": 0.6807562708854675, |
| "learning_rate": 5.695726495726497e-05, |
| "loss": 0.0805, |
| "step": 387 |
| }, |
| { |
| "epoch": 3.1551020408163266, |
| "grad_norm": 0.21039244532585144, |
| "learning_rate": 5.6888888888888895e-05, |
| "loss": 0.0255, |
| "step": 388 |
| }, |
| { |
| "epoch": 3.163265306122449, |
| "grad_norm": 0.2559818625450134, |
| "learning_rate": 5.682051282051283e-05, |
| "loss": 0.0269, |
| "step": 389 |
| }, |
| { |
| "epoch": 3.1714285714285713, |
| "grad_norm": 0.14890031516551971, |
| "learning_rate": 5.6752136752136756e-05, |
| "loss": 0.0131, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.179591836734694, |
| "grad_norm": 0.3709496855735779, |
| "learning_rate": 5.668376068376069e-05, |
| "loss": 0.0379, |
| "step": 391 |
| }, |
| { |
| "epoch": 3.1877551020408164, |
| "grad_norm": 0.0764717385172844, |
| "learning_rate": 5.6615384615384625e-05, |
| "loss": 0.0042, |
| "step": 392 |
| }, |
| { |
| "epoch": 3.195918367346939, |
| "grad_norm": 0.7881947755813599, |
| "learning_rate": 5.654700854700855e-05, |
| "loss": 0.0576, |
| "step": 393 |
| }, |
| { |
| "epoch": 3.204081632653061, |
| "grad_norm": 0.25396037101745605, |
| "learning_rate": 5.6478632478632487e-05, |
| "loss": 0.0259, |
| "step": 394 |
| }, |
| { |
| "epoch": 3.2122448979591836, |
| "grad_norm": 0.7596886157989502, |
| "learning_rate": 5.641025641025642e-05, |
| "loss": 0.0524, |
| "step": 395 |
| }, |
| { |
| "epoch": 3.220408163265306, |
| "grad_norm": 0.371158242225647, |
| "learning_rate": 5.634188034188035e-05, |
| "loss": 0.0512, |
| "step": 396 |
| }, |
| { |
| "epoch": 3.2285714285714286, |
| "grad_norm": 0.33201202750205994, |
| "learning_rate": 5.627350427350428e-05, |
| "loss": 0.0226, |
| "step": 397 |
| }, |
| { |
| "epoch": 3.236734693877551, |
| "grad_norm": 0.4768315553665161, |
| "learning_rate": 5.62051282051282e-05, |
| "loss": 0.0325, |
| "step": 398 |
| }, |
| { |
| "epoch": 3.2448979591836733, |
| "grad_norm": 0.755546510219574, |
| "learning_rate": 5.613675213675214e-05, |
| "loss": 0.0262, |
| "step": 399 |
| }, |
| { |
| "epoch": 3.253061224489796, |
| "grad_norm": 0.6119134426116943, |
| "learning_rate": 5.606837606837607e-05, |
| "loss": 0.042, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.2612244897959184, |
| "grad_norm": 0.5160887837409973, |
| "learning_rate": 5.6e-05, |
| "loss": 0.0331, |
| "step": 401 |
| }, |
| { |
| "epoch": 3.269387755102041, |
| "grad_norm": 0.47487494349479675, |
| "learning_rate": 5.5931623931623933e-05, |
| "loss": 0.0146, |
| "step": 402 |
| }, |
| { |
| "epoch": 3.277551020408163, |
| "grad_norm": 0.1713923066854477, |
| "learning_rate": 5.586324786324787e-05, |
| "loss": 0.0126, |
| "step": 403 |
| }, |
| { |
| "epoch": 3.2857142857142856, |
| "grad_norm": 0.5118584632873535, |
| "learning_rate": 5.5794871794871795e-05, |
| "loss": 0.0501, |
| "step": 404 |
| }, |
| { |
| "epoch": 3.293877551020408, |
| "grad_norm": 0.21943572163581848, |
| "learning_rate": 5.572649572649573e-05, |
| "loss": 0.021, |
| "step": 405 |
| }, |
| { |
| "epoch": 3.3020408163265307, |
| "grad_norm": 0.447221040725708, |
| "learning_rate": 5.5658119658119664e-05, |
| "loss": 0.034, |
| "step": 406 |
| }, |
| { |
| "epoch": 3.3102040816326532, |
| "grad_norm": 0.10484689474105835, |
| "learning_rate": 5.558974358974359e-05, |
| "loss": 0.0134, |
| "step": 407 |
| }, |
| { |
| "epoch": 3.3183673469387753, |
| "grad_norm": 0.14459459483623505, |
| "learning_rate": 5.5521367521367525e-05, |
| "loss": 0.0222, |
| "step": 408 |
| }, |
| { |
| "epoch": 3.326530612244898, |
| "grad_norm": 0.1565796285867691, |
| "learning_rate": 5.545299145299145e-05, |
| "loss": 0.0185, |
| "step": 409 |
| }, |
| { |
| "epoch": 3.3346938775510204, |
| "grad_norm": 0.17262019217014313, |
| "learning_rate": 5.538461538461539e-05, |
| "loss": 0.0152, |
| "step": 410 |
| }, |
| { |
| "epoch": 3.342857142857143, |
| "grad_norm": 0.154659703373909, |
| "learning_rate": 5.531623931623932e-05, |
| "loss": 0.0211, |
| "step": 411 |
| }, |
| { |
| "epoch": 3.351020408163265, |
| "grad_norm": 0.16751931607723236, |
| "learning_rate": 5.524786324786325e-05, |
| "loss": 0.0255, |
| "step": 412 |
| }, |
| { |
| "epoch": 3.3591836734693876, |
| "grad_norm": 0.15946514904499054, |
| "learning_rate": 5.517948717948718e-05, |
| "loss": 0.0112, |
| "step": 413 |
| }, |
| { |
| "epoch": 3.36734693877551, |
| "grad_norm": 0.4122481048107147, |
| "learning_rate": 5.511111111111112e-05, |
| "loss": 0.0306, |
| "step": 414 |
| }, |
| { |
| "epoch": 3.3755102040816327, |
| "grad_norm": 0.4134220480918884, |
| "learning_rate": 5.5042735042735045e-05, |
| "loss": 0.0384, |
| "step": 415 |
| }, |
| { |
| "epoch": 3.3836734693877553, |
| "grad_norm": 0.4064358174800873, |
| "learning_rate": 5.497435897435898e-05, |
| "loss": 0.0276, |
| "step": 416 |
| }, |
| { |
| "epoch": 3.3918367346938774, |
| "grad_norm": 0.3870207369327545, |
| "learning_rate": 5.490598290598291e-05, |
| "loss": 0.0274, |
| "step": 417 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 0.20213182270526886, |
| "learning_rate": 5.483760683760684e-05, |
| "loss": 0.0207, |
| "step": 418 |
| }, |
| { |
| "epoch": 3.4081632653061225, |
| "grad_norm": 0.7050586342811584, |
| "learning_rate": 5.4769230769230775e-05, |
| "loss": 0.0686, |
| "step": 419 |
| }, |
| { |
| "epoch": 3.416326530612245, |
| "grad_norm": 0.26291969418525696, |
| "learning_rate": 5.470085470085471e-05, |
| "loss": 0.0178, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.424489795918367, |
| "grad_norm": 0.24425692856311798, |
| "learning_rate": 5.463247863247864e-05, |
| "loss": 0.0239, |
| "step": 421 |
| }, |
| { |
| "epoch": 3.4326530612244897, |
| "grad_norm": 0.17826378345489502, |
| "learning_rate": 5.456410256410257e-05, |
| "loss": 0.0129, |
| "step": 422 |
| }, |
| { |
| "epoch": 3.440816326530612, |
| "grad_norm": 0.88796067237854, |
| "learning_rate": 5.4495726495726505e-05, |
| "loss": 0.0784, |
| "step": 423 |
| }, |
| { |
| "epoch": 3.4489795918367347, |
| "grad_norm": 0.3603014051914215, |
| "learning_rate": 5.442735042735043e-05, |
| "loss": 0.0263, |
| "step": 424 |
| }, |
| { |
| "epoch": 3.4571428571428573, |
| "grad_norm": 0.344101220369339, |
| "learning_rate": 5.435897435897437e-05, |
| "loss": 0.0357, |
| "step": 425 |
| }, |
| { |
| "epoch": 3.4653061224489794, |
| "grad_norm": 1.5145072937011719, |
| "learning_rate": 5.4290598290598294e-05, |
| "loss": 0.0482, |
| "step": 426 |
| }, |
| { |
| "epoch": 3.473469387755102, |
| "grad_norm": 0.4510698616504669, |
| "learning_rate": 5.422222222222223e-05, |
| "loss": 0.0478, |
| "step": 427 |
| }, |
| { |
| "epoch": 3.4816326530612245, |
| "grad_norm": 1.4540811777114868, |
| "learning_rate": 5.415384615384616e-05, |
| "loss": 0.1315, |
| "step": 428 |
| }, |
| { |
| "epoch": 3.489795918367347, |
| "grad_norm": 0.3050004839897156, |
| "learning_rate": 5.408547008547009e-05, |
| "loss": 0.0409, |
| "step": 429 |
| }, |
| { |
| "epoch": 3.497959183673469, |
| "grad_norm": 0.40628471970558167, |
| "learning_rate": 5.4017094017094025e-05, |
| "loss": 0.0405, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.5061224489795917, |
| "grad_norm": 0.28513649106025696, |
| "learning_rate": 5.394871794871796e-05, |
| "loss": 0.0242, |
| "step": 431 |
| }, |
| { |
| "epoch": 3.5142857142857142, |
| "grad_norm": 0.1387534886598587, |
| "learning_rate": 5.3880341880341886e-05, |
| "loss": 0.0138, |
| "step": 432 |
| }, |
| { |
| "epoch": 3.522448979591837, |
| "grad_norm": 0.21195898950099945, |
| "learning_rate": 5.381196581196582e-05, |
| "loss": 0.0116, |
| "step": 433 |
| }, |
| { |
| "epoch": 3.5306122448979593, |
| "grad_norm": 0.5661990642547607, |
| "learning_rate": 5.374358974358974e-05, |
| "loss": 0.0493, |
| "step": 434 |
| }, |
| { |
| "epoch": 3.538775510204082, |
| "grad_norm": 0.1676921248435974, |
| "learning_rate": 5.3675213675213675e-05, |
| "loss": 0.0172, |
| "step": 435 |
| }, |
| { |
| "epoch": 3.546938775510204, |
| "grad_norm": 0.5208879113197327, |
| "learning_rate": 5.360683760683761e-05, |
| "loss": 0.0243, |
| "step": 436 |
| }, |
| { |
| "epoch": 3.5551020408163265, |
| "grad_norm": 0.30240461230278015, |
| "learning_rate": 5.353846153846154e-05, |
| "loss": 0.0251, |
| "step": 437 |
| }, |
| { |
| "epoch": 3.563265306122449, |
| "grad_norm": 0.3899058699607849, |
| "learning_rate": 5.347008547008547e-05, |
| "loss": 0.0289, |
| "step": 438 |
| }, |
| { |
| "epoch": 3.571428571428571, |
| "grad_norm": 0.6027955412864685, |
| "learning_rate": 5.3401709401709406e-05, |
| "loss": 0.0465, |
| "step": 439 |
| }, |
| { |
| "epoch": 3.5795918367346937, |
| "grad_norm": 0.30090829730033875, |
| "learning_rate": 5.333333333333333e-05, |
| "loss": 0.0165, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.5877551020408163, |
| "grad_norm": 0.6930409669876099, |
| "learning_rate": 5.326495726495727e-05, |
| "loss": 0.0603, |
| "step": 441 |
| }, |
| { |
| "epoch": 3.595918367346939, |
| "grad_norm": 0.375592440366745, |
| "learning_rate": 5.31965811965812e-05, |
| "loss": 0.0287, |
| "step": 442 |
| }, |
| { |
| "epoch": 3.6040816326530614, |
| "grad_norm": 0.8024219870567322, |
| "learning_rate": 5.312820512820513e-05, |
| "loss": 0.0476, |
| "step": 443 |
| }, |
| { |
| "epoch": 3.612244897959184, |
| "grad_norm": 0.4552713632583618, |
| "learning_rate": 5.3059829059829063e-05, |
| "loss": 0.0136, |
| "step": 444 |
| }, |
| { |
| "epoch": 3.620408163265306, |
| "grad_norm": 0.6890722513198853, |
| "learning_rate": 5.299145299145299e-05, |
| "loss": 0.0257, |
| "step": 445 |
| }, |
| { |
| "epoch": 3.6285714285714286, |
| "grad_norm": 0.4332926869392395, |
| "learning_rate": 5.2923076923076925e-05, |
| "loss": 0.0171, |
| "step": 446 |
| }, |
| { |
| "epoch": 3.636734693877551, |
| "grad_norm": 0.21104751527309418, |
| "learning_rate": 5.285470085470086e-05, |
| "loss": 0.0218, |
| "step": 447 |
| }, |
| { |
| "epoch": 3.644897959183673, |
| "grad_norm": 0.2659521698951721, |
| "learning_rate": 5.278632478632479e-05, |
| "loss": 0.0079, |
| "step": 448 |
| }, |
| { |
| "epoch": 3.6530612244897958, |
| "grad_norm": 1.0414408445358276, |
| "learning_rate": 5.271794871794872e-05, |
| "loss": 0.0998, |
| "step": 449 |
| }, |
| { |
| "epoch": 3.6612244897959183, |
| "grad_norm": 0.5479117035865784, |
| "learning_rate": 5.2649572649572655e-05, |
| "loss": 0.0259, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.669387755102041, |
| "grad_norm": 0.5206521153450012, |
| "learning_rate": 5.258119658119658e-05, |
| "loss": 0.0455, |
| "step": 451 |
| }, |
| { |
| "epoch": 3.6775510204081634, |
| "grad_norm": 0.7257238030433655, |
| "learning_rate": 5.251282051282052e-05, |
| "loss": 0.0418, |
| "step": 452 |
| }, |
| { |
| "epoch": 3.685714285714286, |
| "grad_norm": 0.3261127471923828, |
| "learning_rate": 5.244444444444445e-05, |
| "loss": 0.0366, |
| "step": 453 |
| }, |
| { |
| "epoch": 3.693877551020408, |
| "grad_norm": 0.27776315808296204, |
| "learning_rate": 5.237606837606838e-05, |
| "loss": 0.0238, |
| "step": 454 |
| }, |
| { |
| "epoch": 3.7020408163265306, |
| "grad_norm": 0.43985986709594727, |
| "learning_rate": 5.230769230769231e-05, |
| "loss": 0.0304, |
| "step": 455 |
| }, |
| { |
| "epoch": 3.710204081632653, |
| "grad_norm": 0.7832798361778259, |
| "learning_rate": 5.223931623931625e-05, |
| "loss": 0.0394, |
| "step": 456 |
| }, |
| { |
| "epoch": 3.7183673469387752, |
| "grad_norm": 0.610369861125946, |
| "learning_rate": 5.2170940170940175e-05, |
| "loss": 0.0328, |
| "step": 457 |
| }, |
| { |
| "epoch": 3.726530612244898, |
| "grad_norm": 0.7184517979621887, |
| "learning_rate": 5.210256410256411e-05, |
| "loss": 0.0492, |
| "step": 458 |
| }, |
| { |
| "epoch": 3.7346938775510203, |
| "grad_norm": 0.23181170225143433, |
| "learning_rate": 5.203418803418804e-05, |
| "loss": 0.0165, |
| "step": 459 |
| }, |
| { |
| "epoch": 3.742857142857143, |
| "grad_norm": 0.440939337015152, |
| "learning_rate": 5.196581196581197e-05, |
| "loss": 0.0397, |
| "step": 460 |
| }, |
| { |
| "epoch": 3.7510204081632654, |
| "grad_norm": 2.7026727199554443, |
| "learning_rate": 5.1897435897435905e-05, |
| "loss": 0.0126, |
| "step": 461 |
| }, |
| { |
| "epoch": 3.759183673469388, |
| "grad_norm": 0.20904560387134552, |
| "learning_rate": 5.182905982905983e-05, |
| "loss": 0.0238, |
| "step": 462 |
| }, |
| { |
| "epoch": 3.76734693877551, |
| "grad_norm": 0.42715728282928467, |
| "learning_rate": 5.176068376068377e-05, |
| "loss": 0.032, |
| "step": 463 |
| }, |
| { |
| "epoch": 3.7755102040816326, |
| "grad_norm": 0.6544044613838196, |
| "learning_rate": 5.16923076923077e-05, |
| "loss": 0.0357, |
| "step": 464 |
| }, |
| { |
| "epoch": 3.783673469387755, |
| "grad_norm": 0.4051888883113861, |
| "learning_rate": 5.162393162393163e-05, |
| "loss": 0.038, |
| "step": 465 |
| }, |
| { |
| "epoch": 3.7918367346938773, |
| "grad_norm": 0.3358502686023712, |
| "learning_rate": 5.155555555555556e-05, |
| "loss": 0.0278, |
| "step": 466 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 0.20578329265117645, |
| "learning_rate": 5.14871794871795e-05, |
| "loss": 0.0139, |
| "step": 467 |
| }, |
| { |
| "epoch": 3.8081632653061224, |
| "grad_norm": 0.5573644638061523, |
| "learning_rate": 5.1418803418803424e-05, |
| "loss": 0.0494, |
| "step": 468 |
| }, |
| { |
| "epoch": 3.816326530612245, |
| "grad_norm": 0.3017401695251465, |
| "learning_rate": 5.135042735042736e-05, |
| "loss": 0.0199, |
| "step": 469 |
| }, |
| { |
| "epoch": 3.8244897959183675, |
| "grad_norm": 0.73221755027771, |
| "learning_rate": 5.128205128205129e-05, |
| "loss": 0.0768, |
| "step": 470 |
| }, |
| { |
| "epoch": 3.83265306122449, |
| "grad_norm": 0.3336375951766968, |
| "learning_rate": 5.1213675213675214e-05, |
| "loss": 0.0263, |
| "step": 471 |
| }, |
| { |
| "epoch": 3.840816326530612, |
| "grad_norm": 0.38057103753089905, |
| "learning_rate": 5.114529914529915e-05, |
| "loss": 0.0224, |
| "step": 472 |
| }, |
| { |
| "epoch": 3.8489795918367347, |
| "grad_norm": 1.4463324546813965, |
| "learning_rate": 5.1076923076923075e-05, |
| "loss": 0.0933, |
| "step": 473 |
| }, |
| { |
| "epoch": 3.857142857142857, |
| "grad_norm": 0.20391391217708588, |
| "learning_rate": 5.100854700854701e-05, |
| "loss": 0.0358, |
| "step": 474 |
| }, |
| { |
| "epoch": 3.8653061224489798, |
| "grad_norm": 0.9996618628501892, |
| "learning_rate": 5.0940170940170944e-05, |
| "loss": 0.0687, |
| "step": 475 |
| }, |
| { |
| "epoch": 3.873469387755102, |
| "grad_norm": 0.2686821520328522, |
| "learning_rate": 5.087179487179487e-05, |
| "loss": 0.0226, |
| "step": 476 |
| }, |
| { |
| "epoch": 3.8816326530612244, |
| "grad_norm": 0.49687331914901733, |
| "learning_rate": 5.0803418803418805e-05, |
| "loss": 0.0377, |
| "step": 477 |
| }, |
| { |
| "epoch": 3.889795918367347, |
| "grad_norm": 0.6116603016853333, |
| "learning_rate": 5.073504273504274e-05, |
| "loss": 0.0668, |
| "step": 478 |
| }, |
| { |
| "epoch": 3.8979591836734695, |
| "grad_norm": 0.27489814162254333, |
| "learning_rate": 5.066666666666667e-05, |
| "loss": 0.0201, |
| "step": 479 |
| }, |
| { |
| "epoch": 3.906122448979592, |
| "grad_norm": 0.24494236707687378, |
| "learning_rate": 5.05982905982906e-05, |
| "loss": 0.0212, |
| "step": 480 |
| }, |
| { |
| "epoch": 3.914285714285714, |
| "grad_norm": 0.20727446675300598, |
| "learning_rate": 5.052991452991453e-05, |
| "loss": 0.0252, |
| "step": 481 |
| }, |
| { |
| "epoch": 3.9224489795918367, |
| "grad_norm": 0.2882131040096283, |
| "learning_rate": 5.046153846153846e-05, |
| "loss": 0.0214, |
| "step": 482 |
| }, |
| { |
| "epoch": 3.9306122448979592, |
| "grad_norm": 0.40643471479415894, |
| "learning_rate": 5.03931623931624e-05, |
| "loss": 0.0303, |
| "step": 483 |
| }, |
| { |
| "epoch": 3.938775510204082, |
| "grad_norm": 0.25172147154808044, |
| "learning_rate": 5.0324786324786325e-05, |
| "loss": 0.0173, |
| "step": 484 |
| }, |
| { |
| "epoch": 3.946938775510204, |
| "grad_norm": 0.5423907041549683, |
| "learning_rate": 5.025641025641026e-05, |
| "loss": 0.0612, |
| "step": 485 |
| }, |
| { |
| "epoch": 3.9551020408163264, |
| "grad_norm": 0.6183574795722961, |
| "learning_rate": 5.018803418803419e-05, |
| "loss": 0.0504, |
| "step": 486 |
| }, |
| { |
| "epoch": 3.963265306122449, |
| "grad_norm": 0.30423736572265625, |
| "learning_rate": 5.011965811965812e-05, |
| "loss": 0.0332, |
| "step": 487 |
| }, |
| { |
| "epoch": 3.9714285714285715, |
| "grad_norm": 0.5488356947898865, |
| "learning_rate": 5.0051282051282055e-05, |
| "loss": 0.0318, |
| "step": 488 |
| }, |
| { |
| "epoch": 3.979591836734694, |
| "grad_norm": 0.4721347987651825, |
| "learning_rate": 4.998290598290599e-05, |
| "loss": 0.0385, |
| "step": 489 |
| }, |
| { |
| "epoch": 3.987755102040816, |
| "grad_norm": 0.26058709621429443, |
| "learning_rate": 4.991452991452992e-05, |
| "loss": 0.0306, |
| "step": 490 |
| }, |
| { |
| "epoch": 3.9959183673469387, |
| "grad_norm": 0.6200685501098633, |
| "learning_rate": 4.984615384615385e-05, |
| "loss": 0.0661, |
| "step": 491 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.33986982703208923, |
| "learning_rate": 4.9777777777777785e-05, |
| "loss": 0.0131, |
| "step": 492 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9418096418553069, |
| "eval_f1": 0.9408911746349587, |
| "eval_loss": 0.11904772371053696, |
| "eval_precision": 0.9424741467431446, |
| "eval_recall": 0.9418096418553069, |
| "eval_runtime": 26.5542, |
| "eval_samples_per_second": 16.419, |
| "eval_steps_per_second": 16.419, |
| "step": 492 |
| }, |
| { |
| "epoch": 4.0081632653061225, |
| "grad_norm": 0.37561219930648804, |
| "learning_rate": 4.970940170940171e-05, |
| "loss": 0.0331, |
| "step": 493 |
| }, |
| { |
| "epoch": 4.016326530612245, |
| "grad_norm": 0.3382563292980194, |
| "learning_rate": 4.964102564102565e-05, |
| "loss": 0.0247, |
| "step": 494 |
| }, |
| { |
| "epoch": 4.024489795918368, |
| "grad_norm": 0.1595546305179596, |
| "learning_rate": 4.957264957264958e-05, |
| "loss": 0.01, |
| "step": 495 |
| }, |
| { |
| "epoch": 4.03265306122449, |
| "grad_norm": 0.5388701558113098, |
| "learning_rate": 4.950427350427351e-05, |
| "loss": 0.0391, |
| "step": 496 |
| }, |
| { |
| "epoch": 4.040816326530612, |
| "grad_norm": 0.5022732615470886, |
| "learning_rate": 4.943589743589744e-05, |
| "loss": 0.0113, |
| "step": 497 |
| }, |
| { |
| "epoch": 4.048979591836734, |
| "grad_norm": 0.6687523126602173, |
| "learning_rate": 4.936752136752137e-05, |
| "loss": 0.0489, |
| "step": 498 |
| }, |
| { |
| "epoch": 4.057142857142857, |
| "grad_norm": 0.12865830957889557, |
| "learning_rate": 4.9299145299145305e-05, |
| "loss": 0.0044, |
| "step": 499 |
| }, |
| { |
| "epoch": 4.0653061224489795, |
| "grad_norm": 0.2922806739807129, |
| "learning_rate": 4.923076923076924e-05, |
| "loss": 0.0204, |
| "step": 500 |
| }, |
| { |
| "epoch": 4.073469387755102, |
| "grad_norm": 0.2284225970506668, |
| "learning_rate": 4.9162393162393166e-05, |
| "loss": 0.0385, |
| "step": 501 |
| }, |
| { |
| "epoch": 4.081632653061225, |
| "grad_norm": 0.19399873912334442, |
| "learning_rate": 4.90940170940171e-05, |
| "loss": 0.0088, |
| "step": 502 |
| }, |
| { |
| "epoch": 4.089795918367347, |
| "grad_norm": 0.22957640886306763, |
| "learning_rate": 4.9025641025641035e-05, |
| "loss": 0.0238, |
| "step": 503 |
| }, |
| { |
| "epoch": 4.09795918367347, |
| "grad_norm": 0.1837630271911621, |
| "learning_rate": 4.895726495726496e-05, |
| "loss": 0.0181, |
| "step": 504 |
| }, |
| { |
| "epoch": 4.106122448979592, |
| "grad_norm": 0.17346547544002533, |
| "learning_rate": 4.88888888888889e-05, |
| "loss": 0.0134, |
| "step": 505 |
| }, |
| { |
| "epoch": 4.114285714285714, |
| "grad_norm": 0.5421778559684753, |
| "learning_rate": 4.882051282051283e-05, |
| "loss": 0.0256, |
| "step": 506 |
| }, |
| { |
| "epoch": 4.122448979591836, |
| "grad_norm": 0.33288803696632385, |
| "learning_rate": 4.875213675213676e-05, |
| "loss": 0.0134, |
| "step": 507 |
| }, |
| { |
| "epoch": 4.130612244897959, |
| "grad_norm": 0.44486677646636963, |
| "learning_rate": 4.8683760683760686e-05, |
| "loss": 0.0105, |
| "step": 508 |
| }, |
| { |
| "epoch": 4.1387755102040815, |
| "grad_norm": 0.3703673183917999, |
| "learning_rate": 4.861538461538461e-05, |
| "loss": 0.0219, |
| "step": 509 |
| }, |
| { |
| "epoch": 4.146938775510204, |
| "grad_norm": 0.3831328749656677, |
| "learning_rate": 4.854700854700855e-05, |
| "loss": 0.0211, |
| "step": 510 |
| }, |
| { |
| "epoch": 4.155102040816327, |
| "grad_norm": 0.29360687732696533, |
| "learning_rate": 4.847863247863248e-05, |
| "loss": 0.0195, |
| "step": 511 |
| }, |
| { |
| "epoch": 4.163265306122449, |
| "grad_norm": 0.2633644640445709, |
| "learning_rate": 4.841025641025641e-05, |
| "loss": 0.0099, |
| "step": 512 |
| }, |
| { |
| "epoch": 4.171428571428572, |
| "grad_norm": 0.8873499631881714, |
| "learning_rate": 4.8341880341880344e-05, |
| "loss": 0.0423, |
| "step": 513 |
| }, |
| { |
| "epoch": 4.179591836734694, |
| "grad_norm": 0.6571605801582336, |
| "learning_rate": 4.827350427350428e-05, |
| "loss": 0.0374, |
| "step": 514 |
| }, |
| { |
| "epoch": 4.187755102040816, |
| "grad_norm": 0.22063897550106049, |
| "learning_rate": 4.8205128205128205e-05, |
| "loss": 0.0098, |
| "step": 515 |
| }, |
| { |
| "epoch": 4.1959183673469385, |
| "grad_norm": 0.5064321756362915, |
| "learning_rate": 4.813675213675214e-05, |
| "loss": 0.0316, |
| "step": 516 |
| }, |
| { |
| "epoch": 4.204081632653061, |
| "grad_norm": 0.474624365568161, |
| "learning_rate": 4.806837606837607e-05, |
| "loss": 0.0141, |
| "step": 517 |
| }, |
| { |
| "epoch": 4.2122448979591836, |
| "grad_norm": 0.2246454656124115, |
| "learning_rate": 4.8e-05, |
| "loss": 0.0101, |
| "step": 518 |
| }, |
| { |
| "epoch": 4.220408163265306, |
| "grad_norm": 0.13797767460346222, |
| "learning_rate": 4.7931623931623935e-05, |
| "loss": 0.0135, |
| "step": 519 |
| }, |
| { |
| "epoch": 4.228571428571429, |
| "grad_norm": 0.3289293646812439, |
| "learning_rate": 4.786324786324786e-05, |
| "loss": 0.0159, |
| "step": 520 |
| }, |
| { |
| "epoch": 4.236734693877551, |
| "grad_norm": 0.33728495240211487, |
| "learning_rate": 4.77948717948718e-05, |
| "loss": 0.029, |
| "step": 521 |
| }, |
| { |
| "epoch": 4.244897959183674, |
| "grad_norm": 0.3062879741191864, |
| "learning_rate": 4.772649572649573e-05, |
| "loss": 0.0179, |
| "step": 522 |
| }, |
| { |
| "epoch": 4.253061224489796, |
| "grad_norm": 1.0204277038574219, |
| "learning_rate": 4.765811965811966e-05, |
| "loss": 0.0455, |
| "step": 523 |
| }, |
| { |
| "epoch": 4.261224489795918, |
| "grad_norm": 0.4178808927536011, |
| "learning_rate": 4.758974358974359e-05, |
| "loss": 0.0213, |
| "step": 524 |
| }, |
| { |
| "epoch": 4.2693877551020405, |
| "grad_norm": 0.31695181131362915, |
| "learning_rate": 4.752136752136753e-05, |
| "loss": 0.0304, |
| "step": 525 |
| }, |
| { |
| "epoch": 4.277551020408163, |
| "grad_norm": 0.2975795269012451, |
| "learning_rate": 4.7452991452991455e-05, |
| "loss": 0.0175, |
| "step": 526 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 0.32276976108551025, |
| "learning_rate": 4.738461538461539e-05, |
| "loss": 0.0092, |
| "step": 527 |
| }, |
| { |
| "epoch": 4.293877551020408, |
| "grad_norm": 0.9083264470100403, |
| "learning_rate": 4.731623931623932e-05, |
| "loss": 0.0141, |
| "step": 528 |
| }, |
| { |
| "epoch": 4.302040816326531, |
| "grad_norm": 0.6064096689224243, |
| "learning_rate": 4.724786324786325e-05, |
| "loss": 0.0345, |
| "step": 529 |
| }, |
| { |
| "epoch": 4.310204081632653, |
| "grad_norm": 1.0197865962982178, |
| "learning_rate": 4.7179487179487185e-05, |
| "loss": 0.0766, |
| "step": 530 |
| }, |
| { |
| "epoch": 4.318367346938776, |
| "grad_norm": 0.5525026321411133, |
| "learning_rate": 4.711111111111112e-05, |
| "loss": 0.0309, |
| "step": 531 |
| }, |
| { |
| "epoch": 4.326530612244898, |
| "grad_norm": 0.25368353724479675, |
| "learning_rate": 4.704273504273505e-05, |
| "loss": 0.0111, |
| "step": 532 |
| }, |
| { |
| "epoch": 4.33469387755102, |
| "grad_norm": 0.8449558019638062, |
| "learning_rate": 4.697435897435898e-05, |
| "loss": 0.0422, |
| "step": 533 |
| }, |
| { |
| "epoch": 4.3428571428571425, |
| "grad_norm": 0.33069613575935364, |
| "learning_rate": 4.6905982905982915e-05, |
| "loss": 0.012, |
| "step": 534 |
| }, |
| { |
| "epoch": 4.351020408163265, |
| "grad_norm": 0.20474620163440704, |
| "learning_rate": 4.683760683760684e-05, |
| "loss": 0.0078, |
| "step": 535 |
| }, |
| { |
| "epoch": 4.359183673469388, |
| "grad_norm": 0.7975245118141174, |
| "learning_rate": 4.676923076923078e-05, |
| "loss": 0.0343, |
| "step": 536 |
| }, |
| { |
| "epoch": 4.36734693877551, |
| "grad_norm": 0.1958104521036148, |
| "learning_rate": 4.6700854700854704e-05, |
| "loss": 0.0089, |
| "step": 537 |
| }, |
| { |
| "epoch": 4.375510204081633, |
| "grad_norm": 0.1404639035463333, |
| "learning_rate": 4.663247863247864e-05, |
| "loss": 0.007, |
| "step": 538 |
| }, |
| { |
| "epoch": 4.383673469387755, |
| "grad_norm": 0.4322687089443207, |
| "learning_rate": 4.656410256410257e-05, |
| "loss": 0.0127, |
| "step": 539 |
| }, |
| { |
| "epoch": 4.391836734693878, |
| "grad_norm": 0.3357923924922943, |
| "learning_rate": 4.64957264957265e-05, |
| "loss": 0.0283, |
| "step": 540 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.23733913898468018, |
| "learning_rate": 4.6427350427350435e-05, |
| "loss": 0.0128, |
| "step": 541 |
| }, |
| { |
| "epoch": 4.408163265306122, |
| "grad_norm": 0.24629896879196167, |
| "learning_rate": 4.635897435897437e-05, |
| "loss": 0.0124, |
| "step": 542 |
| }, |
| { |
| "epoch": 4.416326530612245, |
| "grad_norm": 0.38652303814888, |
| "learning_rate": 4.6290598290598296e-05, |
| "loss": 0.0167, |
| "step": 543 |
| }, |
| { |
| "epoch": 4.424489795918367, |
| "grad_norm": 0.268097847700119, |
| "learning_rate": 4.6222222222222224e-05, |
| "loss": 0.0168, |
| "step": 544 |
| }, |
| { |
| "epoch": 4.43265306122449, |
| "grad_norm": 0.6078333854675293, |
| "learning_rate": 4.615384615384615e-05, |
| "loss": 0.0147, |
| "step": 545 |
| }, |
| { |
| "epoch": 4.440816326530612, |
| "grad_norm": 0.12846365571022034, |
| "learning_rate": 4.6085470085470086e-05, |
| "loss": 0.0047, |
| "step": 546 |
| }, |
| { |
| "epoch": 4.448979591836735, |
| "grad_norm": 0.23714855313301086, |
| "learning_rate": 4.601709401709402e-05, |
| "loss": 0.0142, |
| "step": 547 |
| }, |
| { |
| "epoch": 4.457142857142857, |
| "grad_norm": 0.3480728268623352, |
| "learning_rate": 4.594871794871795e-05, |
| "loss": 0.0144, |
| "step": 548 |
| }, |
| { |
| "epoch": 4.46530612244898, |
| "grad_norm": 0.24332262575626373, |
| "learning_rate": 4.588034188034188e-05, |
| "loss": 0.0155, |
| "step": 549 |
| }, |
| { |
| "epoch": 4.473469387755102, |
| "grad_norm": 0.13778908550739288, |
| "learning_rate": 4.5811965811965816e-05, |
| "loss": 0.0051, |
| "step": 550 |
| }, |
| { |
| "epoch": 4.481632653061224, |
| "grad_norm": 5.389389991760254, |
| "learning_rate": 4.574358974358974e-05, |
| "loss": 0.042, |
| "step": 551 |
| }, |
| { |
| "epoch": 4.489795918367347, |
| "grad_norm": 0.2608646750450134, |
| "learning_rate": 4.567521367521368e-05, |
| "loss": 0.0233, |
| "step": 552 |
| }, |
| { |
| "epoch": 4.497959183673469, |
| "grad_norm": 0.42670515179634094, |
| "learning_rate": 4.560683760683761e-05, |
| "loss": 0.0253, |
| "step": 553 |
| }, |
| { |
| "epoch": 4.506122448979592, |
| "grad_norm": 1.0257692337036133, |
| "learning_rate": 4.553846153846154e-05, |
| "loss": 0.0571, |
| "step": 554 |
| }, |
| { |
| "epoch": 4.514285714285714, |
| "grad_norm": 0.7005570530891418, |
| "learning_rate": 4.5470085470085474e-05, |
| "loss": 0.0314, |
| "step": 555 |
| }, |
| { |
| "epoch": 4.522448979591837, |
| "grad_norm": 0.39354655146598816, |
| "learning_rate": 4.54017094017094e-05, |
| "loss": 0.0286, |
| "step": 556 |
| }, |
| { |
| "epoch": 4.530612244897959, |
| "grad_norm": 0.2712419629096985, |
| "learning_rate": 4.5333333333333335e-05, |
| "loss": 0.0186, |
| "step": 557 |
| }, |
| { |
| "epoch": 4.538775510204082, |
| "grad_norm": 0.19760717451572418, |
| "learning_rate": 4.526495726495727e-05, |
| "loss": 0.0165, |
| "step": 558 |
| }, |
| { |
| "epoch": 4.546938775510204, |
| "grad_norm": 0.4761720895767212, |
| "learning_rate": 4.51965811965812e-05, |
| "loss": 0.0201, |
| "step": 559 |
| }, |
| { |
| "epoch": 4.555102040816326, |
| "grad_norm": 0.29089584946632385, |
| "learning_rate": 4.512820512820513e-05, |
| "loss": 0.0094, |
| "step": 560 |
| }, |
| { |
| "epoch": 4.563265306122449, |
| "grad_norm": 0.2839196026325226, |
| "learning_rate": 4.5059829059829065e-05, |
| "loss": 0.0109, |
| "step": 561 |
| }, |
| { |
| "epoch": 4.571428571428571, |
| "grad_norm": 0.786836564540863, |
| "learning_rate": 4.499145299145299e-05, |
| "loss": 0.0452, |
| "step": 562 |
| }, |
| { |
| "epoch": 4.579591836734694, |
| "grad_norm": 0.719528317451477, |
| "learning_rate": 4.492307692307693e-05, |
| "loss": 0.0379, |
| "step": 563 |
| }, |
| { |
| "epoch": 4.587755102040816, |
| "grad_norm": 0.138275608420372, |
| "learning_rate": 4.485470085470086e-05, |
| "loss": 0.0128, |
| "step": 564 |
| }, |
| { |
| "epoch": 4.595918367346939, |
| "grad_norm": 0.5043324828147888, |
| "learning_rate": 4.478632478632479e-05, |
| "loss": 0.045, |
| "step": 565 |
| }, |
| { |
| "epoch": 4.604081632653061, |
| "grad_norm": 0.2945300042629242, |
| "learning_rate": 4.471794871794872e-05, |
| "loss": 0.0258, |
| "step": 566 |
| }, |
| { |
| "epoch": 4.612244897959184, |
| "grad_norm": 0.14936916530132294, |
| "learning_rate": 4.464957264957266e-05, |
| "loss": 0.0069, |
| "step": 567 |
| }, |
| { |
| "epoch": 4.6204081632653065, |
| "grad_norm": 0.23340395092964172, |
| "learning_rate": 4.4581196581196585e-05, |
| "loss": 0.0179, |
| "step": 568 |
| }, |
| { |
| "epoch": 4.628571428571428, |
| "grad_norm": 0.49968650937080383, |
| "learning_rate": 4.451282051282052e-05, |
| "loss": 0.0225, |
| "step": 569 |
| }, |
| { |
| "epoch": 4.636734693877551, |
| "grad_norm": 0.25828638672828674, |
| "learning_rate": 4.444444444444445e-05, |
| "loss": 0.0163, |
| "step": 570 |
| }, |
| { |
| "epoch": 4.644897959183673, |
| "grad_norm": 0.853486955165863, |
| "learning_rate": 4.437606837606838e-05, |
| "loss": 0.0136, |
| "step": 571 |
| }, |
| { |
| "epoch": 4.653061224489796, |
| "grad_norm": 0.9049561619758606, |
| "learning_rate": 4.4307692307692315e-05, |
| "loss": 0.0393, |
| "step": 572 |
| }, |
| { |
| "epoch": 4.661224489795918, |
| "grad_norm": 0.2781153619289398, |
| "learning_rate": 4.423931623931624e-05, |
| "loss": 0.0171, |
| "step": 573 |
| }, |
| { |
| "epoch": 4.669387755102041, |
| "grad_norm": 0.29508426785469055, |
| "learning_rate": 4.417094017094018e-05, |
| "loss": 0.0184, |
| "step": 574 |
| }, |
| { |
| "epoch": 4.677551020408163, |
| "grad_norm": 0.35638830065727234, |
| "learning_rate": 4.410256410256411e-05, |
| "loss": 0.0219, |
| "step": 575 |
| }, |
| { |
| "epoch": 4.685714285714286, |
| "grad_norm": 0.4791906476020813, |
| "learning_rate": 4.403418803418804e-05, |
| "loss": 0.0155, |
| "step": 576 |
| }, |
| { |
| "epoch": 4.6938775510204085, |
| "grad_norm": 0.48481646180152893, |
| "learning_rate": 4.396581196581197e-05, |
| "loss": 0.0333, |
| "step": 577 |
| }, |
| { |
| "epoch": 4.70204081632653, |
| "grad_norm": 0.6990826725959778, |
| "learning_rate": 4.389743589743591e-05, |
| "loss": 0.0199, |
| "step": 578 |
| }, |
| { |
| "epoch": 4.710204081632653, |
| "grad_norm": 0.18636147677898407, |
| "learning_rate": 4.3829059829059834e-05, |
| "loss": 0.0071, |
| "step": 579 |
| }, |
| { |
| "epoch": 4.718367346938775, |
| "grad_norm": 0.3480212986469269, |
| "learning_rate": 4.376068376068377e-05, |
| "loss": 0.026, |
| "step": 580 |
| }, |
| { |
| "epoch": 4.726530612244898, |
| "grad_norm": 0.43473413586616516, |
| "learning_rate": 4.369230769230769e-05, |
| "loss": 0.0078, |
| "step": 581 |
| }, |
| { |
| "epoch": 4.73469387755102, |
| "grad_norm": 0.4350218176841736, |
| "learning_rate": 4.3623931623931624e-05, |
| "loss": 0.0193, |
| "step": 582 |
| }, |
| { |
| "epoch": 4.742857142857143, |
| "grad_norm": 0.11488892883062363, |
| "learning_rate": 4.355555555555556e-05, |
| "loss": 0.0079, |
| "step": 583 |
| }, |
| { |
| "epoch": 4.751020408163265, |
| "grad_norm": 0.3166949152946472, |
| "learning_rate": 4.3487179487179485e-05, |
| "loss": 0.017, |
| "step": 584 |
| }, |
| { |
| "epoch": 4.759183673469388, |
| "grad_norm": 0.6318601369857788, |
| "learning_rate": 4.341880341880342e-05, |
| "loss": 0.0156, |
| "step": 585 |
| }, |
| { |
| "epoch": 4.7673469387755105, |
| "grad_norm": 0.47680795192718506, |
| "learning_rate": 4.3350427350427354e-05, |
| "loss": 0.0325, |
| "step": 586 |
| }, |
| { |
| "epoch": 4.775510204081632, |
| "grad_norm": 0.1389036327600479, |
| "learning_rate": 4.328205128205128e-05, |
| "loss": 0.0151, |
| "step": 587 |
| }, |
| { |
| "epoch": 4.783673469387755, |
| "grad_norm": 0.4114654064178467, |
| "learning_rate": 4.3213675213675216e-05, |
| "loss": 0.0261, |
| "step": 588 |
| }, |
| { |
| "epoch": 4.791836734693877, |
| "grad_norm": 0.3760494291782379, |
| "learning_rate": 4.314529914529915e-05, |
| "loss": 0.0149, |
| "step": 589 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.3259177803993225, |
| "learning_rate": 4.307692307692308e-05, |
| "loss": 0.0105, |
| "step": 590 |
| }, |
| { |
| "epoch": 4.808163265306122, |
| "grad_norm": 0.19598586857318878, |
| "learning_rate": 4.300854700854701e-05, |
| "loss": 0.0192, |
| "step": 591 |
| }, |
| { |
| "epoch": 4.816326530612245, |
| "grad_norm": 0.24014200270175934, |
| "learning_rate": 4.294017094017094e-05, |
| "loss": 0.0238, |
| "step": 592 |
| }, |
| { |
| "epoch": 4.8244897959183675, |
| "grad_norm": 0.25813162326812744, |
| "learning_rate": 4.287179487179487e-05, |
| "loss": 0.0169, |
| "step": 593 |
| }, |
| { |
| "epoch": 4.83265306122449, |
| "grad_norm": 0.15530163049697876, |
| "learning_rate": 4.280341880341881e-05, |
| "loss": 0.0113, |
| "step": 594 |
| }, |
| { |
| "epoch": 4.840816326530613, |
| "grad_norm": 0.9454264044761658, |
| "learning_rate": 4.2735042735042735e-05, |
| "loss": 0.071, |
| "step": 595 |
| }, |
| { |
| "epoch": 4.848979591836734, |
| "grad_norm": 0.4564417004585266, |
| "learning_rate": 4.266666666666667e-05, |
| "loss": 0.028, |
| "step": 596 |
| }, |
| { |
| "epoch": 4.857142857142857, |
| "grad_norm": 0.41204723715782166, |
| "learning_rate": 4.2598290598290604e-05, |
| "loss": 0.0337, |
| "step": 597 |
| }, |
| { |
| "epoch": 4.865306122448979, |
| "grad_norm": 0.2819851338863373, |
| "learning_rate": 4.252991452991453e-05, |
| "loss": 0.0286, |
| "step": 598 |
| }, |
| { |
| "epoch": 4.873469387755102, |
| "grad_norm": 1.5066794157028198, |
| "learning_rate": 4.2461538461538465e-05, |
| "loss": 0.0428, |
| "step": 599 |
| }, |
| { |
| "epoch": 4.881632653061224, |
| "grad_norm": 0.5286157131195068, |
| "learning_rate": 4.23931623931624e-05, |
| "loss": 0.0189, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.889795918367347, |
| "grad_norm": 0.94499272108078, |
| "learning_rate": 4.232478632478633e-05, |
| "loss": 0.0531, |
| "step": 601 |
| }, |
| { |
| "epoch": 4.8979591836734695, |
| "grad_norm": 0.10032657533884048, |
| "learning_rate": 4.225641025641026e-05, |
| "loss": 0.0081, |
| "step": 602 |
| }, |
| { |
| "epoch": 4.906122448979592, |
| "grad_norm": 0.23001764714717865, |
| "learning_rate": 4.2188034188034195e-05, |
| "loss": 0.0209, |
| "step": 603 |
| }, |
| { |
| "epoch": 4.914285714285715, |
| "grad_norm": 0.34312617778778076, |
| "learning_rate": 4.211965811965812e-05, |
| "loss": 0.0286, |
| "step": 604 |
| }, |
| { |
| "epoch": 4.922448979591836, |
| "grad_norm": 0.3228893280029297, |
| "learning_rate": 4.205128205128206e-05, |
| "loss": 0.0197, |
| "step": 605 |
| }, |
| { |
| "epoch": 4.930612244897959, |
| "grad_norm": 0.48081985116004944, |
| "learning_rate": 4.198290598290599e-05, |
| "loss": 0.022, |
| "step": 606 |
| }, |
| { |
| "epoch": 4.938775510204081, |
| "grad_norm": 0.18656276166439056, |
| "learning_rate": 4.191452991452992e-05, |
| "loss": 0.0157, |
| "step": 607 |
| }, |
| { |
| "epoch": 4.946938775510204, |
| "grad_norm": 0.29330140352249146, |
| "learning_rate": 4.184615384615385e-05, |
| "loss": 0.0266, |
| "step": 608 |
| }, |
| { |
| "epoch": 4.955102040816326, |
| "grad_norm": 0.22990168631076813, |
| "learning_rate": 4.177777777777778e-05, |
| "loss": 0.0094, |
| "step": 609 |
| }, |
| { |
| "epoch": 4.963265306122449, |
| "grad_norm": 0.31795695424079895, |
| "learning_rate": 4.1709401709401715e-05, |
| "loss": 0.0248, |
| "step": 610 |
| }, |
| { |
| "epoch": 4.9714285714285715, |
| "grad_norm": 0.5349370241165161, |
| "learning_rate": 4.164102564102565e-05, |
| "loss": 0.0126, |
| "step": 611 |
| }, |
| { |
| "epoch": 4.979591836734694, |
| "grad_norm": 0.08685897290706635, |
| "learning_rate": 4.1572649572649577e-05, |
| "loss": 0.0089, |
| "step": 612 |
| }, |
| { |
| "epoch": 4.987755102040817, |
| "grad_norm": 0.17170557379722595, |
| "learning_rate": 4.150427350427351e-05, |
| "loss": 0.024, |
| "step": 613 |
| }, |
| { |
| "epoch": 4.995918367346938, |
| "grad_norm": 0.19321708381175995, |
| "learning_rate": 4.1435897435897445e-05, |
| "loss": 0.0134, |
| "step": 614 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.31885647773742676, |
| "learning_rate": 4.136752136752137e-05, |
| "loss": 0.0177, |
| "step": 615 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9421358209928893, |
| "eval_f1": 0.94177929748661, |
| "eval_loss": 0.11898548156023026, |
| "eval_precision": 0.9418441520307643, |
| "eval_recall": 0.9421358209928893, |
| "eval_runtime": 24.9117, |
| "eval_samples_per_second": 17.502, |
| "eval_steps_per_second": 17.502, |
| "step": 615 |
| }, |
| { |
| "epoch": 5.0081632653061225, |
| "grad_norm": 0.152436301112175, |
| "learning_rate": 4.129914529914531e-05, |
| "loss": 0.0126, |
| "step": 616 |
| }, |
| { |
| "epoch": 5.016326530612245, |
| "grad_norm": 0.29216599464416504, |
| "learning_rate": 4.123076923076923e-05, |
| "loss": 0.0227, |
| "step": 617 |
| }, |
| { |
| "epoch": 5.024489795918368, |
| "grad_norm": 0.28651708364486694, |
| "learning_rate": 4.116239316239316e-05, |
| "loss": 0.0212, |
| "step": 618 |
| }, |
| { |
| "epoch": 5.03265306122449, |
| "grad_norm": 0.4106895327568054, |
| "learning_rate": 4.1094017094017096e-05, |
| "loss": 0.0133, |
| "step": 619 |
| }, |
| { |
| "epoch": 5.040816326530612, |
| "grad_norm": 0.41109445691108704, |
| "learning_rate": 4.1025641025641023e-05, |
| "loss": 0.0327, |
| "step": 620 |
| }, |
| { |
| "epoch": 5.048979591836734, |
| "grad_norm": 0.16403941810131073, |
| "learning_rate": 4.095726495726496e-05, |
| "loss": 0.0147, |
| "step": 621 |
| }, |
| { |
| "epoch": 5.057142857142857, |
| "grad_norm": 0.06824915111064911, |
| "learning_rate": 4.088888888888889e-05, |
| "loss": 0.0034, |
| "step": 622 |
| }, |
| { |
| "epoch": 5.0653061224489795, |
| "grad_norm": 0.27841898798942566, |
| "learning_rate": 4.082051282051282e-05, |
| "loss": 0.0089, |
| "step": 623 |
| }, |
| { |
| "epoch": 5.073469387755102, |
| "grad_norm": 0.4965389370918274, |
| "learning_rate": 4.0752136752136754e-05, |
| "loss": 0.0341, |
| "step": 624 |
| }, |
| { |
| "epoch": 5.081632653061225, |
| "grad_norm": 0.13556616008281708, |
| "learning_rate": 4.068376068376069e-05, |
| "loss": 0.0066, |
| "step": 625 |
| }, |
| { |
| "epoch": 5.089795918367347, |
| "grad_norm": 0.17582395672798157, |
| "learning_rate": 4.0615384615384615e-05, |
| "loss": 0.0121, |
| "step": 626 |
| }, |
| { |
| "epoch": 5.09795918367347, |
| "grad_norm": 0.1906500905752182, |
| "learning_rate": 4.054700854700855e-05, |
| "loss": 0.0121, |
| "step": 627 |
| }, |
| { |
| "epoch": 5.106122448979592, |
| "grad_norm": 0.10774006694555283, |
| "learning_rate": 4.047863247863248e-05, |
| "loss": 0.003, |
| "step": 628 |
| }, |
| { |
| "epoch": 5.114285714285714, |
| "grad_norm": 0.2364247441291809, |
| "learning_rate": 4.041025641025641e-05, |
| "loss": 0.0204, |
| "step": 629 |
| }, |
| { |
| "epoch": 5.122448979591836, |
| "grad_norm": 0.296677827835083, |
| "learning_rate": 4.0341880341880346e-05, |
| "loss": 0.0257, |
| "step": 630 |
| }, |
| { |
| "epoch": 5.130612244897959, |
| "grad_norm": 1.0964839458465576, |
| "learning_rate": 4.027350427350427e-05, |
| "loss": 0.0143, |
| "step": 631 |
| }, |
| { |
| "epoch": 5.1387755102040815, |
| "grad_norm": 0.14557228982448578, |
| "learning_rate": 4.020512820512821e-05, |
| "loss": 0.0083, |
| "step": 632 |
| }, |
| { |
| "epoch": 5.146938775510204, |
| "grad_norm": 0.10422372817993164, |
| "learning_rate": 4.013675213675214e-05, |
| "loss": 0.007, |
| "step": 633 |
| }, |
| { |
| "epoch": 5.155102040816327, |
| "grad_norm": 0.13321611285209656, |
| "learning_rate": 4.006837606837607e-05, |
| "loss": 0.0025, |
| "step": 634 |
| }, |
| { |
| "epoch": 5.163265306122449, |
| "grad_norm": 0.16438782215118408, |
| "learning_rate": 4e-05, |
| "loss": 0.0069, |
| "step": 635 |
| }, |
| { |
| "epoch": 5.171428571428572, |
| "grad_norm": 0.14023995399475098, |
| "learning_rate": 3.993162393162394e-05, |
| "loss": 0.009, |
| "step": 636 |
| }, |
| { |
| "epoch": 5.179591836734694, |
| "grad_norm": 0.1476777046918869, |
| "learning_rate": 3.9863247863247865e-05, |
| "loss": 0.0071, |
| "step": 637 |
| }, |
| { |
| "epoch": 5.187755102040816, |
| "grad_norm": 0.10355421155691147, |
| "learning_rate": 3.97948717948718e-05, |
| "loss": 0.0035, |
| "step": 638 |
| }, |
| { |
| "epoch": 5.1959183673469385, |
| "grad_norm": 0.20340070128440857, |
| "learning_rate": 3.9726495726495733e-05, |
| "loss": 0.0122, |
| "step": 639 |
| }, |
| { |
| "epoch": 5.204081632653061, |
| "grad_norm": 0.20554479956626892, |
| "learning_rate": 3.965811965811966e-05, |
| "loss": 0.0049, |
| "step": 640 |
| }, |
| { |
| "epoch": 5.2122448979591836, |
| "grad_norm": 0.8425185680389404, |
| "learning_rate": 3.9589743589743595e-05, |
| "loss": 0.051, |
| "step": 641 |
| }, |
| { |
| "epoch": 5.220408163265306, |
| "grad_norm": 0.21183599531650543, |
| "learning_rate": 3.952136752136753e-05, |
| "loss": 0.0112, |
| "step": 642 |
| }, |
| { |
| "epoch": 5.228571428571429, |
| "grad_norm": 0.31147268414497375, |
| "learning_rate": 3.945299145299146e-05, |
| "loss": 0.0253, |
| "step": 643 |
| }, |
| { |
| "epoch": 5.236734693877551, |
| "grad_norm": 0.14557214081287384, |
| "learning_rate": 3.938461538461539e-05, |
| "loss": 0.0122, |
| "step": 644 |
| }, |
| { |
| "epoch": 5.244897959183674, |
| "grad_norm": 0.3843270540237427, |
| "learning_rate": 3.931623931623932e-05, |
| "loss": 0.0137, |
| "step": 645 |
| }, |
| { |
| "epoch": 5.253061224489796, |
| "grad_norm": 0.29683414101600647, |
| "learning_rate": 3.9247863247863246e-05, |
| "loss": 0.0152, |
| "step": 646 |
| }, |
| { |
| "epoch": 5.261224489795918, |
| "grad_norm": 0.16846010088920593, |
| "learning_rate": 3.917948717948718e-05, |
| "loss": 0.0062, |
| "step": 647 |
| }, |
| { |
| "epoch": 5.2693877551020405, |
| "grad_norm": 0.6791422963142395, |
| "learning_rate": 3.9111111111111115e-05, |
| "loss": 0.0321, |
| "step": 648 |
| }, |
| { |
| "epoch": 5.277551020408163, |
| "grad_norm": 0.09047195315361023, |
| "learning_rate": 3.904273504273504e-05, |
| "loss": 0.0015, |
| "step": 649 |
| }, |
| { |
| "epoch": 5.285714285714286, |
| "grad_norm": 0.1297522634267807, |
| "learning_rate": 3.8974358974358976e-05, |
| "loss": 0.004, |
| "step": 650 |
| }, |
| { |
| "epoch": 5.293877551020408, |
| "grad_norm": 0.11985556781291962, |
| "learning_rate": 3.890598290598291e-05, |
| "loss": 0.0037, |
| "step": 651 |
| }, |
| { |
| "epoch": 5.302040816326531, |
| "grad_norm": 0.22316373884677887, |
| "learning_rate": 3.883760683760684e-05, |
| "loss": 0.011, |
| "step": 652 |
| }, |
| { |
| "epoch": 5.310204081632653, |
| "grad_norm": 0.4095396399497986, |
| "learning_rate": 3.876923076923077e-05, |
| "loss": 0.0297, |
| "step": 653 |
| }, |
| { |
| "epoch": 5.318367346938776, |
| "grad_norm": 0.49460023641586304, |
| "learning_rate": 3.8700854700854707e-05, |
| "loss": 0.026, |
| "step": 654 |
| }, |
| { |
| "epoch": 5.326530612244898, |
| "grad_norm": 0.01936427690088749, |
| "learning_rate": 3.8632478632478634e-05, |
| "loss": 0.0005, |
| "step": 655 |
| }, |
| { |
| "epoch": 5.33469387755102, |
| "grad_norm": 0.22673501074314117, |
| "learning_rate": 3.856410256410257e-05, |
| "loss": 0.0096, |
| "step": 656 |
| }, |
| { |
| "epoch": 5.3428571428571425, |
| "grad_norm": 0.24311856925487518, |
| "learning_rate": 3.84957264957265e-05, |
| "loss": 0.0092, |
| "step": 657 |
| }, |
| { |
| "epoch": 5.351020408163265, |
| "grad_norm": 0.15321022272109985, |
| "learning_rate": 3.842735042735043e-05, |
| "loss": 0.0057, |
| "step": 658 |
| }, |
| { |
| "epoch": 5.359183673469388, |
| "grad_norm": 0.9374060034751892, |
| "learning_rate": 3.8358974358974364e-05, |
| "loss": 0.0191, |
| "step": 659 |
| }, |
| { |
| "epoch": 5.36734693877551, |
| "grad_norm": 0.10835573077201843, |
| "learning_rate": 3.82905982905983e-05, |
| "loss": 0.0034, |
| "step": 660 |
| }, |
| { |
| "epoch": 5.375510204081633, |
| "grad_norm": 0.18718655407428741, |
| "learning_rate": 3.8222222222222226e-05, |
| "loss": 0.009, |
| "step": 661 |
| }, |
| { |
| "epoch": 5.383673469387755, |
| "grad_norm": 0.2158612310886383, |
| "learning_rate": 3.815384615384616e-05, |
| "loss": 0.0121, |
| "step": 662 |
| }, |
| { |
| "epoch": 5.391836734693878, |
| "grad_norm": 0.26239001750946045, |
| "learning_rate": 3.808547008547009e-05, |
| "loss": 0.0173, |
| "step": 663 |
| }, |
| { |
| "epoch": 5.4, |
| "grad_norm": 0.05576225370168686, |
| "learning_rate": 3.8017094017094015e-05, |
| "loss": 0.0013, |
| "step": 664 |
| }, |
| { |
| "epoch": 5.408163265306122, |
| "grad_norm": 0.5364289879798889, |
| "learning_rate": 3.794871794871795e-05, |
| "loss": 0.028, |
| "step": 665 |
| }, |
| { |
| "epoch": 5.416326530612245, |
| "grad_norm": 0.2207004874944687, |
| "learning_rate": 3.7880341880341884e-05, |
| "loss": 0.0096, |
| "step": 666 |
| }, |
| { |
| "epoch": 5.424489795918367, |
| "grad_norm": 0.22549040615558624, |
| "learning_rate": 3.781196581196581e-05, |
| "loss": 0.0067, |
| "step": 667 |
| }, |
| { |
| "epoch": 5.43265306122449, |
| "grad_norm": 0.1842697411775589, |
| "learning_rate": 3.7743589743589745e-05, |
| "loss": 0.0081, |
| "step": 668 |
| }, |
| { |
| "epoch": 5.440816326530612, |
| "grad_norm": 0.34317877888679504, |
| "learning_rate": 3.767521367521368e-05, |
| "loss": 0.008, |
| "step": 669 |
| }, |
| { |
| "epoch": 5.448979591836735, |
| "grad_norm": 0.0797891840338707, |
| "learning_rate": 3.760683760683761e-05, |
| "loss": 0.0027, |
| "step": 670 |
| }, |
| { |
| "epoch": 5.457142857142857, |
| "grad_norm": 0.09596482664346695, |
| "learning_rate": 3.753846153846154e-05, |
| "loss": 0.004, |
| "step": 671 |
| }, |
| { |
| "epoch": 5.46530612244898, |
| "grad_norm": 0.13847698271274567, |
| "learning_rate": 3.7470085470085476e-05, |
| "loss": 0.008, |
| "step": 672 |
| }, |
| { |
| "epoch": 5.473469387755102, |
| "grad_norm": 0.4299347400665283, |
| "learning_rate": 3.74017094017094e-05, |
| "loss": 0.0129, |
| "step": 673 |
| }, |
| { |
| "epoch": 5.481632653061224, |
| "grad_norm": 0.37290269136428833, |
| "learning_rate": 3.733333333333334e-05, |
| "loss": 0.009, |
| "step": 674 |
| }, |
| { |
| "epoch": 5.489795918367347, |
| "grad_norm": 0.1274859458208084, |
| "learning_rate": 3.726495726495727e-05, |
| "loss": 0.0026, |
| "step": 675 |
| }, |
| { |
| "epoch": 5.497959183673469, |
| "grad_norm": 0.055473342537879944, |
| "learning_rate": 3.71965811965812e-05, |
| "loss": 0.0011, |
| "step": 676 |
| }, |
| { |
| "epoch": 5.506122448979592, |
| "grad_norm": 0.41247642040252686, |
| "learning_rate": 3.712820512820513e-05, |
| "loss": 0.0205, |
| "step": 677 |
| }, |
| { |
| "epoch": 5.514285714285714, |
| "grad_norm": 0.37154918909072876, |
| "learning_rate": 3.705982905982907e-05, |
| "loss": 0.0163, |
| "step": 678 |
| }, |
| { |
| "epoch": 5.522448979591837, |
| "grad_norm": 0.1918550282716751, |
| "learning_rate": 3.6991452991452995e-05, |
| "loss": 0.009, |
| "step": 679 |
| }, |
| { |
| "epoch": 5.530612244897959, |
| "grad_norm": 0.15373460948467255, |
| "learning_rate": 3.692307692307693e-05, |
| "loss": 0.0031, |
| "step": 680 |
| }, |
| { |
| "epoch": 5.538775510204082, |
| "grad_norm": 0.20702648162841797, |
| "learning_rate": 3.685470085470086e-05, |
| "loss": 0.0096, |
| "step": 681 |
| }, |
| { |
| "epoch": 5.546938775510204, |
| "grad_norm": 0.1343519240617752, |
| "learning_rate": 3.6786324786324784e-05, |
| "loss": 0.0082, |
| "step": 682 |
| }, |
| { |
| "epoch": 5.555102040816326, |
| "grad_norm": 0.12029368430376053, |
| "learning_rate": 3.671794871794872e-05, |
| "loss": 0.0018, |
| "step": 683 |
| }, |
| { |
| "epoch": 5.563265306122449, |
| "grad_norm": 0.28310906887054443, |
| "learning_rate": 3.664957264957265e-05, |
| "loss": 0.0103, |
| "step": 684 |
| }, |
| { |
| "epoch": 5.571428571428571, |
| "grad_norm": 0.2390466034412384, |
| "learning_rate": 3.658119658119658e-05, |
| "loss": 0.0053, |
| "step": 685 |
| }, |
| { |
| "epoch": 5.579591836734694, |
| "grad_norm": 0.24921439588069916, |
| "learning_rate": 3.6512820512820514e-05, |
| "loss": 0.0089, |
| "step": 686 |
| }, |
| { |
| "epoch": 5.587755102040816, |
| "grad_norm": 0.3118392527103424, |
| "learning_rate": 3.644444444444445e-05, |
| "loss": 0.0121, |
| "step": 687 |
| }, |
| { |
| "epoch": 5.595918367346939, |
| "grad_norm": 0.18694745004177094, |
| "learning_rate": 3.6376068376068376e-05, |
| "loss": 0.0103, |
| "step": 688 |
| }, |
| { |
| "epoch": 5.604081632653061, |
| "grad_norm": 0.19770139455795288, |
| "learning_rate": 3.630769230769231e-05, |
| "loss": 0.0067, |
| "step": 689 |
| }, |
| { |
| "epoch": 5.612244897959184, |
| "grad_norm": 0.7333056926727295, |
| "learning_rate": 3.6239316239316245e-05, |
| "loss": 0.013, |
| "step": 690 |
| }, |
| { |
| "epoch": 5.6204081632653065, |
| "grad_norm": 0.17407962679862976, |
| "learning_rate": 3.617094017094017e-05, |
| "loss": 0.0082, |
| "step": 691 |
| }, |
| { |
| "epoch": 5.628571428571428, |
| "grad_norm": 0.20941323041915894, |
| "learning_rate": 3.6102564102564106e-05, |
| "loss": 0.0085, |
| "step": 692 |
| }, |
| { |
| "epoch": 5.636734693877551, |
| "grad_norm": 0.08693760633468628, |
| "learning_rate": 3.603418803418804e-05, |
| "loss": 0.0058, |
| "step": 693 |
| }, |
| { |
| "epoch": 5.644897959183673, |
| "grad_norm": 0.5758926272392273, |
| "learning_rate": 3.596581196581197e-05, |
| "loss": 0.0086, |
| "step": 694 |
| }, |
| { |
| "epoch": 5.653061224489796, |
| "grad_norm": 0.34267696738243103, |
| "learning_rate": 3.58974358974359e-05, |
| "loss": 0.003, |
| "step": 695 |
| }, |
| { |
| "epoch": 5.661224489795918, |
| "grad_norm": 0.43159055709838867, |
| "learning_rate": 3.5829059829059837e-05, |
| "loss": 0.0182, |
| "step": 696 |
| }, |
| { |
| "epoch": 5.669387755102041, |
| "grad_norm": 0.23170888423919678, |
| "learning_rate": 3.5760683760683764e-05, |
| "loss": 0.0015, |
| "step": 697 |
| }, |
| { |
| "epoch": 5.677551020408163, |
| "grad_norm": 0.23891058564186096, |
| "learning_rate": 3.56923076923077e-05, |
| "loss": 0.009, |
| "step": 698 |
| }, |
| { |
| "epoch": 5.685714285714286, |
| "grad_norm": 0.28715649247169495, |
| "learning_rate": 3.5623931623931626e-05, |
| "loss": 0.0053, |
| "step": 699 |
| }, |
| { |
| "epoch": 5.6938775510204085, |
| "grad_norm": 0.6357800364494324, |
| "learning_rate": 3.555555555555555e-05, |
| "loss": 0.0117, |
| "step": 700 |
| }, |
| { |
| "epoch": 5.70204081632653, |
| "grad_norm": 0.8419680595397949, |
| "learning_rate": 3.548717948717949e-05, |
| "loss": 0.0252, |
| "step": 701 |
| }, |
| { |
| "epoch": 5.710204081632653, |
| "grad_norm": 0.5171332359313965, |
| "learning_rate": 3.541880341880342e-05, |
| "loss": 0.0181, |
| "step": 702 |
| }, |
| { |
| "epoch": 5.718367346938775, |
| "grad_norm": 0.10230698436498642, |
| "learning_rate": 3.535042735042735e-05, |
| "loss": 0.008, |
| "step": 703 |
| }, |
| { |
| "epoch": 5.726530612244898, |
| "grad_norm": 0.2651132643222809, |
| "learning_rate": 3.5282051282051283e-05, |
| "loss": 0.0113, |
| "step": 704 |
| }, |
| { |
| "epoch": 5.73469387755102, |
| "grad_norm": 0.17068640887737274, |
| "learning_rate": 3.521367521367522e-05, |
| "loss": 0.007, |
| "step": 705 |
| }, |
| { |
| "epoch": 5.742857142857143, |
| "grad_norm": 0.3176792562007904, |
| "learning_rate": 3.5145299145299145e-05, |
| "loss": 0.012, |
| "step": 706 |
| }, |
| { |
| "epoch": 5.751020408163265, |
| "grad_norm": 0.5791796445846558, |
| "learning_rate": 3.507692307692308e-05, |
| "loss": 0.0185, |
| "step": 707 |
| }, |
| { |
| "epoch": 5.759183673469388, |
| "grad_norm": 0.193172425031662, |
| "learning_rate": 3.5008547008547014e-05, |
| "loss": 0.0184, |
| "step": 708 |
| }, |
| { |
| "epoch": 5.7673469387755105, |
| "grad_norm": 0.38892728090286255, |
| "learning_rate": 3.494017094017094e-05, |
| "loss": 0.01, |
| "step": 709 |
| }, |
| { |
| "epoch": 5.775510204081632, |
| "grad_norm": 0.17030949890613556, |
| "learning_rate": 3.4871794871794875e-05, |
| "loss": 0.0036, |
| "step": 710 |
| }, |
| { |
| "epoch": 5.783673469387755, |
| "grad_norm": 0.27598991990089417, |
| "learning_rate": 3.480341880341881e-05, |
| "loss": 0.0221, |
| "step": 711 |
| }, |
| { |
| "epoch": 5.791836734693877, |
| "grad_norm": 0.16970708966255188, |
| "learning_rate": 3.473504273504274e-05, |
| "loss": 0.0054, |
| "step": 712 |
| }, |
| { |
| "epoch": 5.8, |
| "grad_norm": 0.3334875702857971, |
| "learning_rate": 3.466666666666667e-05, |
| "loss": 0.0121, |
| "step": 713 |
| }, |
| { |
| "epoch": 5.808163265306122, |
| "grad_norm": 0.044451236724853516, |
| "learning_rate": 3.4598290598290606e-05, |
| "loss": 0.0019, |
| "step": 714 |
| }, |
| { |
| "epoch": 5.816326530612245, |
| "grad_norm": 0.1450454741716385, |
| "learning_rate": 3.452991452991453e-05, |
| "loss": 0.009, |
| "step": 715 |
| }, |
| { |
| "epoch": 5.8244897959183675, |
| "grad_norm": 0.19222688674926758, |
| "learning_rate": 3.446153846153847e-05, |
| "loss": 0.0081, |
| "step": 716 |
| }, |
| { |
| "epoch": 5.83265306122449, |
| "grad_norm": 0.09491467475891113, |
| "learning_rate": 3.4393162393162395e-05, |
| "loss": 0.001, |
| "step": 717 |
| }, |
| { |
| "epoch": 5.840816326530613, |
| "grad_norm": 0.1450124979019165, |
| "learning_rate": 3.432478632478632e-05, |
| "loss": 0.0109, |
| "step": 718 |
| }, |
| { |
| "epoch": 5.848979591836734, |
| "grad_norm": 0.2537761330604553, |
| "learning_rate": 3.4256410256410256e-05, |
| "loss": 0.0188, |
| "step": 719 |
| }, |
| { |
| "epoch": 5.857142857142857, |
| "grad_norm": 0.19120068848133087, |
| "learning_rate": 3.418803418803419e-05, |
| "loss": 0.0096, |
| "step": 720 |
| }, |
| { |
| "epoch": 5.865306122448979, |
| "grad_norm": 0.06364692002534866, |
| "learning_rate": 3.411965811965812e-05, |
| "loss": 0.0019, |
| "step": 721 |
| }, |
| { |
| "epoch": 5.873469387755102, |
| "grad_norm": 0.22512775659561157, |
| "learning_rate": 3.405128205128205e-05, |
| "loss": 0.0057, |
| "step": 722 |
| }, |
| { |
| "epoch": 5.881632653061224, |
| "grad_norm": 0.21793381869792938, |
| "learning_rate": 3.398290598290599e-05, |
| "loss": 0.0114, |
| "step": 723 |
| }, |
| { |
| "epoch": 5.889795918367347, |
| "grad_norm": 1.5978583097457886, |
| "learning_rate": 3.3914529914529914e-05, |
| "loss": 0.0324, |
| "step": 724 |
| }, |
| { |
| "epoch": 5.8979591836734695, |
| "grad_norm": 0.22985956072807312, |
| "learning_rate": 3.384615384615385e-05, |
| "loss": 0.0112, |
| "step": 725 |
| }, |
| { |
| "epoch": 5.906122448979592, |
| "grad_norm": 0.24227608740329742, |
| "learning_rate": 3.377777777777778e-05, |
| "loss": 0.0128, |
| "step": 726 |
| }, |
| { |
| "epoch": 5.914285714285715, |
| "grad_norm": 0.1724122315645218, |
| "learning_rate": 3.370940170940171e-05, |
| "loss": 0.0086, |
| "step": 727 |
| }, |
| { |
| "epoch": 5.922448979591836, |
| "grad_norm": 0.15982292592525482, |
| "learning_rate": 3.3641025641025644e-05, |
| "loss": 0.0078, |
| "step": 728 |
| }, |
| { |
| "epoch": 5.930612244897959, |
| "grad_norm": 0.12565600872039795, |
| "learning_rate": 3.357264957264958e-05, |
| "loss": 0.0027, |
| "step": 729 |
| }, |
| { |
| "epoch": 5.938775510204081, |
| "grad_norm": 0.2536553144454956, |
| "learning_rate": 3.3504273504273506e-05, |
| "loss": 0.0114, |
| "step": 730 |
| }, |
| { |
| "epoch": 5.946938775510204, |
| "grad_norm": 0.15599173307418823, |
| "learning_rate": 3.343589743589744e-05, |
| "loss": 0.0077, |
| "step": 731 |
| }, |
| { |
| "epoch": 5.955102040816326, |
| "grad_norm": 0.185344398021698, |
| "learning_rate": 3.3367521367521375e-05, |
| "loss": 0.0017, |
| "step": 732 |
| }, |
| { |
| "epoch": 5.963265306122449, |
| "grad_norm": 0.08160512149333954, |
| "learning_rate": 3.32991452991453e-05, |
| "loss": 0.0026, |
| "step": 733 |
| }, |
| { |
| "epoch": 5.9714285714285715, |
| "grad_norm": 0.18903043866157532, |
| "learning_rate": 3.3230769230769236e-05, |
| "loss": 0.0063, |
| "step": 734 |
| }, |
| { |
| "epoch": 5.979591836734694, |
| "grad_norm": 0.1677568107843399, |
| "learning_rate": 3.316239316239317e-05, |
| "loss": 0.0059, |
| "step": 735 |
| }, |
| { |
| "epoch": 5.987755102040817, |
| "grad_norm": 0.1872265338897705, |
| "learning_rate": 3.309401709401709e-05, |
| "loss": 0.0067, |
| "step": 736 |
| }, |
| { |
| "epoch": 5.995918367346938, |
| "grad_norm": 0.1347961127758026, |
| "learning_rate": 3.3025641025641025e-05, |
| "loss": 0.0046, |
| "step": 737 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.05940549820661545, |
| "learning_rate": 3.295726495726496e-05, |
| "loss": 0.0019, |
| "step": 738 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.9420053493378564, |
| "eval_f1": 0.9416409769383864, |
| "eval_loss": 0.15586227178573608, |
| "eval_precision": 0.9417140689962101, |
| "eval_recall": 0.9420053493378564, |
| "eval_runtime": 24.6319, |
| "eval_samples_per_second": 17.701, |
| "eval_steps_per_second": 17.701, |
| "step": 738 |
| }, |
| { |
| "epoch": 6.0081632653061225, |
| "grad_norm": 0.07184652984142303, |
| "learning_rate": 3.288888888888889e-05, |
| "loss": 0.0024, |
| "step": 739 |
| }, |
| { |
| "epoch": 6.016326530612245, |
| "grad_norm": 0.3214910328388214, |
| "learning_rate": 3.282051282051282e-05, |
| "loss": 0.0068, |
| "step": 740 |
| }, |
| { |
| "epoch": 6.024489795918368, |
| "grad_norm": 0.22907869517803192, |
| "learning_rate": 3.2752136752136756e-05, |
| "loss": 0.0084, |
| "step": 741 |
| }, |
| { |
| "epoch": 6.03265306122449, |
| "grad_norm": 0.22705793380737305, |
| "learning_rate": 3.268376068376068e-05, |
| "loss": 0.0049, |
| "step": 742 |
| }, |
| { |
| "epoch": 6.040816326530612, |
| "grad_norm": 0.14635081589221954, |
| "learning_rate": 3.261538461538462e-05, |
| "loss": 0.0102, |
| "step": 743 |
| }, |
| { |
| "epoch": 6.048979591836734, |
| "grad_norm": 0.19782468676567078, |
| "learning_rate": 3.254700854700855e-05, |
| "loss": 0.0044, |
| "step": 744 |
| }, |
| { |
| "epoch": 6.057142857142857, |
| "grad_norm": 0.08663511276245117, |
| "learning_rate": 3.247863247863248e-05, |
| "loss": 0.0052, |
| "step": 745 |
| }, |
| { |
| "epoch": 6.0653061224489795, |
| "grad_norm": 0.056835856288671494, |
| "learning_rate": 3.2410256410256413e-05, |
| "loss": 0.0015, |
| "step": 746 |
| }, |
| { |
| "epoch": 6.073469387755102, |
| "grad_norm": 0.08768238127231598, |
| "learning_rate": 3.234188034188035e-05, |
| "loss": 0.0023, |
| "step": 747 |
| }, |
| { |
| "epoch": 6.081632653061225, |
| "grad_norm": 0.23458008468151093, |
| "learning_rate": 3.2273504273504275e-05, |
| "loss": 0.0131, |
| "step": 748 |
| }, |
| { |
| "epoch": 6.089795918367347, |
| "grad_norm": 0.10216531157493591, |
| "learning_rate": 3.220512820512821e-05, |
| "loss": 0.0032, |
| "step": 749 |
| }, |
| { |
| "epoch": 6.09795918367347, |
| "grad_norm": 0.27003157138824463, |
| "learning_rate": 3.2136752136752144e-05, |
| "loss": 0.0167, |
| "step": 750 |
| }, |
| { |
| "epoch": 6.106122448979592, |
| "grad_norm": 0.1322830319404602, |
| "learning_rate": 3.206837606837607e-05, |
| "loss": 0.0074, |
| "step": 751 |
| }, |
| { |
| "epoch": 6.114285714285714, |
| "grad_norm": 0.06595811247825623, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.0029, |
| "step": 752 |
| }, |
| { |
| "epoch": 6.122448979591836, |
| "grad_norm": 0.11620360612869263, |
| "learning_rate": 3.193162393162394e-05, |
| "loss": 0.0077, |
| "step": 753 |
| }, |
| { |
| "epoch": 6.130612244897959, |
| "grad_norm": 0.04071362689137459, |
| "learning_rate": 3.186324786324787e-05, |
| "loss": 0.0013, |
| "step": 754 |
| }, |
| { |
| "epoch": 6.1387755102040815, |
| "grad_norm": 0.037312667816877365, |
| "learning_rate": 3.1794871794871795e-05, |
| "loss": 0.0007, |
| "step": 755 |
| }, |
| { |
| "epoch": 6.146938775510204, |
| "grad_norm": 0.11435768008232117, |
| "learning_rate": 3.172649572649573e-05, |
| "loss": 0.0016, |
| "step": 756 |
| }, |
| { |
| "epoch": 6.155102040816327, |
| "grad_norm": 0.1955532729625702, |
| "learning_rate": 3.1658119658119656e-05, |
| "loss": 0.0134, |
| "step": 757 |
| }, |
| { |
| "epoch": 6.163265306122449, |
| "grad_norm": 0.06615955382585526, |
| "learning_rate": 3.158974358974359e-05, |
| "loss": 0.0015, |
| "step": 758 |
| }, |
| { |
| "epoch": 6.171428571428572, |
| "grad_norm": 0.1115867868065834, |
| "learning_rate": 3.1521367521367525e-05, |
| "loss": 0.0023, |
| "step": 759 |
| }, |
| { |
| "epoch": 6.179591836734694, |
| "grad_norm": 0.13121837377548218, |
| "learning_rate": 3.145299145299145e-05, |
| "loss": 0.0035, |
| "step": 760 |
| }, |
| { |
| "epoch": 6.187755102040816, |
| "grad_norm": 0.16537025570869446, |
| "learning_rate": 3.1384615384615386e-05, |
| "loss": 0.0022, |
| "step": 761 |
| }, |
| { |
| "epoch": 6.1959183673469385, |
| "grad_norm": 0.16818967461585999, |
| "learning_rate": 3.131623931623932e-05, |
| "loss": 0.0075, |
| "step": 762 |
| }, |
| { |
| "epoch": 6.204081632653061, |
| "grad_norm": 0.16630838811397552, |
| "learning_rate": 3.124786324786325e-05, |
| "loss": 0.0085, |
| "step": 763 |
| }, |
| { |
| "epoch": 6.2122448979591836, |
| "grad_norm": 0.2355005294084549, |
| "learning_rate": 3.117948717948718e-05, |
| "loss": 0.0053, |
| "step": 764 |
| }, |
| { |
| "epoch": 6.220408163265306, |
| "grad_norm": 0.04024514928460121, |
| "learning_rate": 3.111111111111112e-05, |
| "loss": 0.0009, |
| "step": 765 |
| }, |
| { |
| "epoch": 6.228571428571429, |
| "grad_norm": 0.23014380037784576, |
| "learning_rate": 3.1042735042735044e-05, |
| "loss": 0.0146, |
| "step": 766 |
| }, |
| { |
| "epoch": 6.236734693877551, |
| "grad_norm": 0.006015291437506676, |
| "learning_rate": 3.097435897435898e-05, |
| "loss": 0.0001, |
| "step": 767 |
| }, |
| { |
| "epoch": 6.244897959183674, |
| "grad_norm": 0.08510640263557434, |
| "learning_rate": 3.090598290598291e-05, |
| "loss": 0.002, |
| "step": 768 |
| }, |
| { |
| "epoch": 6.253061224489796, |
| "grad_norm": 0.14379987120628357, |
| "learning_rate": 3.083760683760684e-05, |
| "loss": 0.0091, |
| "step": 769 |
| }, |
| { |
| "epoch": 6.261224489795918, |
| "grad_norm": 0.2117013931274414, |
| "learning_rate": 3.0769230769230774e-05, |
| "loss": 0.0025, |
| "step": 770 |
| }, |
| { |
| "epoch": 6.2693877551020405, |
| "grad_norm": 0.040728114545345306, |
| "learning_rate": 3.070085470085471e-05, |
| "loss": 0.0007, |
| "step": 771 |
| }, |
| { |
| "epoch": 6.277551020408163, |
| "grad_norm": 0.03688928857445717, |
| "learning_rate": 3.0632478632478636e-05, |
| "loss": 0.0006, |
| "step": 772 |
| }, |
| { |
| "epoch": 6.285714285714286, |
| "grad_norm": 0.13431765139102936, |
| "learning_rate": 3.0564102564102564e-05, |
| "loss": 0.0019, |
| "step": 773 |
| }, |
| { |
| "epoch": 6.293877551020408, |
| "grad_norm": 0.05924392119050026, |
| "learning_rate": 3.0495726495726498e-05, |
| "loss": 0.0022, |
| "step": 774 |
| }, |
| { |
| "epoch": 6.302040816326531, |
| "grad_norm": 0.11792515218257904, |
| "learning_rate": 3.042735042735043e-05, |
| "loss": 0.0021, |
| "step": 775 |
| }, |
| { |
| "epoch": 6.310204081632653, |
| "grad_norm": 0.2088608741760254, |
| "learning_rate": 3.035897435897436e-05, |
| "loss": 0.0073, |
| "step": 776 |
| }, |
| { |
| "epoch": 6.318367346938776, |
| "grad_norm": 0.8228505253791809, |
| "learning_rate": 3.029059829059829e-05, |
| "loss": 0.007, |
| "step": 777 |
| }, |
| { |
| "epoch": 6.326530612244898, |
| "grad_norm": 0.19457820057868958, |
| "learning_rate": 3.0222222222222225e-05, |
| "loss": 0.0055, |
| "step": 778 |
| }, |
| { |
| "epoch": 6.33469387755102, |
| "grad_norm": 0.07778234779834747, |
| "learning_rate": 3.0153846153846155e-05, |
| "loss": 0.0009, |
| "step": 779 |
| }, |
| { |
| "epoch": 6.3428571428571425, |
| "grad_norm": 0.43910640478134155, |
| "learning_rate": 3.0085470085470086e-05, |
| "loss": 0.0351, |
| "step": 780 |
| }, |
| { |
| "epoch": 6.351020408163265, |
| "grad_norm": 0.10128612816333771, |
| "learning_rate": 3.001709401709402e-05, |
| "loss": 0.003, |
| "step": 781 |
| }, |
| { |
| "epoch": 6.359183673469388, |
| "grad_norm": 0.09770739078521729, |
| "learning_rate": 2.994871794871795e-05, |
| "loss": 0.0033, |
| "step": 782 |
| }, |
| { |
| "epoch": 6.36734693877551, |
| "grad_norm": 0.08708677440881729, |
| "learning_rate": 2.9880341880341882e-05, |
| "loss": 0.0077, |
| "step": 783 |
| }, |
| { |
| "epoch": 6.375510204081633, |
| "grad_norm": 0.5605522990226746, |
| "learning_rate": 2.9811965811965817e-05, |
| "loss": 0.0139, |
| "step": 784 |
| }, |
| { |
| "epoch": 6.383673469387755, |
| "grad_norm": 0.11796006560325623, |
| "learning_rate": 2.9743589743589747e-05, |
| "loss": 0.0049, |
| "step": 785 |
| }, |
| { |
| "epoch": 6.391836734693878, |
| "grad_norm": 0.08884254842996597, |
| "learning_rate": 2.9675213675213678e-05, |
| "loss": 0.0039, |
| "step": 786 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 0.09800074994564056, |
| "learning_rate": 2.960683760683761e-05, |
| "loss": 0.002, |
| "step": 787 |
| }, |
| { |
| "epoch": 6.408163265306122, |
| "grad_norm": 0.07932932674884796, |
| "learning_rate": 2.9538461538461543e-05, |
| "loss": 0.0038, |
| "step": 788 |
| }, |
| { |
| "epoch": 6.416326530612245, |
| "grad_norm": 0.49927836656570435, |
| "learning_rate": 2.9470085470085474e-05, |
| "loss": 0.0156, |
| "step": 789 |
| }, |
| { |
| "epoch": 6.424489795918367, |
| "grad_norm": 0.14197352528572083, |
| "learning_rate": 2.9401709401709405e-05, |
| "loss": 0.0129, |
| "step": 790 |
| }, |
| { |
| "epoch": 6.43265306122449, |
| "grad_norm": 0.47378090023994446, |
| "learning_rate": 2.9333333333333333e-05, |
| "loss": 0.0206, |
| "step": 791 |
| }, |
| { |
| "epoch": 6.440816326530612, |
| "grad_norm": 0.12690985202789307, |
| "learning_rate": 2.9264957264957267e-05, |
| "loss": 0.0035, |
| "step": 792 |
| }, |
| { |
| "epoch": 6.448979591836735, |
| "grad_norm": 0.16043904423713684, |
| "learning_rate": 2.9196581196581198e-05, |
| "loss": 0.0069, |
| "step": 793 |
| }, |
| { |
| "epoch": 6.457142857142857, |
| "grad_norm": 0.37268689274787903, |
| "learning_rate": 2.912820512820513e-05, |
| "loss": 0.0081, |
| "step": 794 |
| }, |
| { |
| "epoch": 6.46530612244898, |
| "grad_norm": 0.08618529886007309, |
| "learning_rate": 2.9059829059829063e-05, |
| "loss": 0.0019, |
| "step": 795 |
| }, |
| { |
| "epoch": 6.473469387755102, |
| "grad_norm": 0.46465665102005005, |
| "learning_rate": 2.8991452991452994e-05, |
| "loss": 0.0041, |
| "step": 796 |
| }, |
| { |
| "epoch": 6.481632653061224, |
| "grad_norm": 0.21116529405117035, |
| "learning_rate": 2.8923076923076925e-05, |
| "loss": 0.0014, |
| "step": 797 |
| }, |
| { |
| "epoch": 6.489795918367347, |
| "grad_norm": 0.03455302491784096, |
| "learning_rate": 2.8854700854700855e-05, |
| "loss": 0.001, |
| "step": 798 |
| }, |
| { |
| "epoch": 6.497959183673469, |
| "grad_norm": 0.07090940326452255, |
| "learning_rate": 2.878632478632479e-05, |
| "loss": 0.0021, |
| "step": 799 |
| }, |
| { |
| "epoch": 6.506122448979592, |
| "grad_norm": 0.04578676074743271, |
| "learning_rate": 2.871794871794872e-05, |
| "loss": 0.0008, |
| "step": 800 |
| }, |
| { |
| "epoch": 6.514285714285714, |
| "grad_norm": 0.14446327090263367, |
| "learning_rate": 2.864957264957265e-05, |
| "loss": 0.0053, |
| "step": 801 |
| }, |
| { |
| "epoch": 6.522448979591837, |
| "grad_norm": 0.1537717580795288, |
| "learning_rate": 2.8581196581196586e-05, |
| "loss": 0.0023, |
| "step": 802 |
| }, |
| { |
| "epoch": 6.530612244897959, |
| "grad_norm": 0.31299567222595215, |
| "learning_rate": 2.8512820512820516e-05, |
| "loss": 0.0068, |
| "step": 803 |
| }, |
| { |
| "epoch": 6.538775510204082, |
| "grad_norm": 0.1606074422597885, |
| "learning_rate": 2.8444444444444447e-05, |
| "loss": 0.004, |
| "step": 804 |
| }, |
| { |
| "epoch": 6.546938775510204, |
| "grad_norm": 0.254300594329834, |
| "learning_rate": 2.8376068376068378e-05, |
| "loss": 0.0063, |
| "step": 805 |
| }, |
| { |
| "epoch": 6.555102040816326, |
| "grad_norm": 0.1450517326593399, |
| "learning_rate": 2.8307692307692312e-05, |
| "loss": 0.0036, |
| "step": 806 |
| }, |
| { |
| "epoch": 6.563265306122449, |
| "grad_norm": 0.11473794281482697, |
| "learning_rate": 2.8239316239316243e-05, |
| "loss": 0.0074, |
| "step": 807 |
| }, |
| { |
| "epoch": 6.571428571428571, |
| "grad_norm": 0.04597209766507149, |
| "learning_rate": 2.8170940170940174e-05, |
| "loss": 0.0008, |
| "step": 808 |
| }, |
| { |
| "epoch": 6.579591836734694, |
| "grad_norm": 0.20627528429031372, |
| "learning_rate": 2.81025641025641e-05, |
| "loss": 0.0099, |
| "step": 809 |
| }, |
| { |
| "epoch": 6.587755102040816, |
| "grad_norm": 0.6563801169395447, |
| "learning_rate": 2.8034188034188036e-05, |
| "loss": 0.0113, |
| "step": 810 |
| }, |
| { |
| "epoch": 6.595918367346939, |
| "grad_norm": 0.12874148786067963, |
| "learning_rate": 2.7965811965811967e-05, |
| "loss": 0.0015, |
| "step": 811 |
| }, |
| { |
| "epoch": 6.604081632653061, |
| "grad_norm": 0.21111907064914703, |
| "learning_rate": 2.7897435897435898e-05, |
| "loss": 0.0095, |
| "step": 812 |
| }, |
| { |
| "epoch": 6.612244897959184, |
| "grad_norm": 0.13607758283615112, |
| "learning_rate": 2.7829059829059832e-05, |
| "loss": 0.0022, |
| "step": 813 |
| }, |
| { |
| "epoch": 6.6204081632653065, |
| "grad_norm": 0.008681375533342361, |
| "learning_rate": 2.7760683760683763e-05, |
| "loss": 0.0002, |
| "step": 814 |
| }, |
| { |
| "epoch": 6.628571428571428, |
| "grad_norm": 0.19657264649868011, |
| "learning_rate": 2.7692307692307694e-05, |
| "loss": 0.0059, |
| "step": 815 |
| }, |
| { |
| "epoch": 6.636734693877551, |
| "grad_norm": 0.22438636422157288, |
| "learning_rate": 2.7623931623931624e-05, |
| "loss": 0.0101, |
| "step": 816 |
| }, |
| { |
| "epoch": 6.644897959183673, |
| "grad_norm": 0.022456951439380646, |
| "learning_rate": 2.755555555555556e-05, |
| "loss": 0.0005, |
| "step": 817 |
| }, |
| { |
| "epoch": 6.653061224489796, |
| "grad_norm": 0.40233445167541504, |
| "learning_rate": 2.748717948717949e-05, |
| "loss": 0.011, |
| "step": 818 |
| }, |
| { |
| "epoch": 6.661224489795918, |
| "grad_norm": 0.4001105725765228, |
| "learning_rate": 2.741880341880342e-05, |
| "loss": 0.0084, |
| "step": 819 |
| }, |
| { |
| "epoch": 6.669387755102041, |
| "grad_norm": 0.12446096539497375, |
| "learning_rate": 2.7350427350427355e-05, |
| "loss": 0.0021, |
| "step": 820 |
| }, |
| { |
| "epoch": 6.677551020408163, |
| "grad_norm": 0.09965896606445312, |
| "learning_rate": 2.7282051282051285e-05, |
| "loss": 0.0052, |
| "step": 821 |
| }, |
| { |
| "epoch": 6.685714285714286, |
| "grad_norm": 0.11254263669252396, |
| "learning_rate": 2.7213675213675216e-05, |
| "loss": 0.0035, |
| "step": 822 |
| }, |
| { |
| "epoch": 6.6938775510204085, |
| "grad_norm": 0.12855035066604614, |
| "learning_rate": 2.7145299145299147e-05, |
| "loss": 0.0079, |
| "step": 823 |
| }, |
| { |
| "epoch": 6.70204081632653, |
| "grad_norm": 0.13291221857070923, |
| "learning_rate": 2.707692307692308e-05, |
| "loss": 0.0038, |
| "step": 824 |
| }, |
| { |
| "epoch": 6.710204081632653, |
| "grad_norm": 0.08022642135620117, |
| "learning_rate": 2.7008547008547012e-05, |
| "loss": 0.0032, |
| "step": 825 |
| }, |
| { |
| "epoch": 6.718367346938775, |
| "grad_norm": 0.14532768726348877, |
| "learning_rate": 2.6940170940170943e-05, |
| "loss": 0.0019, |
| "step": 826 |
| }, |
| { |
| "epoch": 6.726530612244898, |
| "grad_norm": 0.01848861761391163, |
| "learning_rate": 2.687179487179487e-05, |
| "loss": 0.0003, |
| "step": 827 |
| }, |
| { |
| "epoch": 6.73469387755102, |
| "grad_norm": 0.18730799853801727, |
| "learning_rate": 2.6803418803418805e-05, |
| "loss": 0.0101, |
| "step": 828 |
| }, |
| { |
| "epoch": 6.742857142857143, |
| "grad_norm": 0.2433444857597351, |
| "learning_rate": 2.6735042735042736e-05, |
| "loss": 0.0111, |
| "step": 829 |
| }, |
| { |
| "epoch": 6.751020408163265, |
| "grad_norm": 0.10054635256528854, |
| "learning_rate": 2.6666666666666667e-05, |
| "loss": 0.0048, |
| "step": 830 |
| }, |
| { |
| "epoch": 6.759183673469388, |
| "grad_norm": 0.1453963816165924, |
| "learning_rate": 2.65982905982906e-05, |
| "loss": 0.0034, |
| "step": 831 |
| }, |
| { |
| "epoch": 6.7673469387755105, |
| "grad_norm": 0.12410593032836914, |
| "learning_rate": 2.6529914529914532e-05, |
| "loss": 0.0055, |
| "step": 832 |
| }, |
| { |
| "epoch": 6.775510204081632, |
| "grad_norm": 0.02357162907719612, |
| "learning_rate": 2.6461538461538463e-05, |
| "loss": 0.0003, |
| "step": 833 |
| }, |
| { |
| "epoch": 6.783673469387755, |
| "grad_norm": 0.5636110305786133, |
| "learning_rate": 2.6393162393162393e-05, |
| "loss": 0.0138, |
| "step": 834 |
| }, |
| { |
| "epoch": 6.791836734693877, |
| "grad_norm": 0.021261123940348625, |
| "learning_rate": 2.6324786324786328e-05, |
| "loss": 0.0003, |
| "step": 835 |
| }, |
| { |
| "epoch": 6.8, |
| "grad_norm": 0.34472304582595825, |
| "learning_rate": 2.625641025641026e-05, |
| "loss": 0.0105, |
| "step": 836 |
| }, |
| { |
| "epoch": 6.808163265306122, |
| "grad_norm": 0.10296373069286346, |
| "learning_rate": 2.618803418803419e-05, |
| "loss": 0.001, |
| "step": 837 |
| }, |
| { |
| "epoch": 6.816326530612245, |
| "grad_norm": 0.11989521980285645, |
| "learning_rate": 2.6119658119658124e-05, |
| "loss": 0.003, |
| "step": 838 |
| }, |
| { |
| "epoch": 6.8244897959183675, |
| "grad_norm": 0.2446180284023285, |
| "learning_rate": 2.6051282051282054e-05, |
| "loss": 0.0143, |
| "step": 839 |
| }, |
| { |
| "epoch": 6.83265306122449, |
| "grad_norm": 0.14607751369476318, |
| "learning_rate": 2.5982905982905985e-05, |
| "loss": 0.0028, |
| "step": 840 |
| }, |
| { |
| "epoch": 6.840816326530613, |
| "grad_norm": 0.2830953598022461, |
| "learning_rate": 2.5914529914529916e-05, |
| "loss": 0.0136, |
| "step": 841 |
| }, |
| { |
| "epoch": 6.848979591836734, |
| "grad_norm": 0.5133418440818787, |
| "learning_rate": 2.584615384615385e-05, |
| "loss": 0.0147, |
| "step": 842 |
| }, |
| { |
| "epoch": 6.857142857142857, |
| "grad_norm": 0.17149963974952698, |
| "learning_rate": 2.577777777777778e-05, |
| "loss": 0.002, |
| "step": 843 |
| }, |
| { |
| "epoch": 6.865306122448979, |
| "grad_norm": 0.15987303853034973, |
| "learning_rate": 2.5709401709401712e-05, |
| "loss": 0.0011, |
| "step": 844 |
| }, |
| { |
| "epoch": 6.873469387755102, |
| "grad_norm": 0.4356565773487091, |
| "learning_rate": 2.5641025641025646e-05, |
| "loss": 0.0204, |
| "step": 845 |
| }, |
| { |
| "epoch": 6.881632653061224, |
| "grad_norm": 0.04300348833203316, |
| "learning_rate": 2.5572649572649574e-05, |
| "loss": 0.0006, |
| "step": 846 |
| }, |
| { |
| "epoch": 6.889795918367347, |
| "grad_norm": 0.26859360933303833, |
| "learning_rate": 2.5504273504273505e-05, |
| "loss": 0.0105, |
| "step": 847 |
| }, |
| { |
| "epoch": 6.8979591836734695, |
| "grad_norm": 0.12719742953777313, |
| "learning_rate": 2.5435897435897436e-05, |
| "loss": 0.0019, |
| "step": 848 |
| }, |
| { |
| "epoch": 6.906122448979592, |
| "grad_norm": 0.10546525567770004, |
| "learning_rate": 2.536752136752137e-05, |
| "loss": 0.003, |
| "step": 849 |
| }, |
| { |
| "epoch": 6.914285714285715, |
| "grad_norm": 0.15803073346614838, |
| "learning_rate": 2.52991452991453e-05, |
| "loss": 0.0034, |
| "step": 850 |
| }, |
| { |
| "epoch": 6.922448979591836, |
| "grad_norm": 0.08309769630432129, |
| "learning_rate": 2.523076923076923e-05, |
| "loss": 0.0019, |
| "step": 851 |
| }, |
| { |
| "epoch": 6.930612244897959, |
| "grad_norm": 0.011221293359994888, |
| "learning_rate": 2.5162393162393162e-05, |
| "loss": 0.0002, |
| "step": 852 |
| }, |
| { |
| "epoch": 6.938775510204081, |
| "grad_norm": 0.06381987035274506, |
| "learning_rate": 2.5094017094017097e-05, |
| "loss": 0.0025, |
| "step": 853 |
| }, |
| { |
| "epoch": 6.946938775510204, |
| "grad_norm": 0.18709778785705566, |
| "learning_rate": 2.5025641025641028e-05, |
| "loss": 0.003, |
| "step": 854 |
| }, |
| { |
| "epoch": 6.955102040816326, |
| "grad_norm": 0.12326924502849579, |
| "learning_rate": 2.495726495726496e-05, |
| "loss": 0.0031, |
| "step": 855 |
| }, |
| { |
| "epoch": 6.963265306122449, |
| "grad_norm": 0.12623760104179382, |
| "learning_rate": 2.4888888888888893e-05, |
| "loss": 0.0033, |
| "step": 856 |
| }, |
| { |
| "epoch": 6.9714285714285715, |
| "grad_norm": 0.15498894453048706, |
| "learning_rate": 2.4820512820512824e-05, |
| "loss": 0.0028, |
| "step": 857 |
| }, |
| { |
| "epoch": 6.979591836734694, |
| "grad_norm": 0.031409814953804016, |
| "learning_rate": 2.4752136752136754e-05, |
| "loss": 0.0004, |
| "step": 858 |
| }, |
| { |
| "epoch": 6.987755102040817, |
| "grad_norm": 0.18738146126270294, |
| "learning_rate": 2.4683760683760685e-05, |
| "loss": 0.0075, |
| "step": 859 |
| }, |
| { |
| "epoch": 6.995918367346938, |
| "grad_norm": 0.14360497891902924, |
| "learning_rate": 2.461538461538462e-05, |
| "loss": 0.0047, |
| "step": 860 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.3702276945114136, |
| "learning_rate": 2.454700854700855e-05, |
| "loss": 0.0163, |
| "step": 861 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9416139343727575, |
| "eval_f1": 0.9411935049117857, |
| "eval_loss": 0.17100121080875397, |
| "eval_precision": 0.9413414416659728, |
| "eval_recall": 0.9416139343727575, |
| "eval_runtime": 25.0743, |
| "eval_samples_per_second": 17.388, |
| "eval_steps_per_second": 17.388, |
| "step": 861 |
| }, |
| { |
| "epoch": 7.0081632653061225, |
| "grad_norm": 0.005380494520068169, |
| "learning_rate": 2.447863247863248e-05, |
| "loss": 0.0001, |
| "step": 862 |
| }, |
| { |
| "epoch": 7.016326530612245, |
| "grad_norm": 0.07181048393249512, |
| "learning_rate": 2.4410256410256415e-05, |
| "loss": 0.0021, |
| "step": 863 |
| }, |
| { |
| "epoch": 7.024489795918368, |
| "grad_norm": 0.17317695915699005, |
| "learning_rate": 2.4341880341880343e-05, |
| "loss": 0.0014, |
| "step": 864 |
| }, |
| { |
| "epoch": 7.03265306122449, |
| "grad_norm": 0.11304262280464172, |
| "learning_rate": 2.4273504273504274e-05, |
| "loss": 0.007, |
| "step": 865 |
| }, |
| { |
| "epoch": 7.040816326530612, |
| "grad_norm": 0.11228794604539871, |
| "learning_rate": 2.4205128205128205e-05, |
| "loss": 0.0014, |
| "step": 866 |
| }, |
| { |
| "epoch": 7.048979591836734, |
| "grad_norm": 0.0365217849612236, |
| "learning_rate": 2.413675213675214e-05, |
| "loss": 0.0009, |
| "step": 867 |
| }, |
| { |
| "epoch": 7.057142857142857, |
| "grad_norm": 0.2410961538553238, |
| "learning_rate": 2.406837606837607e-05, |
| "loss": 0.0008, |
| "step": 868 |
| }, |
| { |
| "epoch": 7.0653061224489795, |
| "grad_norm": 0.4587385058403015, |
| "learning_rate": 2.4e-05, |
| "loss": 0.0062, |
| "step": 869 |
| }, |
| { |
| "epoch": 7.073469387755102, |
| "grad_norm": 0.1474837362766266, |
| "learning_rate": 2.393162393162393e-05, |
| "loss": 0.0046, |
| "step": 870 |
| }, |
| { |
| "epoch": 7.081632653061225, |
| "grad_norm": 0.12067204713821411, |
| "learning_rate": 2.3863247863247866e-05, |
| "loss": 0.0118, |
| "step": 871 |
| }, |
| { |
| "epoch": 7.089795918367347, |
| "grad_norm": 0.07675416022539139, |
| "learning_rate": 2.3794871794871797e-05, |
| "loss": 0.0017, |
| "step": 872 |
| }, |
| { |
| "epoch": 7.09795918367347, |
| "grad_norm": 0.010980883613228798, |
| "learning_rate": 2.3726495726495727e-05, |
| "loss": 0.0003, |
| "step": 873 |
| }, |
| { |
| "epoch": 7.106122448979592, |
| "grad_norm": 0.10777924954891205, |
| "learning_rate": 2.365811965811966e-05, |
| "loss": 0.0029, |
| "step": 874 |
| }, |
| { |
| "epoch": 7.114285714285714, |
| "grad_norm": 0.04947957023978233, |
| "learning_rate": 2.3589743589743593e-05, |
| "loss": 0.0016, |
| "step": 875 |
| }, |
| { |
| "epoch": 7.122448979591836, |
| "grad_norm": 0.2570093870162964, |
| "learning_rate": 2.3521367521367523e-05, |
| "loss": 0.0298, |
| "step": 876 |
| }, |
| { |
| "epoch": 7.130612244897959, |
| "grad_norm": 0.09827487170696259, |
| "learning_rate": 2.3452991452991458e-05, |
| "loss": 0.0034, |
| "step": 877 |
| }, |
| { |
| "epoch": 7.1387755102040815, |
| "grad_norm": 0.013062435202300549, |
| "learning_rate": 2.338461538461539e-05, |
| "loss": 0.0002, |
| "step": 878 |
| }, |
| { |
| "epoch": 7.146938775510204, |
| "grad_norm": 0.042147841304540634, |
| "learning_rate": 2.331623931623932e-05, |
| "loss": 0.0007, |
| "step": 879 |
| }, |
| { |
| "epoch": 7.155102040816327, |
| "grad_norm": 0.052288565784692764, |
| "learning_rate": 2.324786324786325e-05, |
| "loss": 0.0023, |
| "step": 880 |
| }, |
| { |
| "epoch": 7.163265306122449, |
| "grad_norm": 0.019575530663132668, |
| "learning_rate": 2.3179487179487184e-05, |
| "loss": 0.0004, |
| "step": 881 |
| }, |
| { |
| "epoch": 7.171428571428572, |
| "grad_norm": 0.02954856865108013, |
| "learning_rate": 2.3111111111111112e-05, |
| "loss": 0.0005, |
| "step": 882 |
| }, |
| { |
| "epoch": 7.179591836734694, |
| "grad_norm": 0.09335067123174667, |
| "learning_rate": 2.3042735042735043e-05, |
| "loss": 0.0015, |
| "step": 883 |
| }, |
| { |
| "epoch": 7.187755102040816, |
| "grad_norm": 0.2734461724758148, |
| "learning_rate": 2.2974358974358974e-05, |
| "loss": 0.0014, |
| "step": 884 |
| }, |
| { |
| "epoch": 7.1959183673469385, |
| "grad_norm": 0.49978339672088623, |
| "learning_rate": 2.2905982905982908e-05, |
| "loss": 0.02, |
| "step": 885 |
| }, |
| { |
| "epoch": 7.204081632653061, |
| "grad_norm": 0.04176400974392891, |
| "learning_rate": 2.283760683760684e-05, |
| "loss": 0.0009, |
| "step": 886 |
| }, |
| { |
| "epoch": 7.2122448979591836, |
| "grad_norm": 0.09729107469320297, |
| "learning_rate": 2.276923076923077e-05, |
| "loss": 0.0016, |
| "step": 887 |
| }, |
| { |
| "epoch": 7.220408163265306, |
| "grad_norm": 0.04460914805531502, |
| "learning_rate": 2.27008547008547e-05, |
| "loss": 0.0012, |
| "step": 888 |
| }, |
| { |
| "epoch": 7.228571428571429, |
| "grad_norm": 0.038177452981472015, |
| "learning_rate": 2.2632478632478635e-05, |
| "loss": 0.0003, |
| "step": 889 |
| }, |
| { |
| "epoch": 7.236734693877551, |
| "grad_norm": 0.10953059792518616, |
| "learning_rate": 2.2564102564102566e-05, |
| "loss": 0.0035, |
| "step": 890 |
| }, |
| { |
| "epoch": 7.244897959183674, |
| "grad_norm": 0.311824768781662, |
| "learning_rate": 2.2495726495726496e-05, |
| "loss": 0.0073, |
| "step": 891 |
| }, |
| { |
| "epoch": 7.253061224489796, |
| "grad_norm": 0.046279143542051315, |
| "learning_rate": 2.242735042735043e-05, |
| "loss": 0.0012, |
| "step": 892 |
| }, |
| { |
| "epoch": 7.261224489795918, |
| "grad_norm": 0.016753727570176125, |
| "learning_rate": 2.235897435897436e-05, |
| "loss": 0.0003, |
| "step": 893 |
| }, |
| { |
| "epoch": 7.2693877551020405, |
| "grad_norm": 0.4180339574813843, |
| "learning_rate": 2.2290598290598292e-05, |
| "loss": 0.0082, |
| "step": 894 |
| }, |
| { |
| "epoch": 7.277551020408163, |
| "grad_norm": 0.7917170524597168, |
| "learning_rate": 2.2222222222222227e-05, |
| "loss": 0.0118, |
| "step": 895 |
| }, |
| { |
| "epoch": 7.285714285714286, |
| "grad_norm": 0.023676620796322823, |
| "learning_rate": 2.2153846153846158e-05, |
| "loss": 0.0005, |
| "step": 896 |
| }, |
| { |
| "epoch": 7.293877551020408, |
| "grad_norm": 0.20448362827301025, |
| "learning_rate": 2.208547008547009e-05, |
| "loss": 0.0042, |
| "step": 897 |
| }, |
| { |
| "epoch": 7.302040816326531, |
| "grad_norm": 0.08442284911870956, |
| "learning_rate": 2.201709401709402e-05, |
| "loss": 0.0015, |
| "step": 898 |
| }, |
| { |
| "epoch": 7.310204081632653, |
| "grad_norm": 0.12260103970766068, |
| "learning_rate": 2.1948717948717954e-05, |
| "loss": 0.0031, |
| "step": 899 |
| }, |
| { |
| "epoch": 7.318367346938776, |
| "grad_norm": 0.19080136716365814, |
| "learning_rate": 2.1880341880341884e-05, |
| "loss": 0.0023, |
| "step": 900 |
| }, |
| { |
| "epoch": 7.326530612244898, |
| "grad_norm": 0.15384361147880554, |
| "learning_rate": 2.1811965811965812e-05, |
| "loss": 0.0012, |
| "step": 901 |
| }, |
| { |
| "epoch": 7.33469387755102, |
| "grad_norm": 0.05359187722206116, |
| "learning_rate": 2.1743589743589743e-05, |
| "loss": 0.0006, |
| "step": 902 |
| }, |
| { |
| "epoch": 7.3428571428571425, |
| "grad_norm": 0.2594751715660095, |
| "learning_rate": 2.1675213675213677e-05, |
| "loss": 0.0051, |
| "step": 903 |
| }, |
| { |
| "epoch": 7.351020408163265, |
| "grad_norm": 0.04371648281812668, |
| "learning_rate": 2.1606837606837608e-05, |
| "loss": 0.0006, |
| "step": 904 |
| }, |
| { |
| "epoch": 7.359183673469388, |
| "grad_norm": 0.5175739526748657, |
| "learning_rate": 2.153846153846154e-05, |
| "loss": 0.0058, |
| "step": 905 |
| }, |
| { |
| "epoch": 7.36734693877551, |
| "grad_norm": 0.3708977699279785, |
| "learning_rate": 2.147008547008547e-05, |
| "loss": 0.0035, |
| "step": 906 |
| }, |
| { |
| "epoch": 7.375510204081633, |
| "grad_norm": 0.2661634385585785, |
| "learning_rate": 2.1401709401709404e-05, |
| "loss": 0.0054, |
| "step": 907 |
| }, |
| { |
| "epoch": 7.383673469387755, |
| "grad_norm": 0.11005009710788727, |
| "learning_rate": 2.1333333333333335e-05, |
| "loss": 0.0026, |
| "step": 908 |
| }, |
| { |
| "epoch": 7.391836734693878, |
| "grad_norm": 0.09081326425075531, |
| "learning_rate": 2.1264957264957265e-05, |
| "loss": 0.0009, |
| "step": 909 |
| }, |
| { |
| "epoch": 7.4, |
| "grad_norm": 0.07192150503396988, |
| "learning_rate": 2.11965811965812e-05, |
| "loss": 0.0051, |
| "step": 910 |
| }, |
| { |
| "epoch": 7.408163265306122, |
| "grad_norm": 0.026940980926156044, |
| "learning_rate": 2.112820512820513e-05, |
| "loss": 0.0005, |
| "step": 911 |
| }, |
| { |
| "epoch": 7.416326530612245, |
| "grad_norm": 0.08359820395708084, |
| "learning_rate": 2.105982905982906e-05, |
| "loss": 0.0045, |
| "step": 912 |
| }, |
| { |
| "epoch": 7.424489795918367, |
| "grad_norm": 0.12868310511112213, |
| "learning_rate": 2.0991452991452996e-05, |
| "loss": 0.0056, |
| "step": 913 |
| }, |
| { |
| "epoch": 7.43265306122449, |
| "grad_norm": 0.16965226829051971, |
| "learning_rate": 2.0923076923076927e-05, |
| "loss": 0.0008, |
| "step": 914 |
| }, |
| { |
| "epoch": 7.440816326530612, |
| "grad_norm": 0.4554808437824249, |
| "learning_rate": 2.0854700854700857e-05, |
| "loss": 0.0092, |
| "step": 915 |
| }, |
| { |
| "epoch": 7.448979591836735, |
| "grad_norm": 0.008080328814685345, |
| "learning_rate": 2.0786324786324788e-05, |
| "loss": 0.0002, |
| "step": 916 |
| }, |
| { |
| "epoch": 7.457142857142857, |
| "grad_norm": 0.03749796375632286, |
| "learning_rate": 2.0717948717948723e-05, |
| "loss": 0.0036, |
| "step": 917 |
| }, |
| { |
| "epoch": 7.46530612244898, |
| "grad_norm": 0.01586100459098816, |
| "learning_rate": 2.0649572649572653e-05, |
| "loss": 0.0002, |
| "step": 918 |
| }, |
| { |
| "epoch": 7.473469387755102, |
| "grad_norm": 0.13012056052684784, |
| "learning_rate": 2.058119658119658e-05, |
| "loss": 0.0018, |
| "step": 919 |
| }, |
| { |
| "epoch": 7.481632653061224, |
| "grad_norm": 0.04649100825190544, |
| "learning_rate": 2.0512820512820512e-05, |
| "loss": 0.0006, |
| "step": 920 |
| }, |
| { |
| "epoch": 7.489795918367347, |
| "grad_norm": 0.03851509839296341, |
| "learning_rate": 2.0444444444444446e-05, |
| "loss": 0.0036, |
| "step": 921 |
| }, |
| { |
| "epoch": 7.497959183673469, |
| "grad_norm": 0.1530081182718277, |
| "learning_rate": 2.0376068376068377e-05, |
| "loss": 0.01, |
| "step": 922 |
| }, |
| { |
| "epoch": 7.506122448979592, |
| "grad_norm": 0.028013063594698906, |
| "learning_rate": 2.0307692307692308e-05, |
| "loss": 0.0005, |
| "step": 923 |
| }, |
| { |
| "epoch": 7.514285714285714, |
| "grad_norm": 0.017429566010832787, |
| "learning_rate": 2.023931623931624e-05, |
| "loss": 0.0003, |
| "step": 924 |
| }, |
| { |
| "epoch": 7.522448979591837, |
| "grad_norm": 0.08652772009372711, |
| "learning_rate": 2.0170940170940173e-05, |
| "loss": 0.0041, |
| "step": 925 |
| }, |
| { |
| "epoch": 7.530612244897959, |
| "grad_norm": 0.015552469529211521, |
| "learning_rate": 2.0102564102564104e-05, |
| "loss": 0.0004, |
| "step": 926 |
| }, |
| { |
| "epoch": 7.538775510204082, |
| "grad_norm": 0.1635313630104065, |
| "learning_rate": 2.0034188034188035e-05, |
| "loss": 0.0024, |
| "step": 927 |
| }, |
| { |
| "epoch": 7.546938775510204, |
| "grad_norm": 0.09557072073221207, |
| "learning_rate": 1.996581196581197e-05, |
| "loss": 0.0031, |
| "step": 928 |
| }, |
| { |
| "epoch": 7.555102040816326, |
| "grad_norm": 0.056514523923397064, |
| "learning_rate": 1.98974358974359e-05, |
| "loss": 0.001, |
| "step": 929 |
| }, |
| { |
| "epoch": 7.563265306122449, |
| "grad_norm": 0.11032027006149292, |
| "learning_rate": 1.982905982905983e-05, |
| "loss": 0.0027, |
| "step": 930 |
| }, |
| { |
| "epoch": 7.571428571428571, |
| "grad_norm": 0.1199721097946167, |
| "learning_rate": 1.9760683760683765e-05, |
| "loss": 0.0045, |
| "step": 931 |
| }, |
| { |
| "epoch": 7.579591836734694, |
| "grad_norm": 0.06572246551513672, |
| "learning_rate": 1.9692307692307696e-05, |
| "loss": 0.0005, |
| "step": 932 |
| }, |
| { |
| "epoch": 7.587755102040816, |
| "grad_norm": 0.02812982350587845, |
| "learning_rate": 1.9623931623931623e-05, |
| "loss": 0.0016, |
| "step": 933 |
| }, |
| { |
| "epoch": 7.595918367346939, |
| "grad_norm": 0.36669132113456726, |
| "learning_rate": 1.9555555555555557e-05, |
| "loss": 0.0075, |
| "step": 934 |
| }, |
| { |
| "epoch": 7.604081632653061, |
| "grad_norm": 0.007166026625782251, |
| "learning_rate": 1.9487179487179488e-05, |
| "loss": 0.0002, |
| "step": 935 |
| }, |
| { |
| "epoch": 7.612244897959184, |
| "grad_norm": 0.0843917652964592, |
| "learning_rate": 1.941880341880342e-05, |
| "loss": 0.0014, |
| "step": 936 |
| }, |
| { |
| "epoch": 7.6204081632653065, |
| "grad_norm": 0.03270947188138962, |
| "learning_rate": 1.9350427350427353e-05, |
| "loss": 0.0004, |
| "step": 937 |
| }, |
| { |
| "epoch": 7.628571428571428, |
| "grad_norm": 0.11428512632846832, |
| "learning_rate": 1.9282051282051284e-05, |
| "loss": 0.0037, |
| "step": 938 |
| }, |
| { |
| "epoch": 7.636734693877551, |
| "grad_norm": 0.14075659215450287, |
| "learning_rate": 1.9213675213675215e-05, |
| "loss": 0.0038, |
| "step": 939 |
| }, |
| { |
| "epoch": 7.644897959183673, |
| "grad_norm": 0.039455536752939224, |
| "learning_rate": 1.914529914529915e-05, |
| "loss": 0.0003, |
| "step": 940 |
| }, |
| { |
| "epoch": 7.653061224489796, |
| "grad_norm": 0.08807907998561859, |
| "learning_rate": 1.907692307692308e-05, |
| "loss": 0.0032, |
| "step": 941 |
| }, |
| { |
| "epoch": 7.661224489795918, |
| "grad_norm": 0.016785893589258194, |
| "learning_rate": 1.9008547008547008e-05, |
| "loss": 0.0003, |
| "step": 942 |
| }, |
| { |
| "epoch": 7.669387755102041, |
| "grad_norm": 0.050439443439245224, |
| "learning_rate": 1.8940170940170942e-05, |
| "loss": 0.0006, |
| "step": 943 |
| }, |
| { |
| "epoch": 7.677551020408163, |
| "grad_norm": 0.05136784538626671, |
| "learning_rate": 1.8871794871794873e-05, |
| "loss": 0.0008, |
| "step": 944 |
| }, |
| { |
| "epoch": 7.685714285714286, |
| "grad_norm": 0.032696232199668884, |
| "learning_rate": 1.8803418803418804e-05, |
| "loss": 0.0004, |
| "step": 945 |
| }, |
| { |
| "epoch": 7.6938775510204085, |
| "grad_norm": 0.06387408822774887, |
| "learning_rate": 1.8735042735042738e-05, |
| "loss": 0.0017, |
| "step": 946 |
| }, |
| { |
| "epoch": 7.70204081632653, |
| "grad_norm": 0.3237035870552063, |
| "learning_rate": 1.866666666666667e-05, |
| "loss": 0.0077, |
| "step": 947 |
| }, |
| { |
| "epoch": 7.710204081632653, |
| "grad_norm": 0.14317689836025238, |
| "learning_rate": 1.85982905982906e-05, |
| "loss": 0.0036, |
| "step": 948 |
| }, |
| { |
| "epoch": 7.718367346938775, |
| "grad_norm": 0.03586750105023384, |
| "learning_rate": 1.8529914529914534e-05, |
| "loss": 0.0018, |
| "step": 949 |
| }, |
| { |
| "epoch": 7.726530612244898, |
| "grad_norm": 0.005396117921918631, |
| "learning_rate": 1.8461538461538465e-05, |
| "loss": 0.0001, |
| "step": 950 |
| }, |
| { |
| "epoch": 7.73469387755102, |
| "grad_norm": 0.010027500800788403, |
| "learning_rate": 1.8393162393162392e-05, |
| "loss": 0.0002, |
| "step": 951 |
| }, |
| { |
| "epoch": 7.742857142857143, |
| "grad_norm": 0.047518227249383926, |
| "learning_rate": 1.8324786324786326e-05, |
| "loss": 0.0016, |
| "step": 952 |
| }, |
| { |
| "epoch": 7.751020408163265, |
| "grad_norm": 0.005562972743064165, |
| "learning_rate": 1.8256410256410257e-05, |
| "loss": 0.0001, |
| "step": 953 |
| }, |
| { |
| "epoch": 7.759183673469388, |
| "grad_norm": 0.007851188071072102, |
| "learning_rate": 1.8188034188034188e-05, |
| "loss": 0.0002, |
| "step": 954 |
| }, |
| { |
| "epoch": 7.7673469387755105, |
| "grad_norm": 0.005186399444937706, |
| "learning_rate": 1.8119658119658122e-05, |
| "loss": 0.0001, |
| "step": 955 |
| }, |
| { |
| "epoch": 7.775510204081632, |
| "grad_norm": 0.020631812512874603, |
| "learning_rate": 1.8051282051282053e-05, |
| "loss": 0.0003, |
| "step": 956 |
| }, |
| { |
| "epoch": 7.783673469387755, |
| "grad_norm": 0.0623784177005291, |
| "learning_rate": 1.7982905982905984e-05, |
| "loss": 0.0007, |
| "step": 957 |
| }, |
| { |
| "epoch": 7.791836734693877, |
| "grad_norm": 0.10035212337970734, |
| "learning_rate": 1.7914529914529918e-05, |
| "loss": 0.0041, |
| "step": 958 |
| }, |
| { |
| "epoch": 7.8, |
| "grad_norm": 0.10691452026367188, |
| "learning_rate": 1.784615384615385e-05, |
| "loss": 0.004, |
| "step": 959 |
| }, |
| { |
| "epoch": 7.808163265306122, |
| "grad_norm": 0.2187003642320633, |
| "learning_rate": 1.7777777777777777e-05, |
| "loss": 0.003, |
| "step": 960 |
| }, |
| { |
| "epoch": 7.816326530612245, |
| "grad_norm": 0.12766751646995544, |
| "learning_rate": 1.770940170940171e-05, |
| "loss": 0.003, |
| "step": 961 |
| }, |
| { |
| "epoch": 7.8244897959183675, |
| "grad_norm": 0.10042405128479004, |
| "learning_rate": 1.7641025641025642e-05, |
| "loss": 0.0025, |
| "step": 962 |
| }, |
| { |
| "epoch": 7.83265306122449, |
| "grad_norm": 0.024409618228673935, |
| "learning_rate": 1.7572649572649573e-05, |
| "loss": 0.0003, |
| "step": 963 |
| }, |
| { |
| "epoch": 7.840816326530613, |
| "grad_norm": 0.08938995003700256, |
| "learning_rate": 1.7504273504273507e-05, |
| "loss": 0.0017, |
| "step": 964 |
| }, |
| { |
| "epoch": 7.848979591836734, |
| "grad_norm": 0.006908862851560116, |
| "learning_rate": 1.7435897435897438e-05, |
| "loss": 0.0001, |
| "step": 965 |
| }, |
| { |
| "epoch": 7.857142857142857, |
| "grad_norm": 0.33812665939331055, |
| "learning_rate": 1.736752136752137e-05, |
| "loss": 0.0101, |
| "step": 966 |
| }, |
| { |
| "epoch": 7.865306122448979, |
| "grad_norm": 0.059313975274562836, |
| "learning_rate": 1.7299145299145303e-05, |
| "loss": 0.0023, |
| "step": 967 |
| }, |
| { |
| "epoch": 7.873469387755102, |
| "grad_norm": 0.2146165370941162, |
| "learning_rate": 1.7230769230769234e-05, |
| "loss": 0.0082, |
| "step": 968 |
| }, |
| { |
| "epoch": 7.881632653061224, |
| "grad_norm": 0.07495953142642975, |
| "learning_rate": 1.716239316239316e-05, |
| "loss": 0.0052, |
| "step": 969 |
| }, |
| { |
| "epoch": 7.889795918367347, |
| "grad_norm": 0.17084024846553802, |
| "learning_rate": 1.7094017094017095e-05, |
| "loss": 0.0009, |
| "step": 970 |
| }, |
| { |
| "epoch": 7.8979591836734695, |
| "grad_norm": 0.16996727883815765, |
| "learning_rate": 1.7025641025641026e-05, |
| "loss": 0.0038, |
| "step": 971 |
| }, |
| { |
| "epoch": 7.906122448979592, |
| "grad_norm": 0.06174658238887787, |
| "learning_rate": 1.6957264957264957e-05, |
| "loss": 0.0009, |
| "step": 972 |
| }, |
| { |
| "epoch": 7.914285714285715, |
| "grad_norm": 0.035608597099781036, |
| "learning_rate": 1.688888888888889e-05, |
| "loss": 0.0006, |
| "step": 973 |
| }, |
| { |
| "epoch": 7.922448979591836, |
| "grad_norm": 0.2112169861793518, |
| "learning_rate": 1.6820512820512822e-05, |
| "loss": 0.0035, |
| "step": 974 |
| }, |
| { |
| "epoch": 7.930612244897959, |
| "grad_norm": 0.15173368155956268, |
| "learning_rate": 1.6752136752136753e-05, |
| "loss": 0.0024, |
| "step": 975 |
| }, |
| { |
| "epoch": 7.938775510204081, |
| "grad_norm": 0.00365807325579226, |
| "learning_rate": 1.6683760683760687e-05, |
| "loss": 0.0001, |
| "step": 976 |
| }, |
| { |
| "epoch": 7.946938775510204, |
| "grad_norm": 0.04172469303011894, |
| "learning_rate": 1.6615384615384618e-05, |
| "loss": 0.0003, |
| "step": 977 |
| }, |
| { |
| "epoch": 7.955102040816326, |
| "grad_norm": 0.02554394118487835, |
| "learning_rate": 1.6547008547008546e-05, |
| "loss": 0.0005, |
| "step": 978 |
| }, |
| { |
| "epoch": 7.963265306122449, |
| "grad_norm": 0.14168842136859894, |
| "learning_rate": 1.647863247863248e-05, |
| "loss": 0.0022, |
| "step": 979 |
| }, |
| { |
| "epoch": 7.9714285714285715, |
| "grad_norm": 0.06103862076997757, |
| "learning_rate": 1.641025641025641e-05, |
| "loss": 0.0009, |
| "step": 980 |
| }, |
| { |
| "epoch": 7.979591836734694, |
| "grad_norm": 0.09359610825777054, |
| "learning_rate": 1.634188034188034e-05, |
| "loss": 0.0004, |
| "step": 981 |
| }, |
| { |
| "epoch": 7.987755102040817, |
| "grad_norm": 0.09833401441574097, |
| "learning_rate": 1.6273504273504276e-05, |
| "loss": 0.0015, |
| "step": 982 |
| }, |
| { |
| "epoch": 7.995918367346938, |
| "grad_norm": 0.14906033873558044, |
| "learning_rate": 1.6205128205128207e-05, |
| "loss": 0.0052, |
| "step": 983 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.03419484943151474, |
| "learning_rate": 1.6136752136752138e-05, |
| "loss": 0.0006, |
| "step": 984 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9416139343727575, |
| "eval_f1": 0.9410915355911077, |
| "eval_loss": 0.20951753854751587, |
| "eval_precision": 0.9414299271894795, |
| "eval_recall": 0.9416139343727575, |
| "eval_runtime": 24.82, |
| "eval_samples_per_second": 17.567, |
| "eval_steps_per_second": 17.567, |
| "step": 984 |
| }, |
| { |
| "epoch": 8.008163265306122, |
| "grad_norm": 0.027842367067933083, |
| "learning_rate": 1.6068376068376072e-05, |
| "loss": 0.0005, |
| "step": 985 |
| }, |
| { |
| "epoch": 8.016326530612245, |
| "grad_norm": 0.015295000746846199, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.0003, |
| "step": 986 |
| }, |
| { |
| "epoch": 8.024489795918367, |
| "grad_norm": 0.04106583446264267, |
| "learning_rate": 1.5931623931623934e-05, |
| "loss": 0.0029, |
| "step": 987 |
| }, |
| { |
| "epoch": 8.03265306122449, |
| "grad_norm": 0.19340075552463531, |
| "learning_rate": 1.5863247863247864e-05, |
| "loss": 0.0128, |
| "step": 988 |
| }, |
| { |
| "epoch": 8.040816326530612, |
| "grad_norm": 0.018350228667259216, |
| "learning_rate": 1.5794871794871795e-05, |
| "loss": 0.0002, |
| "step": 989 |
| }, |
| { |
| "epoch": 8.048979591836735, |
| "grad_norm": 0.010258130729198456, |
| "learning_rate": 1.5726495726495726e-05, |
| "loss": 0.0002, |
| "step": 990 |
| }, |
| { |
| "epoch": 8.057142857142857, |
| "grad_norm": 0.12881983816623688, |
| "learning_rate": 1.565811965811966e-05, |
| "loss": 0.0094, |
| "step": 991 |
| }, |
| { |
| "epoch": 8.06530612244898, |
| "grad_norm": 0.04051700606942177, |
| "learning_rate": 1.558974358974359e-05, |
| "loss": 0.0005, |
| "step": 992 |
| }, |
| { |
| "epoch": 8.073469387755102, |
| "grad_norm": 0.08483976870775223, |
| "learning_rate": 1.5521367521367522e-05, |
| "loss": 0.0015, |
| "step": 993 |
| }, |
| { |
| "epoch": 8.081632653061224, |
| "grad_norm": 0.0454951710999012, |
| "learning_rate": 1.5452991452991456e-05, |
| "loss": 0.0013, |
| "step": 994 |
| }, |
| { |
| "epoch": 8.089795918367347, |
| "grad_norm": 0.01969115249812603, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 0.0003, |
| "step": 995 |
| }, |
| { |
| "epoch": 8.097959183673469, |
| "grad_norm": 0.033365558832883835, |
| "learning_rate": 1.5316239316239318e-05, |
| "loss": 0.0004, |
| "step": 996 |
| }, |
| { |
| "epoch": 8.106122448979592, |
| "grad_norm": 0.0014622848248109221, |
| "learning_rate": 1.5247863247863249e-05, |
| "loss": 0.0, |
| "step": 997 |
| }, |
| { |
| "epoch": 8.114285714285714, |
| "grad_norm": 0.007859136909246445, |
| "learning_rate": 1.517948717948718e-05, |
| "loss": 0.0001, |
| "step": 998 |
| }, |
| { |
| "epoch": 8.122448979591837, |
| "grad_norm": 0.03159104660153389, |
| "learning_rate": 1.5111111111111112e-05, |
| "loss": 0.0003, |
| "step": 999 |
| }, |
| { |
| "epoch": 8.130612244897959, |
| "grad_norm": 0.008046046830713749, |
| "learning_rate": 1.5042735042735043e-05, |
| "loss": 0.0002, |
| "step": 1000 |
| }, |
| { |
| "epoch": 8.138775510204082, |
| "grad_norm": 0.0062632174231112, |
| "learning_rate": 1.4974358974358976e-05, |
| "loss": 0.0001, |
| "step": 1001 |
| }, |
| { |
| "epoch": 8.146938775510204, |
| "grad_norm": 0.018382323905825615, |
| "learning_rate": 1.4905982905982908e-05, |
| "loss": 0.0003, |
| "step": 1002 |
| }, |
| { |
| "epoch": 8.155102040816326, |
| "grad_norm": 0.0031079400796443224, |
| "learning_rate": 1.4837606837606839e-05, |
| "loss": 0.0001, |
| "step": 1003 |
| }, |
| { |
| "epoch": 8.16326530612245, |
| "grad_norm": 0.037184253334999084, |
| "learning_rate": 1.4769230769230772e-05, |
| "loss": 0.0009, |
| "step": 1004 |
| }, |
| { |
| "epoch": 8.17142857142857, |
| "grad_norm": 0.04274457320570946, |
| "learning_rate": 1.4700854700854703e-05, |
| "loss": 0.0007, |
| "step": 1005 |
| }, |
| { |
| "epoch": 8.179591836734694, |
| "grad_norm": 0.03666644170880318, |
| "learning_rate": 1.4632478632478633e-05, |
| "loss": 0.0006, |
| "step": 1006 |
| }, |
| { |
| "epoch": 8.187755102040816, |
| "grad_norm": 0.09907951951026917, |
| "learning_rate": 1.4564102564102564e-05, |
| "loss": 0.0022, |
| "step": 1007 |
| }, |
| { |
| "epoch": 8.19591836734694, |
| "grad_norm": 0.057159584015607834, |
| "learning_rate": 1.4495726495726497e-05, |
| "loss": 0.0005, |
| "step": 1008 |
| }, |
| { |
| "epoch": 8.204081632653061, |
| "grad_norm": 0.03271704539656639, |
| "learning_rate": 1.4427350427350428e-05, |
| "loss": 0.0006, |
| "step": 1009 |
| }, |
| { |
| "epoch": 8.212244897959184, |
| "grad_norm": 0.0030933571979403496, |
| "learning_rate": 1.435897435897436e-05, |
| "loss": 0.0, |
| "step": 1010 |
| }, |
| { |
| "epoch": 8.220408163265306, |
| "grad_norm": 0.02095775492489338, |
| "learning_rate": 1.4290598290598293e-05, |
| "loss": 0.0005, |
| "step": 1011 |
| }, |
| { |
| "epoch": 8.228571428571428, |
| "grad_norm": 0.049353066831827164, |
| "learning_rate": 1.4222222222222224e-05, |
| "loss": 0.0007, |
| "step": 1012 |
| }, |
| { |
| "epoch": 8.236734693877551, |
| "grad_norm": 0.023141806945204735, |
| "learning_rate": 1.4153846153846156e-05, |
| "loss": 0.0003, |
| "step": 1013 |
| }, |
| { |
| "epoch": 8.244897959183673, |
| "grad_norm": 0.015122964978218079, |
| "learning_rate": 1.4085470085470087e-05, |
| "loss": 0.0007, |
| "step": 1014 |
| }, |
| { |
| "epoch": 8.253061224489796, |
| "grad_norm": 0.003815308678895235, |
| "learning_rate": 1.4017094017094018e-05, |
| "loss": 0.0001, |
| "step": 1015 |
| }, |
| { |
| "epoch": 8.261224489795918, |
| "grad_norm": 0.006639714352786541, |
| "learning_rate": 1.3948717948717949e-05, |
| "loss": 0.0001, |
| "step": 1016 |
| }, |
| { |
| "epoch": 8.269387755102041, |
| "grad_norm": 0.20341694355010986, |
| "learning_rate": 1.3880341880341881e-05, |
| "loss": 0.0075, |
| "step": 1017 |
| }, |
| { |
| "epoch": 8.277551020408163, |
| "grad_norm": 0.006596466526389122, |
| "learning_rate": 1.3811965811965812e-05, |
| "loss": 0.0, |
| "step": 1018 |
| }, |
| { |
| "epoch": 8.285714285714286, |
| "grad_norm": 0.059214457869529724, |
| "learning_rate": 1.3743589743589745e-05, |
| "loss": 0.0022, |
| "step": 1019 |
| }, |
| { |
| "epoch": 8.293877551020408, |
| "grad_norm": 0.01356984581798315, |
| "learning_rate": 1.3675213675213677e-05, |
| "loss": 0.0002, |
| "step": 1020 |
| }, |
| { |
| "epoch": 8.30204081632653, |
| "grad_norm": 0.052929263561964035, |
| "learning_rate": 1.3606837606837608e-05, |
| "loss": 0.0013, |
| "step": 1021 |
| }, |
| { |
| "epoch": 8.310204081632653, |
| "grad_norm": 0.0020449981093406677, |
| "learning_rate": 1.353846153846154e-05, |
| "loss": 0.0, |
| "step": 1022 |
| }, |
| { |
| "epoch": 8.318367346938775, |
| "grad_norm": 0.3605046272277832, |
| "learning_rate": 1.3470085470085472e-05, |
| "loss": 0.0136, |
| "step": 1023 |
| }, |
| { |
| "epoch": 8.326530612244898, |
| "grad_norm": 0.05114666745066643, |
| "learning_rate": 1.3401709401709402e-05, |
| "loss": 0.0009, |
| "step": 1024 |
| }, |
| { |
| "epoch": 8.33469387755102, |
| "grad_norm": 0.025500185787677765, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.0004, |
| "step": 1025 |
| }, |
| { |
| "epoch": 8.342857142857143, |
| "grad_norm": 0.006557499058544636, |
| "learning_rate": 1.3264957264957266e-05, |
| "loss": 0.0001, |
| "step": 1026 |
| }, |
| { |
| "epoch": 8.351020408163265, |
| "grad_norm": 0.02797406166791916, |
| "learning_rate": 1.3196581196581197e-05, |
| "loss": 0.0002, |
| "step": 1027 |
| }, |
| { |
| "epoch": 8.359183673469389, |
| "grad_norm": 0.003020975971594453, |
| "learning_rate": 1.312820512820513e-05, |
| "loss": 0.0001, |
| "step": 1028 |
| }, |
| { |
| "epoch": 8.36734693877551, |
| "grad_norm": 0.0024696961045265198, |
| "learning_rate": 1.3059829059829062e-05, |
| "loss": 0.0, |
| "step": 1029 |
| }, |
| { |
| "epoch": 8.375510204081632, |
| "grad_norm": 0.05511481314897537, |
| "learning_rate": 1.2991452991452993e-05, |
| "loss": 0.001, |
| "step": 1030 |
| }, |
| { |
| "epoch": 8.383673469387755, |
| "grad_norm": 0.03543705493211746, |
| "learning_rate": 1.2923076923076925e-05, |
| "loss": 0.0004, |
| "step": 1031 |
| }, |
| { |
| "epoch": 8.391836734693877, |
| "grad_norm": 1.5244866609573364, |
| "learning_rate": 1.2854700854700856e-05, |
| "loss": 0.0021, |
| "step": 1032 |
| }, |
| { |
| "epoch": 8.4, |
| "grad_norm": 0.030157363042235374, |
| "learning_rate": 1.2786324786324787e-05, |
| "loss": 0.0006, |
| "step": 1033 |
| }, |
| { |
| "epoch": 8.408163265306122, |
| "grad_norm": 0.06905968487262726, |
| "learning_rate": 1.2717948717948718e-05, |
| "loss": 0.0011, |
| "step": 1034 |
| }, |
| { |
| "epoch": 8.416326530612245, |
| "grad_norm": 0.002873434219509363, |
| "learning_rate": 1.264957264957265e-05, |
| "loss": 0.0, |
| "step": 1035 |
| }, |
| { |
| "epoch": 8.424489795918367, |
| "grad_norm": 0.012141639366745949, |
| "learning_rate": 1.2581196581196581e-05, |
| "loss": 0.0002, |
| "step": 1036 |
| }, |
| { |
| "epoch": 8.43265306122449, |
| "grad_norm": 0.016885140910744667, |
| "learning_rate": 1.2512820512820514e-05, |
| "loss": 0.0002, |
| "step": 1037 |
| }, |
| { |
| "epoch": 8.440816326530612, |
| "grad_norm": 0.0075583746656775475, |
| "learning_rate": 1.2444444444444446e-05, |
| "loss": 0.0001, |
| "step": 1038 |
| }, |
| { |
| "epoch": 8.448979591836734, |
| "grad_norm": 0.01263425312936306, |
| "learning_rate": 1.2376068376068377e-05, |
| "loss": 0.0003, |
| "step": 1039 |
| }, |
| { |
| "epoch": 8.457142857142857, |
| "grad_norm": 0.004327788483351469, |
| "learning_rate": 1.230769230769231e-05, |
| "loss": 0.0001, |
| "step": 1040 |
| }, |
| { |
| "epoch": 8.465306122448979, |
| "grad_norm": 0.005272835027426481, |
| "learning_rate": 1.223931623931624e-05, |
| "loss": 0.0001, |
| "step": 1041 |
| }, |
| { |
| "epoch": 8.473469387755102, |
| "grad_norm": 0.00520187197253108, |
| "learning_rate": 1.2170940170940171e-05, |
| "loss": 0.0001, |
| "step": 1042 |
| }, |
| { |
| "epoch": 8.481632653061224, |
| "grad_norm": 0.03641340509057045, |
| "learning_rate": 1.2102564102564102e-05, |
| "loss": 0.0003, |
| "step": 1043 |
| }, |
| { |
| "epoch": 8.489795918367347, |
| "grad_norm": 0.05241888388991356, |
| "learning_rate": 1.2034188034188035e-05, |
| "loss": 0.0017, |
| "step": 1044 |
| }, |
| { |
| "epoch": 8.49795918367347, |
| "grad_norm": 0.009457703679800034, |
| "learning_rate": 1.1965811965811966e-05, |
| "loss": 0.0002, |
| "step": 1045 |
| }, |
| { |
| "epoch": 8.506122448979593, |
| "grad_norm": 0.003092976287007332, |
| "learning_rate": 1.1897435897435898e-05, |
| "loss": 0.0, |
| "step": 1046 |
| }, |
| { |
| "epoch": 8.514285714285714, |
| "grad_norm": 0.001040496164932847, |
| "learning_rate": 1.182905982905983e-05, |
| "loss": 0.0, |
| "step": 1047 |
| }, |
| { |
| "epoch": 8.522448979591836, |
| "grad_norm": 0.01029434148222208, |
| "learning_rate": 1.1760683760683762e-05, |
| "loss": 0.0001, |
| "step": 1048 |
| }, |
| { |
| "epoch": 8.53061224489796, |
| "grad_norm": 0.07402694225311279, |
| "learning_rate": 1.1692307692307694e-05, |
| "loss": 0.003, |
| "step": 1049 |
| }, |
| { |
| "epoch": 8.538775510204081, |
| "grad_norm": 0.0047174179926514626, |
| "learning_rate": 1.1623931623931625e-05, |
| "loss": 0.0001, |
| "step": 1050 |
| }, |
| { |
| "epoch": 8.546938775510204, |
| "grad_norm": 0.029351942241191864, |
| "learning_rate": 1.1555555555555556e-05, |
| "loss": 0.0005, |
| "step": 1051 |
| }, |
| { |
| "epoch": 8.555102040816326, |
| "grad_norm": 0.002197829307988286, |
| "learning_rate": 1.1487179487179487e-05, |
| "loss": 0.0, |
| "step": 1052 |
| }, |
| { |
| "epoch": 8.56326530612245, |
| "grad_norm": 0.0036702328361570835, |
| "learning_rate": 1.141880341880342e-05, |
| "loss": 0.0001, |
| "step": 1053 |
| }, |
| { |
| "epoch": 8.571428571428571, |
| "grad_norm": 0.03191132843494415, |
| "learning_rate": 1.135042735042735e-05, |
| "loss": 0.0024, |
| "step": 1054 |
| }, |
| { |
| "epoch": 8.579591836734695, |
| "grad_norm": 0.03412720188498497, |
| "learning_rate": 1.1282051282051283e-05, |
| "loss": 0.0006, |
| "step": 1055 |
| }, |
| { |
| "epoch": 8.587755102040816, |
| "grad_norm": 0.005484213586896658, |
| "learning_rate": 1.1213675213675215e-05, |
| "loss": 0.0001, |
| "step": 1056 |
| }, |
| { |
| "epoch": 8.59591836734694, |
| "grad_norm": 0.0703863799571991, |
| "learning_rate": 1.1145299145299146e-05, |
| "loss": 0.0018, |
| "step": 1057 |
| }, |
| { |
| "epoch": 8.604081632653061, |
| "grad_norm": 0.025943145155906677, |
| "learning_rate": 1.1076923076923079e-05, |
| "loss": 0.0002, |
| "step": 1058 |
| }, |
| { |
| "epoch": 8.612244897959183, |
| "grad_norm": 0.1388336420059204, |
| "learning_rate": 1.100854700854701e-05, |
| "loss": 0.0015, |
| "step": 1059 |
| }, |
| { |
| "epoch": 8.620408163265306, |
| "grad_norm": 0.020861342549324036, |
| "learning_rate": 1.0940170940170942e-05, |
| "loss": 0.0004, |
| "step": 1060 |
| }, |
| { |
| "epoch": 8.628571428571428, |
| "grad_norm": 0.08928504586219788, |
| "learning_rate": 1.0871794871794871e-05, |
| "loss": 0.002, |
| "step": 1061 |
| }, |
| { |
| "epoch": 8.636734693877552, |
| "grad_norm": 0.012267638929188251, |
| "learning_rate": 1.0803418803418804e-05, |
| "loss": 0.0001, |
| "step": 1062 |
| }, |
| { |
| "epoch": 8.644897959183673, |
| "grad_norm": 0.0072326031513512135, |
| "learning_rate": 1.0735042735042735e-05, |
| "loss": 0.0001, |
| "step": 1063 |
| }, |
| { |
| "epoch": 8.653061224489797, |
| "grad_norm": 0.004826648626476526, |
| "learning_rate": 1.0666666666666667e-05, |
| "loss": 0.0001, |
| "step": 1064 |
| }, |
| { |
| "epoch": 8.661224489795918, |
| "grad_norm": 0.10817044228315353, |
| "learning_rate": 1.05982905982906e-05, |
| "loss": 0.0053, |
| "step": 1065 |
| }, |
| { |
| "epoch": 8.66938775510204, |
| "grad_norm": 0.0037550870329141617, |
| "learning_rate": 1.052991452991453e-05, |
| "loss": 0.0, |
| "step": 1066 |
| }, |
| { |
| "epoch": 8.677551020408163, |
| "grad_norm": 0.002859473694115877, |
| "learning_rate": 1.0461538461538463e-05, |
| "loss": 0.0001, |
| "step": 1067 |
| }, |
| { |
| "epoch": 8.685714285714285, |
| "grad_norm": 0.0039060532581061125, |
| "learning_rate": 1.0393162393162394e-05, |
| "loss": 0.0001, |
| "step": 1068 |
| }, |
| { |
| "epoch": 8.693877551020408, |
| "grad_norm": 0.0033676582388579845, |
| "learning_rate": 1.0324786324786327e-05, |
| "loss": 0.0, |
| "step": 1069 |
| }, |
| { |
| "epoch": 8.70204081632653, |
| "grad_norm": 0.0012714867480099201, |
| "learning_rate": 1.0256410256410256e-05, |
| "loss": 0.0, |
| "step": 1070 |
| }, |
| { |
| "epoch": 8.710204081632654, |
| "grad_norm": 0.02062312327325344, |
| "learning_rate": 1.0188034188034188e-05, |
| "loss": 0.0002, |
| "step": 1071 |
| }, |
| { |
| "epoch": 8.718367346938775, |
| "grad_norm": 0.05126015469431877, |
| "learning_rate": 1.011965811965812e-05, |
| "loss": 0.0017, |
| "step": 1072 |
| }, |
| { |
| "epoch": 8.726530612244899, |
| "grad_norm": 0.009600671008229256, |
| "learning_rate": 1.0051282051282052e-05, |
| "loss": 0.0001, |
| "step": 1073 |
| }, |
| { |
| "epoch": 8.73469387755102, |
| "grad_norm": 0.02762376330792904, |
| "learning_rate": 9.982905982905984e-06, |
| "loss": 0.0006, |
| "step": 1074 |
| }, |
| { |
| "epoch": 8.742857142857144, |
| "grad_norm": 0.06737780570983887, |
| "learning_rate": 9.914529914529915e-06, |
| "loss": 0.0005, |
| "step": 1075 |
| }, |
| { |
| "epoch": 8.751020408163265, |
| "grad_norm": 0.07408367842435837, |
| "learning_rate": 9.846153846153848e-06, |
| "loss": 0.0032, |
| "step": 1076 |
| }, |
| { |
| "epoch": 8.759183673469387, |
| "grad_norm": 0.0068168556317687035, |
| "learning_rate": 9.777777777777779e-06, |
| "loss": 0.0001, |
| "step": 1077 |
| }, |
| { |
| "epoch": 8.76734693877551, |
| "grad_norm": 0.02478768117725849, |
| "learning_rate": 9.70940170940171e-06, |
| "loss": 0.0004, |
| "step": 1078 |
| }, |
| { |
| "epoch": 8.775510204081632, |
| "grad_norm": 0.03732183575630188, |
| "learning_rate": 9.641025641025642e-06, |
| "loss": 0.0014, |
| "step": 1079 |
| }, |
| { |
| "epoch": 8.783673469387756, |
| "grad_norm": 0.7320724725723267, |
| "learning_rate": 9.572649572649575e-06, |
| "loss": 0.0094, |
| "step": 1080 |
| }, |
| { |
| "epoch": 8.791836734693877, |
| "grad_norm": 0.02578953094780445, |
| "learning_rate": 9.504273504273504e-06, |
| "loss": 0.0003, |
| "step": 1081 |
| }, |
| { |
| "epoch": 8.8, |
| "grad_norm": 0.026644522324204445, |
| "learning_rate": 9.435897435897436e-06, |
| "loss": 0.0005, |
| "step": 1082 |
| }, |
| { |
| "epoch": 8.808163265306122, |
| "grad_norm": 0.04598251357674599, |
| "learning_rate": 9.367521367521369e-06, |
| "loss": 0.0014, |
| "step": 1083 |
| }, |
| { |
| "epoch": 8.816326530612244, |
| "grad_norm": 0.2066701352596283, |
| "learning_rate": 9.2991452991453e-06, |
| "loss": 0.003, |
| "step": 1084 |
| }, |
| { |
| "epoch": 8.824489795918367, |
| "grad_norm": 0.20979352295398712, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 0.0021, |
| "step": 1085 |
| }, |
| { |
| "epoch": 8.83265306122449, |
| "grad_norm": 0.11119628697633743, |
| "learning_rate": 9.162393162393163e-06, |
| "loss": 0.0034, |
| "step": 1086 |
| }, |
| { |
| "epoch": 8.840816326530613, |
| "grad_norm": 0.04252481833100319, |
| "learning_rate": 9.094017094017094e-06, |
| "loss": 0.0013, |
| "step": 1087 |
| }, |
| { |
| "epoch": 8.848979591836734, |
| "grad_norm": 0.004191332496702671, |
| "learning_rate": 9.025641025641027e-06, |
| "loss": 0.0001, |
| "step": 1088 |
| }, |
| { |
| "epoch": 8.857142857142858, |
| "grad_norm": 0.19183596968650818, |
| "learning_rate": 8.957264957264959e-06, |
| "loss": 0.0055, |
| "step": 1089 |
| }, |
| { |
| "epoch": 8.86530612244898, |
| "grad_norm": 0.012185700237751007, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 0.0002, |
| "step": 1090 |
| }, |
| { |
| "epoch": 8.873469387755103, |
| "grad_norm": 0.05381055921316147, |
| "learning_rate": 8.820512820512821e-06, |
| "loss": 0.0028, |
| "step": 1091 |
| }, |
| { |
| "epoch": 8.881632653061224, |
| "grad_norm": 0.008519163355231285, |
| "learning_rate": 8.752136752136753e-06, |
| "loss": 0.0001, |
| "step": 1092 |
| }, |
| { |
| "epoch": 8.889795918367348, |
| "grad_norm": 0.03137361258268356, |
| "learning_rate": 8.683760683760684e-06, |
| "loss": 0.0005, |
| "step": 1093 |
| }, |
| { |
| "epoch": 8.89795918367347, |
| "grad_norm": 0.0023849045392125845, |
| "learning_rate": 8.615384615384617e-06, |
| "loss": 0.0, |
| "step": 1094 |
| }, |
| { |
| "epoch": 8.906122448979591, |
| "grad_norm": 0.0032758014276623726, |
| "learning_rate": 8.547008547008548e-06, |
| "loss": 0.0, |
| "step": 1095 |
| }, |
| { |
| "epoch": 8.914285714285715, |
| "grad_norm": 0.004983650054782629, |
| "learning_rate": 8.478632478632479e-06, |
| "loss": 0.0001, |
| "step": 1096 |
| }, |
| { |
| "epoch": 8.922448979591836, |
| "grad_norm": 0.06227012723684311, |
| "learning_rate": 8.410256410256411e-06, |
| "loss": 0.0029, |
| "step": 1097 |
| }, |
| { |
| "epoch": 8.93061224489796, |
| "grad_norm": 0.008136185817420483, |
| "learning_rate": 8.341880341880344e-06, |
| "loss": 0.0001, |
| "step": 1098 |
| }, |
| { |
| "epoch": 8.938775510204081, |
| "grad_norm": 0.01348600722849369, |
| "learning_rate": 8.273504273504273e-06, |
| "loss": 0.0001, |
| "step": 1099 |
| }, |
| { |
| "epoch": 8.946938775510205, |
| "grad_norm": 0.017930278554558754, |
| "learning_rate": 8.205128205128205e-06, |
| "loss": 0.0002, |
| "step": 1100 |
| }, |
| { |
| "epoch": 8.955102040816326, |
| "grad_norm": 0.18920879065990448, |
| "learning_rate": 8.136752136752138e-06, |
| "loss": 0.0022, |
| "step": 1101 |
| }, |
| { |
| "epoch": 8.963265306122448, |
| "grad_norm": 0.024649549275636673, |
| "learning_rate": 8.068376068376069e-06, |
| "loss": 0.001, |
| "step": 1102 |
| }, |
| { |
| "epoch": 8.971428571428572, |
| "grad_norm": 0.040687914937734604, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0007, |
| "step": 1103 |
| }, |
| { |
| "epoch": 8.979591836734693, |
| "grad_norm": 0.024966664612293243, |
| "learning_rate": 7.931623931623932e-06, |
| "loss": 0.0003, |
| "step": 1104 |
| }, |
| { |
| "epoch": 8.987755102040817, |
| "grad_norm": 0.016600683331489563, |
| "learning_rate": 7.863247863247863e-06, |
| "loss": 0.0002, |
| "step": 1105 |
| }, |
| { |
| "epoch": 8.995918367346938, |
| "grad_norm": 0.10007146000862122, |
| "learning_rate": 7.794871794871796e-06, |
| "loss": 0.0015, |
| "step": 1106 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.007699214853346348, |
| "learning_rate": 7.726495726495728e-06, |
| "loss": 0.0001, |
| "step": 1107 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.942788179268054, |
| "eval_f1": 0.9422835056672126, |
| "eval_loss": 0.22396019101142883, |
| "eval_precision": 0.9426181079752368, |
| "eval_recall": 0.942788179268054, |
| "eval_runtime": 25.2075, |
| "eval_samples_per_second": 17.296, |
| "eval_steps_per_second": 17.296, |
| "step": 1107 |
| }, |
| { |
| "epoch": 9.008163265306122, |
| "grad_norm": 0.004843783099204302, |
| "learning_rate": 7.658119658119659e-06, |
| "loss": 0.0001, |
| "step": 1108 |
| }, |
| { |
| "epoch": 9.016326530612245, |
| "grad_norm": 0.003891325555741787, |
| "learning_rate": 7.58974358974359e-06, |
| "loss": 0.0, |
| "step": 1109 |
| }, |
| { |
| "epoch": 9.024489795918367, |
| "grad_norm": 0.08905791491270065, |
| "learning_rate": 7.521367521367522e-06, |
| "loss": 0.01, |
| "step": 1110 |
| }, |
| { |
| "epoch": 9.03265306122449, |
| "grad_norm": 0.016216980293393135, |
| "learning_rate": 7.452991452991454e-06, |
| "loss": 0.0003, |
| "step": 1111 |
| }, |
| { |
| "epoch": 9.040816326530612, |
| "grad_norm": 0.020533408969640732, |
| "learning_rate": 7.384615384615386e-06, |
| "loss": 0.0004, |
| "step": 1112 |
| }, |
| { |
| "epoch": 9.048979591836735, |
| "grad_norm": 0.002070846501737833, |
| "learning_rate": 7.316239316239317e-06, |
| "loss": 0.0, |
| "step": 1113 |
| }, |
| { |
| "epoch": 9.057142857142857, |
| "grad_norm": 0.0092104347422719, |
| "learning_rate": 7.247863247863248e-06, |
| "loss": 0.0001, |
| "step": 1114 |
| }, |
| { |
| "epoch": 9.06530612244898, |
| "grad_norm": 0.007390407379716635, |
| "learning_rate": 7.17948717948718e-06, |
| "loss": 0.0002, |
| "step": 1115 |
| }, |
| { |
| "epoch": 9.073469387755102, |
| "grad_norm": 0.17624011635780334, |
| "learning_rate": 7.111111111111112e-06, |
| "loss": 0.0009, |
| "step": 1116 |
| }, |
| { |
| "epoch": 9.081632653061224, |
| "grad_norm": 0.01938670314848423, |
| "learning_rate": 7.0427350427350435e-06, |
| "loss": 0.0004, |
| "step": 1117 |
| }, |
| { |
| "epoch": 9.089795918367347, |
| "grad_norm": 0.00450798450037837, |
| "learning_rate": 6.974358974358974e-06, |
| "loss": 0.0001, |
| "step": 1118 |
| }, |
| { |
| "epoch": 9.097959183673469, |
| "grad_norm": 0.0029924893751740456, |
| "learning_rate": 6.905982905982906e-06, |
| "loss": 0.0, |
| "step": 1119 |
| }, |
| { |
| "epoch": 9.106122448979592, |
| "grad_norm": 0.0016975200269371271, |
| "learning_rate": 6.837606837606839e-06, |
| "loss": 0.0, |
| "step": 1120 |
| }, |
| { |
| "epoch": 9.114285714285714, |
| "grad_norm": 0.02085467241704464, |
| "learning_rate": 6.76923076923077e-06, |
| "loss": 0.0004, |
| "step": 1121 |
| }, |
| { |
| "epoch": 9.122448979591837, |
| "grad_norm": 0.008936136029660702, |
| "learning_rate": 6.700854700854701e-06, |
| "loss": 0.0001, |
| "step": 1122 |
| }, |
| { |
| "epoch": 9.130612244897959, |
| "grad_norm": 0.12007738649845123, |
| "learning_rate": 6.632478632478633e-06, |
| "loss": 0.0013, |
| "step": 1123 |
| }, |
| { |
| "epoch": 9.138775510204082, |
| "grad_norm": 0.017182469367980957, |
| "learning_rate": 6.564102564102565e-06, |
| "loss": 0.0003, |
| "step": 1124 |
| }, |
| { |
| "epoch": 9.146938775510204, |
| "grad_norm": 0.042313314974308014, |
| "learning_rate": 6.495726495726496e-06, |
| "loss": 0.0009, |
| "step": 1125 |
| }, |
| { |
| "epoch": 9.155102040816326, |
| "grad_norm": 0.0015840729465708137, |
| "learning_rate": 6.427350427350428e-06, |
| "loss": 0.0, |
| "step": 1126 |
| }, |
| { |
| "epoch": 9.16326530612245, |
| "grad_norm": 0.11032771319150925, |
| "learning_rate": 6.358974358974359e-06, |
| "loss": 0.0052, |
| "step": 1127 |
| }, |
| { |
| "epoch": 9.17142857142857, |
| "grad_norm": 0.030041849240660667, |
| "learning_rate": 6.290598290598291e-06, |
| "loss": 0.0013, |
| "step": 1128 |
| }, |
| { |
| "epoch": 9.179591836734694, |
| "grad_norm": 0.005967669188976288, |
| "learning_rate": 6.222222222222223e-06, |
| "loss": 0.0001, |
| "step": 1129 |
| }, |
| { |
| "epoch": 9.187755102040816, |
| "grad_norm": 0.00804552435874939, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 0.0001, |
| "step": 1130 |
| }, |
| { |
| "epoch": 9.19591836734694, |
| "grad_norm": 0.004231815226376057, |
| "learning_rate": 6.085470085470086e-06, |
| "loss": 0.0001, |
| "step": 1131 |
| }, |
| { |
| "epoch": 9.204081632653061, |
| "grad_norm": 0.0016901058843359351, |
| "learning_rate": 6.0170940170940174e-06, |
| "loss": 0.0, |
| "step": 1132 |
| }, |
| { |
| "epoch": 9.212244897959184, |
| "grad_norm": 0.004359536338597536, |
| "learning_rate": 5.948717948717949e-06, |
| "loss": 0.0, |
| "step": 1133 |
| }, |
| { |
| "epoch": 9.220408163265306, |
| "grad_norm": 0.006538494490087032, |
| "learning_rate": 5.880341880341881e-06, |
| "loss": 0.0001, |
| "step": 1134 |
| }, |
| { |
| "epoch": 9.228571428571428, |
| "grad_norm": 0.009976035915315151, |
| "learning_rate": 5.8119658119658126e-06, |
| "loss": 0.0002, |
| "step": 1135 |
| }, |
| { |
| "epoch": 9.236734693877551, |
| "grad_norm": 0.005809263791888952, |
| "learning_rate": 5.743589743589743e-06, |
| "loss": 0.0001, |
| "step": 1136 |
| }, |
| { |
| "epoch": 9.244897959183673, |
| "grad_norm": 0.01854880154132843, |
| "learning_rate": 5.675213675213675e-06, |
| "loss": 0.0003, |
| "step": 1137 |
| }, |
| { |
| "epoch": 9.253061224489796, |
| "grad_norm": 0.00991115067154169, |
| "learning_rate": 5.606837606837608e-06, |
| "loss": 0.0002, |
| "step": 1138 |
| }, |
| { |
| "epoch": 9.261224489795918, |
| "grad_norm": 0.02814176119863987, |
| "learning_rate": 5.538461538461539e-06, |
| "loss": 0.0006, |
| "step": 1139 |
| }, |
| { |
| "epoch": 9.269387755102041, |
| "grad_norm": 0.003233405062928796, |
| "learning_rate": 5.470085470085471e-06, |
| "loss": 0.0, |
| "step": 1140 |
| }, |
| { |
| "epoch": 9.277551020408163, |
| "grad_norm": 0.005762243643403053, |
| "learning_rate": 5.401709401709402e-06, |
| "loss": 0.0001, |
| "step": 1141 |
| }, |
| { |
| "epoch": 9.285714285714286, |
| "grad_norm": 0.03154560178518295, |
| "learning_rate": 5.333333333333334e-06, |
| "loss": 0.0008, |
| "step": 1142 |
| }, |
| { |
| "epoch": 9.293877551020408, |
| "grad_norm": 0.025290396064519882, |
| "learning_rate": 5.264957264957265e-06, |
| "loss": 0.0004, |
| "step": 1143 |
| }, |
| { |
| "epoch": 9.30204081632653, |
| "grad_norm": 0.004073767457157373, |
| "learning_rate": 5.196581196581197e-06, |
| "loss": 0.0, |
| "step": 1144 |
| }, |
| { |
| "epoch": 9.310204081632653, |
| "grad_norm": 0.006276815664023161, |
| "learning_rate": 5.128205128205128e-06, |
| "loss": 0.0001, |
| "step": 1145 |
| }, |
| { |
| "epoch": 9.318367346938775, |
| "grad_norm": 0.013420458883047104, |
| "learning_rate": 5.05982905982906e-06, |
| "loss": 0.0001, |
| "step": 1146 |
| }, |
| { |
| "epoch": 9.326530612244898, |
| "grad_norm": 0.003337176749482751, |
| "learning_rate": 4.991452991452992e-06, |
| "loss": 0.0, |
| "step": 1147 |
| }, |
| { |
| "epoch": 9.33469387755102, |
| "grad_norm": 0.0010663908906280994, |
| "learning_rate": 4.923076923076924e-06, |
| "loss": 0.0, |
| "step": 1148 |
| }, |
| { |
| "epoch": 9.342857142857143, |
| "grad_norm": 0.00427238317206502, |
| "learning_rate": 4.854700854700855e-06, |
| "loss": 0.0001, |
| "step": 1149 |
| }, |
| { |
| "epoch": 9.351020408163265, |
| "grad_norm": 0.0492619089782238, |
| "learning_rate": 4.786324786324787e-06, |
| "loss": 0.0023, |
| "step": 1150 |
| }, |
| { |
| "epoch": 9.359183673469389, |
| "grad_norm": 0.014486223459243774, |
| "learning_rate": 4.717948717948718e-06, |
| "loss": 0.0002, |
| "step": 1151 |
| }, |
| { |
| "epoch": 9.36734693877551, |
| "grad_norm": 0.006705184932798147, |
| "learning_rate": 4.64957264957265e-06, |
| "loss": 0.0001, |
| "step": 1152 |
| }, |
| { |
| "epoch": 9.375510204081632, |
| "grad_norm": 0.12139922380447388, |
| "learning_rate": 4.581196581196582e-06, |
| "loss": 0.0029, |
| "step": 1153 |
| }, |
| { |
| "epoch": 9.383673469387755, |
| "grad_norm": 0.01572628878057003, |
| "learning_rate": 4.512820512820513e-06, |
| "loss": 0.0003, |
| "step": 1154 |
| }, |
| { |
| "epoch": 9.391836734693877, |
| "grad_norm": 0.015249603427946568, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 0.0003, |
| "step": 1155 |
| }, |
| { |
| "epoch": 9.4, |
| "grad_norm": 0.013273622840642929, |
| "learning_rate": 4.376068376068377e-06, |
| "loss": 0.0002, |
| "step": 1156 |
| }, |
| { |
| "epoch": 9.408163265306122, |
| "grad_norm": 0.002208263147622347, |
| "learning_rate": 4.307692307692308e-06, |
| "loss": 0.0, |
| "step": 1157 |
| }, |
| { |
| "epoch": 9.416326530612245, |
| "grad_norm": 0.0044462066143751144, |
| "learning_rate": 4.239316239316239e-06, |
| "loss": 0.0, |
| "step": 1158 |
| }, |
| { |
| "epoch": 9.424489795918367, |
| "grad_norm": 0.04531145468354225, |
| "learning_rate": 4.170940170940172e-06, |
| "loss": 0.0003, |
| "step": 1159 |
| }, |
| { |
| "epoch": 9.43265306122449, |
| "grad_norm": 0.005794985685497522, |
| "learning_rate": 4.102564102564103e-06, |
| "loss": 0.0001, |
| "step": 1160 |
| }, |
| { |
| "epoch": 9.440816326530612, |
| "grad_norm": 0.007728431839495897, |
| "learning_rate": 4.034188034188034e-06, |
| "loss": 0.0001, |
| "step": 1161 |
| }, |
| { |
| "epoch": 9.448979591836734, |
| "grad_norm": 0.00340407807379961, |
| "learning_rate": 3.965811965811966e-06, |
| "loss": 0.0001, |
| "step": 1162 |
| }, |
| { |
| "epoch": 9.457142857142857, |
| "grad_norm": 0.01496533490717411, |
| "learning_rate": 3.897435897435898e-06, |
| "loss": 0.0003, |
| "step": 1163 |
| }, |
| { |
| "epoch": 9.465306122448979, |
| "grad_norm": 0.002183685777708888, |
| "learning_rate": 3.8290598290598295e-06, |
| "loss": 0.0, |
| "step": 1164 |
| }, |
| { |
| "epoch": 9.473469387755102, |
| "grad_norm": 0.07050516456365585, |
| "learning_rate": 3.760683760683761e-06, |
| "loss": 0.0001, |
| "step": 1165 |
| }, |
| { |
| "epoch": 9.481632653061224, |
| "grad_norm": 0.002324522938579321, |
| "learning_rate": 3.692307692307693e-06, |
| "loss": 0.0, |
| "step": 1166 |
| }, |
| { |
| "epoch": 9.489795918367347, |
| "grad_norm": 0.006657972000539303, |
| "learning_rate": 3.623931623931624e-06, |
| "loss": 0.0001, |
| "step": 1167 |
| }, |
| { |
| "epoch": 9.49795918367347, |
| "grad_norm": 0.04592962563037872, |
| "learning_rate": 3.555555555555556e-06, |
| "loss": 0.004, |
| "step": 1168 |
| }, |
| { |
| "epoch": 9.506122448979593, |
| "grad_norm": 0.0031562778167426586, |
| "learning_rate": 3.487179487179487e-06, |
| "loss": 0.0, |
| "step": 1169 |
| }, |
| { |
| "epoch": 9.514285714285714, |
| "grad_norm": 0.006828220561146736, |
| "learning_rate": 3.4188034188034193e-06, |
| "loss": 0.0001, |
| "step": 1170 |
| }, |
| { |
| "epoch": 9.522448979591836, |
| "grad_norm": 0.003534778719767928, |
| "learning_rate": 3.3504273504273506e-06, |
| "loss": 0.0001, |
| "step": 1171 |
| }, |
| { |
| "epoch": 9.53061224489796, |
| "grad_norm": 0.00211413879878819, |
| "learning_rate": 3.2820512820512823e-06, |
| "loss": 0.0, |
| "step": 1172 |
| }, |
| { |
| "epoch": 9.538775510204081, |
| "grad_norm": 0.006114844232797623, |
| "learning_rate": 3.213675213675214e-06, |
| "loss": 0.0001, |
| "step": 1173 |
| }, |
| { |
| "epoch": 9.546938775510204, |
| "grad_norm": 0.0042116702534258366, |
| "learning_rate": 3.1452991452991453e-06, |
| "loss": 0.0, |
| "step": 1174 |
| }, |
| { |
| "epoch": 9.555102040816326, |
| "grad_norm": 0.005698191002011299, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 0.0001, |
| "step": 1175 |
| }, |
| { |
| "epoch": 9.56326530612245, |
| "grad_norm": 0.02607082575559616, |
| "learning_rate": 3.0085470085470087e-06, |
| "loss": 0.0003, |
| "step": 1176 |
| }, |
| { |
| "epoch": 9.571428571428571, |
| "grad_norm": 0.024089762941002846, |
| "learning_rate": 2.9401709401709404e-06, |
| "loss": 0.0005, |
| "step": 1177 |
| }, |
| { |
| "epoch": 9.579591836734695, |
| "grad_norm": 0.00590532599017024, |
| "learning_rate": 2.8717948717948717e-06, |
| "loss": 0.0001, |
| "step": 1178 |
| }, |
| { |
| "epoch": 9.587755102040816, |
| "grad_norm": 0.0025553421583026648, |
| "learning_rate": 2.803418803418804e-06, |
| "loss": 0.0, |
| "step": 1179 |
| }, |
| { |
| "epoch": 9.59591836734694, |
| "grad_norm": 0.005867726169526577, |
| "learning_rate": 2.7350427350427355e-06, |
| "loss": 0.0001, |
| "step": 1180 |
| }, |
| { |
| "epoch": 9.604081632653061, |
| "grad_norm": 0.0035446197725832462, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 0.0001, |
| "step": 1181 |
| }, |
| { |
| "epoch": 9.612244897959183, |
| "grad_norm": 0.0048804013058543205, |
| "learning_rate": 2.5982905982905985e-06, |
| "loss": 0.0, |
| "step": 1182 |
| }, |
| { |
| "epoch": 9.620408163265306, |
| "grad_norm": 0.0234014093875885, |
| "learning_rate": 2.52991452991453e-06, |
| "loss": 0.0, |
| "step": 1183 |
| }, |
| { |
| "epoch": 9.628571428571428, |
| "grad_norm": 0.014328445307910442, |
| "learning_rate": 2.461538461538462e-06, |
| "loss": 0.0002, |
| "step": 1184 |
| }, |
| { |
| "epoch": 9.636734693877552, |
| "grad_norm": 0.03956277295947075, |
| "learning_rate": 2.3931623931623937e-06, |
| "loss": 0.0018, |
| "step": 1185 |
| }, |
| { |
| "epoch": 9.644897959183673, |
| "grad_norm": 0.1734299510717392, |
| "learning_rate": 2.324786324786325e-06, |
| "loss": 0.0005, |
| "step": 1186 |
| }, |
| { |
| "epoch": 9.653061224489797, |
| "grad_norm": 0.0017454794142395258, |
| "learning_rate": 2.2564102564102566e-06, |
| "loss": 0.0, |
| "step": 1187 |
| }, |
| { |
| "epoch": 9.661224489795918, |
| "grad_norm": 0.009274955838918686, |
| "learning_rate": 2.1880341880341884e-06, |
| "loss": 0.0001, |
| "step": 1188 |
| }, |
| { |
| "epoch": 9.66938775510204, |
| "grad_norm": 0.045709025114774704, |
| "learning_rate": 2.1196581196581196e-06, |
| "loss": 0.0006, |
| "step": 1189 |
| }, |
| { |
| "epoch": 9.677551020408163, |
| "grad_norm": 0.044529132544994354, |
| "learning_rate": 2.0512820512820513e-06, |
| "loss": 0.0019, |
| "step": 1190 |
| }, |
| { |
| "epoch": 9.685714285714285, |
| "grad_norm": 0.0007452021236531436, |
| "learning_rate": 1.982905982905983e-06, |
| "loss": 0.0, |
| "step": 1191 |
| }, |
| { |
| "epoch": 9.693877551020408, |
| "grad_norm": 0.004460840951651335, |
| "learning_rate": 1.9145299145299148e-06, |
| "loss": 0.0001, |
| "step": 1192 |
| }, |
| { |
| "epoch": 9.70204081632653, |
| "grad_norm": 0.004089562688022852, |
| "learning_rate": 1.8461538461538465e-06, |
| "loss": 0.0001, |
| "step": 1193 |
| }, |
| { |
| "epoch": 9.710204081632654, |
| "grad_norm": 0.01671615056693554, |
| "learning_rate": 1.777777777777778e-06, |
| "loss": 0.0001, |
| "step": 1194 |
| }, |
| { |
| "epoch": 9.718367346938775, |
| "grad_norm": 0.0166789498180151, |
| "learning_rate": 1.7094017094017097e-06, |
| "loss": 0.0003, |
| "step": 1195 |
| }, |
| { |
| "epoch": 9.726530612244899, |
| "grad_norm": 0.0030747223645448685, |
| "learning_rate": 1.6410256410256412e-06, |
| "loss": 0.0, |
| "step": 1196 |
| }, |
| { |
| "epoch": 9.73469387755102, |
| "grad_norm": 0.027723681181669235, |
| "learning_rate": 1.5726495726495727e-06, |
| "loss": 0.0005, |
| "step": 1197 |
| }, |
| { |
| "epoch": 9.742857142857144, |
| "grad_norm": 0.006767381448298693, |
| "learning_rate": 1.5042735042735044e-06, |
| "loss": 0.0001, |
| "step": 1198 |
| }, |
| { |
| "epoch": 9.751020408163265, |
| "grad_norm": 0.014646001160144806, |
| "learning_rate": 1.4358974358974359e-06, |
| "loss": 0.0003, |
| "step": 1199 |
| }, |
| { |
| "epoch": 9.759183673469387, |
| "grad_norm": 0.005182509310543537, |
| "learning_rate": 1.3675213675213678e-06, |
| "loss": 0.0001, |
| "step": 1200 |
| }, |
| { |
| "epoch": 9.76734693877551, |
| "grad_norm": 0.002610682277008891, |
| "learning_rate": 1.2991452991452993e-06, |
| "loss": 0.0, |
| "step": 1201 |
| }, |
| { |
| "epoch": 9.775510204081632, |
| "grad_norm": 0.026194339618086815, |
| "learning_rate": 1.230769230769231e-06, |
| "loss": 0.002, |
| "step": 1202 |
| }, |
| { |
| "epoch": 9.783673469387756, |
| "grad_norm": 0.005605866201221943, |
| "learning_rate": 1.1623931623931625e-06, |
| "loss": 0.0, |
| "step": 1203 |
| }, |
| { |
| "epoch": 9.791836734693877, |
| "grad_norm": 0.06640844792127609, |
| "learning_rate": 1.0940170940170942e-06, |
| "loss": 0.0033, |
| "step": 1204 |
| }, |
| { |
| "epoch": 9.8, |
| "grad_norm": 0.006540970876812935, |
| "learning_rate": 1.0256410256410257e-06, |
| "loss": 0.0001, |
| "step": 1205 |
| }, |
| { |
| "epoch": 9.808163265306122, |
| "grad_norm": 0.005154821090400219, |
| "learning_rate": 9.572649572649574e-07, |
| "loss": 0.0001, |
| "step": 1206 |
| }, |
| { |
| "epoch": 9.816326530612244, |
| "grad_norm": 0.0638870820403099, |
| "learning_rate": 8.88888888888889e-07, |
| "loss": 0.0008, |
| "step": 1207 |
| }, |
| { |
| "epoch": 9.824489795918367, |
| "grad_norm": 0.022943247109651566, |
| "learning_rate": 8.205128205128206e-07, |
| "loss": 0.001, |
| "step": 1208 |
| }, |
| { |
| "epoch": 9.83265306122449, |
| "grad_norm": 0.020996147766709328, |
| "learning_rate": 7.521367521367522e-07, |
| "loss": 0.0001, |
| "step": 1209 |
| }, |
| { |
| "epoch": 9.840816326530613, |
| "grad_norm": 0.05984543636441231, |
| "learning_rate": 6.837606837606839e-07, |
| "loss": 0.0045, |
| "step": 1210 |
| }, |
| { |
| "epoch": 9.848979591836734, |
| "grad_norm": 0.008096975274384022, |
| "learning_rate": 6.153846153846155e-07, |
| "loss": 0.0, |
| "step": 1211 |
| }, |
| { |
| "epoch": 9.857142857142858, |
| "grad_norm": 0.0041329097002744675, |
| "learning_rate": 5.470085470085471e-07, |
| "loss": 0.0, |
| "step": 1212 |
| }, |
| { |
| "epoch": 9.86530612244898, |
| "grad_norm": 0.032030075788497925, |
| "learning_rate": 4.786324786324787e-07, |
| "loss": 0.0004, |
| "step": 1213 |
| }, |
| { |
| "epoch": 9.873469387755103, |
| "grad_norm": 0.029202815145254135, |
| "learning_rate": 4.102564102564103e-07, |
| "loss": 0.0012, |
| "step": 1214 |
| }, |
| { |
| "epoch": 9.881632653061224, |
| "grad_norm": 0.2898118495941162, |
| "learning_rate": 3.4188034188034194e-07, |
| "loss": 0.0014, |
| "step": 1215 |
| }, |
| { |
| "epoch": 9.889795918367348, |
| "grad_norm": 0.016399463638663292, |
| "learning_rate": 2.7350427350427354e-07, |
| "loss": 0.0002, |
| "step": 1216 |
| }, |
| { |
| "epoch": 9.89795918367347, |
| "grad_norm": 0.06763066351413727, |
| "learning_rate": 2.0512820512820514e-07, |
| "loss": 0.0015, |
| "step": 1217 |
| }, |
| { |
| "epoch": 9.906122448979591, |
| "grad_norm": 0.008314032107591629, |
| "learning_rate": 1.3675213675213677e-07, |
| "loss": 0.0001, |
| "step": 1218 |
| }, |
| { |
| "epoch": 9.914285714285715, |
| "grad_norm": 0.0004530400619842112, |
| "learning_rate": 6.837606837606839e-08, |
| "loss": 0.0, |
| "step": 1219 |
| }, |
| { |
| "epoch": 9.922448979591836, |
| "grad_norm": 0.017806239426136017, |
| "learning_rate": 0.0, |
| "loss": 0.0002, |
| "step": 1220 |
| }, |
| { |
| "epoch": 9.922448979591836, |
| "eval_accuracy": 0.9429838867506034, |
| "eval_f1": 0.9425042335887034, |
| "eval_loss": 0.2353067696094513, |
| "eval_precision": 0.9427915319091013, |
| "eval_recall": 0.9429838867506034, |
| "eval_runtime": 24.893, |
| "eval_samples_per_second": 17.515, |
| "eval_steps_per_second": 17.515, |
| "step": 1220 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1220, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.581551776666747e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|