| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 972, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003092783505154639, |
| "grad_norm": 0.7220329009380105, |
| "learning_rate": 0.0, |
| "loss": 0.4422, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006185567010309278, |
| "grad_norm": 0.7035020283112076, |
| "learning_rate": 1.0204081632653061e-07, |
| "loss": 0.4832, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.009278350515463918, |
| "grad_norm": 0.8737722015358887, |
| "learning_rate": 2.0408163265306121e-07, |
| "loss": 0.4732, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.012371134020618556, |
| "grad_norm": 0.7779243721957012, |
| "learning_rate": 3.0612244897959183e-07, |
| "loss": 0.4737, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.015463917525773196, |
| "grad_norm": 0.7329444808774586, |
| "learning_rate": 4.0816326530612243e-07, |
| "loss": 0.4426, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.018556701030927835, |
| "grad_norm": 0.7158028965170649, |
| "learning_rate": 5.102040816326531e-07, |
| "loss": 0.4941, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.021649484536082474, |
| "grad_norm": 0.6888075254399494, |
| "learning_rate": 6.122448979591837e-07, |
| "loss": 0.4111, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.024742268041237112, |
| "grad_norm": 0.7427508164973907, |
| "learning_rate": 7.142857142857143e-07, |
| "loss": 0.5104, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.027835051546391754, |
| "grad_norm": 0.6277972002358595, |
| "learning_rate": 8.163265306122449e-07, |
| "loss": 0.4448, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.030927835051546393, |
| "grad_norm": 0.6615474642249266, |
| "learning_rate": 9.183673469387756e-07, |
| "loss": 0.4766, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03402061855670103, |
| "grad_norm": 0.6267344916879954, |
| "learning_rate": 1.0204081632653063e-06, |
| "loss": 0.4347, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03711340206185567, |
| "grad_norm": 1.1775850456355763, |
| "learning_rate": 1.122448979591837e-06, |
| "loss": 0.5167, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04020618556701031, |
| "grad_norm": 0.6941660855089555, |
| "learning_rate": 1.2244897959183673e-06, |
| "loss": 0.4782, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04329896907216495, |
| "grad_norm": 0.7691560992409402, |
| "learning_rate": 1.3265306122448982e-06, |
| "loss": 0.4624, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04639175257731959, |
| "grad_norm": 0.6661388733908383, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 0.4674, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.049484536082474224, |
| "grad_norm": 0.5788791202729546, |
| "learning_rate": 1.5306122448979593e-06, |
| "loss": 0.4704, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.05257731958762887, |
| "grad_norm": 0.5025256108670897, |
| "learning_rate": 1.6326530612244897e-06, |
| "loss": 0.4672, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05567010309278351, |
| "grad_norm": 1.2671386866538892, |
| "learning_rate": 1.7346938775510206e-06, |
| "loss": 0.4322, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.058762886597938144, |
| "grad_norm": 0.43553685659977487, |
| "learning_rate": 1.8367346938775512e-06, |
| "loss": 0.4041, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.061855670103092786, |
| "grad_norm": 0.5729928987299671, |
| "learning_rate": 1.938775510204082e-06, |
| "loss": 0.5054, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06494845360824743, |
| "grad_norm": 0.6645449546643061, |
| "learning_rate": 2.0408163265306125e-06, |
| "loss": 0.438, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.06804123711340206, |
| "grad_norm": 0.5176445382179179, |
| "learning_rate": 2.1428571428571427e-06, |
| "loss": 0.4382, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0711340206185567, |
| "grad_norm": 0.5262967794748546, |
| "learning_rate": 2.244897959183674e-06, |
| "loss": 0.4205, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.07422680412371134, |
| "grad_norm": 0.7267621375796322, |
| "learning_rate": 2.3469387755102044e-06, |
| "loss": 0.5079, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07731958762886598, |
| "grad_norm": 0.4252212798888888, |
| "learning_rate": 2.4489795918367347e-06, |
| "loss": 0.4396, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08041237113402062, |
| "grad_norm": 0.4536131271915926, |
| "learning_rate": 2.5510204081632657e-06, |
| "loss": 0.4384, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.08350515463917525, |
| "grad_norm": 0.45116014661712117, |
| "learning_rate": 2.6530612244897964e-06, |
| "loss": 0.4402, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0865979381443299, |
| "grad_norm": 0.4568728052221468, |
| "learning_rate": 2.7551020408163266e-06, |
| "loss": 0.4648, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.08969072164948454, |
| "grad_norm": 0.43137876910461337, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 0.4402, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.09278350515463918, |
| "grad_norm": 0.4211788965935997, |
| "learning_rate": 2.959183673469388e-06, |
| "loss": 0.4229, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09587628865979382, |
| "grad_norm": 0.4337448222173674, |
| "learning_rate": 3.0612244897959185e-06, |
| "loss": 0.4128, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.09896907216494845, |
| "grad_norm": 0.3729189572852763, |
| "learning_rate": 3.1632653061224496e-06, |
| "loss": 0.4159, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.10206185567010309, |
| "grad_norm": 0.4231908442749754, |
| "learning_rate": 3.2653061224489794e-06, |
| "loss": 0.431, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.10515463917525773, |
| "grad_norm": 0.409077818435389, |
| "learning_rate": 3.3673469387755105e-06, |
| "loss": 0.4525, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.10824742268041238, |
| "grad_norm": 0.34021413683338564, |
| "learning_rate": 3.469387755102041e-06, |
| "loss": 0.4116, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.11134020618556702, |
| "grad_norm": 0.3591499421696542, |
| "learning_rate": 3.5714285714285718e-06, |
| "loss": 0.4299, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.11443298969072165, |
| "grad_norm": 0.4659218552375075, |
| "learning_rate": 3.6734693877551024e-06, |
| "loss": 0.4215, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.11752577319587629, |
| "grad_norm": 0.36553772280054864, |
| "learning_rate": 3.7755102040816327e-06, |
| "loss": 0.4367, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.12061855670103093, |
| "grad_norm": 0.4224042486029835, |
| "learning_rate": 3.877551020408164e-06, |
| "loss": 0.4438, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.12371134020618557, |
| "grad_norm": 0.3971996014459553, |
| "learning_rate": 3.979591836734694e-06, |
| "loss": 0.4487, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1268041237113402, |
| "grad_norm": 0.35284570413422267, |
| "learning_rate": 4.081632653061225e-06, |
| "loss": 0.4039, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.12989690721649486, |
| "grad_norm": 0.4493870470268664, |
| "learning_rate": 4.183673469387755e-06, |
| "loss": 0.472, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.13298969072164948, |
| "grad_norm": 0.5342463347549146, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": 0.4162, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1360824742268041, |
| "grad_norm": 0.4174496136137149, |
| "learning_rate": 4.3877551020408165e-06, |
| "loss": 0.4095, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.13917525773195877, |
| "grad_norm": 0.4384141653564562, |
| "learning_rate": 4.489795918367348e-06, |
| "loss": 0.4054, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1422680412371134, |
| "grad_norm": 0.40013936895317725, |
| "learning_rate": 4.591836734693878e-06, |
| "loss": 0.4541, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.14536082474226805, |
| "grad_norm": 0.3480277328089883, |
| "learning_rate": 4.693877551020409e-06, |
| "loss": 0.4374, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.14845360824742268, |
| "grad_norm": 0.42726168307574297, |
| "learning_rate": 4.795918367346939e-06, |
| "loss": 0.4039, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1515463917525773, |
| "grad_norm": 0.4538463173366489, |
| "learning_rate": 4.897959183673469e-06, |
| "loss": 0.4666, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.15463917525773196, |
| "grad_norm": 0.40284353314825155, |
| "learning_rate": 5e-06, |
| "loss": 0.4622, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1577319587628866, |
| "grad_norm": 0.44910131131991515, |
| "learning_rate": 5.1020408163265315e-06, |
| "loss": 0.3969, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.16082474226804125, |
| "grad_norm": 0.44134065441696, |
| "learning_rate": 5.204081632653062e-06, |
| "loss": 0.4969, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.16391752577319588, |
| "grad_norm": 0.35918306588103044, |
| "learning_rate": 5.306122448979593e-06, |
| "loss": 0.424, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1670103092783505, |
| "grad_norm": 0.33710757989510826, |
| "learning_rate": 5.408163265306123e-06, |
| "loss": 0.4266, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.17010309278350516, |
| "grad_norm": 0.434215578187321, |
| "learning_rate": 5.510204081632653e-06, |
| "loss": 0.4376, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1731958762886598, |
| "grad_norm": 0.4260116466909397, |
| "learning_rate": 5.6122448979591834e-06, |
| "loss": 0.4191, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.17628865979381445, |
| "grad_norm": 0.559318489706675, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 0.447, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.17938144329896907, |
| "grad_norm": 0.3906173126178787, |
| "learning_rate": 5.816326530612246e-06, |
| "loss": 0.4225, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1824742268041237, |
| "grad_norm": 0.4742346991841608, |
| "learning_rate": 5.918367346938776e-06, |
| "loss": 0.4458, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.18556701030927836, |
| "grad_norm": 0.4123045030138927, |
| "learning_rate": 6.020408163265307e-06, |
| "loss": 0.4497, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.18865979381443299, |
| "grad_norm": 0.35048792524854255, |
| "learning_rate": 6.122448979591837e-06, |
| "loss": 0.4158, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.19175257731958764, |
| "grad_norm": 0.4169124872120747, |
| "learning_rate": 6.224489795918368e-06, |
| "loss": 0.4283, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.19484536082474227, |
| "grad_norm": 0.41029713183541644, |
| "learning_rate": 6.326530612244899e-06, |
| "loss": 0.4246, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1979381443298969, |
| "grad_norm": 0.5337463704313201, |
| "learning_rate": 6.4285714285714295e-06, |
| "loss": 0.4433, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.20103092783505155, |
| "grad_norm": 0.3933430581995637, |
| "learning_rate": 6.530612244897959e-06, |
| "loss": 0.3966, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.20412371134020618, |
| "grad_norm": 0.4582694378054699, |
| "learning_rate": 6.63265306122449e-06, |
| "loss": 0.4432, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.20721649484536084, |
| "grad_norm": 0.48765468978039683, |
| "learning_rate": 6.734693877551021e-06, |
| "loss": 0.4393, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.21030927835051547, |
| "grad_norm": 0.39676839862520125, |
| "learning_rate": 6.836734693877551e-06, |
| "loss": 0.4623, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.2134020618556701, |
| "grad_norm": 0.4482589398534172, |
| "learning_rate": 6.938775510204082e-06, |
| "loss": 0.4101, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.21649484536082475, |
| "grad_norm": 0.3624149691888066, |
| "learning_rate": 7.0408163265306125e-06, |
| "loss": 0.4036, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.21958762886597938, |
| "grad_norm": 0.4295048480379611, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 0.403, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.22268041237113403, |
| "grad_norm": 0.39682111123734054, |
| "learning_rate": 7.244897959183675e-06, |
| "loss": 0.4229, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.22577319587628866, |
| "grad_norm": 0.4154676798215689, |
| "learning_rate": 7.346938775510205e-06, |
| "loss": 0.4131, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.2288659793814433, |
| "grad_norm": 0.3765403100349772, |
| "learning_rate": 7.448979591836736e-06, |
| "loss": 0.4517, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.23195876288659795, |
| "grad_norm": 0.4768545909661036, |
| "learning_rate": 7.551020408163265e-06, |
| "loss": 0.417, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.23505154639175257, |
| "grad_norm": 0.37769473717411983, |
| "learning_rate": 7.653061224489796e-06, |
| "loss": 0.4568, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.2381443298969072, |
| "grad_norm": 0.38498631713602566, |
| "learning_rate": 7.755102040816327e-06, |
| "loss": 0.4334, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.24123711340206186, |
| "grad_norm": 0.3174598983048248, |
| "learning_rate": 7.857142857142858e-06, |
| "loss": 0.3881, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2443298969072165, |
| "grad_norm": 0.36192435295437103, |
| "learning_rate": 7.959183673469388e-06, |
| "loss": 0.3982, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.24742268041237114, |
| "grad_norm": 0.38008377420730255, |
| "learning_rate": 8.06122448979592e-06, |
| "loss": 0.4129, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.25051546391752577, |
| "grad_norm": 0.37093712326106304, |
| "learning_rate": 8.16326530612245e-06, |
| "loss": 0.3955, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.2536082474226804, |
| "grad_norm": 0.3245693253207308, |
| "learning_rate": 8.26530612244898e-06, |
| "loss": 0.3779, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.256701030927835, |
| "grad_norm": 0.40484115388567915, |
| "learning_rate": 8.36734693877551e-06, |
| "loss": 0.4201, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.2597938144329897, |
| "grad_norm": 0.45349807280259075, |
| "learning_rate": 8.469387755102042e-06, |
| "loss": 0.407, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.26288659793814434, |
| "grad_norm": 0.5134134442864399, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 0.4503, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.26597938144329897, |
| "grad_norm": 0.600117050646678, |
| "learning_rate": 8.673469387755103e-06, |
| "loss": 0.4274, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.2690721649484536, |
| "grad_norm": 0.4164938359408504, |
| "learning_rate": 8.775510204081633e-06, |
| "loss": 0.4473, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2721649484536082, |
| "grad_norm": 0.3648277119395248, |
| "learning_rate": 8.877551020408163e-06, |
| "loss": 0.4435, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2752577319587629, |
| "grad_norm": 0.42584735229761017, |
| "learning_rate": 8.979591836734695e-06, |
| "loss": 0.4637, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.27835051546391754, |
| "grad_norm": 0.4893392978187684, |
| "learning_rate": 9.081632653061225e-06, |
| "loss": 0.4608, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.28144329896907216, |
| "grad_norm": 0.40017674990337454, |
| "learning_rate": 9.183673469387756e-06, |
| "loss": 0.4463, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2845360824742268, |
| "grad_norm": 0.3765261542973935, |
| "learning_rate": 9.285714285714288e-06, |
| "loss": 0.4219, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2876288659793814, |
| "grad_norm": 0.37518752775293795, |
| "learning_rate": 9.387755102040818e-06, |
| "loss": 0.4244, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.2907216494845361, |
| "grad_norm": 0.4473276537814246, |
| "learning_rate": 9.489795918367348e-06, |
| "loss": 0.4572, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.29381443298969073, |
| "grad_norm": 0.5287937607719012, |
| "learning_rate": 9.591836734693878e-06, |
| "loss": 0.4772, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.29690721649484536, |
| "grad_norm": 0.43309575827009483, |
| "learning_rate": 9.693877551020408e-06, |
| "loss": 0.4366, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.3868727014101142, |
| "learning_rate": 9.795918367346939e-06, |
| "loss": 0.4408, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.3030927835051546, |
| "grad_norm": 0.3840116443447955, |
| "learning_rate": 9.89795918367347e-06, |
| "loss": 0.3959, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3061855670103093, |
| "grad_norm": 0.3332314789253363, |
| "learning_rate": 1e-05, |
| "loss": 0.4098, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.30927835051546393, |
| "grad_norm": 0.36075607939522497, |
| "learning_rate": 9.999967698966278e-06, |
| "loss": 0.4226, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.31237113402061856, |
| "grad_norm": 0.4392150875918316, |
| "learning_rate": 9.999870796282452e-06, |
| "loss": 0.4338, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.3154639175257732, |
| "grad_norm": 0.43798219013201406, |
| "learning_rate": 9.999709293200546e-06, |
| "loss": 0.4664, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3185567010309278, |
| "grad_norm": 0.3364109046254725, |
| "learning_rate": 9.999483191807245e-06, |
| "loss": 0.405, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.3216494845360825, |
| "grad_norm": 0.4033274426634507, |
| "learning_rate": 9.999192495023873e-06, |
| "loss": 0.4612, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.3247422680412371, |
| "grad_norm": 1.264002600498985, |
| "learning_rate": 9.998837206606355e-06, |
| "loss": 0.3965, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.32783505154639175, |
| "grad_norm": 0.36417937925451055, |
| "learning_rate": 9.998417331145161e-06, |
| "loss": 0.4417, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3309278350515464, |
| "grad_norm": 0.4023715037416989, |
| "learning_rate": 9.997932874065259e-06, |
| "loss": 0.4406, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.334020618556701, |
| "grad_norm": 0.42289969931350985, |
| "learning_rate": 9.99738384162603e-06, |
| "loss": 0.4116, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3371134020618557, |
| "grad_norm": 0.35713977786014356, |
| "learning_rate": 9.996770240921205e-06, |
| "loss": 0.4185, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.3402061855670103, |
| "grad_norm": 0.3670739072941028, |
| "learning_rate": 9.996092079878757e-06, |
| "loss": 0.4022, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.34329896907216495, |
| "grad_norm": 0.3870582401338966, |
| "learning_rate": 9.995349367260807e-06, |
| "loss": 0.4577, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3463917525773196, |
| "grad_norm": 0.32993992408348866, |
| "learning_rate": 9.994542112663507e-06, |
| "loss": 0.3801, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3494845360824742, |
| "grad_norm": 0.345855562846639, |
| "learning_rate": 9.993670326516924e-06, |
| "loss": 0.461, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.3525773195876289, |
| "grad_norm": 0.4104212434960431, |
| "learning_rate": 9.992734020084892e-06, |
| "loss": 0.3999, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3556701030927835, |
| "grad_norm": 0.6634908671468979, |
| "learning_rate": 9.991733205464882e-06, |
| "loss": 0.4251, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.35876288659793815, |
| "grad_norm": 0.2947924939452513, |
| "learning_rate": 9.990667895587827e-06, |
| "loss": 0.3913, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3618556701030928, |
| "grad_norm": 0.3478221537413288, |
| "learning_rate": 9.989538104217975e-06, |
| "loss": 0.3926, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3649484536082474, |
| "grad_norm": 0.3265206725118031, |
| "learning_rate": 9.988343845952697e-06, |
| "loss": 0.3703, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3680412371134021, |
| "grad_norm": 0.3720856553833873, |
| "learning_rate": 9.987085136222302e-06, |
| "loss": 0.3786, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3711340206185567, |
| "grad_norm": 0.43621656093852407, |
| "learning_rate": 9.985761991289841e-06, |
| "loss": 0.4315, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.37422680412371134, |
| "grad_norm": 0.37899203918553465, |
| "learning_rate": 9.984374428250894e-06, |
| "loss": 0.3953, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.37731958762886597, |
| "grad_norm": 0.3609003641884193, |
| "learning_rate": 9.98292246503335e-06, |
| "loss": 0.4319, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.3804123711340206, |
| "grad_norm": 0.41971555363580315, |
| "learning_rate": 9.981406120397172e-06, |
| "loss": 0.4421, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.3835051546391753, |
| "grad_norm": 0.3977990495669762, |
| "learning_rate": 9.979825413934162e-06, |
| "loss": 0.4188, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3865979381443299, |
| "grad_norm": 0.3987336298164563, |
| "learning_rate": 9.9781803660677e-06, |
| "loss": 0.4371, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.38969072164948454, |
| "grad_norm": 0.35568515500000975, |
| "learning_rate": 9.976470998052484e-06, |
| "loss": 0.41, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.39278350515463917, |
| "grad_norm": 0.468475729583225, |
| "learning_rate": 9.974697331974255e-06, |
| "loss": 0.4802, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.3958762886597938, |
| "grad_norm": 0.442269906201004, |
| "learning_rate": 9.972859390749516e-06, |
| "loss": 0.4967, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.3989690721649485, |
| "grad_norm": 0.424830726205854, |
| "learning_rate": 9.970957198125224e-06, |
| "loss": 0.4099, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.4020618556701031, |
| "grad_norm": 0.3901734315345603, |
| "learning_rate": 9.968990778678493e-06, |
| "loss": 0.405, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.40515463917525774, |
| "grad_norm": 0.38051479001704774, |
| "learning_rate": 9.966960157816279e-06, |
| "loss": 0.4329, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.40824742268041236, |
| "grad_norm": 0.38980392969505734, |
| "learning_rate": 9.964865361775042e-06, |
| "loss": 0.4273, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.411340206185567, |
| "grad_norm": 0.4082796222777848, |
| "learning_rate": 9.962706417620413e-06, |
| "loss": 0.4248, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.4144329896907217, |
| "grad_norm": 0.3455050558847363, |
| "learning_rate": 9.960483353246843e-06, |
| "loss": 0.4163, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.4175257731958763, |
| "grad_norm": 0.41108684679376056, |
| "learning_rate": 9.958196197377242e-06, |
| "loss": 0.4468, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.42061855670103093, |
| "grad_norm": 0.3848356328809587, |
| "learning_rate": 9.95584497956261e-06, |
| "loss": 0.392, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.42371134020618556, |
| "grad_norm": 0.3760869314548746, |
| "learning_rate": 9.953429730181653e-06, |
| "loss": 0.4666, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.4268041237113402, |
| "grad_norm": 0.34588003209988005, |
| "learning_rate": 9.950950480440396e-06, |
| "loss": 0.4416, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.4298969072164949, |
| "grad_norm": 0.3735210250651018, |
| "learning_rate": 9.948407262371764e-06, |
| "loss": 0.3973, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.4329896907216495, |
| "grad_norm": 0.36920833160292676, |
| "learning_rate": 9.945800108835191e-06, |
| "loss": 0.4235, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.43608247422680413, |
| "grad_norm": 0.4064849366706592, |
| "learning_rate": 9.943129053516176e-06, |
| "loss": 0.4391, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.43917525773195876, |
| "grad_norm": 0.32274388016168765, |
| "learning_rate": 9.940394130925858e-06, |
| "loss": 0.401, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.4422680412371134, |
| "grad_norm": 0.3514480510132677, |
| "learning_rate": 9.93759537640057e-06, |
| "loss": 0.4147, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.44536082474226807, |
| "grad_norm": 0.3433080243743223, |
| "learning_rate": 9.934732826101378e-06, |
| "loss": 0.426, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4484536082474227, |
| "grad_norm": 0.43791164185777237, |
| "learning_rate": 9.931806517013612e-06, |
| "loss": 0.4596, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.4515463917525773, |
| "grad_norm": 0.370180398918842, |
| "learning_rate": 9.928816486946398e-06, |
| "loss": 0.3997, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.45463917525773195, |
| "grad_norm": 0.3272727921118669, |
| "learning_rate": 9.925762774532162e-06, |
| "loss": 0.4272, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.4577319587628866, |
| "grad_norm": 0.4637056549954698, |
| "learning_rate": 9.922645419226128e-06, |
| "loss": 0.4718, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.4608247422680412, |
| "grad_norm": 0.7294401083647661, |
| "learning_rate": 9.919464461305817e-06, |
| "loss": 0.416, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.4639175257731959, |
| "grad_norm": 0.4103431625946844, |
| "learning_rate": 9.916219941870519e-06, |
| "loss": 0.418, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4670103092783505, |
| "grad_norm": 0.3519149712001119, |
| "learning_rate": 9.912911902840771e-06, |
| "loss": 0.4014, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.47010309278350515, |
| "grad_norm": 0.41537482262548897, |
| "learning_rate": 9.909540386957801e-06, |
| "loss": 0.4462, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.4731958762886598, |
| "grad_norm": 0.45138202485027723, |
| "learning_rate": 9.90610543778299e-06, |
| "loss": 0.4613, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.4762886597938144, |
| "grad_norm": 0.7949633754886084, |
| "learning_rate": 9.9026070996973e-06, |
| "loss": 0.4492, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.4793814432989691, |
| "grad_norm": 0.4084320907683219, |
| "learning_rate": 9.899045417900709e-06, |
| "loss": 0.4524, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4824742268041237, |
| "grad_norm": 0.3097088652783349, |
| "learning_rate": 9.895420438411616e-06, |
| "loss": 0.3947, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.48556701030927835, |
| "grad_norm": 0.500006282123083, |
| "learning_rate": 9.891732208066254e-06, |
| "loss": 0.4033, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.488659793814433, |
| "grad_norm": 0.3682651386422796, |
| "learning_rate": 9.887980774518085e-06, |
| "loss": 0.399, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.4917525773195876, |
| "grad_norm": 0.39684906849089263, |
| "learning_rate": 9.884166186237185e-06, |
| "loss": 0.4304, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.4948453608247423, |
| "grad_norm": 0.4519243281046066, |
| "learning_rate": 9.880288492509606e-06, |
| "loss": 0.48, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4979381443298969, |
| "grad_norm": 0.3524909210389552, |
| "learning_rate": 9.876347743436758e-06, |
| "loss": 0.3941, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.5010309278350515, |
| "grad_norm": 0.4045327844967625, |
| "learning_rate": 9.872343989934747e-06, |
| "loss": 0.4384, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5041237113402062, |
| "grad_norm": 0.3717528508433992, |
| "learning_rate": 9.868277283733725e-06, |
| "loss": 0.4573, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.5072164948453608, |
| "grad_norm": 0.4190213137059769, |
| "learning_rate": 9.864147677377218e-06, |
| "loss": 0.4404, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.5103092783505154, |
| "grad_norm": 0.4186463041983309, |
| "learning_rate": 9.859955224221446e-06, |
| "loss": 0.4126, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.51340206185567, |
| "grad_norm": 0.38404866212835087, |
| "learning_rate": 9.855699978434639e-06, |
| "loss": 0.449, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5164948453608248, |
| "grad_norm": 0.35653907687833253, |
| "learning_rate": 9.85138199499633e-06, |
| "loss": 0.3943, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.5195876288659794, |
| "grad_norm": 0.42289422396642834, |
| "learning_rate": 9.847001329696653e-06, |
| "loss": 0.4413, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.522680412371134, |
| "grad_norm": 0.40627594296304126, |
| "learning_rate": 9.842558039135612e-06, |
| "loss": 0.4244, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.5257731958762887, |
| "grad_norm": 0.452599947941535, |
| "learning_rate": 9.838052180722362e-06, |
| "loss": 0.4264, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5288659793814433, |
| "grad_norm": 0.3882521773969115, |
| "learning_rate": 9.833483812674453e-06, |
| "loss": 0.4086, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5319587628865979, |
| "grad_norm": 0.31860129778076346, |
| "learning_rate": 9.828852994017091e-06, |
| "loss": 0.4264, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5350515463917526, |
| "grad_norm": 0.372117789676078, |
| "learning_rate": 9.82415978458237e-06, |
| "loss": 0.4023, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5381443298969072, |
| "grad_norm": 0.5036778726839901, |
| "learning_rate": 9.819404245008492e-06, |
| "loss": 0.3944, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5412371134020618, |
| "grad_norm": 0.3674447495317301, |
| "learning_rate": 9.814586436738998e-06, |
| "loss": 0.3923, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5443298969072164, |
| "grad_norm": 0.3731434238979774, |
| "learning_rate": 9.80970642202196e-06, |
| "loss": 0.4205, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5474226804123712, |
| "grad_norm": 0.41627568220258476, |
| "learning_rate": 9.80476426390919e-06, |
| "loss": 0.3717, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5505154639175258, |
| "grad_norm": 1.0211708857535742, |
| "learning_rate": 9.799760026255412e-06, |
| "loss": 0.418, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5536082474226804, |
| "grad_norm": 0.38193863125239336, |
| "learning_rate": 9.794693773717445e-06, |
| "loss": 0.4458, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5567010309278351, |
| "grad_norm": 0.47696480909517025, |
| "learning_rate": 9.789565571753368e-06, |
| "loss": 0.4631, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5597938144329897, |
| "grad_norm": 0.40610575547480393, |
| "learning_rate": 9.78437548662167e-06, |
| "loss": 0.4083, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5628865979381443, |
| "grad_norm": 0.3304252568122808, |
| "learning_rate": 9.779123585380398e-06, |
| "loss": 0.439, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.565979381443299, |
| "grad_norm": 0.3272419825098461, |
| "learning_rate": 9.773809935886287e-06, |
| "loss": 0.3937, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5690721649484536, |
| "grad_norm": 0.3587831705215171, |
| "learning_rate": 9.768434606793884e-06, |
| "loss": 0.4018, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5721649484536082, |
| "grad_norm": 0.443615507107455, |
| "learning_rate": 9.762997667554666e-06, |
| "loss": 0.4092, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5752577319587628, |
| "grad_norm": 0.37884710599971766, |
| "learning_rate": 9.757499188416135e-06, |
| "loss": 0.3896, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5783505154639176, |
| "grad_norm": 0.3856243077642201, |
| "learning_rate": 9.751939240420916e-06, |
| "loss": 0.4444, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5814432989690722, |
| "grad_norm": 0.3871002878329792, |
| "learning_rate": 9.746317895405835e-06, |
| "loss": 0.4652, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5845360824742268, |
| "grad_norm": 0.40509810646221905, |
| "learning_rate": 9.740635226000994e-06, |
| "loss": 0.4397, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.5876288659793815, |
| "grad_norm": 0.4160183357738651, |
| "learning_rate": 9.734891305628831e-06, |
| "loss": 0.4314, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5907216494845361, |
| "grad_norm": 0.43975201212553466, |
| "learning_rate": 9.729086208503174e-06, |
| "loss": 0.4575, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.5938144329896907, |
| "grad_norm": 0.484435973107823, |
| "learning_rate": 9.723220009628278e-06, |
| "loss": 0.4178, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5969072164948453, |
| "grad_norm": 0.34862743794307083, |
| "learning_rate": 9.717292784797854e-06, |
| "loss": 0.3981, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.37978393661957066, |
| "learning_rate": 9.711304610594104e-06, |
| "loss": 0.4521, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.6030927835051546, |
| "grad_norm": 0.3542607634854072, |
| "learning_rate": 9.70525556438671e-06, |
| "loss": 0.434, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6061855670103092, |
| "grad_norm": 0.3507367379175751, |
| "learning_rate": 9.699145724331851e-06, |
| "loss": 0.4462, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.6092783505154639, |
| "grad_norm": 0.38846079113543686, |
| "learning_rate": 9.692975169371189e-06, |
| "loss": 0.4112, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.6123711340206186, |
| "grad_norm": 0.3761006338373534, |
| "learning_rate": 9.686743979230844e-06, |
| "loss": 0.4542, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6154639175257732, |
| "grad_norm": 0.37798748643860347, |
| "learning_rate": 9.68045223442037e-06, |
| "loss": 0.3916, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.6185567010309279, |
| "grad_norm": 0.3762540406851515, |
| "learning_rate": 9.67410001623171e-06, |
| "loss": 0.4026, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6216494845360825, |
| "grad_norm": 0.4064096939338082, |
| "learning_rate": 9.66768740673815e-06, |
| "loss": 0.4475, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.6247422680412371, |
| "grad_norm": 0.40087767969653776, |
| "learning_rate": 9.661214488793257e-06, |
| "loss": 0.4158, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6278350515463917, |
| "grad_norm": 0.3542486717999693, |
| "learning_rate": 9.654681346029809e-06, |
| "loss": 0.4163, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.6309278350515464, |
| "grad_norm": 0.41164735599886176, |
| "learning_rate": 9.648088062858707e-06, |
| "loss": 0.4334, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.634020618556701, |
| "grad_norm": 0.3750735185215337, |
| "learning_rate": 9.6414347244679e-06, |
| "loss": 0.3863, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6371134020618556, |
| "grad_norm": 0.4017716434148873, |
| "learning_rate": 9.63472141682127e-06, |
| "loss": 0.4541, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.6402061855670103, |
| "grad_norm": 0.3482809621069407, |
| "learning_rate": 9.627948226657527e-06, |
| "loss": 0.4196, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.643298969072165, |
| "grad_norm": 0.41831555099933715, |
| "learning_rate": 9.62111524148909e-06, |
| "loss": 0.4261, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.6463917525773196, |
| "grad_norm": 0.3376522288825046, |
| "learning_rate": 9.61422254960095e-06, |
| "loss": 0.4191, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.6494845360824743, |
| "grad_norm": 0.4086079442501632, |
| "learning_rate": 9.60727024004954e-06, |
| "loss": 0.4254, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6525773195876289, |
| "grad_norm": 0.3554579587789702, |
| "learning_rate": 9.60025840266157e-06, |
| "loss": 0.4514, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6556701030927835, |
| "grad_norm": 0.5797896520598668, |
| "learning_rate": 9.593187128032882e-06, |
| "loss": 0.4812, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.6587628865979381, |
| "grad_norm": 0.3872271636430896, |
| "learning_rate": 9.586056507527266e-06, |
| "loss": 0.419, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6618556701030928, |
| "grad_norm": 0.38821488571945145, |
| "learning_rate": 9.578866633275289e-06, |
| "loss": 0.42, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6649484536082474, |
| "grad_norm": 0.4813854082108463, |
| "learning_rate": 9.571617598173097e-06, |
| "loss": 0.4325, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.668041237113402, |
| "grad_norm": 0.3627936880248002, |
| "learning_rate": 9.564309495881221e-06, |
| "loss": 0.392, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6711340206185566, |
| "grad_norm": 0.37734221944263685, |
| "learning_rate": 9.556942420823368e-06, |
| "loss": 0.4246, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.6742268041237114, |
| "grad_norm": 0.4004639252719525, |
| "learning_rate": 9.549516468185191e-06, |
| "loss": 0.412, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.677319587628866, |
| "grad_norm": 0.4526596384689256, |
| "learning_rate": 9.542031733913069e-06, |
| "loss": 0.4792, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.6804123711340206, |
| "grad_norm": 0.4390731617883788, |
| "learning_rate": 9.534488314712863e-06, |
| "loss": 0.4421, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6835051546391753, |
| "grad_norm": 0.4007337258521647, |
| "learning_rate": 9.52688630804867e-06, |
| "loss": 0.4404, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.6865979381443299, |
| "grad_norm": 0.3915553551665069, |
| "learning_rate": 9.519225812141556e-06, |
| "loss": 0.393, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6896907216494845, |
| "grad_norm": 0.341094849828294, |
| "learning_rate": 9.511506925968302e-06, |
| "loss": 0.4145, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.6927835051546392, |
| "grad_norm": 0.413391987710813, |
| "learning_rate": 9.503729749260101e-06, |
| "loss": 0.4449, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6958762886597938, |
| "grad_norm": 0.3876076479558053, |
| "learning_rate": 9.4958943825013e-06, |
| "loss": 0.43, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6989690721649484, |
| "grad_norm": 0.35907008562608367, |
| "learning_rate": 9.488000926928071e-06, |
| "loss": 0.4108, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.702061855670103, |
| "grad_norm": 0.3561364856811235, |
| "learning_rate": 9.480049484527127e-06, |
| "loss": 0.4238, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.7051546391752578, |
| "grad_norm": 0.47010210624724486, |
| "learning_rate": 9.472040158034392e-06, |
| "loss": 0.4209, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.7082474226804124, |
| "grad_norm": 0.4301123700583335, |
| "learning_rate": 9.463973050933674e-06, |
| "loss": 0.3994, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.711340206185567, |
| "grad_norm": 0.3909682894633138, |
| "learning_rate": 9.455848267455332e-06, |
| "loss": 0.4215, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7144329896907217, |
| "grad_norm": 0.34838625934979284, |
| "learning_rate": 9.44766591257493e-06, |
| "loss": 0.3846, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.7175257731958763, |
| "grad_norm": 0.3280238808955969, |
| "learning_rate": 9.439426092011877e-06, |
| "loss": 0.3931, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.7206185567010309, |
| "grad_norm": 0.35144130802487117, |
| "learning_rate": 9.43112891222806e-06, |
| "loss": 0.4096, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.7237113402061855, |
| "grad_norm": 0.41596258564300653, |
| "learning_rate": 9.422774480426474e-06, |
| "loss": 0.4025, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.7268041237113402, |
| "grad_norm": 0.3841630129084166, |
| "learning_rate": 9.414362904549829e-06, |
| "loss": 0.4532, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7298969072164948, |
| "grad_norm": 0.3578469578427432, |
| "learning_rate": 9.405894293279167e-06, |
| "loss": 0.4503, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.7329896907216494, |
| "grad_norm": 0.38595777247388136, |
| "learning_rate": 9.397368756032445e-06, |
| "loss": 0.4066, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.7360824742268042, |
| "grad_norm": 0.4335838878361591, |
| "learning_rate": 9.388786402963133e-06, |
| "loss": 0.4495, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.7391752577319588, |
| "grad_norm": 0.33324163736582774, |
| "learning_rate": 9.380147344958778e-06, |
| "loss": 0.3915, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.7422680412371134, |
| "grad_norm": 0.36773272717492805, |
| "learning_rate": 9.371451693639583e-06, |
| "loss": 0.4307, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7453608247422681, |
| "grad_norm": 0.40450476128913343, |
| "learning_rate": 9.362699561356957e-06, |
| "loss": 0.4256, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.7484536082474227, |
| "grad_norm": 0.3908440757047333, |
| "learning_rate": 9.35389106119207e-06, |
| "loss": 0.4153, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.7515463917525773, |
| "grad_norm": 0.3114817033772392, |
| "learning_rate": 9.345026306954385e-06, |
| "loss": 0.4125, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.7546391752577319, |
| "grad_norm": 0.29706798256457034, |
| "learning_rate": 9.336105413180194e-06, |
| "loss": 0.4226, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.7577319587628866, |
| "grad_norm": 0.4376146395225796, |
| "learning_rate": 9.32712849513113e-06, |
| "loss": 0.4331, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7608247422680412, |
| "grad_norm": 0.3899623219746875, |
| "learning_rate": 9.31809566879269e-06, |
| "loss": 0.4383, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7639175257731958, |
| "grad_norm": 0.34792340705670277, |
| "learning_rate": 9.309007050872722e-06, |
| "loss": 0.443, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7670103092783506, |
| "grad_norm": 0.40032908798049566, |
| "learning_rate": 9.299862758799929e-06, |
| "loss": 0.3943, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7701030927835052, |
| "grad_norm": 0.3646886635478996, |
| "learning_rate": 9.290662910722346e-06, |
| "loss": 0.4438, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.7731958762886598, |
| "grad_norm": 0.48708708631155473, |
| "learning_rate": 9.281407625505813e-06, |
| "loss": 0.3921, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7762886597938145, |
| "grad_norm": 0.35131588651040985, |
| "learning_rate": 9.272097022732444e-06, |
| "loss": 0.3841, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.7793814432989691, |
| "grad_norm": 0.3366070564480312, |
| "learning_rate": 9.262731222699073e-06, |
| "loss": 0.4244, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.7824742268041237, |
| "grad_norm": 0.4059540639934267, |
| "learning_rate": 9.253310346415714e-06, |
| "loss": 0.3968, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.7855670103092783, |
| "grad_norm": 0.3319429374771854, |
| "learning_rate": 9.24383451560398e-06, |
| "loss": 0.4001, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.788659793814433, |
| "grad_norm": 0.3747214017804394, |
| "learning_rate": 9.234303852695526e-06, |
| "loss": 0.4079, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7917525773195876, |
| "grad_norm": 0.6094521128793228, |
| "learning_rate": 9.224718480830454e-06, |
| "loss": 0.4492, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7948453608247422, |
| "grad_norm": 0.40150812678403713, |
| "learning_rate": 9.215078523855736e-06, |
| "loss": 0.3971, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.797938144329897, |
| "grad_norm": 0.33127062639306104, |
| "learning_rate": 9.205384106323602e-06, |
| "loss": 0.391, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.8010309278350516, |
| "grad_norm": 0.3804077472064743, |
| "learning_rate": 9.195635353489932e-06, |
| "loss": 0.424, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.8041237113402062, |
| "grad_norm": 0.4156186134532376, |
| "learning_rate": 9.185832391312644e-06, |
| "loss": 0.4239, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8072164948453608, |
| "grad_norm": 0.37494860050910456, |
| "learning_rate": 9.175975346450063e-06, |
| "loss": 0.4122, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.8103092783505155, |
| "grad_norm": 0.37253235581305144, |
| "learning_rate": 9.166064346259288e-06, |
| "loss": 0.4636, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.8134020618556701, |
| "grad_norm": 0.47925682852498785, |
| "learning_rate": 9.156099518794535e-06, |
| "loss": 0.4191, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.8164948453608247, |
| "grad_norm": 0.38882262828447034, |
| "learning_rate": 9.146080992805497e-06, |
| "loss": 0.4743, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.8195876288659794, |
| "grad_norm": 0.40517243481841286, |
| "learning_rate": 9.136008897735673e-06, |
| "loss": 0.4381, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.822680412371134, |
| "grad_norm": 0.33944094329173813, |
| "learning_rate": 9.125883363720696e-06, |
| "loss": 0.4218, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.8257731958762886, |
| "grad_norm": 0.3387360961611941, |
| "learning_rate": 9.11570452158665e-06, |
| "loss": 0.4187, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.8288659793814434, |
| "grad_norm": 0.32535654522901664, |
| "learning_rate": 9.105472502848386e-06, |
| "loss": 0.425, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.831958762886598, |
| "grad_norm": 0.4065234297081288, |
| "learning_rate": 9.095187439707817e-06, |
| "loss": 0.4338, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.8350515463917526, |
| "grad_norm": 0.40157205206617597, |
| "learning_rate": 9.08484946505221e-06, |
| "loss": 0.4564, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8381443298969072, |
| "grad_norm": 0.3479048593946284, |
| "learning_rate": 9.074458712452476e-06, |
| "loss": 0.4339, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.8412371134020619, |
| "grad_norm": 0.46307296193741704, |
| "learning_rate": 9.06401531616143e-06, |
| "loss": 0.4778, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.8443298969072165, |
| "grad_norm": 0.35582479153588514, |
| "learning_rate": 9.053519411112075e-06, |
| "loss": 0.3822, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.8474226804123711, |
| "grad_norm": 0.4376659890869369, |
| "learning_rate": 9.042971132915841e-06, |
| "loss": 0.3995, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.8505154639175257, |
| "grad_norm": 0.3389338487984306, |
| "learning_rate": 9.032370617860844e-06, |
| "loss": 0.4204, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.8536082474226804, |
| "grad_norm": 0.33150814648720384, |
| "learning_rate": 9.021718002910124e-06, |
| "loss": 0.396, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.856701030927835, |
| "grad_norm": 0.3834437528783491, |
| "learning_rate": 9.011013425699868e-06, |
| "loss": 0.4362, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.8597938144329897, |
| "grad_norm": 0.3942889058329744, |
| "learning_rate": 9.000257024537641e-06, |
| "loss": 0.4506, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.8628865979381444, |
| "grad_norm": 0.6702145922259628, |
| "learning_rate": 8.989448938400596e-06, |
| "loss": 0.4833, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.865979381443299, |
| "grad_norm": 0.31283468483751053, |
| "learning_rate": 8.978589306933672e-06, |
| "loss": 0.4072, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8690721649484536, |
| "grad_norm": 0.36632655743824466, |
| "learning_rate": 8.9676782704478e-06, |
| "loss": 0.4511, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.8721649484536083, |
| "grad_norm": 0.3966023481535047, |
| "learning_rate": 8.95671596991808e-06, |
| "loss": 0.4306, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.8752577319587629, |
| "grad_norm": 0.397005850506469, |
| "learning_rate": 8.94570254698197e-06, |
| "loss": 0.4489, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.8783505154639175, |
| "grad_norm": 0.48150680327390494, |
| "learning_rate": 8.934638143937447e-06, |
| "loss": 0.4036, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.8814432989690721, |
| "grad_norm": 0.42034692202567625, |
| "learning_rate": 8.923522903741173e-06, |
| "loss": 0.4246, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8845360824742268, |
| "grad_norm": 0.34587961383464905, |
| "learning_rate": 8.91235697000665e-06, |
| "loss": 0.3852, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.8876288659793814, |
| "grad_norm": 0.4895986957921685, |
| "learning_rate": 8.901140487002358e-06, |
| "loss": 0.4635, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.8907216494845361, |
| "grad_norm": 0.35487167407743136, |
| "learning_rate": 8.889873599649893e-06, |
| "loss": 0.3973, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8938144329896908, |
| "grad_norm": 0.37736951311343125, |
| "learning_rate": 8.8785564535221e-06, |
| "loss": 0.4081, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.8969072164948454, |
| "grad_norm": 0.3846700038419489, |
| "learning_rate": 8.867189194841187e-06, |
| "loss": 0.4881, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.4429103421627471, |
| "learning_rate": 8.855771970476834e-06, |
| "loss": 0.4551, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.9030927835051547, |
| "grad_norm": 0.32825849474711527, |
| "learning_rate": 8.844304927944304e-06, |
| "loss": 0.3956, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.9061855670103093, |
| "grad_norm": 0.3304508041662356, |
| "learning_rate": 8.832788215402527e-06, |
| "loss": 0.3972, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.9092783505154639, |
| "grad_norm": 0.4344446557575567, |
| "learning_rate": 8.821221981652189e-06, |
| "loss": 0.4141, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.9123711340206185, |
| "grad_norm": 0.32272845920125925, |
| "learning_rate": 8.809606376133814e-06, |
| "loss": 0.4501, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9154639175257732, |
| "grad_norm": 0.3453561017057089, |
| "learning_rate": 8.79794154892583e-06, |
| "loss": 0.4051, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.9185567010309278, |
| "grad_norm": 0.340213017334916, |
| "learning_rate": 8.786227650742624e-06, |
| "loss": 0.4144, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.9216494845360824, |
| "grad_norm": 0.32610289859497454, |
| "learning_rate": 8.774464832932609e-06, |
| "loss": 0.4105, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.9247422680412372, |
| "grad_norm": 0.3382705441690299, |
| "learning_rate": 8.762653247476249e-06, |
| "loss": 0.4359, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.9278350515463918, |
| "grad_norm": 0.3373010065525689, |
| "learning_rate": 8.750793046984118e-06, |
| "loss": 0.3884, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9309278350515464, |
| "grad_norm": 0.4147951347719845, |
| "learning_rate": 8.738884384694905e-06, |
| "loss": 0.4298, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.934020618556701, |
| "grad_norm": 0.3312432357311152, |
| "learning_rate": 8.726927414473457e-06, |
| "loss": 0.4053, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.9371134020618557, |
| "grad_norm": 0.4627090495345429, |
| "learning_rate": 8.714922290808766e-06, |
| "loss": 0.4279, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.9402061855670103, |
| "grad_norm": 0.3896335087463135, |
| "learning_rate": 8.702869168811999e-06, |
| "loss": 0.423, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.9432989690721649, |
| "grad_norm": 0.41180922800326647, |
| "learning_rate": 8.690768204214474e-06, |
| "loss": 0.4363, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.9463917525773196, |
| "grad_norm": 0.3307854211509245, |
| "learning_rate": 8.67861955336566e-06, |
| "loss": 0.413, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.9494845360824742, |
| "grad_norm": 0.3841553041095563, |
| "learning_rate": 8.666423373231145e-06, |
| "loss": 0.4125, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.9525773195876288, |
| "grad_norm": 0.3838781748953959, |
| "learning_rate": 8.65417982139062e-06, |
| "loss": 0.4439, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.9556701030927836, |
| "grad_norm": 0.31305227186806495, |
| "learning_rate": 8.641889056035842e-06, |
| "loss": 0.3905, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.9587628865979382, |
| "grad_norm": 0.3857088956434675, |
| "learning_rate": 8.629551235968577e-06, |
| "loss": 0.411, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9618556701030928, |
| "grad_norm": 0.399404131470791, |
| "learning_rate": 8.617166520598563e-06, |
| "loss": 0.4339, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.9649484536082474, |
| "grad_norm": 0.34041988314797883, |
| "learning_rate": 8.604735069941443e-06, |
| "loss": 0.4393, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.9680412371134021, |
| "grad_norm": 0.33528564492422497, |
| "learning_rate": 8.592257044616701e-06, |
| "loss": 0.4026, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.9711340206185567, |
| "grad_norm": 0.35692516201601204, |
| "learning_rate": 8.579732605845583e-06, |
| "loss": 0.4384, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.9742268041237113, |
| "grad_norm": 0.38353193047164685, |
| "learning_rate": 8.567161915449018e-06, |
| "loss": 0.4539, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.977319587628866, |
| "grad_norm": 0.45921826745698263, |
| "learning_rate": 8.554545135845522e-06, |
| "loss": 0.4103, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.9804123711340206, |
| "grad_norm": 0.39231294331678845, |
| "learning_rate": 8.541882430049103e-06, |
| "loss": 0.4193, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.9835051546391752, |
| "grad_norm": 0.3158697601066205, |
| "learning_rate": 8.529173961667158e-06, |
| "loss": 0.3764, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.98659793814433, |
| "grad_norm": 0.3617686666769801, |
| "learning_rate": 8.516419894898356e-06, |
| "loss": 0.3925, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.9896907216494846, |
| "grad_norm": 0.3494830754623986, |
| "learning_rate": 8.503620394530507e-06, |
| "loss": 0.4126, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9927835051546392, |
| "grad_norm": 0.4042145993787018, |
| "learning_rate": 8.490775625938452e-06, |
| "loss": 0.4458, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.9958762886597938, |
| "grad_norm": 0.34415938289188014, |
| "learning_rate": 8.477885755081913e-06, |
| "loss": 0.4324, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.9989690721649485, |
| "grad_norm": 0.4559898087472229, |
| "learning_rate": 8.46495094850335e-06, |
| "loss": 0.4039, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.4559898087472229, |
| "learning_rate": 8.451971373325813e-06, |
| "loss": 0.4044, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.0030927835051546, |
| "grad_norm": 0.6525724020105182, |
| "learning_rate": 8.43894719725078e-06, |
| "loss": 0.3491, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.0061855670103093, |
| "grad_norm": 0.3922452835125829, |
| "learning_rate": 8.42587858855599e-06, |
| "loss": 0.3665, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.0092783505154639, |
| "grad_norm": 0.3692331200761448, |
| "learning_rate": 8.412765716093273e-06, |
| "loss": 0.4013, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.0123711340206185, |
| "grad_norm": 0.32689915021476607, |
| "learning_rate": 8.39960874928636e-06, |
| "loss": 0.3473, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.0154639175257731, |
| "grad_norm": 0.35641939466022565, |
| "learning_rate": 8.386407858128707e-06, |
| "loss": 0.3484, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.0185567010309278, |
| "grad_norm": 0.4466817808622907, |
| "learning_rate": 8.373163213181283e-06, |
| "loss": 0.3214, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.0216494845360824, |
| "grad_norm": 0.3830567903995025, |
| "learning_rate": 8.359874985570378e-06, |
| "loss": 0.3644, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.024742268041237, |
| "grad_norm": 0.47720737189360485, |
| "learning_rate": 8.346543346985388e-06, |
| "loss": 0.3808, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.0278350515463917, |
| "grad_norm": 0.30458724044567703, |
| "learning_rate": 8.333168469676595e-06, |
| "loss": 0.3616, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.0309278350515463, |
| "grad_norm": 0.46053373607867876, |
| "learning_rate": 8.319750526452945e-06, |
| "loss": 0.35, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.0340206185567011, |
| "grad_norm": 0.40190768640909413, |
| "learning_rate": 8.306289690679812e-06, |
| "loss": 0.3926, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.0371134020618558, |
| "grad_norm": 0.43240340569051033, |
| "learning_rate": 8.29278613627676e-06, |
| "loss": 0.2903, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.0402061855670104, |
| "grad_norm": 0.31303678537438284, |
| "learning_rate": 8.279240037715297e-06, |
| "loss": 0.3662, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.043298969072165, |
| "grad_norm": 0.36641545734601244, |
| "learning_rate": 8.265651570016618e-06, |
| "loss": 0.3553, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.0463917525773196, |
| "grad_norm": 0.4841123603738825, |
| "learning_rate": 8.252020908749338e-06, |
| "loss": 0.332, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.0494845360824743, |
| "grad_norm": 0.2956997076869852, |
| "learning_rate": 8.238348230027245e-06, |
| "loss": 0.3379, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.052577319587629, |
| "grad_norm": 0.38931134059191863, |
| "learning_rate": 8.224633710506997e-06, |
| "loss": 0.3648, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.0556701030927835, |
| "grad_norm": 0.3611149951062155, |
| "learning_rate": 8.210877527385859e-06, |
| "loss": 0.328, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.0587628865979382, |
| "grad_norm": 0.4546040068388239, |
| "learning_rate": 8.197079858399403e-06, |
| "loss": 0.3335, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.0618556701030928, |
| "grad_norm": 0.3993100472987632, |
| "learning_rate": 8.18324088181922e-06, |
| "loss": 0.3621, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.0649484536082474, |
| "grad_norm": 0.3853715287732158, |
| "learning_rate": 8.169360776450606e-06, |
| "loss": 0.3497, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.068041237113402, |
| "grad_norm": 0.4538572958536918, |
| "learning_rate": 8.155439721630265e-06, |
| "loss": 0.4041, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.0711340206185567, |
| "grad_norm": 0.3819616971274627, |
| "learning_rate": 8.14147789722398e-06, |
| "loss": 0.3125, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.0742268041237113, |
| "grad_norm": 0.404047744842174, |
| "learning_rate": 8.127475483624296e-06, |
| "loss": 0.376, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.077319587628866, |
| "grad_norm": 0.6412664221693233, |
| "learning_rate": 8.113432661748187e-06, |
| "loss": 0.3368, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.0804123711340206, |
| "grad_norm": 0.4079355731584008, |
| "learning_rate": 8.099349613034715e-06, |
| "loss": 0.3299, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0835051546391752, |
| "grad_norm": 0.34948984869697675, |
| "learning_rate": 8.085226519442697e-06, |
| "loss": 0.3166, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.0865979381443298, |
| "grad_norm": 0.3702256660226774, |
| "learning_rate": 8.071063563448341e-06, |
| "loss": 0.3456, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.0896907216494844, |
| "grad_norm": 0.3591573870798163, |
| "learning_rate": 8.056860928042892e-06, |
| "loss": 0.3067, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.0927835051546393, |
| "grad_norm": 0.4211722962881093, |
| "learning_rate": 8.042618796730272e-06, |
| "loss": 0.357, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.095876288659794, |
| "grad_norm": 0.2979530511506807, |
| "learning_rate": 8.028337353524712e-06, |
| "loss": 0.3434, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.0989690721649485, |
| "grad_norm": 0.7222053664711489, |
| "learning_rate": 8.014016782948358e-06, |
| "loss": 0.3513, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.1020618556701032, |
| "grad_norm": 0.4726448804328554, |
| "learning_rate": 7.999657270028904e-06, |
| "loss": 0.3424, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.1051546391752578, |
| "grad_norm": 0.384744888765029, |
| "learning_rate": 7.985259000297196e-06, |
| "loss": 0.347, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.1082474226804124, |
| "grad_norm": 0.4214120836808585, |
| "learning_rate": 7.970822159784832e-06, |
| "loss": 0.3527, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.111340206185567, |
| "grad_norm": 0.4313237210833637, |
| "learning_rate": 7.956346935021762e-06, |
| "loss": 0.3338, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.1144329896907217, |
| "grad_norm": 0.41043298999057304, |
| "learning_rate": 7.941833513033873e-06, |
| "loss": 0.3257, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.1175257731958763, |
| "grad_norm": 0.45398400909999115, |
| "learning_rate": 7.92728208134058e-06, |
| "loss": 0.3533, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.120618556701031, |
| "grad_norm": 0.4121091535911782, |
| "learning_rate": 7.912692827952395e-06, |
| "loss": 0.3478, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.1237113402061856, |
| "grad_norm": 0.3799154594744056, |
| "learning_rate": 7.898065941368507e-06, |
| "loss": 0.3679, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.1268041237113402, |
| "grad_norm": 0.3231398982291365, |
| "learning_rate": 7.883401610574338e-06, |
| "loss": 0.3267, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.1298969072164948, |
| "grad_norm": 0.37731112008573586, |
| "learning_rate": 7.868700025039102e-06, |
| "loss": 0.3444, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.1329896907216495, |
| "grad_norm": 0.2979459870840287, |
| "learning_rate": 7.853961374713367e-06, |
| "loss": 0.3876, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.136082474226804, |
| "grad_norm": 0.360771137481857, |
| "learning_rate": 7.839185850026592e-06, |
| "loss": 0.3663, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.1391752577319587, |
| "grad_norm": 0.3907315619368398, |
| "learning_rate": 7.82437364188466e-06, |
| "loss": 0.3472, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.1422680412371133, |
| "grad_norm": 0.3514296826261096, |
| "learning_rate": 7.809524941667426e-06, |
| "loss": 0.3583, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.145360824742268, |
| "grad_norm": 0.4471849446328275, |
| "learning_rate": 7.794639941226238e-06, |
| "loss": 0.335, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.1484536082474226, |
| "grad_norm": 0.3432232339200618, |
| "learning_rate": 7.779718832881456e-06, |
| "loss": 0.3139, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.1515463917525772, |
| "grad_norm": 0.34717136955485867, |
| "learning_rate": 7.764761809419969e-06, |
| "loss": 0.348, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.1546391752577319, |
| "grad_norm": 0.5078372437404328, |
| "learning_rate": 7.749769064092706e-06, |
| "loss": 0.3642, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.1577319587628865, |
| "grad_norm": 0.4613570762769725, |
| "learning_rate": 7.734740790612137e-06, |
| "loss": 0.3346, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.1608247422680413, |
| "grad_norm": 0.36959768373709084, |
| "learning_rate": 7.719677183149764e-06, |
| "loss": 0.3575, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.163917525773196, |
| "grad_norm": 0.4779248378014372, |
| "learning_rate": 7.70457843633363e-06, |
| "loss": 0.3423, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.1670103092783506, |
| "grad_norm": 0.32643441854444594, |
| "learning_rate": 7.689444745245782e-06, |
| "loss": 0.378, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.1701030927835052, |
| "grad_norm": 0.3089704414768058, |
| "learning_rate": 7.67427630541977e-06, |
| "loss": 0.331, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.1731958762886598, |
| "grad_norm": 0.3749819889676696, |
| "learning_rate": 7.65907331283811e-06, |
| "loss": 0.3184, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1762886597938145, |
| "grad_norm": 0.4677147598049641, |
| "learning_rate": 7.643835963929747e-06, |
| "loss": 0.3818, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.179381443298969, |
| "grad_norm": 0.4828388717102419, |
| "learning_rate": 7.6285644555675345e-06, |
| "loss": 0.3484, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.1824742268041237, |
| "grad_norm": 0.26942212683700595, |
| "learning_rate": 7.613258985065672e-06, |
| "loss": 0.3623, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.1855670103092784, |
| "grad_norm": 0.4157868728729967, |
| "learning_rate": 7.597919750177168e-06, |
| "loss": 0.3401, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.188659793814433, |
| "grad_norm": 0.42605063182184566, |
| "learning_rate": 7.58254694909128e-06, |
| "loss": 0.3159, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.1917525773195876, |
| "grad_norm": 0.34314819154922166, |
| "learning_rate": 7.567140780430956e-06, |
| "loss": 0.3419, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.1948453608247422, |
| "grad_norm": 0.3289614756235703, |
| "learning_rate": 7.551701443250263e-06, |
| "loss": 0.3211, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.1979381443298969, |
| "grad_norm": 0.4057519424982471, |
| "learning_rate": 7.536229137031822e-06, |
| "loss": 0.3368, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.2010309278350515, |
| "grad_norm": 0.3744695844688546, |
| "learning_rate": 7.520724061684227e-06, |
| "loss": 0.3625, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.2041237113402061, |
| "grad_norm": 0.3504025419656684, |
| "learning_rate": 7.505186417539465e-06, |
| "loss": 0.3437, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.2072164948453608, |
| "grad_norm": 0.4279051952223975, |
| "learning_rate": 7.489616405350319e-06, |
| "loss": 0.3286, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.2103092783505154, |
| "grad_norm": 0.43577738791346377, |
| "learning_rate": 7.474014226287786e-06, |
| "loss": 0.3519, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.21340206185567, |
| "grad_norm": 0.29125474177173316, |
| "learning_rate": 7.45838008193847e-06, |
| "loss": 0.3096, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.2164948453608249, |
| "grad_norm": 0.4472479854562308, |
| "learning_rate": 7.442714174301984e-06, |
| "loss": 0.3563, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.2195876288659795, |
| "grad_norm": 0.3249198760178035, |
| "learning_rate": 7.4270167057883295e-06, |
| "loss": 0.3825, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.2226804123711341, |
| "grad_norm": 0.41662397510911336, |
| "learning_rate": 7.411287879215291e-06, |
| "loss": 0.3413, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.2257731958762887, |
| "grad_norm": 0.3850817892627363, |
| "learning_rate": 7.395527897805812e-06, |
| "loss": 0.3426, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.2288659793814434, |
| "grad_norm": 0.41909976516465813, |
| "learning_rate": 7.379736965185369e-06, |
| "loss": 0.2918, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.231958762886598, |
| "grad_norm": 0.35938630643481323, |
| "learning_rate": 7.36391528537934e-06, |
| "loss": 0.3251, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.2350515463917526, |
| "grad_norm": 0.351044135365865, |
| "learning_rate": 7.348063062810369e-06, |
| "loss": 0.3227, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.2381443298969073, |
| "grad_norm": 0.43751484222875453, |
| "learning_rate": 7.332180502295729e-06, |
| "loss": 0.3734, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.2412371134020619, |
| "grad_norm": 0.36948439372703645, |
| "learning_rate": 7.316267809044667e-06, |
| "loss": 0.3195, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.2443298969072165, |
| "grad_norm": 0.29471956115359044, |
| "learning_rate": 7.300325188655762e-06, |
| "loss": 0.3466, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.2474226804123711, |
| "grad_norm": 0.5091978979592966, |
| "learning_rate": 7.284352847114259e-06, |
| "loss": 0.3743, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.2505154639175258, |
| "grad_norm": 0.3628727698515663, |
| "learning_rate": 7.268350990789415e-06, |
| "loss": 0.3523, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.2536082474226804, |
| "grad_norm": 0.3878916069312507, |
| "learning_rate": 7.252319826431833e-06, |
| "loss": 0.3589, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.256701030927835, |
| "grad_norm": 0.36280618889349325, |
| "learning_rate": 7.236259561170783e-06, |
| "loss": 0.3658, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.2597938144329897, |
| "grad_norm": 0.6373467770204526, |
| "learning_rate": 7.220170402511534e-06, |
| "loss": 0.3547, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.2628865979381443, |
| "grad_norm": 0.33274294232763424, |
| "learning_rate": 7.204052558332668e-06, |
| "loss": 0.3103, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.265979381443299, |
| "grad_norm": 0.3735084312326953, |
| "learning_rate": 7.187906236883395e-06, |
| "loss": 0.3776, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.2690721649484535, |
| "grad_norm": 0.346815947501004, |
| "learning_rate": 7.171731646780867e-06, |
| "loss": 0.3539, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.2721649484536082, |
| "grad_norm": 0.35962803496131096, |
| "learning_rate": 7.155528997007476e-06, |
| "loss": 0.3321, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.2752577319587628, |
| "grad_norm": 0.310966882010396, |
| "learning_rate": 7.139298496908155e-06, |
| "loss": 0.3241, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.2783505154639174, |
| "grad_norm": 0.39105942243010555, |
| "learning_rate": 7.123040356187676e-06, |
| "loss": 0.3472, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.281443298969072, |
| "grad_norm": 0.33753426953195, |
| "learning_rate": 7.106754784907942e-06, |
| "loss": 0.2907, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.2845360824742267, |
| "grad_norm": 0.40088355762514305, |
| "learning_rate": 7.090441993485268e-06, |
| "loss": 0.3508, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.2876288659793813, |
| "grad_norm": 0.40064565306290706, |
| "learning_rate": 7.07410219268766e-06, |
| "loss": 0.3348, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.2907216494845362, |
| "grad_norm": 0.5469772825212642, |
| "learning_rate": 7.057735593632106e-06, |
| "loss": 0.3911, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.2938144329896908, |
| "grad_norm": 0.3938454332181955, |
| "learning_rate": 7.04134240778183e-06, |
| "loss": 0.3275, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.2969072164948454, |
| "grad_norm": 0.3841074843900627, |
| "learning_rate": 7.024922846943573e-06, |
| "loss": 0.3324, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.3589284518865989, |
| "learning_rate": 7.008477123264849e-06, |
| "loss": 0.3225, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.3030927835051547, |
| "grad_norm": 0.3059033829909143, |
| "learning_rate": 6.9920054492312086e-06, |
| "loss": 0.359, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.3061855670103093, |
| "grad_norm": 0.3271771652734719, |
| "learning_rate": 6.97550803766349e-06, |
| "loss": 0.3395, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.309278350515464, |
| "grad_norm": 0.40806409758051815, |
| "learning_rate": 6.958985101715077e-06, |
| "loss": 0.3615, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.3123711340206186, |
| "grad_norm": 0.4066403373834957, |
| "learning_rate": 6.942436854869129e-06, |
| "loss": 0.3689, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.3154639175257732, |
| "grad_norm": 0.370416386524105, |
| "learning_rate": 6.925863510935839e-06, |
| "loss": 0.3433, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.3185567010309278, |
| "grad_norm": 0.4418974179010107, |
| "learning_rate": 6.909265284049664e-06, |
| "loss": 0.3827, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.3216494845360824, |
| "grad_norm": 0.4227883285206932, |
| "learning_rate": 6.89264238866656e-06, |
| "loss": 0.3627, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.324742268041237, |
| "grad_norm": 0.31167092762445603, |
| "learning_rate": 6.875995039561206e-06, |
| "loss": 0.359, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.3278350515463917, |
| "grad_norm": 0.3480875177965604, |
| "learning_rate": 6.859323451824238e-06, |
| "loss": 0.3444, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.3309278350515463, |
| "grad_norm": 0.33854262777758315, |
| "learning_rate": 6.842627840859461e-06, |
| "loss": 0.3664, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.334020618556701, |
| "grad_norm": 0.34767527727955205, |
| "learning_rate": 6.825908422381074e-06, |
| "loss": 0.3516, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.3371134020618558, |
| "grad_norm": 0.5865810671246062, |
| "learning_rate": 6.8091654124108765e-06, |
| "loss": 0.4047, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.3402061855670104, |
| "grad_norm": 0.45609449114653355, |
| "learning_rate": 6.792399027275482e-06, |
| "loss": 0.3319, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.343298969072165, |
| "grad_norm": 0.4400731902548814, |
| "learning_rate": 6.775609483603516e-06, |
| "loss": 0.3652, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.3463917525773197, |
| "grad_norm": 0.38007555373261076, |
| "learning_rate": 6.758796998322825e-06, |
| "loss": 0.3685, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.3494845360824743, |
| "grad_norm": 0.35178301360682257, |
| "learning_rate": 6.7419617886576735e-06, |
| "loss": 0.3555, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.352577319587629, |
| "grad_norm": 0.43610235707265677, |
| "learning_rate": 6.725104072125931e-06, |
| "loss": 0.3481, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.3556701030927836, |
| "grad_norm": 0.34661801350291904, |
| "learning_rate": 6.708224066536263e-06, |
| "loss": 0.3837, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.3587628865979382, |
| "grad_norm": 0.3989161601450613, |
| "learning_rate": 6.6913219899853245e-06, |
| "loss": 0.329, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.3618556701030928, |
| "grad_norm": 0.3744534100982985, |
| "learning_rate": 6.674398060854931e-06, |
| "loss": 0.3634, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.3649484536082475, |
| "grad_norm": 0.33702451565402913, |
| "learning_rate": 6.657452497809247e-06, |
| "loss": 0.349, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.368041237113402, |
| "grad_norm": 0.5591464114695802, |
| "learning_rate": 6.640485519791953e-06, |
| "loss": 0.3789, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.3711340206185567, |
| "grad_norm": 0.3252398001484424, |
| "learning_rate": 6.6234973460234184e-06, |
| "loss": 0.3184, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.3742268041237113, |
| "grad_norm": 0.5256539082109882, |
| "learning_rate": 6.606488195997876e-06, |
| "loss": 0.3707, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.377319587628866, |
| "grad_norm": 0.41502723892825255, |
| "learning_rate": 6.589458289480575e-06, |
| "loss": 0.3424, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.3804123711340206, |
| "grad_norm": 0.34496860905236404, |
| "learning_rate": 6.57240784650495e-06, |
| "loss": 0.3553, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.3835051546391752, |
| "grad_norm": 0.3677644556343446, |
| "learning_rate": 6.555337087369775e-06, |
| "loss": 0.3133, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.3865979381443299, |
| "grad_norm": 0.43771172700586186, |
| "learning_rate": 6.538246232636316e-06, |
| "loss": 0.3844, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.3896907216494845, |
| "grad_norm": 0.3438481741906832, |
| "learning_rate": 6.521135503125483e-06, |
| "loss": 0.3283, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.3927835051546391, |
| "grad_norm": 0.625983954705621, |
| "learning_rate": 6.5040051199149755e-06, |
| "loss": 0.397, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.3958762886597937, |
| "grad_norm": 0.40915327598292334, |
| "learning_rate": 6.48685530433643e-06, |
| "loss": 0.3807, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.3989690721649484, |
| "grad_norm": 0.30288182205857417, |
| "learning_rate": 6.469686277972556e-06, |
| "loss": 0.3476, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.402061855670103, |
| "grad_norm": 0.4292888222844284, |
| "learning_rate": 6.452498262654267e-06, |
| "loss": 0.3573, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.4051546391752576, |
| "grad_norm": 0.3468873094824028, |
| "learning_rate": 6.4352914804578345e-06, |
| "loss": 0.3475, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.4082474226804123, |
| "grad_norm": 0.3611550096422234, |
| "learning_rate": 6.418066153701997e-06, |
| "loss": 0.3448, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.4113402061855669, |
| "grad_norm": 0.38685613343922054, |
| "learning_rate": 6.4008225049450974e-06, |
| "loss": 0.3516, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.4144329896907217, |
| "grad_norm": 0.3151727833355366, |
| "learning_rate": 6.38356075698221e-06, |
| "loss": 0.3493, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.4175257731958764, |
| "grad_norm": 0.3408245800320407, |
| "learning_rate": 6.366281132842256e-06, |
| "loss": 0.3642, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.420618556701031, |
| "grad_norm": 0.35947440187809293, |
| "learning_rate": 6.348983855785122e-06, |
| "loss": 0.3491, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.4237113402061856, |
| "grad_norm": 0.3697513099146176, |
| "learning_rate": 6.331669149298781e-06, |
| "loss": 0.3462, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.4268041237113402, |
| "grad_norm": 0.3440627883456723, |
| "learning_rate": 6.314337237096401e-06, |
| "loss": 0.3294, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.4298969072164949, |
| "grad_norm": 0.3424656075586527, |
| "learning_rate": 6.296988343113453e-06, |
| "loss": 0.3441, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.4329896907216495, |
| "grad_norm": 0.3410954960548553, |
| "learning_rate": 6.279622691504821e-06, |
| "loss": 0.3367, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.4360824742268041, |
| "grad_norm": 0.42875886013339126, |
| "learning_rate": 6.2622405066419046e-06, |
| "loss": 0.3659, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.4391752577319588, |
| "grad_norm": 0.34138282265214576, |
| "learning_rate": 6.24484201310972e-06, |
| "loss": 0.3187, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.4422680412371134, |
| "grad_norm": 0.3498674361424939, |
| "learning_rate": 6.227427435703997e-06, |
| "loss": 0.3249, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.445360824742268, |
| "grad_norm": 0.40405356495212513, |
| "learning_rate": 6.2099969994282764e-06, |
| "loss": 0.3288, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.4484536082474226, |
| "grad_norm": 0.3596781700650695, |
| "learning_rate": 6.192550929491002e-06, |
| "loss": 0.3578, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.4515463917525773, |
| "grad_norm": 0.3510031934171229, |
| "learning_rate": 6.175089451302614e-06, |
| "loss": 0.3546, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.454639175257732, |
| "grad_norm": 0.4142516280132979, |
| "learning_rate": 6.157612790472626e-06, |
| "loss": 0.356, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.4577319587628865, |
| "grad_norm": 0.42780646762862284, |
| "learning_rate": 6.140121172806725e-06, |
| "loss": 0.3447, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.4608247422680412, |
| "grad_norm": 0.370184077167348, |
| "learning_rate": 6.122614824303845e-06, |
| "loss": 0.3162, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.463917525773196, |
| "grad_norm": 0.3817693239463498, |
| "learning_rate": 6.105093971153246e-06, |
| "loss": 0.3536, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.4670103092783506, |
| "grad_norm": 0.3917768728174191, |
| "learning_rate": 6.087558839731594e-06, |
| "loss": 0.3341, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.4701030927835053, |
| "grad_norm": 0.3428239655666794, |
| "learning_rate": 6.070009656600039e-06, |
| "loss": 0.3422, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.47319587628866, |
| "grad_norm": 0.3973161497874271, |
| "learning_rate": 6.052446648501283e-06, |
| "loss": 0.3429, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.4762886597938145, |
| "grad_norm": 0.4333165450189888, |
| "learning_rate": 6.034870042356653e-06, |
| "loss": 0.3703, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.4793814432989691, |
| "grad_norm": 0.323605469691123, |
| "learning_rate": 6.0172800652631706e-06, |
| "loss": 0.3426, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.4824742268041238, |
| "grad_norm": 0.3929125656201943, |
| "learning_rate": 5.999676944490609e-06, |
| "loss": 0.3324, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.4855670103092784, |
| "grad_norm": 0.3587189281991536, |
| "learning_rate": 5.982060907478568e-06, |
| "loss": 0.3608, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.488659793814433, |
| "grad_norm": 0.4074306888968019, |
| "learning_rate": 5.964432181833532e-06, |
| "loss": 0.3386, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.4917525773195877, |
| "grad_norm": 0.4119418936472174, |
| "learning_rate": 5.946790995325924e-06, |
| "loss": 0.3351, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.4948453608247423, |
| "grad_norm": 0.5463610190099886, |
| "learning_rate": 5.929137575887167e-06, |
| "loss": 0.3486, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.497938144329897, |
| "grad_norm": 0.39394480450005404, |
| "learning_rate": 5.911472151606743e-06, |
| "loss": 0.3647, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.5010309278350515, |
| "grad_norm": 0.32096578094355016, |
| "learning_rate": 5.893794950729237e-06, |
| "loss": 0.3321, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.5041237113402062, |
| "grad_norm": 0.4262076144835789, |
| "learning_rate": 5.876106201651392e-06, |
| "loss": 0.3445, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.5072164948453608, |
| "grad_norm": 0.40275399201166046, |
| "learning_rate": 5.858406132919162e-06, |
| "loss": 0.328, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.5103092783505154, |
| "grad_norm": 0.31623557083038034, |
| "learning_rate": 5.840694973224752e-06, |
| "loss": 0.3488, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.51340206185567, |
| "grad_norm": 0.41350708263181063, |
| "learning_rate": 5.82297295140367e-06, |
| "loss": 0.3198, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.5164948453608247, |
| "grad_norm": 0.32875741521436547, |
| "learning_rate": 5.805240296431765e-06, |
| "loss": 0.3453, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.5195876288659793, |
| "grad_norm": 0.4161024892078206, |
| "learning_rate": 5.787497237422272e-06, |
| "loss": 0.3537, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.522680412371134, |
| "grad_norm": 0.3350236926596831, |
| "learning_rate": 5.769744003622852e-06, |
| "loss": 0.3524, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.5257731958762886, |
| "grad_norm": 0.3718584309646912, |
| "learning_rate": 5.751980824412622e-06, |
| "loss": 0.3415, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.5288659793814432, |
| "grad_norm": 0.33405973084354273, |
| "learning_rate": 5.734207929299206e-06, |
| "loss": 0.3785, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.5319587628865978, |
| "grad_norm": 0.3491907461943334, |
| "learning_rate": 5.716425547915756e-06, |
| "loss": 0.3566, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.5350515463917525, |
| "grad_norm": 0.34785897166443974, |
| "learning_rate": 5.698633910017993e-06, |
| "loss": 0.3465, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.538144329896907, |
| "grad_norm": 0.40927118637399906, |
| "learning_rate": 5.680833245481234e-06, |
| "loss": 0.3254, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.5412371134020617, |
| "grad_norm": 0.364191447666914, |
| "learning_rate": 5.663023784297426e-06, |
| "loss": 0.3581, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.5443298969072163, |
| "grad_norm": 0.49907269025129153, |
| "learning_rate": 5.6452057565721715e-06, |
| "loss": 0.3772, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.5474226804123712, |
| "grad_norm": 0.35782849389183596, |
| "learning_rate": 5.627379392521758e-06, |
| "loss": 0.3206, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.5505154639175258, |
| "grad_norm": 0.38887433093763457, |
| "learning_rate": 5.609544922470178e-06, |
| "loss": 0.2835, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.5536082474226804, |
| "grad_norm": 0.35326584373882364, |
| "learning_rate": 5.59170257684616e-06, |
| "loss": 0.3389, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.556701030927835, |
| "grad_norm": 0.40349932678306516, |
| "learning_rate": 5.573852586180185e-06, |
| "loss": 0.3889, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.5597938144329897, |
| "grad_norm": 0.3765765021817677, |
| "learning_rate": 5.555995181101517e-06, |
| "loss": 0.3528, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.5628865979381443, |
| "grad_norm": 0.3463176384496649, |
| "learning_rate": 5.53813059233521e-06, |
| "loss": 0.3459, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.565979381443299, |
| "grad_norm": 0.394296667709075, |
| "learning_rate": 5.520259050699138e-06, |
| "loss": 0.3811, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.5690721649484536, |
| "grad_norm": 0.4384971668510495, |
| "learning_rate": 5.50238078710101e-06, |
| "loss": 0.2956, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.5721649484536082, |
| "grad_norm": 0.2759163025819777, |
| "learning_rate": 5.484496032535385e-06, |
| "loss": 0.3542, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.5752577319587628, |
| "grad_norm": 0.39946717876701127, |
| "learning_rate": 5.466605018080684e-06, |
| "loss": 0.3553, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.5783505154639177, |
| "grad_norm": 0.3168571537880368, |
| "learning_rate": 5.448707974896214e-06, |
| "loss": 0.3816, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.5814432989690723, |
| "grad_norm": 0.35776094699034044, |
| "learning_rate": 5.430805134219171e-06, |
| "loss": 0.3374, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.584536082474227, |
| "grad_norm": 0.3372746052323508, |
| "learning_rate": 5.412896727361663e-06, |
| "loss": 0.3289, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.5876288659793816, |
| "grad_norm": 0.3509930509793478, |
| "learning_rate": 5.3949829857077075e-06, |
| "loss": 0.3443, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.5907216494845362, |
| "grad_norm": 0.4449590962316539, |
| "learning_rate": 5.3770641407102554e-06, |
| "loss": 0.3418, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.5938144329896908, |
| "grad_norm": 0.3218980327532609, |
| "learning_rate": 5.3591404238881935e-06, |
| "loss": 0.3143, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.5969072164948455, |
| "grad_norm": 0.43752839784545067, |
| "learning_rate": 5.341212066823356e-06, |
| "loss": 0.3479, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.43005166973733555, |
| "learning_rate": 5.323279301157526e-06, |
| "loss": 0.3679, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.6030927835051547, |
| "grad_norm": 0.3840246824337281, |
| "learning_rate": 5.305342358589452e-06, |
| "loss": 0.3277, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.6061855670103093, |
| "grad_norm": 0.6948499227181512, |
| "learning_rate": 5.287401470871851e-06, |
| "loss": 0.4366, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.609278350515464, |
| "grad_norm": 0.4752253370301213, |
| "learning_rate": 5.2694568698084085e-06, |
| "loss": 0.3607, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.6123711340206186, |
| "grad_norm": 0.34613731373360773, |
| "learning_rate": 5.25150878725079e-06, |
| "loss": 0.3557, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.6154639175257732, |
| "grad_norm": 0.29453002565946584, |
| "learning_rate": 5.233557455095645e-06, |
| "loss": 0.3663, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.6185567010309279, |
| "grad_norm": 0.5096116989863569, |
| "learning_rate": 5.215603105281606e-06, |
| "loss": 0.4037, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.6216494845360825, |
| "grad_norm": 0.4228973578656383, |
| "learning_rate": 5.197645969786297e-06, |
| "loss": 0.3569, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.6247422680412371, |
| "grad_norm": 0.40898122279760163, |
| "learning_rate": 5.179686280623334e-06, |
| "loss": 0.3361, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.6278350515463917, |
| "grad_norm": 0.3239619283576348, |
| "learning_rate": 5.1617242698393265e-06, |
| "loss": 0.3408, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.6309278350515464, |
| "grad_norm": 0.32898745469773205, |
| "learning_rate": 5.143760169510882e-06, |
| "loss": 0.3341, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.634020618556701, |
| "grad_norm": 0.3635754593359022, |
| "learning_rate": 5.125794211741602e-06, |
| "loss": 0.3524, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.6371134020618556, |
| "grad_norm": 0.3818846922647241, |
| "learning_rate": 5.107826628659095e-06, |
| "loss": 0.3659, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.6402061855670103, |
| "grad_norm": 0.4996575536438249, |
| "learning_rate": 5.089857652411961e-06, |
| "loss": 0.3966, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.6432989690721649, |
| "grad_norm": 0.3917831825064119, |
| "learning_rate": 5.0718875151668005e-06, |
| "loss": 0.3868, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.6463917525773195, |
| "grad_norm": 0.4033639088959359, |
| "learning_rate": 5.053916449105219e-06, |
| "loss": 0.3408, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.6494845360824741, |
| "grad_norm": 0.5601375825737572, |
| "learning_rate": 5.035944686420823e-06, |
| "loss": 0.366, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.6525773195876288, |
| "grad_norm": 0.3915181598012574, |
| "learning_rate": 5.0179724593162146e-06, |
| "loss": 0.3312, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.6556701030927834, |
| "grad_norm": 0.4643735140339054, |
| "learning_rate": 5e-06, |
| "loss": 0.3672, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.658762886597938, |
| "grad_norm": 0.4120202123265614, |
| "learning_rate": 4.982027540683785e-06, |
| "loss": 0.3972, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.6618556701030927, |
| "grad_norm": 0.3417335396795078, |
| "learning_rate": 4.964055313579179e-06, |
| "loss": 0.3024, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.6649484536082473, |
| "grad_norm": 0.35012876477382987, |
| "learning_rate": 4.946083550894782e-06, |
| "loss": 0.333, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.668041237113402, |
| "grad_norm": 0.3636966873267465, |
| "learning_rate": 4.928112484833201e-06, |
| "loss": 0.3441, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.6711340206185565, |
| "grad_norm": 0.3111643102986036, |
| "learning_rate": 4.910142347588041e-06, |
| "loss": 0.3188, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.6742268041237114, |
| "grad_norm": 0.40428582352299114, |
| "learning_rate": 4.892173371340907e-06, |
| "loss": 0.3484, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.677319587628866, |
| "grad_norm": 0.3672336238706189, |
| "learning_rate": 4.874205788258397e-06, |
| "loss": 0.3441, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.6804123711340206, |
| "grad_norm": 0.40811813907806466, |
| "learning_rate": 4.856239830489121e-06, |
| "loss": 0.3425, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.6835051546391753, |
| "grad_norm": 0.3656893919324694, |
| "learning_rate": 4.838275730160675e-06, |
| "loss": 0.3323, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.68659793814433, |
| "grad_norm": 0.37306700029813783, |
| "learning_rate": 4.8203137193766685e-06, |
| "loss": 0.3366, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.6896907216494845, |
| "grad_norm": 0.3487607040775841, |
| "learning_rate": 4.802354030213704e-06, |
| "loss": 0.3481, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.6927835051546392, |
| "grad_norm": 0.43124099792571774, |
| "learning_rate": 4.784396894718397e-06, |
| "loss": 0.3563, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.6958762886597938, |
| "grad_norm": 0.4317586697798039, |
| "learning_rate": 4.766442544904357e-06, |
| "loss": 0.3116, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.6989690721649484, |
| "grad_norm": 0.3554072453278775, |
| "learning_rate": 4.748491212749212e-06, |
| "loss": 0.3717, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.702061855670103, |
| "grad_norm": 0.38642313014520263, |
| "learning_rate": 4.730543130191594e-06, |
| "loss": 0.3133, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.705154639175258, |
| "grad_norm": 0.35718698367747603, |
| "learning_rate": 4.71259852912815e-06, |
| "loss": 0.3353, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.7082474226804125, |
| "grad_norm": 0.3768471478704191, |
| "learning_rate": 4.6946576414105485e-06, |
| "loss": 0.3762, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.7113402061855671, |
| "grad_norm": 0.3857887148028528, |
| "learning_rate": 4.676720698842474e-06, |
| "loss": 0.3529, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.7144329896907218, |
| "grad_norm": 0.4266039767609616, |
| "learning_rate": 4.6587879331766465e-06, |
| "loss": 0.3594, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.7175257731958764, |
| "grad_norm": 0.3948922880525116, |
| "learning_rate": 4.640859576111806e-06, |
| "loss": 0.3095, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.720618556701031, |
| "grad_norm": 0.30686472583946167, |
| "learning_rate": 4.622935859289745e-06, |
| "loss": 0.3217, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.7237113402061857, |
| "grad_norm": 0.33402814027027605, |
| "learning_rate": 4.605017014292294e-06, |
| "loss": 0.3162, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.7268041237113403, |
| "grad_norm": 0.39075139358824645, |
| "learning_rate": 4.587103272638339e-06, |
| "loss": 0.3532, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.729896907216495, |
| "grad_norm": 0.32617228983096963, |
| "learning_rate": 4.56919486578083e-06, |
| "loss": 0.3321, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.7329896907216495, |
| "grad_norm": 0.4767942295459208, |
| "learning_rate": 4.551292025103789e-06, |
| "loss": 0.3727, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.7360824742268042, |
| "grad_norm": 0.3987103445762424, |
| "learning_rate": 4.533394981919318e-06, |
| "loss": 0.3387, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.7391752577319588, |
| "grad_norm": 0.38195474771000787, |
| "learning_rate": 4.515503967464619e-06, |
| "loss": 0.3518, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.7422680412371134, |
| "grad_norm": 0.35405334182231835, |
| "learning_rate": 4.4976192128989905e-06, |
| "loss": 0.3058, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.745360824742268, |
| "grad_norm": 0.3992288275189986, |
| "learning_rate": 4.479740949300864e-06, |
| "loss": 0.3615, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.7484536082474227, |
| "grad_norm": 0.3332582817837684, |
| "learning_rate": 4.461869407664791e-06, |
| "loss": 0.3642, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.7515463917525773, |
| "grad_norm": 0.42033238544083, |
| "learning_rate": 4.444004818898484e-06, |
| "loss": 0.3787, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.754639175257732, |
| "grad_norm": 0.4880167336142244, |
| "learning_rate": 4.426147413819816e-06, |
| "loss": 0.4005, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.7577319587628866, |
| "grad_norm": 0.3281528013955215, |
| "learning_rate": 4.408297423153841e-06, |
| "loss": 0.3268, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.7608247422680412, |
| "grad_norm": 0.3574637034531168, |
| "learning_rate": 4.3904550775298235e-06, |
| "loss": 0.3773, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.7639175257731958, |
| "grad_norm": 0.42421289930673906, |
| "learning_rate": 4.372620607478242e-06, |
| "loss": 0.3483, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.7670103092783505, |
| "grad_norm": 0.3679170394870709, |
| "learning_rate": 4.354794243427829e-06, |
| "loss": 0.3731, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.770103092783505, |
| "grad_norm": 0.38955579533123275, |
| "learning_rate": 4.336976215702574e-06, |
| "loss": 0.373, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.7731958762886597, |
| "grad_norm": 0.45173466197382955, |
| "learning_rate": 4.319166754518768e-06, |
| "loss": 0.3379, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.7762886597938143, |
| "grad_norm": 0.39227759702665027, |
| "learning_rate": 4.301366089982009e-06, |
| "loss": 0.3869, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.779381443298969, |
| "grad_norm": 0.3729461642890205, |
| "learning_rate": 4.283574452084246e-06, |
| "loss": 0.3542, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.7824742268041236, |
| "grad_norm": 0.4436097525383527, |
| "learning_rate": 4.265792070700796e-06, |
| "loss": 0.3567, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.7855670103092782, |
| "grad_norm": 0.40747362880576216, |
| "learning_rate": 4.24801917558738e-06, |
| "loss": 0.3232, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.7886597938144329, |
| "grad_norm": 0.3828534325794096, |
| "learning_rate": 4.23025599637715e-06, |
| "loss": 0.3209, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.7917525773195875, |
| "grad_norm": 0.28993427750985645, |
| "learning_rate": 4.212502762577729e-06, |
| "loss": 0.3421, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.794845360824742, |
| "grad_norm": 0.39602892726754746, |
| "learning_rate": 4.1947597035682355e-06, |
| "loss": 0.3635, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.797938144329897, |
| "grad_norm": 0.3789277210001876, |
| "learning_rate": 4.17702704859633e-06, |
| "loss": 0.3234, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.8010309278350516, |
| "grad_norm": 0.39826069405908116, |
| "learning_rate": 4.159305026775249e-06, |
| "loss": 0.32, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.8041237113402062, |
| "grad_norm": 0.3932667433563304, |
| "learning_rate": 4.14159386708084e-06, |
| "loss": 0.3634, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.8072164948453608, |
| "grad_norm": 0.3506483772059777, |
| "learning_rate": 4.1238937983486085e-06, |
| "loss": 0.3733, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.8103092783505155, |
| "grad_norm": 0.4272908390532751, |
| "learning_rate": 4.106205049270764e-06, |
| "loss": 0.384, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.81340206185567, |
| "grad_norm": 0.36438615249297024, |
| "learning_rate": 4.088527848393258e-06, |
| "loss": 0.3289, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.8164948453608247, |
| "grad_norm": 0.3438225140274201, |
| "learning_rate": 4.070862424112833e-06, |
| "loss": 0.3511, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.8195876288659794, |
| "grad_norm": 0.3114587225460769, |
| "learning_rate": 4.053209004674079e-06, |
| "loss": 0.3222, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.822680412371134, |
| "grad_norm": 0.29869536683197423, |
| "learning_rate": 4.035567818166469e-06, |
| "loss": 0.3006, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.8257731958762886, |
| "grad_norm": 0.3609354722469145, |
| "learning_rate": 4.017939092521434e-06, |
| "loss": 0.3662, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.8288659793814435, |
| "grad_norm": 0.47969264614152507, |
| "learning_rate": 4.000323055509393e-06, |
| "loss": 0.3669, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.831958762886598, |
| "grad_norm": 0.3050755424330547, |
| "learning_rate": 3.982719934736832e-06, |
| "loss": 0.3482, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.8350515463917527, |
| "grad_norm": 0.4386065475892597, |
| "learning_rate": 3.9651299576433475e-06, |
| "loss": 0.3382, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.8381443298969073, |
| "grad_norm": 0.3563022429334855, |
| "learning_rate": 3.947553351498719e-06, |
| "loss": 0.3758, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.841237113402062, |
| "grad_norm": 0.3462789139485534, |
| "learning_rate": 3.929990343399963e-06, |
| "loss": 0.3771, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.8443298969072166, |
| "grad_norm": 0.6529657263861594, |
| "learning_rate": 3.912441160268407e-06, |
| "loss": 0.3721, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.8474226804123712, |
| "grad_norm": 0.42113391108788284, |
| "learning_rate": 3.894906028846757e-06, |
| "loss": 0.3296, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.8505154639175259, |
| "grad_norm": 0.3248573190461725, |
| "learning_rate": 3.877385175696156e-06, |
| "loss": 0.328, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.8536082474226805, |
| "grad_norm": 0.38525111572361326, |
| "learning_rate": 3.859878827193276e-06, |
| "loss": 0.3457, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.8567010309278351, |
| "grad_norm": 0.45934475152425075, |
| "learning_rate": 3.842387209527374e-06, |
| "loss": 0.3482, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.8597938144329897, |
| "grad_norm": 0.3268894340260456, |
| "learning_rate": 3.824910548697388e-06, |
| "loss": 0.319, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.8628865979381444, |
| "grad_norm": 0.3645984463478698, |
| "learning_rate": 3.8074490705089983e-06, |
| "loss": 0.3271, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.865979381443299, |
| "grad_norm": 0.29361036400417095, |
| "learning_rate": 3.790003000571726e-06, |
| "loss": 0.3453, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.8690721649484536, |
| "grad_norm": 0.42982680335675016, |
| "learning_rate": 3.7725725642960047e-06, |
| "loss": 0.3677, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.8721649484536083, |
| "grad_norm": 0.3720418664564901, |
| "learning_rate": 3.7551579868902828e-06, |
| "loss": 0.354, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.8752577319587629, |
| "grad_norm": 0.812231457718325, |
| "learning_rate": 3.7377594933580967e-06, |
| "loss": 0.3607, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.8783505154639175, |
| "grad_norm": 0.347727286666641, |
| "learning_rate": 3.7203773084951816e-06, |
| "loss": 0.3527, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.8814432989690721, |
| "grad_norm": 0.4814060129518193, |
| "learning_rate": 3.7030116568865486e-06, |
| "loss": 0.3684, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.8845360824742268, |
| "grad_norm": 0.36169836379494236, |
| "learning_rate": 3.685662762903601e-06, |
| "loss": 0.3466, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.8876288659793814, |
| "grad_norm": 0.34318821998983295, |
| "learning_rate": 3.6683308507012196e-06, |
| "loss": 0.3299, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.890721649484536, |
| "grad_norm": 0.2995004736405127, |
| "learning_rate": 3.6510161442148783e-06, |
| "loss": 0.308, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.8938144329896907, |
| "grad_norm": 0.515389833064657, |
| "learning_rate": 3.6337188671577463e-06, |
| "loss": 0.3536, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.8969072164948453, |
| "grad_norm": 0.439637019996902, |
| "learning_rate": 3.6164392430177898e-06, |
| "loss": 0.325, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.3374756184294089, |
| "learning_rate": 3.599177495054903e-06, |
| "loss": 0.3683, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.9030927835051545, |
| "grad_norm": 0.3948612480996173, |
| "learning_rate": 3.5819338462980037e-06, |
| "loss": 0.3625, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.9061855670103092, |
| "grad_norm": 0.33357356595751453, |
| "learning_rate": 3.5647085195421668e-06, |
| "loss": 0.316, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.9092783505154638, |
| "grad_norm": 0.45806876778610933, |
| "learning_rate": 3.5475017373457328e-06, |
| "loss": 0.356, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.9123711340206184, |
| "grad_norm": 0.5131941090672334, |
| "learning_rate": 3.5303137220274467e-06, |
| "loss": 0.367, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.915463917525773, |
| "grad_norm": 0.4186380446116192, |
| "learning_rate": 3.5131446956635706e-06, |
| "loss": 0.3809, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.9185567010309277, |
| "grad_norm": 0.43813716192128194, |
| "learning_rate": 3.4959948800850253e-06, |
| "loss": 0.334, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.9216494845360823, |
| "grad_norm": 0.3278335871752149, |
| "learning_rate": 3.478864496874519e-06, |
| "loss": 0.3531, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.9247422680412372, |
| "grad_norm": 0.43980216398961003, |
| "learning_rate": 3.461753767363687e-06, |
| "loss": 0.3638, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.9278350515463918, |
| "grad_norm": 0.39408155093980723, |
| "learning_rate": 3.4446629126302268e-06, |
| "loss": 0.3387, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.9309278350515464, |
| "grad_norm": 0.3733877925676547, |
| "learning_rate": 3.427592153495053e-06, |
| "loss": 0.3462, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.934020618556701, |
| "grad_norm": 0.4832864361818532, |
| "learning_rate": 3.410541710519427e-06, |
| "loss": 0.3567, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.9371134020618557, |
| "grad_norm": 0.3762228487632087, |
| "learning_rate": 3.3935118040021255e-06, |
| "loss": 0.3031, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.9402061855670103, |
| "grad_norm": 0.3598117820415253, |
| "learning_rate": 3.3765026539765832e-06, |
| "loss": 0.3347, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.943298969072165, |
| "grad_norm": 0.3579045196689011, |
| "learning_rate": 3.3595144802080493e-06, |
| "loss": 0.3662, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.9463917525773196, |
| "grad_norm": 0.38779135939714676, |
| "learning_rate": 3.342547502190754e-06, |
| "loss": 0.309, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.9494845360824742, |
| "grad_norm": 0.3153073267233112, |
| "learning_rate": 3.3256019391450696e-06, |
| "loss": 0.3268, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.9525773195876288, |
| "grad_norm": 0.39382790442601595, |
| "learning_rate": 3.3086780100146776e-06, |
| "loss": 0.3551, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.9556701030927837, |
| "grad_norm": 0.3173007725064202, |
| "learning_rate": 3.2917759334637376e-06, |
| "loss": 0.3162, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.9587628865979383, |
| "grad_norm": 0.3731996954778807, |
| "learning_rate": 3.2748959278740714e-06, |
| "loss": 0.3844, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.961855670103093, |
| "grad_norm": 0.38549990466885287, |
| "learning_rate": 3.258038211342327e-06, |
| "loss": 0.3603, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.9649484536082475, |
| "grad_norm": 0.2821598992722239, |
| "learning_rate": 3.2412030016771768e-06, |
| "loss": 0.3093, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.9680412371134022, |
| "grad_norm": 0.37085760577720334, |
| "learning_rate": 3.2243905163964863e-06, |
| "loss": 0.3517, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.9711340206185568, |
| "grad_norm": 0.369370716299227, |
| "learning_rate": 3.2076009727245204e-06, |
| "loss": 0.3113, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.9742268041237114, |
| "grad_norm": 0.44492141505571886, |
| "learning_rate": 3.1908345875891243e-06, |
| "loss": 0.3806, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.977319587628866, |
| "grad_norm": 0.4299515138037621, |
| "learning_rate": 3.1740915776189275e-06, |
| "loss": 0.3484, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.9804123711340207, |
| "grad_norm": 0.3458870621986509, |
| "learning_rate": 3.1573721591405405e-06, |
| "loss": 0.3463, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.9835051546391753, |
| "grad_norm": 0.3522458946378208, |
| "learning_rate": 3.140676548175763e-06, |
| "loss": 0.3339, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.98659793814433, |
| "grad_norm": 0.3755282519867609, |
| "learning_rate": 3.1240049604387955e-06, |
| "loss": 0.3647, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.9896907216494846, |
| "grad_norm": 0.3836690091047248, |
| "learning_rate": 3.10735761133344e-06, |
| "loss": 0.3316, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.9927835051546392, |
| "grad_norm": 0.460009964702149, |
| "learning_rate": 3.0907347159503364e-06, |
| "loss": 0.3478, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.9958762886597938, |
| "grad_norm": 0.38159065451537555, |
| "learning_rate": 3.074136489064161e-06, |
| "loss": 0.3474, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.9989690721649485, |
| "grad_norm": 0.38052585704250785, |
| "learning_rate": 3.057563145130873e-06, |
| "loss": 0.3167, |
| "step": 647 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.38052585704250785, |
| "learning_rate": 3.0410148982849248e-06, |
| "loss": 0.346, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.0030927835051546, |
| "grad_norm": 0.7864373970070601, |
| "learning_rate": 3.024491962336511e-06, |
| "loss": 0.3058, |
| "step": 649 |
| }, |
| { |
| "epoch": 2.0061855670103093, |
| "grad_norm": 0.4155546032809773, |
| "learning_rate": 3.007994550768793e-06, |
| "loss": 0.2993, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.009278350515464, |
| "grad_norm": 0.517838019795173, |
| "learning_rate": 2.991522876735154e-06, |
| "loss": 0.2926, |
| "step": 651 |
| }, |
| { |
| "epoch": 2.0123711340206185, |
| "grad_norm": 0.34043841402604463, |
| "learning_rate": 2.9750771530564295e-06, |
| "loss": 0.2738, |
| "step": 652 |
| }, |
| { |
| "epoch": 2.015463917525773, |
| "grad_norm": 0.3120194955678573, |
| "learning_rate": 2.9586575922181724e-06, |
| "loss": 0.2674, |
| "step": 653 |
| }, |
| { |
| "epoch": 2.0185567010309278, |
| "grad_norm": 0.7794499266529689, |
| "learning_rate": 2.9422644063678952e-06, |
| "loss": 0.2644, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.0216494845360824, |
| "grad_norm": 0.38566659693898253, |
| "learning_rate": 2.9258978073123413e-06, |
| "loss": 0.2684, |
| "step": 655 |
| }, |
| { |
| "epoch": 2.024742268041237, |
| "grad_norm": 0.49676337527578907, |
| "learning_rate": 2.909558006514735e-06, |
| "loss": 0.257, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.0278350515463917, |
| "grad_norm": 0.35809281358226486, |
| "learning_rate": 2.8932452150920576e-06, |
| "loss": 0.2881, |
| "step": 657 |
| }, |
| { |
| "epoch": 2.0309278350515463, |
| "grad_norm": 0.343520690507132, |
| "learning_rate": 2.876959643812325e-06, |
| "loss": 0.2949, |
| "step": 658 |
| }, |
| { |
| "epoch": 2.034020618556701, |
| "grad_norm": 0.43398985865900497, |
| "learning_rate": 2.860701503091845e-06, |
| "loss": 0.3143, |
| "step": 659 |
| }, |
| { |
| "epoch": 2.0371134020618555, |
| "grad_norm": 0.5249462442966696, |
| "learning_rate": 2.844471002992526e-06, |
| "loss": 0.2811, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.04020618556701, |
| "grad_norm": 0.38045577210647147, |
| "learning_rate": 2.8282683532191333e-06, |
| "loss": 0.2832, |
| "step": 661 |
| }, |
| { |
| "epoch": 2.043298969072165, |
| "grad_norm": 0.41396895046937116, |
| "learning_rate": 2.8120937631166056e-06, |
| "loss": 0.283, |
| "step": 662 |
| }, |
| { |
| "epoch": 2.0463917525773194, |
| "grad_norm": 0.3911427806566122, |
| "learning_rate": 2.795947441667334e-06, |
| "loss": 0.2797, |
| "step": 663 |
| }, |
| { |
| "epoch": 2.049484536082474, |
| "grad_norm": 0.3515798025425524, |
| "learning_rate": 2.7798295974884675e-06, |
| "loss": 0.2801, |
| "step": 664 |
| }, |
| { |
| "epoch": 2.0525773195876287, |
| "grad_norm": 0.3343140501364322, |
| "learning_rate": 2.7637404388292184e-06, |
| "loss": 0.3043, |
| "step": 665 |
| }, |
| { |
| "epoch": 2.0556701030927833, |
| "grad_norm": 0.4382243594147191, |
| "learning_rate": 2.747680173568168e-06, |
| "loss": 0.2569, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.058762886597938, |
| "grad_norm": 0.5015312640014641, |
| "learning_rate": 2.7316490092105856e-06, |
| "loss": 0.2709, |
| "step": 667 |
| }, |
| { |
| "epoch": 2.0618556701030926, |
| "grad_norm": 0.4976105522523717, |
| "learning_rate": 2.715647152885743e-06, |
| "loss": 0.3071, |
| "step": 668 |
| }, |
| { |
| "epoch": 2.0649484536082476, |
| "grad_norm": 0.40739613826250587, |
| "learning_rate": 2.6996748113442397e-06, |
| "loss": 0.2654, |
| "step": 669 |
| }, |
| { |
| "epoch": 2.0680412371134023, |
| "grad_norm": 0.37038708287005284, |
| "learning_rate": 2.6837321909553336e-06, |
| "loss": 0.2736, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.071134020618557, |
| "grad_norm": 0.48730500899488516, |
| "learning_rate": 2.6678194977042727e-06, |
| "loss": 0.2616, |
| "step": 671 |
| }, |
| { |
| "epoch": 2.0742268041237115, |
| "grad_norm": 0.4328911289033136, |
| "learning_rate": 2.651936937189632e-06, |
| "loss": 0.3325, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.077319587628866, |
| "grad_norm": 0.5426180704517115, |
| "learning_rate": 2.6360847146206624e-06, |
| "loss": 0.2887, |
| "step": 673 |
| }, |
| { |
| "epoch": 2.0804123711340208, |
| "grad_norm": 0.6127385789467704, |
| "learning_rate": 2.6202630348146323e-06, |
| "loss": 0.27, |
| "step": 674 |
| }, |
| { |
| "epoch": 2.0835051546391754, |
| "grad_norm": 0.3666477703023413, |
| "learning_rate": 2.6044721021941887e-06, |
| "loss": 0.2891, |
| "step": 675 |
| }, |
| { |
| "epoch": 2.08659793814433, |
| "grad_norm": 0.37996463656101304, |
| "learning_rate": 2.5887121207847093e-06, |
| "loss": 0.3025, |
| "step": 676 |
| }, |
| { |
| "epoch": 2.0896907216494847, |
| "grad_norm": 0.35839291187334554, |
| "learning_rate": 2.5729832942116705e-06, |
| "loss": 0.2903, |
| "step": 677 |
| }, |
| { |
| "epoch": 2.0927835051546393, |
| "grad_norm": 0.4004079234535629, |
| "learning_rate": 2.5572858256980163e-06, |
| "loss": 0.2635, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.095876288659794, |
| "grad_norm": 0.3487304623438044, |
| "learning_rate": 2.5416199180615297e-06, |
| "loss": 0.2711, |
| "step": 679 |
| }, |
| { |
| "epoch": 2.0989690721649485, |
| "grad_norm": 0.4353724025324779, |
| "learning_rate": 2.525985773712216e-06, |
| "loss": 0.2755, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.102061855670103, |
| "grad_norm": 0.4276980963577041, |
| "learning_rate": 2.5103835946496846e-06, |
| "loss": 0.2917, |
| "step": 681 |
| }, |
| { |
| "epoch": 2.105154639175258, |
| "grad_norm": 0.42988999785831816, |
| "learning_rate": 2.4948135824605366e-06, |
| "loss": 0.24, |
| "step": 682 |
| }, |
| { |
| "epoch": 2.1082474226804124, |
| "grad_norm": 0.4082557837039154, |
| "learning_rate": 2.479275938315775e-06, |
| "loss": 0.2634, |
| "step": 683 |
| }, |
| { |
| "epoch": 2.111340206185567, |
| "grad_norm": 0.3143558127025377, |
| "learning_rate": 2.4637708629681786e-06, |
| "loss": 0.2673, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.1144329896907217, |
| "grad_norm": 0.357931063053651, |
| "learning_rate": 2.4482985567497395e-06, |
| "loss": 0.278, |
| "step": 685 |
| }, |
| { |
| "epoch": 2.1175257731958763, |
| "grad_norm": 0.3981027115089952, |
| "learning_rate": 2.4328592195690444e-06, |
| "loss": 0.2677, |
| "step": 686 |
| }, |
| { |
| "epoch": 2.120618556701031, |
| "grad_norm": 0.40716571063614465, |
| "learning_rate": 2.4174530509087193e-06, |
| "loss": 0.273, |
| "step": 687 |
| }, |
| { |
| "epoch": 2.1237113402061856, |
| "grad_norm": 0.36013914620812215, |
| "learning_rate": 2.4020802498228333e-06, |
| "loss": 0.2601, |
| "step": 688 |
| }, |
| { |
| "epoch": 2.12680412371134, |
| "grad_norm": 0.3767013802752783, |
| "learning_rate": 2.3867410149343284e-06, |
| "loss": 0.282, |
| "step": 689 |
| }, |
| { |
| "epoch": 2.129896907216495, |
| "grad_norm": 0.49573734401612746, |
| "learning_rate": 2.3714355444324675e-06, |
| "loss": 0.2557, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.1329896907216495, |
| "grad_norm": 0.3697812393978906, |
| "learning_rate": 2.3561640360702525e-06, |
| "loss": 0.2651, |
| "step": 691 |
| }, |
| { |
| "epoch": 2.136082474226804, |
| "grad_norm": 0.3928302981423731, |
| "learning_rate": 2.340926687161893e-06, |
| "loss": 0.2942, |
| "step": 692 |
| }, |
| { |
| "epoch": 2.1391752577319587, |
| "grad_norm": 0.44134848334336135, |
| "learning_rate": 2.3257236945802292e-06, |
| "loss": 0.2776, |
| "step": 693 |
| }, |
| { |
| "epoch": 2.1422680412371133, |
| "grad_norm": 0.5528462494959189, |
| "learning_rate": 2.31055525475422e-06, |
| "loss": 0.2531, |
| "step": 694 |
| }, |
| { |
| "epoch": 2.145360824742268, |
| "grad_norm": 0.3843449058361117, |
| "learning_rate": 2.295421563666372e-06, |
| "loss": 0.2888, |
| "step": 695 |
| }, |
| { |
| "epoch": 2.1484536082474226, |
| "grad_norm": 0.3541017264885976, |
| "learning_rate": 2.2803228168502383e-06, |
| "loss": 0.2728, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.1515463917525772, |
| "grad_norm": 0.365972017913051, |
| "learning_rate": 2.265259209387867e-06, |
| "loss": 0.2565, |
| "step": 697 |
| }, |
| { |
| "epoch": 2.154639175257732, |
| "grad_norm": 0.3863462492512773, |
| "learning_rate": 2.2502309359072953e-06, |
| "loss": 0.2682, |
| "step": 698 |
| }, |
| { |
| "epoch": 2.1577319587628865, |
| "grad_norm": 0.35442097659929384, |
| "learning_rate": 2.2352381905800325e-06, |
| "loss": 0.2536, |
| "step": 699 |
| }, |
| { |
| "epoch": 2.160824742268041, |
| "grad_norm": 0.32697612991708297, |
| "learning_rate": 2.2202811671185458e-06, |
| "loss": 0.2975, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.1639175257731957, |
| "grad_norm": 0.36614596481715983, |
| "learning_rate": 2.205360058773764e-06, |
| "loss": 0.2735, |
| "step": 701 |
| }, |
| { |
| "epoch": 2.1670103092783504, |
| "grad_norm": 0.43091349727443096, |
| "learning_rate": 2.190475058332574e-06, |
| "loss": 0.2549, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.170103092783505, |
| "grad_norm": 0.361129805505901, |
| "learning_rate": 2.1756263581153427e-06, |
| "loss": 0.3013, |
| "step": 703 |
| }, |
| { |
| "epoch": 2.1731958762886596, |
| "grad_norm": 0.4947360631523454, |
| "learning_rate": 2.16081414997341e-06, |
| "loss": 0.2583, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.1762886597938143, |
| "grad_norm": 0.38239489577444014, |
| "learning_rate": 2.1460386252866327e-06, |
| "loss": 0.2851, |
| "step": 705 |
| }, |
| { |
| "epoch": 2.179381443298969, |
| "grad_norm": 0.3919201012163893, |
| "learning_rate": 2.1312999749608987e-06, |
| "loss": 0.2598, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.1824742268041235, |
| "grad_norm": 0.45850653108362616, |
| "learning_rate": 2.1165983894256647e-06, |
| "loss": 0.2748, |
| "step": 707 |
| }, |
| { |
| "epoch": 2.1855670103092786, |
| "grad_norm": 0.414377399643986, |
| "learning_rate": 2.101934058631495e-06, |
| "loss": 0.3246, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.188659793814433, |
| "grad_norm": 0.36659216289733115, |
| "learning_rate": 2.0873071720476067e-06, |
| "loss": 0.2595, |
| "step": 709 |
| }, |
| { |
| "epoch": 2.191752577319588, |
| "grad_norm": 0.3417432754143865, |
| "learning_rate": 2.0727179186594224e-06, |
| "loss": 0.2881, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.1948453608247425, |
| "grad_norm": 0.33748255922934906, |
| "learning_rate": 2.058166486966128e-06, |
| "loss": 0.2418, |
| "step": 711 |
| }, |
| { |
| "epoch": 2.197938144329897, |
| "grad_norm": 0.3995433111495412, |
| "learning_rate": 2.043653064978239e-06, |
| "loss": 0.2679, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.2010309278350517, |
| "grad_norm": 0.4163499781278848, |
| "learning_rate": 2.0291778402151685e-06, |
| "loss": 0.2564, |
| "step": 713 |
| }, |
| { |
| "epoch": 2.2041237113402063, |
| "grad_norm": 0.32369566851848647, |
| "learning_rate": 2.0147409997028045e-06, |
| "loss": 0.2842, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.207216494845361, |
| "grad_norm": 0.4445352641934898, |
| "learning_rate": 2.0003427299710966e-06, |
| "loss": 0.3008, |
| "step": 715 |
| }, |
| { |
| "epoch": 2.2103092783505156, |
| "grad_norm": 0.37416288101445394, |
| "learning_rate": 1.9859832170516437e-06, |
| "loss": 0.2833, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.2134020618556702, |
| "grad_norm": 0.44437495554517636, |
| "learning_rate": 1.9716626464752896e-06, |
| "loss": 0.2731, |
| "step": 717 |
| }, |
| { |
| "epoch": 2.216494845360825, |
| "grad_norm": 0.43872445768670315, |
| "learning_rate": 1.9573812032697277e-06, |
| "loss": 0.2923, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.2195876288659795, |
| "grad_norm": 0.3242000894012009, |
| "learning_rate": 1.9431390719571096e-06, |
| "loss": 0.2392, |
| "step": 719 |
| }, |
| { |
| "epoch": 2.222680412371134, |
| "grad_norm": 0.47261881424241464, |
| "learning_rate": 1.928936436551661e-06, |
| "loss": 0.2833, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.2257731958762887, |
| "grad_norm": 0.6426216936806711, |
| "learning_rate": 1.914773480557304e-06, |
| "loss": 0.2585, |
| "step": 721 |
| }, |
| { |
| "epoch": 2.2288659793814434, |
| "grad_norm": 0.48708615764527685, |
| "learning_rate": 1.9006503869652854e-06, |
| "loss": 0.2923, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.231958762886598, |
| "grad_norm": 0.4066200708502729, |
| "learning_rate": 1.8865673382518146e-06, |
| "loss": 0.2868, |
| "step": 723 |
| }, |
| { |
| "epoch": 2.2350515463917526, |
| "grad_norm": 0.4193733496397231, |
| "learning_rate": 1.872524516375705e-06, |
| "loss": 0.2813, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.2381443298969073, |
| "grad_norm": 0.37564083363888556, |
| "learning_rate": 1.8585221027760209e-06, |
| "loss": 0.2832, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.241237113402062, |
| "grad_norm": 0.3203728508806716, |
| "learning_rate": 1.8445602783697375e-06, |
| "loss": 0.2657, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.2443298969072165, |
| "grad_norm": 0.39734444741295993, |
| "learning_rate": 1.8306392235493946e-06, |
| "loss": 0.2582, |
| "step": 727 |
| }, |
| { |
| "epoch": 2.247422680412371, |
| "grad_norm": 0.5190391324637584, |
| "learning_rate": 1.8167591181807836e-06, |
| "loss": 0.3178, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.2505154639175258, |
| "grad_norm": 0.3638149149971288, |
| "learning_rate": 1.8029201416005976e-06, |
| "loss": 0.2378, |
| "step": 729 |
| }, |
| { |
| "epoch": 2.2536082474226804, |
| "grad_norm": 0.3864306165776402, |
| "learning_rate": 1.789122472614143e-06, |
| "loss": 0.2643, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.256701030927835, |
| "grad_norm": 0.40392109683979727, |
| "learning_rate": 1.775366289493003e-06, |
| "loss": 0.2998, |
| "step": 731 |
| }, |
| { |
| "epoch": 2.2597938144329897, |
| "grad_norm": 0.47275997790914914, |
| "learning_rate": 1.7616517699727554e-06, |
| "loss": 0.2748, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.2628865979381443, |
| "grad_norm": 0.408857288094096, |
| "learning_rate": 1.7479790912506628e-06, |
| "loss": 0.2554, |
| "step": 733 |
| }, |
| { |
| "epoch": 2.265979381443299, |
| "grad_norm": 0.41080443668702876, |
| "learning_rate": 1.734348429983384e-06, |
| "loss": 0.2522, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.2690721649484535, |
| "grad_norm": 0.38970842456935784, |
| "learning_rate": 1.7207599622847042e-06, |
| "loss": 0.3015, |
| "step": 735 |
| }, |
| { |
| "epoch": 2.272164948453608, |
| "grad_norm": 0.35769582798595084, |
| "learning_rate": 1.7072138637232394e-06, |
| "loss": 0.2763, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.275257731958763, |
| "grad_norm": 0.4250873527600758, |
| "learning_rate": 1.6937103093201895e-06, |
| "loss": 0.3007, |
| "step": 737 |
| }, |
| { |
| "epoch": 2.2783505154639174, |
| "grad_norm": 0.361704767065038, |
| "learning_rate": 1.6802494735470548e-06, |
| "loss": 0.2558, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.281443298969072, |
| "grad_norm": 0.3686554134526815, |
| "learning_rate": 1.6668315303234068e-06, |
| "loss": 0.2619, |
| "step": 739 |
| }, |
| { |
| "epoch": 2.2845360824742267, |
| "grad_norm": 0.44057581127752826, |
| "learning_rate": 1.6534566530146123e-06, |
| "loss": 0.2878, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.2876288659793813, |
| "grad_norm": 0.3306113026712118, |
| "learning_rate": 1.6401250144296239e-06, |
| "loss": 0.2848, |
| "step": 741 |
| }, |
| { |
| "epoch": 2.290721649484536, |
| "grad_norm": 0.30579912019686456, |
| "learning_rate": 1.626836786818719e-06, |
| "loss": 0.3013, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.2938144329896906, |
| "grad_norm": 0.4848429339929378, |
| "learning_rate": 1.6135921418712959e-06, |
| "loss": 0.2721, |
| "step": 743 |
| }, |
| { |
| "epoch": 2.296907216494845, |
| "grad_norm": 0.3619522576835984, |
| "learning_rate": 1.6003912507136422e-06, |
| "loss": 0.2901, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 0.3117306212597808, |
| "learning_rate": 1.5872342839067305e-06, |
| "loss": 0.2809, |
| "step": 745 |
| }, |
| { |
| "epoch": 2.3030927835051545, |
| "grad_norm": 0.3018814157736744, |
| "learning_rate": 1.574121411444013e-06, |
| "loss": 0.2563, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.306185567010309, |
| "grad_norm": 0.39204350000474086, |
| "learning_rate": 1.561052802749221e-06, |
| "loss": 0.2949, |
| "step": 747 |
| }, |
| { |
| "epoch": 2.3092783505154637, |
| "grad_norm": 0.469936745505809, |
| "learning_rate": 1.548028626674189e-06, |
| "loss": 0.2635, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.3123711340206183, |
| "grad_norm": 0.33508014362099003, |
| "learning_rate": 1.5350490514966509e-06, |
| "loss": 0.2604, |
| "step": 749 |
| }, |
| { |
| "epoch": 2.315463917525773, |
| "grad_norm": 0.4125367262529412, |
| "learning_rate": 1.5221142449180882e-06, |
| "loss": 0.2784, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.3185567010309276, |
| "grad_norm": 0.40880353780401285, |
| "learning_rate": 1.5092243740615486e-06, |
| "loss": 0.2649, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.3216494845360827, |
| "grad_norm": 0.3511252824079501, |
| "learning_rate": 1.496379605469494e-06, |
| "loss": 0.2589, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.3247422680412373, |
| "grad_norm": 0.44823739152907094, |
| "learning_rate": 1.4835801051016463e-06, |
| "loss": 0.3085, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.327835051546392, |
| "grad_norm": 0.4619415605914144, |
| "learning_rate": 1.4708260383328422e-06, |
| "loss": 0.3109, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.3309278350515465, |
| "grad_norm": 0.37302697291531406, |
| "learning_rate": 1.4581175699508982e-06, |
| "loss": 0.2895, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.334020618556701, |
| "grad_norm": 0.4600197812984674, |
| "learning_rate": 1.4454548641544803e-06, |
| "loss": 0.2506, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.337113402061856, |
| "grad_norm": 0.3379307558333953, |
| "learning_rate": 1.4328380845509837e-06, |
| "loss": 0.2783, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.3402061855670104, |
| "grad_norm": 0.4403369320114859, |
| "learning_rate": 1.4202673941544176e-06, |
| "loss": 0.2945, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.343298969072165, |
| "grad_norm": 0.41855994301163396, |
| "learning_rate": 1.4077429553832995e-06, |
| "loss": 0.2787, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.3463917525773197, |
| "grad_norm": 0.4242311482486076, |
| "learning_rate": 1.3952649300585574e-06, |
| "loss": 0.2753, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.3494845360824743, |
| "grad_norm": 0.4912168070549898, |
| "learning_rate": 1.382833479401438e-06, |
| "loss": 0.2874, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.352577319587629, |
| "grad_norm": 0.47260058954453504, |
| "learning_rate": 1.3704487640314257e-06, |
| "loss": 0.2781, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.3556701030927836, |
| "grad_norm": 0.3356648638014468, |
| "learning_rate": 1.3581109439641587e-06, |
| "loss": 0.2429, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.358762886597938, |
| "grad_norm": 0.3406439635161593, |
| "learning_rate": 1.3458201786093795e-06, |
| "loss": 0.3005, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.361855670103093, |
| "grad_norm": 0.9027133680522813, |
| "learning_rate": 1.3335766267688566e-06, |
| "loss": 0.2512, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.3649484536082475, |
| "grad_norm": 0.4170248415384265, |
| "learning_rate": 1.321380446634342e-06, |
| "loss": 0.2716, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.368041237113402, |
| "grad_norm": 0.34736226887673133, |
| "learning_rate": 1.309231795785526e-06, |
| "loss": 0.279, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.3711340206185567, |
| "grad_norm": 0.38374462474597076, |
| "learning_rate": 1.2971308311880015e-06, |
| "loss": 0.2659, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.3742268041237113, |
| "grad_norm": 0.4087701862728044, |
| "learning_rate": 1.2850777091912364e-06, |
| "loss": 0.2615, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.377319587628866, |
| "grad_norm": 0.4354684165110471, |
| "learning_rate": 1.2730725855265452e-06, |
| "loss": 0.2617, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.3804123711340206, |
| "grad_norm": 0.4232874812730349, |
| "learning_rate": 1.2611156153050963e-06, |
| "loss": 0.2992, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.3835051546391752, |
| "grad_norm": 0.4866904483506314, |
| "learning_rate": 1.2492069530158829e-06, |
| "loss": 0.2852, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.38659793814433, |
| "grad_norm": 0.522108829131126, |
| "learning_rate": 1.237346752523752e-06, |
| "loss": 0.2949, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.3896907216494845, |
| "grad_norm": 0.36945752306550494, |
| "learning_rate": 1.225535167067392e-06, |
| "loss": 0.299, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.392783505154639, |
| "grad_norm": 0.40065674081859776, |
| "learning_rate": 1.2137723492573766e-06, |
| "loss": 0.2727, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.3958762886597937, |
| "grad_norm": 0.43968669047979536, |
| "learning_rate": 1.2020584510741707e-06, |
| "loss": 0.2764, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.3989690721649484, |
| "grad_norm": 0.3823590728970254, |
| "learning_rate": 1.1903936238661868e-06, |
| "loss": 0.2512, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.402061855670103, |
| "grad_norm": 0.3865543216955176, |
| "learning_rate": 1.1787780183478126e-06, |
| "loss": 0.2597, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.4051546391752576, |
| "grad_norm": 0.4474565050735651, |
| "learning_rate": 1.167211784597474e-06, |
| "loss": 0.2475, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.4082474226804123, |
| "grad_norm": 1.5862590518924486, |
| "learning_rate": 1.1556950720556976e-06, |
| "loss": 0.2697, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.411340206185567, |
| "grad_norm": 0.46216684820643666, |
| "learning_rate": 1.1442280295231656e-06, |
| "loss": 0.2752, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.4144329896907215, |
| "grad_norm": 0.40916275158083065, |
| "learning_rate": 1.1328108051588154e-06, |
| "loss": 0.2357, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.417525773195876, |
| "grad_norm": 0.48248035654922017, |
| "learning_rate": 1.1214435464779006e-06, |
| "loss": 0.2558, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.4206185567010308, |
| "grad_norm": 0.36558870979576563, |
| "learning_rate": 1.1101264003501088e-06, |
| "loss": 0.2771, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.4237113402061854, |
| "grad_norm": 0.37906519013254625, |
| "learning_rate": 1.0988595129976444e-06, |
| "loss": 0.2524, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.42680412371134, |
| "grad_norm": 0.3733058866504784, |
| "learning_rate": 1.0876430299933516e-06, |
| "loss": 0.258, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.429896907216495, |
| "grad_norm": 0.3403712357526472, |
| "learning_rate": 1.0764770962588278e-06, |
| "loss": 0.2502, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.4329896907216497, |
| "grad_norm": 0.38993238552151593, |
| "learning_rate": 1.0653618560625556e-06, |
| "loss": 0.2777, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.4360824742268044, |
| "grad_norm": 0.4792747181734268, |
| "learning_rate": 1.0542974530180327e-06, |
| "loss": 0.297, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.439175257731959, |
| "grad_norm": 0.47061224367787174, |
| "learning_rate": 1.0432840300819224e-06, |
| "loss": 0.2518, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.4422680412371136, |
| "grad_norm": 0.35277694812297855, |
| "learning_rate": 1.0323217295522026e-06, |
| "loss": 0.245, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.4453608247422682, |
| "grad_norm": 0.360935495801492, |
| "learning_rate": 1.0214106930663293e-06, |
| "loss": 0.2731, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.448453608247423, |
| "grad_norm": 0.4162882205392147, |
| "learning_rate": 1.0105510615994051e-06, |
| "loss": 0.2776, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.4515463917525775, |
| "grad_norm": 0.41547933460758235, |
| "learning_rate": 9.99742975462359e-07, |
| "loss": 0.2611, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.454639175257732, |
| "grad_norm": 0.37728046401810167, |
| "learning_rate": 9.889865743001332e-07, |
| "loss": 0.2664, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.4577319587628867, |
| "grad_norm": 0.43165759943984655, |
| "learning_rate": 9.782819970898776e-07, |
| "loss": 0.2605, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.4608247422680414, |
| "grad_norm": 0.3654529820399213, |
| "learning_rate": 9.676293821391568e-07, |
| "loss": 0.2988, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.463917525773196, |
| "grad_norm": 0.43771001110749813, |
| "learning_rate": 9.570288670841609e-07, |
| "loss": 0.2555, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.4670103092783506, |
| "grad_norm": 0.338337995644797, |
| "learning_rate": 9.464805888879264e-07, |
| "loss": 0.2696, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.4701030927835053, |
| "grad_norm": 0.3598910989343362, |
| "learning_rate": 9.359846838385706e-07, |
| "loss": 0.2768, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.47319587628866, |
| "grad_norm": 0.47013544090875536, |
| "learning_rate": 9.255412875475256e-07, |
| "loss": 0.2867, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.4762886597938145, |
| "grad_norm": 0.42217158967218676, |
| "learning_rate": 9.151505349477901e-07, |
| "loss": 0.2746, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.479381443298969, |
| "grad_norm": 0.35208495582452803, |
| "learning_rate": 9.048125602921843e-07, |
| "loss": 0.2563, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.4824742268041238, |
| "grad_norm": 0.3505110513475081, |
| "learning_rate": 8.945274971516155e-07, |
| "loss": 0.2536, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.4855670103092784, |
| "grad_norm": 0.4087816084686581, |
| "learning_rate": 8.842954784133517e-07, |
| "loss": 0.3051, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.488659793814433, |
| "grad_norm": 0.3960789356267894, |
| "learning_rate": 8.741166362793057e-07, |
| "loss": 0.2537, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.4917525773195877, |
| "grad_norm": 0.39069133380864984, |
| "learning_rate": 8.639911022643288e-07, |
| "loss": 0.264, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.4948453608247423, |
| "grad_norm": 0.3863554138938276, |
| "learning_rate": 8.539190071945036e-07, |
| "loss": 0.272, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.497938144329897, |
| "grad_norm": 0.40862601106103924, |
| "learning_rate": 8.439004812054658e-07, |
| "loss": 0.2636, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.5010309278350515, |
| "grad_norm": 0.4222027241318616, |
| "learning_rate": 8.339356537407129e-07, |
| "loss": 0.2596, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.504123711340206, |
| "grad_norm": 0.36178986472682245, |
| "learning_rate": 8.240246535499369e-07, |
| "loss": 0.2847, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.507216494845361, |
| "grad_norm": 0.38472636391035925, |
| "learning_rate": 8.141676086873574e-07, |
| "loss": 0.2774, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.5103092783505154, |
| "grad_norm": 0.4039931071916291, |
| "learning_rate": 8.043646465100696e-07, |
| "loss": 0.2434, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.51340206185567, |
| "grad_norm": 0.32765615054736147, |
| "learning_rate": 7.946158936764003e-07, |
| "loss": 0.2747, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.5164948453608247, |
| "grad_norm": 0.3772369754517315, |
| "learning_rate": 7.849214761442637e-07, |
| "loss": 0.2729, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.5195876288659793, |
| "grad_norm": 0.3986761836353652, |
| "learning_rate": 7.752815191695462e-07, |
| "loss": 0.2977, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.522680412371134, |
| "grad_norm": 0.44269655672764346, |
| "learning_rate": 7.656961473044744e-07, |
| "loss": 0.2605, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.5257731958762886, |
| "grad_norm": 0.43722142368011174, |
| "learning_rate": 7.561654843960208e-07, |
| "loss": 0.2602, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.528865979381443, |
| "grad_norm": 0.3764444430973381, |
| "learning_rate": 7.466896535842865e-07, |
| "loss": 0.262, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.531958762886598, |
| "grad_norm": 0.43708092583266656, |
| "learning_rate": 7.372687773009273e-07, |
| "loss": 0.2876, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.5350515463917525, |
| "grad_norm": 0.375053913647473, |
| "learning_rate": 7.279029772675572e-07, |
| "loss": 0.2637, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.538144329896907, |
| "grad_norm": 5.106978224597531, |
| "learning_rate": 7.185923744941881e-07, |
| "loss": 0.248, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.5412371134020617, |
| "grad_norm": 0.40781135132686575, |
| "learning_rate": 7.093370892776558e-07, |
| "loss": 0.2786, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.5443298969072163, |
| "grad_norm": 0.3881270194714463, |
| "learning_rate": 7.001372412000718e-07, |
| "loss": 0.281, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.547422680412371, |
| "grad_norm": 0.31196018561276995, |
| "learning_rate": 6.909929491272799e-07, |
| "loss": 0.2535, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.5505154639175256, |
| "grad_norm": 0.40077079764947493, |
| "learning_rate": 6.819043312073109e-07, |
| "loss": 0.2762, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.55360824742268, |
| "grad_norm": 0.5121874348267299, |
| "learning_rate": 6.728715048688711e-07, |
| "loss": 0.2696, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.556701030927835, |
| "grad_norm": 0.49662806556832984, |
| "learning_rate": 6.638945868198071e-07, |
| "loss": 0.2629, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.5597938144329895, |
| "grad_norm": 0.4261532245947411, |
| "learning_rate": 6.549736930456163e-07, |
| "loss": 0.2577, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.562886597938144, |
| "grad_norm": 0.3591850207269949, |
| "learning_rate": 6.461089388079316e-07, |
| "loss": 0.2779, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.5659793814432987, |
| "grad_norm": 0.4543277400791709, |
| "learning_rate": 6.373004386430442e-07, |
| "loss": 0.2866, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.5690721649484534, |
| "grad_norm": 0.39010995724066533, |
| "learning_rate": 6.285483063604187e-07, |
| "loss": 0.2647, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.572164948453608, |
| "grad_norm": 0.4890947193837219, |
| "learning_rate": 6.198526550412232e-07, |
| "loss": 0.2426, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.5752577319587626, |
| "grad_norm": 0.39146288102200943, |
| "learning_rate": 6.112135970368682e-07, |
| "loss": 0.2516, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.5783505154639177, |
| "grad_norm": 0.35663592908885416, |
| "learning_rate": 6.026312439675553e-07, |
| "loss": 0.2481, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.5814432989690723, |
| "grad_norm": 0.3665307794329737, |
| "learning_rate": 5.941057067208345e-07, |
| "loss": 0.2902, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.584536082474227, |
| "grad_norm": 0.3711205197724142, |
| "learning_rate": 5.856370954501722e-07, |
| "loss": 0.2383, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.5876288659793816, |
| "grad_norm": 0.34065816638638197, |
| "learning_rate": 5.772255195735287e-07, |
| "loss": 0.2502, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.590721649484536, |
| "grad_norm": 0.6025679888532969, |
| "learning_rate": 5.688710877719417e-07, |
| "loss": 0.2592, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.593814432989691, |
| "grad_norm": 0.3777446364919783, |
| "learning_rate": 5.60573907988124e-07, |
| "loss": 0.2496, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.5969072164948455, |
| "grad_norm": 0.3927351584397551, |
| "learning_rate": 5.523340874250704e-07, |
| "loss": 0.2932, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.4611904495937945, |
| "learning_rate": 5.441517325446688e-07, |
| "loss": 0.2724, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.6030927835051547, |
| "grad_norm": 0.4838396928689296, |
| "learning_rate": 5.360269490663278e-07, |
| "loss": 0.2788, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.6061855670103093, |
| "grad_norm": 0.28849031956241356, |
| "learning_rate": 5.279598419656096e-07, |
| "loss": 0.2754, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.609278350515464, |
| "grad_norm": 0.4277305524273044, |
| "learning_rate": 5.199505154728729e-07, |
| "loss": 0.2812, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.6123711340206186, |
| "grad_norm": 0.431380090314389, |
| "learning_rate": 5.119990730719287e-07, |
| "loss": 0.248, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.6154639175257732, |
| "grad_norm": 0.457727370255861, |
| "learning_rate": 5.041056174987008e-07, |
| "loss": 0.3045, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.618556701030928, |
| "grad_norm": 0.337047837273392, |
| "learning_rate": 4.962702507398981e-07, |
| "loss": 0.2562, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.6216494845360825, |
| "grad_norm": 0.444447361152871, |
| "learning_rate": 4.88493074031699e-07, |
| "loss": 0.2774, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.624742268041237, |
| "grad_norm": 0.47058562033160894, |
| "learning_rate": 4.807741878584444e-07, |
| "loss": 0.2833, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.6278350515463917, |
| "grad_norm": 0.5103792800918486, |
| "learning_rate": 4.7311369195133127e-07, |
| "loss": 0.2742, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.6309278350515464, |
| "grad_norm": 0.3871907852225993, |
| "learning_rate": 4.6551168528713884e-07, |
| "loss": 0.2707, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.634020618556701, |
| "grad_norm": 0.3990501931627433, |
| "learning_rate": 4.5796826608693277e-07, |
| "loss": 0.2832, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.6371134020618556, |
| "grad_norm": 0.3926262596809124, |
| "learning_rate": 4.5048353181481043e-07, |
| "loss": 0.2603, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.6402061855670103, |
| "grad_norm": 0.42016597491134544, |
| "learning_rate": 4.4305757917663284e-07, |
| "loss": 0.2423, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.643298969072165, |
| "grad_norm": 0.33762976623227237, |
| "learning_rate": 4.3569050411877867e-07, |
| "loss": 0.3094, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.6463917525773195, |
| "grad_norm": 0.4255252595029353, |
| "learning_rate": 4.283824018269045e-07, |
| "loss": 0.2543, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.649484536082474, |
| "grad_norm": 0.38422820883982844, |
| "learning_rate": 4.211333667247125e-07, |
| "loss": 0.2908, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.6525773195876288, |
| "grad_norm": 0.41409709865845773, |
| "learning_rate": 4.139434924727359e-07, |
| "loss": 0.2527, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.6556701030927834, |
| "grad_norm": 0.4891114365667326, |
| "learning_rate": 4.0681287196711883e-07, |
| "loss": 0.2578, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.658762886597938, |
| "grad_norm": 0.382607275622272, |
| "learning_rate": 3.997415973384311e-07, |
| "loss": 0.2739, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.6618556701030927, |
| "grad_norm": 0.4775250066484936, |
| "learning_rate": 3.9272975995046146e-07, |
| "loss": 0.2569, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.6649484536082473, |
| "grad_norm": 0.3349894883367055, |
| "learning_rate": 3.857774503990513e-07, |
| "loss": 0.3055, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.668041237113402, |
| "grad_norm": 0.361989359292344, |
| "learning_rate": 3.7888475851091123e-07, |
| "loss": 0.2593, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.6711340206185565, |
| "grad_norm": 1.0723316740464135, |
| "learning_rate": 3.7205177334247445e-07, |
| "loss": 0.287, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.6742268041237116, |
| "grad_norm": 0.4674936800468732, |
| "learning_rate": 3.6527858317873146e-07, |
| "loss": 0.2634, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.6773195876288662, |
| "grad_norm": 0.3896450406311716, |
| "learning_rate": 3.585652755321012e-07, |
| "loss": 0.306, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.680412371134021, |
| "grad_norm": 0.36435008009318615, |
| "learning_rate": 3.519119371412938e-07, |
| "loss": 0.2608, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.6835051546391755, |
| "grad_norm": 0.3340294872611109, |
| "learning_rate": 3.453186539701925e-07, |
| "loss": 0.2463, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.68659793814433, |
| "grad_norm": 0.387560286599799, |
| "learning_rate": 3.3878551120674343e-07, |
| "loss": 0.2553, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.6896907216494848, |
| "grad_norm": 0.402851383790354, |
| "learning_rate": 3.3231259326184983e-07, |
| "loss": 0.2429, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.6927835051546394, |
| "grad_norm": 0.4149186840791913, |
| "learning_rate": 3.2589998376829135e-07, |
| "loss": 0.2619, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.695876288659794, |
| "grad_norm": 0.33280941208455317, |
| "learning_rate": 3.1954776557963086e-07, |
| "loss": 0.2696, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.6989690721649486, |
| "grad_norm": 0.342998191453652, |
| "learning_rate": 3.1325602076915706e-07, |
| "loss": 0.246, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.7020618556701033, |
| "grad_norm": 0.3793080311309386, |
| "learning_rate": 3.0702483062881206e-07, |
| "loss": 0.2769, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.705154639175258, |
| "grad_norm": 0.3772309885276913, |
| "learning_rate": 3.0085427566814985e-07, |
| "loss": 0.281, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.7082474226804125, |
| "grad_norm": 0.44008596469496586, |
| "learning_rate": 2.947444356132917e-07, |
| "loss": 0.3324, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.711340206185567, |
| "grad_norm": 0.5243569107693952, |
| "learning_rate": 2.88695389405898e-07, |
| "loss": 0.3094, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.7144329896907218, |
| "grad_norm": 0.3169183516642598, |
| "learning_rate": 2.827072152021465e-07, |
| "loss": 0.2737, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.7175257731958764, |
| "grad_norm": 0.49768488943684985, |
| "learning_rate": 2.767799903717244e-07, |
| "loss": 0.2457, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.720618556701031, |
| "grad_norm": 0.41352156394619477, |
| "learning_rate": 2.7091379149682683e-07, |
| "loss": 0.2771, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.7237113402061857, |
| "grad_norm": 0.35439240015429474, |
| "learning_rate": 2.6510869437116946e-07, |
| "loss": 0.2667, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.7268041237113403, |
| "grad_norm": 0.37933465501415164, |
| "learning_rate": 2.593647739990068e-07, |
| "loss": 0.2894, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.729896907216495, |
| "grad_norm": 0.39623780129771313, |
| "learning_rate": 2.5368210459416565e-07, |
| "loss": 0.2505, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.7329896907216495, |
| "grad_norm": 0.3981902202273425, |
| "learning_rate": 2.480607595790846e-07, |
| "loss": 0.267, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.736082474226804, |
| "grad_norm": 0.47542948996594553, |
| "learning_rate": 2.425008115838651e-07, |
| "loss": 0.279, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.739175257731959, |
| "grad_norm": 0.6127141337516678, |
| "learning_rate": 2.3700233244533412e-07, |
| "loss": 0.2449, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.7422680412371134, |
| "grad_norm": 0.38973267154824953, |
| "learning_rate": 2.3156539320611627e-07, |
| "loss": 0.2935, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.745360824742268, |
| "grad_norm": 0.3155145685377891, |
| "learning_rate": 2.2619006411371437e-07, |
| "loss": 0.3411, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.7484536082474227, |
| "grad_norm": 0.4456296619500586, |
| "learning_rate": 2.2087641461960295e-07, |
| "loss": 0.2672, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.7515463917525773, |
| "grad_norm": 0.48391156603601054, |
| "learning_rate": 2.156245133783308e-07, |
| "loss": 0.2872, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.754639175257732, |
| "grad_norm": 0.38451851250891883, |
| "learning_rate": 2.1043442824663308e-07, |
| "loss": 0.2703, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.7577319587628866, |
| "grad_norm": 0.4464145304550885, |
| "learning_rate": 2.0530622628255613e-07, |
| "loss": 0.287, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.760824742268041, |
| "grad_norm": 0.3885382227965622, |
| "learning_rate": 2.0023997374458927e-07, |
| "loss": 0.2586, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.763917525773196, |
| "grad_norm": 0.3558472125223351, |
| "learning_rate": 1.9523573609081137e-07, |
| "loss": 0.2846, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.7670103092783505, |
| "grad_norm": 0.35075461056120477, |
| "learning_rate": 1.9029357797804017e-07, |
| "loss": 0.2949, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.770103092783505, |
| "grad_norm": 0.5126676208214347, |
| "learning_rate": 1.8541356326100436e-07, |
| "loss": 0.2796, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.7731958762886597, |
| "grad_norm": 0.43519449725676185, |
| "learning_rate": 1.8059575499150883e-07, |
| "loss": 0.2585, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.7762886597938143, |
| "grad_norm": 0.34708575228506444, |
| "learning_rate": 1.758402154176314e-07, |
| "loss": 0.294, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.779381443298969, |
| "grad_norm": 0.3774280912494685, |
| "learning_rate": 1.71147005982909e-07, |
| "loss": 0.2599, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.7824742268041236, |
| "grad_norm": 0.3108267423948177, |
| "learning_rate": 1.6651618732554774e-07, |
| "loss": 0.2571, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.7855670103092782, |
| "grad_norm": 0.433653354138328, |
| "learning_rate": 1.6194781927763913e-07, |
| "loss": 0.279, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.788659793814433, |
| "grad_norm": 0.37052857742824075, |
| "learning_rate": 1.5744196086438789e-07, |
| "loss": 0.2591, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.7917525773195875, |
| "grad_norm": 0.46199204480002765, |
| "learning_rate": 1.5299867030334815e-07, |
| "loss": 0.255, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.794845360824742, |
| "grad_norm": 0.580124929875133, |
| "learning_rate": 1.4861800500367007e-07, |
| "loss": 0.2875, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.7979381443298967, |
| "grad_norm": 0.5217340942830481, |
| "learning_rate": 1.4430002156536226e-07, |
| "loss": 0.2861, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.8010309278350514, |
| "grad_norm": 0.3904931691443872, |
| "learning_rate": 1.4004477577855392e-07, |
| "loss": 0.2675, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.804123711340206, |
| "grad_norm": 0.36630360257428185, |
| "learning_rate": 1.3585232262278258e-07, |
| "loss": 0.2712, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.8072164948453606, |
| "grad_norm": 0.353945605569572, |
| "learning_rate": 1.3172271626627486e-07, |
| "loss": 0.3017, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.8103092783505152, |
| "grad_norm": 0.4188118630836017, |
| "learning_rate": 1.276560100652535e-07, |
| "loss": 0.2353, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.81340206185567, |
| "grad_norm": 0.34093312115837016, |
| "learning_rate": 1.2365225656324308e-07, |
| "loss": 0.2573, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.8164948453608245, |
| "grad_norm": 0.3665586170666988, |
| "learning_rate": 1.197115074903954e-07, |
| "loss": 0.2662, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.819587628865979, |
| "grad_norm": 0.2807017898415233, |
| "learning_rate": 1.1583381376281733e-07, |
| "loss": 0.2606, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.8226804123711338, |
| "grad_norm": 0.4234165577007828, |
| "learning_rate": 1.1201922548191468e-07, |
| "loss": 0.274, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.8257731958762884, |
| "grad_norm": 0.3720057394231554, |
| "learning_rate": 1.0826779193374715e-07, |
| "loss": 0.2876, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.8288659793814435, |
| "grad_norm": 0.3204436821534787, |
| "learning_rate": 1.0457956158838545e-07, |
| "loss": 0.28, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.831958762886598, |
| "grad_norm": 0.565914708121846, |
| "learning_rate": 1.0095458209929243e-07, |
| "loss": 0.2697, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.8350515463917527, |
| "grad_norm": 0.34518258805906293, |
| "learning_rate": 9.739290030269965e-08, |
| "loss": 0.2942, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.8381443298969073, |
| "grad_norm": 0.3636538558427229, |
| "learning_rate": 9.389456221701121e-08, |
| "loss": 0.2733, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.841237113402062, |
| "grad_norm": 0.40022954321305015, |
| "learning_rate": 9.045961304219974e-08, |
| "loss": 0.2621, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.8443298969072166, |
| "grad_norm": 0.4344415769634195, |
| "learning_rate": 8.708809715922973e-08, |
| "loss": 0.2534, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.8474226804123712, |
| "grad_norm": 0.3824356118052112, |
| "learning_rate": 8.378005812948064e-08, |
| "loss": 0.2793, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.850515463917526, |
| "grad_norm": 0.35498451884083126, |
| "learning_rate": 8.053553869418418e-08, |
| "loss": 0.2764, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.8536082474226805, |
| "grad_norm": 0.4429208680189465, |
| "learning_rate": 7.735458077387292e-08, |
| "loss": 0.2563, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.856701030927835, |
| "grad_norm": 0.3742405675124718, |
| "learning_rate": 7.423722546783918e-08, |
| "loss": 0.28, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.8597938144329897, |
| "grad_norm": 0.42632928679275617, |
| "learning_rate": 7.118351305360205e-08, |
| "loss": 0.3038, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.8628865979381444, |
| "grad_norm": 0.3897225778477736, |
| "learning_rate": 6.819348298638839e-08, |
| "loss": 0.2552, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.865979381443299, |
| "grad_norm": 0.2883333432226637, |
| "learning_rate": 6.526717389862325e-08, |
| "loss": 0.3023, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.8690721649484536, |
| "grad_norm": 0.5777472796705979, |
| "learning_rate": 6.240462359942967e-08, |
| "loss": 0.2608, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.8721649484536083, |
| "grad_norm": 0.40631841925621104, |
| "learning_rate": 5.960586907414189e-08, |
| "loss": 0.2779, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.875257731958763, |
| "grad_norm": 0.46092516469259476, |
| "learning_rate": 5.687094648382518e-08, |
| "loss": 0.2641, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.8783505154639175, |
| "grad_norm": 0.34915642648853407, |
| "learning_rate": 5.419989116481061e-08, |
| "loss": 0.3025, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.881443298969072, |
| "grad_norm": 0.47411839457507193, |
| "learning_rate": 5.159273762823658e-08, |
| "loss": 0.3305, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.8845360824742268, |
| "grad_norm": 0.36833349217647166, |
| "learning_rate": 4.90495195596058e-08, |
| "loss": 0.2965, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.8876288659793814, |
| "grad_norm": 0.4500567088618251, |
| "learning_rate": 4.657026981834623e-08, |
| "loss": 0.2479, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.890721649484536, |
| "grad_norm": 0.4208605131949342, |
| "learning_rate": 4.415502043739084e-08, |
| "loss": 0.2799, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.8938144329896907, |
| "grad_norm": 0.31553449780177045, |
| "learning_rate": 4.180380262275907e-08, |
| "loss": 0.2921, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.8969072164948453, |
| "grad_norm": 0.3409240679158436, |
| "learning_rate": 3.9516646753158247e-08, |
| "loss": 0.2697, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 0.4208461142015241, |
| "learning_rate": 3.72935823795878e-08, |
| "loss": 0.2855, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.9030927835051545, |
| "grad_norm": 0.4435280021877381, |
| "learning_rate": 3.513463822495844e-08, |
| "loss": 0.2819, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.906185567010309, |
| "grad_norm": 0.41517671074392126, |
| "learning_rate": 3.303984218372136e-08, |
| "loss": 0.2588, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.909278350515464, |
| "grad_norm": 0.3540642341715653, |
| "learning_rate": 3.100922132150741e-08, |
| "loss": 0.2296, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.9123711340206184, |
| "grad_norm": 0.3690759806820083, |
| "learning_rate": 2.9042801874777925e-08, |
| "loss": 0.2871, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.915463917525773, |
| "grad_norm": 0.4546986119024043, |
| "learning_rate": 2.714060925048556e-08, |
| "loss": 0.2569, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.9185567010309277, |
| "grad_norm": 0.32836248625240094, |
| "learning_rate": 2.53026680257451e-08, |
| "loss": 0.2511, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.9216494845360823, |
| "grad_norm": 0.4058215648872401, |
| "learning_rate": 2.352900194751706e-08, |
| "loss": 0.3142, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.9247422680412374, |
| "grad_norm": 0.40012516007646937, |
| "learning_rate": 2.1819633932301797e-08, |
| "loss": 0.2792, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.927835051546392, |
| "grad_norm": 0.4125955989567251, |
| "learning_rate": 2.0174586065838664e-08, |
| "loss": 0.2907, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.9309278350515466, |
| "grad_norm": 0.3524425854408069, |
| "learning_rate": 1.8593879602828434e-08, |
| "loss": 0.2886, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.9340206185567013, |
| "grad_norm": 0.3170471980249348, |
| "learning_rate": 1.7077534966650767e-08, |
| "loss": 0.2784, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.937113402061856, |
| "grad_norm": 0.4411619208014409, |
| "learning_rate": 1.562557174910606e-08, |
| "loss": 0.3009, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.9402061855670105, |
| "grad_norm": 0.46198859439075746, |
| "learning_rate": 1.4238008710159567e-08, |
| "loss": 0.2446, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.943298969072165, |
| "grad_norm": 0.36122796091522313, |
| "learning_rate": 1.2914863777698794e-08, |
| "loss": 0.2738, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.94639175257732, |
| "grad_norm": 0.35861966022079894, |
| "learning_rate": 1.1656154047303691e-08, |
| "loss": 0.2798, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.9494845360824744, |
| "grad_norm": 0.3875255977808121, |
| "learning_rate": 1.0461895782025166e-08, |
| "loss": 0.2711, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.952577319587629, |
| "grad_norm": 0.35530566543196673, |
| "learning_rate": 9.332104412173027e-09, |
| "loss": 0.3048, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.9556701030927837, |
| "grad_norm": 0.361452363482159, |
| "learning_rate": 8.266794535118915e-09, |
| "loss": 0.2831, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.9587628865979383, |
| "grad_norm": 0.4420803980691798, |
| "learning_rate": 7.265979915107024e-09, |
| "loss": 0.2521, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.961855670103093, |
| "grad_norm": 0.46036523480311, |
| "learning_rate": 6.329673483076448e-09, |
| "loss": 0.2704, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.9649484536082475, |
| "grad_norm": 0.6117555345915687, |
| "learning_rate": 5.4578873364929955e-09, |
| "loss": 0.2917, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.968041237113402, |
| "grad_norm": 0.42432677847100164, |
| "learning_rate": 4.650632739194305e-09, |
| "loss": 0.2692, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.971134020618557, |
| "grad_norm": 0.46221110515624103, |
| "learning_rate": 3.907920121243858e-09, |
| "loss": 0.2854, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.9742268041237114, |
| "grad_norm": 0.4255820151298039, |
| "learning_rate": 3.2297590787955248e-09, |
| "loss": 0.2589, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.977319587628866, |
| "grad_norm": 0.4146109541033293, |
| "learning_rate": 2.6161583739703344e-09, |
| "loss": 0.2617, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.9804123711340207, |
| "grad_norm": 0.3106992898270277, |
| "learning_rate": 2.067125934742675e-09, |
| "loss": 0.2612, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.9835051546391753, |
| "grad_norm": 0.308479948088998, |
| "learning_rate": 1.5826688548398194e-09, |
| "loss": 0.2896, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.98659793814433, |
| "grad_norm": 0.4301763954612885, |
| "learning_rate": 1.1627933936464442e-09, |
| "loss": 0.2707, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.9896907216494846, |
| "grad_norm": 0.4411906019363483, |
| "learning_rate": 8.075049761274711e-10, |
| "loss": 0.2932, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.992783505154639, |
| "grad_norm": 0.40563705971599223, |
| "learning_rate": 5.168081927564572e-10, |
| "loss": 0.2849, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.995876288659794, |
| "grad_norm": 0.3315762234528888, |
| "learning_rate": 2.907067994556423e-10, |
| "loss": 0.2589, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.9989690721649485, |
| "grad_norm": 0.39679025047238414, |
| "learning_rate": 1.2920371754931994e-10, |
| "loss": 0.2737, |
| "step": 971 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.2638973106902003, |
| "learning_rate": 3.230103372275917e-11, |
| "loss": 0.2755, |
| "step": 972 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 972, |
| "total_flos": 328086624501760.0, |
| "train_loss": 0.3500921587179963, |
| "train_runtime": 40196.0341, |
| "train_samples_per_second": 1.158, |
| "train_steps_per_second": 0.024 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 972, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 328086624501760.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|