| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9996219996219996, |
| "eval_steps": 500, |
| "global_step": 1763, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.000567000567000567, |
| "grad_norm": 4.704992771148682, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 1.9183, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.001134001134001134, |
| "grad_norm": 4.873214244842529, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 1.9567, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001701001701001701, |
| "grad_norm": 4.890101432800293, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 1.8994, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.002268002268002268, |
| "grad_norm": 4.563302516937256, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.8513, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.002835002835002835, |
| "grad_norm": 4.943462371826172, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.8998, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.003402003402003402, |
| "grad_norm": 5.069730281829834, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 1.9748, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.003969003969003969, |
| "grad_norm": 5.16794490814209, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 2.0669, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.004536004536004536, |
| "grad_norm": 4.572751998901367, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.9074, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.005103005103005103, |
| "grad_norm": 4.676807403564453, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 1.9424, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00567000567000567, |
| "grad_norm": 4.998410701751709, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.9398, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006237006237006237, |
| "grad_norm": 4.833102703094482, |
| "learning_rate": 5.5e-07, |
| "loss": 1.9526, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.006804006804006804, |
| "grad_norm": 4.7410078048706055, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.9341, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.007371007371007371, |
| "grad_norm": 4.841571807861328, |
| "learning_rate": 6.5e-07, |
| "loss": 1.8506, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.007938007938007937, |
| "grad_norm": 4.75044584274292, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.9124, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.008505008505008505, |
| "grad_norm": 5.093398094177246, |
| "learning_rate": 7.5e-07, |
| "loss": 1.8485, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.009072009072009071, |
| "grad_norm": 4.743251800537109, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.8827, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.009639009639009639, |
| "grad_norm": 4.599445343017578, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.9064, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.010206010206010205, |
| "grad_norm": 4.685406684875488, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.8437, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.010773010773010773, |
| "grad_norm": 5.116965293884277, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.8478, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01134001134001134, |
| "grad_norm": 4.974440574645996, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.9602, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.011907011907011907, |
| "grad_norm": 4.430954933166504, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.814, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.012474012474012475, |
| "grad_norm": 4.7586164474487305, |
| "learning_rate": 1.1e-06, |
| "loss": 1.8719, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01304101304101304, |
| "grad_norm": 4.204355716705322, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.8459, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.013608013608013609, |
| "grad_norm": 4.102180004119873, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.9011, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.014175014175014175, |
| "grad_norm": 3.9540836811065674, |
| "learning_rate": 1.25e-06, |
| "loss": 1.7151, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.014742014742014743, |
| "grad_norm": 3.724958658218384, |
| "learning_rate": 1.3e-06, |
| "loss": 1.8811, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.015309015309015309, |
| "grad_norm": 3.6260979175567627, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 1.7493, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.015876015876015875, |
| "grad_norm": 3.549999713897705, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.8808, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.016443016443016444, |
| "grad_norm": 3.366023063659668, |
| "learning_rate": 1.45e-06, |
| "loss": 1.6787, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01701001701001701, |
| "grad_norm": 3.0748300552368164, |
| "learning_rate": 1.5e-06, |
| "loss": 1.6742, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.017577017577017576, |
| "grad_norm": 3.056105852127075, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 1.6747, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.018144018144018143, |
| "grad_norm": 3.277554512023926, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.6949, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.018711018711018712, |
| "grad_norm": 3.0011744499206543, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 1.681, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.019278019278019278, |
| "grad_norm": 3.01111102104187, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 1.7185, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.019845019845019844, |
| "grad_norm": 2.8633668422698975, |
| "learning_rate": 1.75e-06, |
| "loss": 1.6156, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.02041202041202041, |
| "grad_norm": 2.5703542232513428, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 1.6805, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.02097902097902098, |
| "grad_norm": 2.5187172889709473, |
| "learning_rate": 1.85e-06, |
| "loss": 1.6083, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.021546021546021546, |
| "grad_norm": 2.7661757469177246, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 1.6907, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.022113022113022112, |
| "grad_norm": 3.207343101501465, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 1.5878, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.02268002268002268, |
| "grad_norm": 2.7971088886260986, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.5075, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.023247023247023248, |
| "grad_norm": 2.50616192817688, |
| "learning_rate": 2.05e-06, |
| "loss": 1.6045, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.023814023814023814, |
| "grad_norm": 2.8198935985565186, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 1.6205, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.02438102438102438, |
| "grad_norm": 2.591521739959717, |
| "learning_rate": 2.15e-06, |
| "loss": 1.5531, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.02494802494802495, |
| "grad_norm": 2.4420039653778076, |
| "learning_rate": 2.2e-06, |
| "loss": 1.5229, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.025515025515025515, |
| "grad_norm": 2.6265766620635986, |
| "learning_rate": 2.25e-06, |
| "loss": 1.4936, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02608202608202608, |
| "grad_norm": 2.300294876098633, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 1.5293, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.026649026649026648, |
| "grad_norm": 2.3751509189605713, |
| "learning_rate": 2.35e-06, |
| "loss": 1.5413, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.027216027216027217, |
| "grad_norm": 2.4751758575439453, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 1.5172, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.027783027783027783, |
| "grad_norm": 2.3078811168670654, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 1.5058, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.02835002835002835, |
| "grad_norm": 2.3470571041107178, |
| "learning_rate": 2.5e-06, |
| "loss": 1.5087, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.028917028917028915, |
| "grad_norm": 2.2181596755981445, |
| "learning_rate": 2.55e-06, |
| "loss": 1.5913, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.029484029484029485, |
| "grad_norm": 2.228245735168457, |
| "learning_rate": 2.6e-06, |
| "loss": 1.491, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.03005103005103005, |
| "grad_norm": 2.0523054599761963, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 1.3369, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.030618030618030617, |
| "grad_norm": 2.1131646633148193, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 1.4824, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.031185031185031187, |
| "grad_norm": 2.1240482330322266, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 1.4588, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03175203175203175, |
| "grad_norm": 2.329206705093384, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 1.4934, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.03231903231903232, |
| "grad_norm": 2.2496628761291504, |
| "learning_rate": 2.85e-06, |
| "loss": 1.4863, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03288603288603289, |
| "grad_norm": 2.1327602863311768, |
| "learning_rate": 2.9e-06, |
| "loss": 1.493, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.03345303345303345, |
| "grad_norm": 2.0308682918548584, |
| "learning_rate": 2.95e-06, |
| "loss": 1.4056, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03402003402003402, |
| "grad_norm": 2.0320749282836914, |
| "learning_rate": 3e-06, |
| "loss": 1.4364, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03458703458703459, |
| "grad_norm": 2.419875144958496, |
| "learning_rate": 3.05e-06, |
| "loss": 1.4899, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.03515403515403515, |
| "grad_norm": 2.623107433319092, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 1.3701, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03572103572103572, |
| "grad_norm": 2.1976675987243652, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 1.3846, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.036288036288036285, |
| "grad_norm": 2.262049436569214, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 1.4136, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.036855036855036855, |
| "grad_norm": 2.027294397354126, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 1.4009, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.037422037422037424, |
| "grad_norm": 2.174931526184082, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 1.4644, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03798903798903799, |
| "grad_norm": 2.3164167404174805, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 1.3916, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.038556038556038556, |
| "grad_norm": 2.1951541900634766, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 1.3923, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.039123039123039126, |
| "grad_norm": 2.751126766204834, |
| "learning_rate": 3.45e-06, |
| "loss": 1.445, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03969003969003969, |
| "grad_norm": 2.052192449569702, |
| "learning_rate": 3.5e-06, |
| "loss": 1.3873, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04025704025704026, |
| "grad_norm": 1.9943838119506836, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 1.4527, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.04082404082404082, |
| "grad_norm": 2.073538303375244, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 1.4175, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.04139104139104139, |
| "grad_norm": 2.014153480529785, |
| "learning_rate": 3.65e-06, |
| "loss": 1.4308, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.04195804195804196, |
| "grad_norm": 2.140015125274658, |
| "learning_rate": 3.7e-06, |
| "loss": 1.3748, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.04252504252504252, |
| "grad_norm": 2.0118002891540527, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 1.3709, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.04309204309204309, |
| "grad_norm": 2.0641977787017822, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 1.3158, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.04365904365904366, |
| "grad_norm": 2.119843006134033, |
| "learning_rate": 3.85e-06, |
| "loss": 1.4224, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.044226044226044224, |
| "grad_norm": 2.0117244720458984, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 1.2549, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.044793044793044794, |
| "grad_norm": 2.001262664794922, |
| "learning_rate": 3.95e-06, |
| "loss": 1.4035, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.04536004536004536, |
| "grad_norm": 2.128577470779419, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.3866, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.045927045927045926, |
| "grad_norm": 2.002300977706909, |
| "learning_rate": 4.05e-06, |
| "loss": 1.3133, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.046494046494046495, |
| "grad_norm": 2.065413475036621, |
| "learning_rate": 4.1e-06, |
| "loss": 1.37, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.04706104706104706, |
| "grad_norm": 2.11797833442688, |
| "learning_rate": 4.15e-06, |
| "loss": 1.4127, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.04762804762804763, |
| "grad_norm": 2.0376827716827393, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 1.3478, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0481950481950482, |
| "grad_norm": 2.141932964324951, |
| "learning_rate": 4.25e-06, |
| "loss": 1.4469, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.04876204876204876, |
| "grad_norm": 2.0376832485198975, |
| "learning_rate": 4.3e-06, |
| "loss": 1.3035, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.04932904932904933, |
| "grad_norm": 1.9415974617004395, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 1.3057, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0498960498960499, |
| "grad_norm": 1.972311019897461, |
| "learning_rate": 4.4e-06, |
| "loss": 1.3494, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.05046305046305046, |
| "grad_norm": 2.0214779376983643, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 1.348, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.05103005103005103, |
| "grad_norm": 1.9641141891479492, |
| "learning_rate": 4.5e-06, |
| "loss": 1.3264, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.051597051597051594, |
| "grad_norm": 2.1060950756073, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 1.3606, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.05216405216405216, |
| "grad_norm": 2.0103988647460938, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 1.3464, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.05273105273105273, |
| "grad_norm": 1.9835673570632935, |
| "learning_rate": 4.65e-06, |
| "loss": 1.353, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.053298053298053295, |
| "grad_norm": 2.0680184364318848, |
| "learning_rate": 4.7e-06, |
| "loss": 1.4022, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.053865053865053865, |
| "grad_norm": 2.5879733562469482, |
| "learning_rate": 4.75e-06, |
| "loss": 1.3186, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.054432054432054434, |
| "grad_norm": 2.620063066482544, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 1.3101, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.054999054999055, |
| "grad_norm": 2.196293592453003, |
| "learning_rate": 4.85e-06, |
| "loss": 1.2989, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.05556605556605557, |
| "grad_norm": 2.123204231262207, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.3334, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.056133056133056136, |
| "grad_norm": 2.1718428134918213, |
| "learning_rate": 4.95e-06, |
| "loss": 1.3703, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.0567000567000567, |
| "grad_norm": 2.10593843460083, |
| "learning_rate": 5e-06, |
| "loss": 1.3411, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05726705726705727, |
| "grad_norm": 1.9898821115493774, |
| "learning_rate": 4.999999887629331e-06, |
| "loss": 1.3076, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.05783405783405783, |
| "grad_norm": 2.1761505603790283, |
| "learning_rate": 4.999999550517334e-06, |
| "loss": 1.3909, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.0584010584010584, |
| "grad_norm": 1.9373730421066284, |
| "learning_rate": 4.999998988664039e-06, |
| "loss": 1.2573, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.05896805896805897, |
| "grad_norm": 2.208836078643799, |
| "learning_rate": 4.999998202069496e-06, |
| "loss": 1.3415, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.05953505953505953, |
| "grad_norm": 2.1279194355010986, |
| "learning_rate": 4.999997190733778e-06, |
| "loss": 1.3049, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.0601020601020601, |
| "grad_norm": 2.0629360675811768, |
| "learning_rate": 4.999995954656972e-06, |
| "loss": 1.289, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.06066906066906067, |
| "grad_norm": 2.208876132965088, |
| "learning_rate": 4.9999944938391935e-06, |
| "loss": 1.3609, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.061236061236061234, |
| "grad_norm": 2.086843252182007, |
| "learning_rate": 4.99999280828057e-06, |
| "loss": 1.3733, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.061803061803061804, |
| "grad_norm": 2.007978677749634, |
| "learning_rate": 4.999990897981256e-06, |
| "loss": 1.2892, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.062370062370062374, |
| "grad_norm": 2.0387015342712402, |
| "learning_rate": 4.999988762941422e-06, |
| "loss": 1.3037, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06293706293706294, |
| "grad_norm": 2.8154208660125732, |
| "learning_rate": 4.99998640316126e-06, |
| "loss": 1.2534, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0635040635040635, |
| "grad_norm": 1.996565818786621, |
| "learning_rate": 4.999983818640981e-06, |
| "loss": 1.2697, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.06407106407106407, |
| "grad_norm": 1.850510835647583, |
| "learning_rate": 4.99998100938082e-06, |
| "loss": 1.295, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.06463806463806464, |
| "grad_norm": 1.994425654411316, |
| "learning_rate": 4.999977975381027e-06, |
| "loss": 1.3082, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.06520506520506521, |
| "grad_norm": 2.0517311096191406, |
| "learning_rate": 4.999974716641875e-06, |
| "loss": 1.2921, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.06577206577206578, |
| "grad_norm": 2.0769684314727783, |
| "learning_rate": 4.999971233163658e-06, |
| "loss": 1.3186, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.06633906633906633, |
| "grad_norm": 2.0265793800354004, |
| "learning_rate": 4.99996752494669e-06, |
| "loss": 1.3795, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.0669060669060669, |
| "grad_norm": 1.9943898916244507, |
| "learning_rate": 4.999963591991302e-06, |
| "loss": 1.3286, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.06747306747306747, |
| "grad_norm": 2.2208609580993652, |
| "learning_rate": 4.999959434297849e-06, |
| "loss": 1.2839, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.06804006804006804, |
| "grad_norm": 2.0283749103546143, |
| "learning_rate": 4.9999550518667045e-06, |
| "loss": 1.3112, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06860706860706861, |
| "grad_norm": 2.0714364051818848, |
| "learning_rate": 4.999950444698262e-06, |
| "loss": 1.3426, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.06917406917406918, |
| "grad_norm": 1.9883396625518799, |
| "learning_rate": 4.999945612792937e-06, |
| "loss": 1.321, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.06974106974106974, |
| "grad_norm": 2.229637622833252, |
| "learning_rate": 4.999940556151163e-06, |
| "loss": 1.3168, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0703080703080703, |
| "grad_norm": 2.144243001937866, |
| "learning_rate": 4.999935274773394e-06, |
| "loss": 1.2621, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.07087507087507088, |
| "grad_norm": 2.2905640602111816, |
| "learning_rate": 4.999929768660105e-06, |
| "loss": 1.3178, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.07144207144207144, |
| "grad_norm": 2.149752140045166, |
| "learning_rate": 4.999924037811792e-06, |
| "loss": 1.3038, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.07200907200907201, |
| "grad_norm": 2.0315706729888916, |
| "learning_rate": 4.999918082228969e-06, |
| "loss": 1.2576, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.07257607257607257, |
| "grad_norm": 2.168241262435913, |
| "learning_rate": 4.999911901912172e-06, |
| "loss": 1.2738, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.07314307314307314, |
| "grad_norm": 1.9652851819992065, |
| "learning_rate": 4.999905496861957e-06, |
| "loss": 1.2494, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.07371007371007371, |
| "grad_norm": 2.2343976497650146, |
| "learning_rate": 4.999898867078898e-06, |
| "loss": 1.336, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07427707427707428, |
| "grad_norm": 2.411104202270508, |
| "learning_rate": 4.999892012563593e-06, |
| "loss": 1.2526, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.07484407484407485, |
| "grad_norm": 2.051623582839966, |
| "learning_rate": 4.999884933316658e-06, |
| "loss": 1.3189, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.07541107541107542, |
| "grad_norm": 2.2318692207336426, |
| "learning_rate": 4.999877629338729e-06, |
| "loss": 1.3392, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.07597807597807597, |
| "grad_norm": 2.2030036449432373, |
| "learning_rate": 4.999870100630462e-06, |
| "loss": 1.2661, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.07654507654507654, |
| "grad_norm": 2.2581799030303955, |
| "learning_rate": 4.999862347192533e-06, |
| "loss": 1.3374, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.07711207711207711, |
| "grad_norm": 2.343352794647217, |
| "learning_rate": 4.9998543690256415e-06, |
| "loss": 1.2771, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.07767907767907768, |
| "grad_norm": 2.143404483795166, |
| "learning_rate": 4.999846166130503e-06, |
| "loss": 1.2934, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.07824607824607825, |
| "grad_norm": 2.125469923019409, |
| "learning_rate": 4.999837738507856e-06, |
| "loss": 1.2593, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.07881307881307881, |
| "grad_norm": 1.9987525939941406, |
| "learning_rate": 4.999829086158458e-06, |
| "loss": 1.2719, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.07938007938007938, |
| "grad_norm": 2.0173356533050537, |
| "learning_rate": 4.999820209083085e-06, |
| "loss": 1.2888, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07994707994707995, |
| "grad_norm": 2.903157949447632, |
| "learning_rate": 4.999811107282537e-06, |
| "loss": 1.2915, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.08051408051408052, |
| "grad_norm": 2.426927328109741, |
| "learning_rate": 4.999801780757631e-06, |
| "loss": 1.3207, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.08108108108108109, |
| "grad_norm": 2.19057035446167, |
| "learning_rate": 4.999792229509207e-06, |
| "loss": 1.2671, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.08164808164808164, |
| "grad_norm": 1.889320731163025, |
| "learning_rate": 4.9997824535381215e-06, |
| "loss": 1.2258, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.08221508221508221, |
| "grad_norm": 2.0674948692321777, |
| "learning_rate": 4.9997724528452554e-06, |
| "loss": 1.2624, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.08278208278208278, |
| "grad_norm": 2.3926374912261963, |
| "learning_rate": 4.999762227431506e-06, |
| "loss": 1.2322, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.08334908334908335, |
| "grad_norm": 2.16817045211792, |
| "learning_rate": 4.999751777297794e-06, |
| "loss": 1.2871, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.08391608391608392, |
| "grad_norm": 2.0507423877716064, |
| "learning_rate": 4.999741102445057e-06, |
| "loss": 1.2344, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.08448308448308449, |
| "grad_norm": 2.096215009689331, |
| "learning_rate": 4.999730202874256e-06, |
| "loss": 1.2724, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.08505008505008504, |
| "grad_norm": 2.091418504714966, |
| "learning_rate": 4.999719078586372e-06, |
| "loss": 1.2381, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08561708561708561, |
| "grad_norm": 2.1694529056549072, |
| "learning_rate": 4.999707729582402e-06, |
| "loss": 1.298, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.08618408618408618, |
| "grad_norm": 2.03376841545105, |
| "learning_rate": 4.999696155863369e-06, |
| "loss": 1.2768, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.08675108675108675, |
| "grad_norm": 2.1496047973632812, |
| "learning_rate": 4.999684357430312e-06, |
| "loss": 1.2704, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.08731808731808732, |
| "grad_norm": 2.192505359649658, |
| "learning_rate": 4.999672334284292e-06, |
| "loss": 1.2537, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.08788508788508788, |
| "grad_norm": 2.290090560913086, |
| "learning_rate": 4.999660086426389e-06, |
| "loss": 1.258, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.08845208845208845, |
| "grad_norm": 2.2584447860717773, |
| "learning_rate": 4.999647613857706e-06, |
| "loss": 1.2762, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.08901908901908902, |
| "grad_norm": 2.1247639656066895, |
| "learning_rate": 4.999634916579362e-06, |
| "loss": 1.2899, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.08958608958608959, |
| "grad_norm": 2.1827831268310547, |
| "learning_rate": 4.9996219945925e-06, |
| "loss": 1.2723, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.09015309015309016, |
| "grad_norm": 2.2694759368896484, |
| "learning_rate": 4.999608847898281e-06, |
| "loss": 1.3093, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.09072009072009073, |
| "grad_norm": 2.494340658187866, |
| "learning_rate": 4.9995954764978865e-06, |
| "loss": 1.3717, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.09128709128709128, |
| "grad_norm": 2.2255382537841797, |
| "learning_rate": 4.99958188039252e-06, |
| "loss": 1.2386, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.09185409185409185, |
| "grad_norm": 2.237102746963501, |
| "learning_rate": 4.999568059583401e-06, |
| "loss": 1.285, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.09242109242109242, |
| "grad_norm": 2.3099398612976074, |
| "learning_rate": 4.999554014071776e-06, |
| "loss": 1.2403, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.09298809298809299, |
| "grad_norm": 2.0923874378204346, |
| "learning_rate": 4.999539743858904e-06, |
| "loss": 1.3747, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.09355509355509356, |
| "grad_norm": 2.0679986476898193, |
| "learning_rate": 4.99952524894607e-06, |
| "loss": 1.1877, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.09412209412209412, |
| "grad_norm": 2.03501296043396, |
| "learning_rate": 4.999510529334575e-06, |
| "loss": 1.2692, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.09468909468909469, |
| "grad_norm": 2.1350958347320557, |
| "learning_rate": 4.9994955850257444e-06, |
| "loss": 1.292, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.09525609525609526, |
| "grad_norm": 2.3618247509002686, |
| "learning_rate": 4.999480416020921e-06, |
| "loss": 1.255, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.09582309582309582, |
| "grad_norm": 2.1679601669311523, |
| "learning_rate": 4.9994650223214665e-06, |
| "loss": 1.2706, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.0963900963900964, |
| "grad_norm": 2.4074959754943848, |
| "learning_rate": 4.999449403928768e-06, |
| "loss": 1.2783, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.09695709695709696, |
| "grad_norm": 2.200929880142212, |
| "learning_rate": 4.999433560844228e-06, |
| "loss": 1.2846, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.09752409752409752, |
| "grad_norm": 2.061795949935913, |
| "learning_rate": 4.999417493069269e-06, |
| "loss": 1.2984, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.09809109809109809, |
| "grad_norm": 2.0811519622802734, |
| "learning_rate": 4.99940120060534e-06, |
| "loss": 1.2096, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.09865809865809866, |
| "grad_norm": 2.2621266841888428, |
| "learning_rate": 4.999384683453901e-06, |
| "loss": 1.304, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.09922509922509923, |
| "grad_norm": 2.1586225032806396, |
| "learning_rate": 4.999367941616438e-06, |
| "loss": 1.2258, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0997920997920998, |
| "grad_norm": 2.058638334274292, |
| "learning_rate": 4.9993509750944565e-06, |
| "loss": 1.2461, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.10035910035910035, |
| "grad_norm": 2.0724360942840576, |
| "learning_rate": 4.999333783889483e-06, |
| "loss": 1.2287, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.10092610092610092, |
| "grad_norm": 1.9717096090316772, |
| "learning_rate": 4.999316368003062e-06, |
| "loss": 1.2764, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.10149310149310149, |
| "grad_norm": 2.199272871017456, |
| "learning_rate": 4.999298727436758e-06, |
| "loss": 1.245, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.10206010206010206, |
| "grad_norm": 2.1129369735717773, |
| "learning_rate": 4.999280862192158e-06, |
| "loss": 1.2592, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.10262710262710263, |
| "grad_norm": 2.0408732891082764, |
| "learning_rate": 4.999262772270867e-06, |
| "loss": 1.2407, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.10319410319410319, |
| "grad_norm": 2.1376776695251465, |
| "learning_rate": 4.999244457674514e-06, |
| "loss": 1.2328, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.10376110376110376, |
| "grad_norm": 2.1702775955200195, |
| "learning_rate": 4.999225918404741e-06, |
| "loss": 1.2439, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.10432810432810433, |
| "grad_norm": 2.6905198097229004, |
| "learning_rate": 4.9992071544632184e-06, |
| "loss": 1.2983, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.1048951048951049, |
| "grad_norm": 2.080111265182495, |
| "learning_rate": 4.999188165851632e-06, |
| "loss": 1.2545, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.10546210546210547, |
| "grad_norm": 2.1639678478240967, |
| "learning_rate": 4.999168952571687e-06, |
| "loss": 1.2709, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.10602910602910603, |
| "grad_norm": 2.16153883934021, |
| "learning_rate": 4.999149514625113e-06, |
| "loss": 1.2861, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.10659610659610659, |
| "grad_norm": 2.1005361080169678, |
| "learning_rate": 4.999129852013656e-06, |
| "loss": 1.2461, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.10716310716310716, |
| "grad_norm": 2.059573173522949, |
| "learning_rate": 4.999109964739085e-06, |
| "loss": 1.2245, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.10773010773010773, |
| "grad_norm": 2.1651785373687744, |
| "learning_rate": 4.999089852803186e-06, |
| "loss": 1.1909, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1082971082971083, |
| "grad_norm": 2.1926372051239014, |
| "learning_rate": 4.999069516207767e-06, |
| "loss": 1.2346, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.10886410886410887, |
| "grad_norm": 1.9541335105895996, |
| "learning_rate": 4.999048954954658e-06, |
| "loss": 1.1564, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.10943110943110942, |
| "grad_norm": 2.194918394088745, |
| "learning_rate": 4.9990281690457075e-06, |
| "loss": 1.2866, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.10999810999811, |
| "grad_norm": 2.0585858821868896, |
| "learning_rate": 4.9990071584827815e-06, |
| "loss": 1.2152, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.11056511056511056, |
| "grad_norm": 2.174222469329834, |
| "learning_rate": 4.998985923267771e-06, |
| "loss": 1.2421, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.11113211113211113, |
| "grad_norm": 2.400076150894165, |
| "learning_rate": 4.998964463402583e-06, |
| "loss": 1.2473, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.1116991116991117, |
| "grad_norm": 2.1600160598754883, |
| "learning_rate": 4.99894277888915e-06, |
| "loss": 1.2247, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.11226611226611227, |
| "grad_norm": 2.108492851257324, |
| "learning_rate": 4.998920869729418e-06, |
| "loss": 1.2678, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.11283311283311283, |
| "grad_norm": 2.3057072162628174, |
| "learning_rate": 4.998898735925357e-06, |
| "loss": 1.2647, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.1134001134001134, |
| "grad_norm": 2.171736240386963, |
| "learning_rate": 4.998876377478959e-06, |
| "loss": 1.2397, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.11396711396711397, |
| "grad_norm": 2.1163182258605957, |
| "learning_rate": 4.998853794392233e-06, |
| "loss": 1.2026, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.11453411453411454, |
| "grad_norm": 2.1151411533355713, |
| "learning_rate": 4.998830986667207e-06, |
| "loss": 1.2562, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1151011151011151, |
| "grad_norm": 2.1629276275634766, |
| "learning_rate": 4.998807954305933e-06, |
| "loss": 1.242, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.11566811566811566, |
| "grad_norm": 2.0635461807250977, |
| "learning_rate": 4.998784697310483e-06, |
| "loss": 1.2825, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.11623511623511623, |
| "grad_norm": 2.018306255340576, |
| "learning_rate": 4.998761215682945e-06, |
| "loss": 1.2942, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.1168021168021168, |
| "grad_norm": 2.0687899589538574, |
| "learning_rate": 4.998737509425432e-06, |
| "loss": 1.241, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.11736911736911737, |
| "grad_norm": 2.129807233810425, |
| "learning_rate": 4.998713578540074e-06, |
| "loss": 1.2185, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.11793611793611794, |
| "grad_norm": 2.1497106552124023, |
| "learning_rate": 4.998689423029022e-06, |
| "loss": 1.2279, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.11850311850311851, |
| "grad_norm": 2.1885874271392822, |
| "learning_rate": 4.998665042894449e-06, |
| "loss": 1.2404, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.11907011907011907, |
| "grad_norm": 2.271296739578247, |
| "learning_rate": 4.998640438138545e-06, |
| "loss": 1.242, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.11963711963711963, |
| "grad_norm": 2.147714138031006, |
| "learning_rate": 4.998615608763524e-06, |
| "loss": 1.1742, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.1202041202041202, |
| "grad_norm": 2.109957695007324, |
| "learning_rate": 4.998590554771615e-06, |
| "loss": 1.2519, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.12077112077112077, |
| "grad_norm": 2.291804313659668, |
| "learning_rate": 4.998565276165073e-06, |
| "loss": 1.2788, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.12133812133812134, |
| "grad_norm": 2.127683401107788, |
| "learning_rate": 4.998539772946169e-06, |
| "loss": 1.2399, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1219051219051219, |
| "grad_norm": 2.188720226287842, |
| "learning_rate": 4.998514045117197e-06, |
| "loss": 1.2295, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.12247212247212247, |
| "grad_norm": 2.2163619995117188, |
| "learning_rate": 4.998488092680468e-06, |
| "loss": 1.2594, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.12303912303912304, |
| "grad_norm": 2.2182250022888184, |
| "learning_rate": 4.998461915638316e-06, |
| "loss": 1.2382, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.12360612360612361, |
| "grad_norm": 2.0882575511932373, |
| "learning_rate": 4.998435513993095e-06, |
| "loss": 1.2426, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.12417312417312418, |
| "grad_norm": 2.2090935707092285, |
| "learning_rate": 4.998408887747177e-06, |
| "loss": 1.2609, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.12474012474012475, |
| "grad_norm": 2.1636769771575928, |
| "learning_rate": 4.9983820369029565e-06, |
| "loss": 1.2703, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.12530712530712532, |
| "grad_norm": 2.2115790843963623, |
| "learning_rate": 4.998354961462847e-06, |
| "loss": 1.242, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.1258741258741259, |
| "grad_norm": 2.766953945159912, |
| "learning_rate": 4.998327661429282e-06, |
| "loss": 1.2074, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.12644112644112643, |
| "grad_norm": 2.1348719596862793, |
| "learning_rate": 4.998300136804717e-06, |
| "loss": 1.2299, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.127008127008127, |
| "grad_norm": 2.1691837310791016, |
| "learning_rate": 4.998272387591625e-06, |
| "loss": 1.272, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.12757512757512757, |
| "grad_norm": 2.123806953430176, |
| "learning_rate": 4.998244413792501e-06, |
| "loss": 1.2037, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.12814212814212814, |
| "grad_norm": 2.1003634929656982, |
| "learning_rate": 4.9982162154098605e-06, |
| "loss": 1.2979, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.1287091287091287, |
| "grad_norm": 2.2071104049682617, |
| "learning_rate": 4.998187792446238e-06, |
| "loss": 1.2586, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.12927612927612928, |
| "grad_norm": 1.992179274559021, |
| "learning_rate": 4.998159144904188e-06, |
| "loss": 1.2152, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.12984312984312985, |
| "grad_norm": 2.196314573287964, |
| "learning_rate": 4.998130272786286e-06, |
| "loss": 1.2631, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.13041013041013041, |
| "grad_norm": 2.072913885116577, |
| "learning_rate": 4.998101176095128e-06, |
| "loss": 1.2451, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.13097713097713098, |
| "grad_norm": 2.045172691345215, |
| "learning_rate": 4.99807185483333e-06, |
| "loss": 1.1982, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.13154413154413155, |
| "grad_norm": 2.30246639251709, |
| "learning_rate": 4.998042309003526e-06, |
| "loss": 1.3061, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.13211113211113212, |
| "grad_norm": 1.9828840494155884, |
| "learning_rate": 4.9980125386083744e-06, |
| "loss": 1.2102, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.13267813267813267, |
| "grad_norm": 2.0770061016082764, |
| "learning_rate": 4.99798254365055e-06, |
| "loss": 1.268, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.13324513324513323, |
| "grad_norm": 2.1250219345092773, |
| "learning_rate": 4.99795232413275e-06, |
| "loss": 1.2187, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.1338121338121338, |
| "grad_norm": 2.24729323387146, |
| "learning_rate": 4.99792188005769e-06, |
| "loss": 1.1909, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.13437913437913437, |
| "grad_norm": 2.048240900039673, |
| "learning_rate": 4.997891211428109e-06, |
| "loss": 1.216, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.13494613494613494, |
| "grad_norm": 2.0455849170684814, |
| "learning_rate": 4.997860318246761e-06, |
| "loss": 1.2172, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.1355131355131355, |
| "grad_norm": 2.306419849395752, |
| "learning_rate": 4.997829200516426e-06, |
| "loss": 1.3109, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.13608013608013608, |
| "grad_norm": 2.0704190731048584, |
| "learning_rate": 4.997797858239899e-06, |
| "loss": 1.1954, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.13664713664713665, |
| "grad_norm": 2.1370325088500977, |
| "learning_rate": 4.997766291419999e-06, |
| "loss": 1.3341, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.13721413721413722, |
| "grad_norm": 2.311892509460449, |
| "learning_rate": 4.997734500059564e-06, |
| "loss": 1.2123, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1377811377811378, |
| "grad_norm": 2.125614643096924, |
| "learning_rate": 4.997702484161451e-06, |
| "loss": 1.2384, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.13834813834813836, |
| "grad_norm": 2.17887806892395, |
| "learning_rate": 4.997670243728538e-06, |
| "loss": 1.2155, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.1389151389151389, |
| "grad_norm": 2.132019519805908, |
| "learning_rate": 4.9976377787637246e-06, |
| "loss": 1.1651, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.13948213948213947, |
| "grad_norm": 2.275861978530884, |
| "learning_rate": 4.997605089269928e-06, |
| "loss": 1.2107, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.14004914004914004, |
| "grad_norm": 2.1661062240600586, |
| "learning_rate": 4.997572175250087e-06, |
| "loss": 1.2028, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.1406161406161406, |
| "grad_norm": 2.1466269493103027, |
| "learning_rate": 4.997539036707162e-06, |
| "loss": 1.2264, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.14118314118314118, |
| "grad_norm": 2.0136516094207764, |
| "learning_rate": 4.997505673644129e-06, |
| "loss": 1.224, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.14175014175014175, |
| "grad_norm": 2.0584123134613037, |
| "learning_rate": 4.9974720860639906e-06, |
| "loss": 1.2282, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.14231714231714232, |
| "grad_norm": 2.3332486152648926, |
| "learning_rate": 4.997438273969764e-06, |
| "loss": 1.2475, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.1428841428841429, |
| "grad_norm": 2.032762289047241, |
| "learning_rate": 4.997404237364489e-06, |
| "loss": 1.2194, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.14345114345114346, |
| "grad_norm": 2.1560542583465576, |
| "learning_rate": 4.997369976251228e-06, |
| "loss": 1.21, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.14401814401814403, |
| "grad_norm": 2.2596874237060547, |
| "learning_rate": 4.9973354906330565e-06, |
| "loss": 1.2647, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.1445851445851446, |
| "grad_norm": 2.0843327045440674, |
| "learning_rate": 4.997300780513078e-06, |
| "loss": 1.1982, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.14515214515214514, |
| "grad_norm": 2.177116870880127, |
| "learning_rate": 4.997265845894411e-06, |
| "loss": 1.2727, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.1457191457191457, |
| "grad_norm": 2.1334030628204346, |
| "learning_rate": 4.997230686780197e-06, |
| "loss": 1.227, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.14628614628614628, |
| "grad_norm": 2.182185649871826, |
| "learning_rate": 4.997195303173597e-06, |
| "loss": 1.2546, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.14685314685314685, |
| "grad_norm": 2.2583160400390625, |
| "learning_rate": 4.99715969507779e-06, |
| "loss": 1.2269, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.14742014742014742, |
| "grad_norm": 2.1098732948303223, |
| "learning_rate": 4.99712386249598e-06, |
| "loss": 1.2527, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.147987147987148, |
| "grad_norm": 2.074742317199707, |
| "learning_rate": 4.997087805431385e-06, |
| "loss": 1.1974, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.14855414855414856, |
| "grad_norm": 2.2374022006988525, |
| "learning_rate": 4.997051523887249e-06, |
| "loss": 1.2668, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.14912114912114913, |
| "grad_norm": 2.2284483909606934, |
| "learning_rate": 4.997015017866832e-06, |
| "loss": 1.1923, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.1496881496881497, |
| "grad_norm": 2.0552480220794678, |
| "learning_rate": 4.9969782873734165e-06, |
| "loss": 1.2624, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.15025515025515027, |
| "grad_norm": 1.9496517181396484, |
| "learning_rate": 4.996941332410304e-06, |
| "loss": 1.2363, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.15082215082215084, |
| "grad_norm": 2.10217547416687, |
| "learning_rate": 4.996904152980817e-06, |
| "loss": 1.2402, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.15138915138915138, |
| "grad_norm": 2.3567399978637695, |
| "learning_rate": 4.996866749088298e-06, |
| "loss": 1.185, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.15195615195615195, |
| "grad_norm": 2.2061548233032227, |
| "learning_rate": 4.996829120736109e-06, |
| "loss": 1.199, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.15252315252315252, |
| "grad_norm": 2.2688632011413574, |
| "learning_rate": 4.996791267927632e-06, |
| "loss": 1.1538, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.15309015309015309, |
| "grad_norm": 2.0387306213378906, |
| "learning_rate": 4.996753190666272e-06, |
| "loss": 1.2068, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.15365715365715366, |
| "grad_norm": 1.9924688339233398, |
| "learning_rate": 4.9967148889554495e-06, |
| "loss": 1.1666, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.15422415422415423, |
| "grad_norm": 2.164651393890381, |
| "learning_rate": 4.99667636279861e-06, |
| "loss": 1.2135, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.1547911547911548, |
| "grad_norm": 2.251842737197876, |
| "learning_rate": 4.996637612199215e-06, |
| "loss": 1.2516, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.15535815535815536, |
| "grad_norm": 2.039698600769043, |
| "learning_rate": 4.996598637160749e-06, |
| "loss": 1.1277, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.15592515592515593, |
| "grad_norm": 2.076981544494629, |
| "learning_rate": 4.996559437686716e-06, |
| "loss": 1.2322, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.1564921564921565, |
| "grad_norm": 2.067885398864746, |
| "learning_rate": 4.996520013780638e-06, |
| "loss": 1.236, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.15705915705915707, |
| "grad_norm": 1.9662197828292847, |
| "learning_rate": 4.996480365446061e-06, |
| "loss": 1.1706, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.15762615762615761, |
| "grad_norm": 1.9575865268707275, |
| "learning_rate": 4.99644049268655e-06, |
| "loss": 1.1679, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.15819315819315818, |
| "grad_norm": 1.9785221815109253, |
| "learning_rate": 4.996400395505686e-06, |
| "loss": 1.2064, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.15876015876015875, |
| "grad_norm": 1.8600579500198364, |
| "learning_rate": 4.996360073907077e-06, |
| "loss": 1.1616, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.15932715932715932, |
| "grad_norm": 1.9530023336410522, |
| "learning_rate": 4.996319527894347e-06, |
| "loss": 1.1516, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.1598941598941599, |
| "grad_norm": 1.9305200576782227, |
| "learning_rate": 4.996278757471139e-06, |
| "loss": 1.1937, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.16046116046116046, |
| "grad_norm": 2.1341655254364014, |
| "learning_rate": 4.996237762641121e-06, |
| "loss": 1.2599, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.16102816102816103, |
| "grad_norm": 2.364447593688965, |
| "learning_rate": 4.996196543407976e-06, |
| "loss": 1.2051, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.1615951615951616, |
| "grad_norm": 2.126579523086548, |
| "learning_rate": 4.996155099775411e-06, |
| "loss": 1.2119, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.16216216216216217, |
| "grad_norm": 2.0040011405944824, |
| "learning_rate": 4.99611343174715e-06, |
| "loss": 1.2536, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.16272916272916274, |
| "grad_norm": 2.0829451084136963, |
| "learning_rate": 4.99607153932694e-06, |
| "loss": 1.1956, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.16329616329616328, |
| "grad_norm": 2.0177149772644043, |
| "learning_rate": 4.996029422518547e-06, |
| "loss": 1.1834, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.16386316386316385, |
| "grad_norm": 2.0412003993988037, |
| "learning_rate": 4.995987081325757e-06, |
| "loss": 1.2371, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.16443016443016442, |
| "grad_norm": 2.0813381671905518, |
| "learning_rate": 4.995944515752377e-06, |
| "loss": 1.2217, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.164997164997165, |
| "grad_norm": 2.190868377685547, |
| "learning_rate": 4.995901725802231e-06, |
| "loss": 1.2081, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.16556416556416556, |
| "grad_norm": 1.9888038635253906, |
| "learning_rate": 4.995858711479169e-06, |
| "loss": 1.1693, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.16613116613116613, |
| "grad_norm": 2.158754348754883, |
| "learning_rate": 4.995815472787055e-06, |
| "loss": 1.2367, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1666981666981667, |
| "grad_norm": 2.0454671382904053, |
| "learning_rate": 4.995772009729778e-06, |
| "loss": 1.1847, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.16726516726516727, |
| "grad_norm": 2.2318146228790283, |
| "learning_rate": 4.995728322311244e-06, |
| "loss": 1.1736, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.16783216783216784, |
| "grad_norm": 2.050093412399292, |
| "learning_rate": 4.995684410535382e-06, |
| "loss": 1.2201, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.1683991683991684, |
| "grad_norm": 2.1249499320983887, |
| "learning_rate": 4.995640274406137e-06, |
| "loss": 1.2297, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.16896616896616898, |
| "grad_norm": 2.013911008834839, |
| "learning_rate": 4.995595913927478e-06, |
| "loss": 1.2292, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.16953316953316952, |
| "grad_norm": 3.098249673843384, |
| "learning_rate": 4.995551329103393e-06, |
| "loss": 1.1848, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1701001701001701, |
| "grad_norm": 2.1460604667663574, |
| "learning_rate": 4.99550651993789e-06, |
| "loss": 1.2376, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.17066717066717066, |
| "grad_norm": 2.1938812732696533, |
| "learning_rate": 4.995461486434997e-06, |
| "loss": 1.1589, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.17123417123417123, |
| "grad_norm": 2.1145992279052734, |
| "learning_rate": 4.995416228598763e-06, |
| "loss": 1.2085, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.1718011718011718, |
| "grad_norm": 2.0934178829193115, |
| "learning_rate": 4.995370746433256e-06, |
| "loss": 1.2386, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.17236817236817237, |
| "grad_norm": 2.2182319164276123, |
| "learning_rate": 4.995325039942563e-06, |
| "loss": 1.1851, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.17293517293517294, |
| "grad_norm": 2.167012929916382, |
| "learning_rate": 4.995279109130796e-06, |
| "loss": 1.1872, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1735021735021735, |
| "grad_norm": 2.0496084690093994, |
| "learning_rate": 4.995232954002082e-06, |
| "loss": 1.1956, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.17406917406917408, |
| "grad_norm": 2.009467124938965, |
| "learning_rate": 4.9951865745605705e-06, |
| "loss": 1.1736, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.17463617463617465, |
| "grad_norm": 2.2250547409057617, |
| "learning_rate": 4.995139970810431e-06, |
| "loss": 1.2494, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.17520317520317522, |
| "grad_norm": 2.7782530784606934, |
| "learning_rate": 4.995093142755854e-06, |
| "loss": 1.1837, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.17577017577017576, |
| "grad_norm": 2.1650445461273193, |
| "learning_rate": 4.995046090401047e-06, |
| "loss": 1.1792, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.17633717633717633, |
| "grad_norm": 2.4029552936553955, |
| "learning_rate": 4.994998813750241e-06, |
| "loss": 1.2021, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.1769041769041769, |
| "grad_norm": 2.227540969848633, |
| "learning_rate": 4.994951312807687e-06, |
| "loss": 1.1454, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.17747117747117747, |
| "grad_norm": 2.060182571411133, |
| "learning_rate": 4.994903587577653e-06, |
| "loss": 1.2367, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.17803817803817804, |
| "grad_norm": 2.069013833999634, |
| "learning_rate": 4.994855638064432e-06, |
| "loss": 1.2495, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.1786051786051786, |
| "grad_norm": 2.2063608169555664, |
| "learning_rate": 4.994807464272332e-06, |
| "loss": 1.1819, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.17917217917217917, |
| "grad_norm": 1.9642980098724365, |
| "learning_rate": 4.994759066205685e-06, |
| "loss": 1.1656, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.17973917973917974, |
| "grad_norm": 2.192549228668213, |
| "learning_rate": 4.994710443868842e-06, |
| "loss": 1.2461, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.1803061803061803, |
| "grad_norm": 1.8663341999053955, |
| "learning_rate": 4.9946615972661735e-06, |
| "loss": 1.1618, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.18087318087318088, |
| "grad_norm": 1.9651904106140137, |
| "learning_rate": 4.994612526402071e-06, |
| "loss": 1.2028, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.18144018144018145, |
| "grad_norm": 2.250514030456543, |
| "learning_rate": 4.9945632312809444e-06, |
| "loss": 1.2224, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.182007182007182, |
| "grad_norm": 2.1259069442749023, |
| "learning_rate": 4.994513711907227e-06, |
| "loss": 1.2486, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.18257418257418256, |
| "grad_norm": 2.530109405517578, |
| "learning_rate": 4.994463968285369e-06, |
| "loss": 1.2559, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.18314118314118313, |
| "grad_norm": 2.106372833251953, |
| "learning_rate": 4.994414000419844e-06, |
| "loss": 1.1923, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.1837081837081837, |
| "grad_norm": 2.2210237979888916, |
| "learning_rate": 4.994363808315141e-06, |
| "loss": 1.2749, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.18427518427518427, |
| "grad_norm": 2.1548731327056885, |
| "learning_rate": 4.994313391975775e-06, |
| "loss": 1.2003, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.18484218484218484, |
| "grad_norm": 2.310274124145508, |
| "learning_rate": 4.994262751406277e-06, |
| "loss": 1.2314, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.1854091854091854, |
| "grad_norm": 2.3611135482788086, |
| "learning_rate": 4.9942118866112e-06, |
| "loss": 1.1846, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.18597618597618598, |
| "grad_norm": 2.2071614265441895, |
| "learning_rate": 4.994160797595115e-06, |
| "loss": 1.1793, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.18654318654318655, |
| "grad_norm": 1.9791762828826904, |
| "learning_rate": 4.994109484362617e-06, |
| "loss": 1.2402, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.18711018711018712, |
| "grad_norm": 2.1133017539978027, |
| "learning_rate": 4.9940579469183174e-06, |
| "loss": 1.1781, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1876771876771877, |
| "grad_norm": 2.1351208686828613, |
| "learning_rate": 4.994006185266848e-06, |
| "loss": 1.1897, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.18824418824418823, |
| "grad_norm": 1.9921027421951294, |
| "learning_rate": 4.9939541994128646e-06, |
| "loss": 1.1257, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.1888111888111888, |
| "grad_norm": 2.026926279067993, |
| "learning_rate": 4.99390198936104e-06, |
| "loss": 1.1219, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.18937818937818937, |
| "grad_norm": 2.0869030952453613, |
| "learning_rate": 4.993849555116067e-06, |
| "loss": 1.1899, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.18994518994518994, |
| "grad_norm": 2.1541025638580322, |
| "learning_rate": 4.9937968966826595e-06, |
| "loss": 1.231, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.1905121905121905, |
| "grad_norm": 1.927504062652588, |
| "learning_rate": 4.993744014065551e-06, |
| "loss": 1.1633, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.19107919107919108, |
| "grad_norm": 2.2648391723632812, |
| "learning_rate": 4.993690907269496e-06, |
| "loss": 1.1915, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.19164619164619165, |
| "grad_norm": 2.340109348297119, |
| "learning_rate": 4.993637576299268e-06, |
| "loss": 1.1964, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.19221319221319222, |
| "grad_norm": 2.209946393966675, |
| "learning_rate": 4.993584021159662e-06, |
| "loss": 1.2111, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.1927801927801928, |
| "grad_norm": 2.405576705932617, |
| "learning_rate": 4.993530241855491e-06, |
| "loss": 1.1626, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.19334719334719336, |
| "grad_norm": 2.1429643630981445, |
| "learning_rate": 4.993476238391591e-06, |
| "loss": 1.193, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.19391419391419393, |
| "grad_norm": 2.2143402099609375, |
| "learning_rate": 4.993422010772817e-06, |
| "loss": 1.2702, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.19448119448119447, |
| "grad_norm": 2.0782816410064697, |
| "learning_rate": 4.993367559004043e-06, |
| "loss": 1.1514, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.19504819504819504, |
| "grad_norm": 2.1788971424102783, |
| "learning_rate": 4.993312883090164e-06, |
| "loss": 1.1957, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.1956151956151956, |
| "grad_norm": 2.1041131019592285, |
| "learning_rate": 4.993257983036095e-06, |
| "loss": 1.2639, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.19618219618219618, |
| "grad_norm": 2.1448371410369873, |
| "learning_rate": 4.993202858846773e-06, |
| "loss": 1.2499, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.19674919674919675, |
| "grad_norm": 2.086442470550537, |
| "learning_rate": 4.993147510527151e-06, |
| "loss": 1.2106, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.19731619731619732, |
| "grad_norm": 1.9370099306106567, |
| "learning_rate": 4.993091938082206e-06, |
| "loss": 1.145, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.1978831978831979, |
| "grad_norm": 2.2078094482421875, |
| "learning_rate": 4.993036141516934e-06, |
| "loss": 1.2206, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.19845019845019846, |
| "grad_norm": 2.2671496868133545, |
| "learning_rate": 4.99298012083635e-06, |
| "loss": 1.1099, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.19901719901719903, |
| "grad_norm": 2.2037370204925537, |
| "learning_rate": 4.9929238760454915e-06, |
| "loss": 1.2188, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.1995841995841996, |
| "grad_norm": 2.1167116165161133, |
| "learning_rate": 4.9928674071494125e-06, |
| "loss": 1.2037, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.20015120015120016, |
| "grad_norm": 2.066723585128784, |
| "learning_rate": 4.992810714153191e-06, |
| "loss": 1.1682, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.2007182007182007, |
| "grad_norm": 2.2286489009857178, |
| "learning_rate": 4.992753797061924e-06, |
| "loss": 1.1563, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.20128520128520128, |
| "grad_norm": 2.165644645690918, |
| "learning_rate": 4.992696655880727e-06, |
| "loss": 1.1885, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.20185220185220185, |
| "grad_norm": 2.1326098442077637, |
| "learning_rate": 4.992639290614736e-06, |
| "loss": 1.1434, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.20241920241920242, |
| "grad_norm": 2.0205516815185547, |
| "learning_rate": 4.99258170126911e-06, |
| "loss": 1.1146, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.20298620298620298, |
| "grad_norm": 2.1004693508148193, |
| "learning_rate": 4.992523887849025e-06, |
| "loss": 1.1704, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.20355320355320355, |
| "grad_norm": 2.3303492069244385, |
| "learning_rate": 4.992465850359679e-06, |
| "loss": 1.2289, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.20412020412020412, |
| "grad_norm": 2.013455867767334, |
| "learning_rate": 4.992407588806287e-06, |
| "loss": 1.1745, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2046872046872047, |
| "grad_norm": 2.2908389568328857, |
| "learning_rate": 4.9923491031940895e-06, |
| "loss": 1.2258, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.20525420525420526, |
| "grad_norm": 2.0814812183380127, |
| "learning_rate": 4.9922903935283425e-06, |
| "loss": 1.1626, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.20582120582120583, |
| "grad_norm": 2.045369863510132, |
| "learning_rate": 4.992231459814324e-06, |
| "loss": 1.1758, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.20638820638820637, |
| "grad_norm": 1.986330270767212, |
| "learning_rate": 4.992172302057332e-06, |
| "loss": 1.1153, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.20695520695520694, |
| "grad_norm": 3.600193738937378, |
| "learning_rate": 4.9921129202626856e-06, |
| "loss": 1.171, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.2075222075222075, |
| "grad_norm": 2.119173288345337, |
| "learning_rate": 4.992053314435722e-06, |
| "loss": 1.152, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.20808920808920808, |
| "grad_norm": 2.0884904861450195, |
| "learning_rate": 4.9919934845817984e-06, |
| "loss": 1.1959, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.20865620865620865, |
| "grad_norm": 2.014221668243408, |
| "learning_rate": 4.991933430706296e-06, |
| "loss": 1.1839, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.20922320922320922, |
| "grad_norm": 1.9567116498947144, |
| "learning_rate": 4.9918731528146115e-06, |
| "loss": 1.2124, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.2097902097902098, |
| "grad_norm": 2.1171345710754395, |
| "learning_rate": 4.991812650912163e-06, |
| "loss": 1.1805, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.21035721035721036, |
| "grad_norm": 2.2752904891967773, |
| "learning_rate": 4.991751925004392e-06, |
| "loss": 1.2005, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.21092421092421093, |
| "grad_norm": 2.025243043899536, |
| "learning_rate": 4.991690975096756e-06, |
| "loss": 1.1823, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.2114912114912115, |
| "grad_norm": 2.2367069721221924, |
| "learning_rate": 4.991629801194734e-06, |
| "loss": 1.1298, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.21205821205821207, |
| "grad_norm": 2.109471082687378, |
| "learning_rate": 4.991568403303825e-06, |
| "loss": 1.2322, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.2126252126252126, |
| "grad_norm": 2.2278378009796143, |
| "learning_rate": 4.99150678142955e-06, |
| "loss": 1.1766, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.21319221319221318, |
| "grad_norm": 2.0208706855773926, |
| "learning_rate": 4.991444935577447e-06, |
| "loss": 1.1748, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.21375921375921375, |
| "grad_norm": 2.2481842041015625, |
| "learning_rate": 4.991382865753077e-06, |
| "loss": 1.2832, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.21432621432621432, |
| "grad_norm": 2.2334558963775635, |
| "learning_rate": 4.9913205719620195e-06, |
| "loss": 1.2306, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.2148932148932149, |
| "grad_norm": 2.373222589492798, |
| "learning_rate": 4.991258054209873e-06, |
| "loss": 1.1926, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.21546021546021546, |
| "grad_norm": 2.2954397201538086, |
| "learning_rate": 4.9911953125022606e-06, |
| "loss": 1.25, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.21602721602721603, |
| "grad_norm": 2.0847930908203125, |
| "learning_rate": 4.991132346844819e-06, |
| "loss": 1.1645, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.2165942165942166, |
| "grad_norm": 2.0304839611053467, |
| "learning_rate": 4.991069157243212e-06, |
| "loss": 1.1687, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.21716121716121717, |
| "grad_norm": 2.000683546066284, |
| "learning_rate": 4.991005743703118e-06, |
| "loss": 1.1637, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.21772821772821774, |
| "grad_norm": 2.1974005699157715, |
| "learning_rate": 4.990942106230238e-06, |
| "loss": 1.168, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.2182952182952183, |
| "grad_norm": 2.0585193634033203, |
| "learning_rate": 4.990878244830294e-06, |
| "loss": 1.213, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.21886221886221885, |
| "grad_norm": 2.027149200439453, |
| "learning_rate": 4.990814159509025e-06, |
| "loss": 1.1494, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.21942921942921942, |
| "grad_norm": 2.1952474117279053, |
| "learning_rate": 4.990749850272193e-06, |
| "loss": 1.1986, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.21999621999622, |
| "grad_norm": 1.9279240369796753, |
| "learning_rate": 4.990685317125579e-06, |
| "loss": 1.2058, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.22056322056322056, |
| "grad_norm": 1.9203171730041504, |
| "learning_rate": 4.9906205600749855e-06, |
| "loss": 1.143, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.22113022113022113, |
| "grad_norm": 2.044790506362915, |
| "learning_rate": 4.990555579126232e-06, |
| "loss": 1.2609, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2216972216972217, |
| "grad_norm": 2.142638921737671, |
| "learning_rate": 4.99049037428516e-06, |
| "loss": 1.2095, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.22226422226422227, |
| "grad_norm": 2.184535026550293, |
| "learning_rate": 4.990424945557635e-06, |
| "loss": 1.153, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.22283122283122284, |
| "grad_norm": 2.1759684085845947, |
| "learning_rate": 4.990359292949534e-06, |
| "loss": 1.1759, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.2233982233982234, |
| "grad_norm": 2.133268356323242, |
| "learning_rate": 4.990293416466761e-06, |
| "loss": 1.2163, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.22396522396522398, |
| "grad_norm": 2.0592682361602783, |
| "learning_rate": 4.9902273161152385e-06, |
| "loss": 1.1854, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.22453222453222454, |
| "grad_norm": 1.9705684185028076, |
| "learning_rate": 4.990160991900907e-06, |
| "loss": 1.2054, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.2250992250992251, |
| "grad_norm": 1.9913262128829956, |
| "learning_rate": 4.990094443829732e-06, |
| "loss": 1.1733, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.22566622566622566, |
| "grad_norm": 2.1340818405151367, |
| "learning_rate": 4.990027671907692e-06, |
| "loss": 1.147, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.22623322623322623, |
| "grad_norm": 2.1737279891967773, |
| "learning_rate": 4.989960676140793e-06, |
| "loss": 1.2293, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.2268002268002268, |
| "grad_norm": 2.1729650497436523, |
| "learning_rate": 4.989893456535056e-06, |
| "loss": 1.2801, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.22736722736722736, |
| "grad_norm": 2.1917948722839355, |
| "learning_rate": 4.989826013096522e-06, |
| "loss": 1.1536, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.22793422793422793, |
| "grad_norm": 2.1080565452575684, |
| "learning_rate": 4.989758345831258e-06, |
| "loss": 1.2141, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.2285012285012285, |
| "grad_norm": 2.110219717025757, |
| "learning_rate": 4.989690454745345e-06, |
| "loss": 1.2062, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.22906822906822907, |
| "grad_norm": 2.035661220550537, |
| "learning_rate": 4.989622339844886e-06, |
| "loss": 1.1714, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.22963522963522964, |
| "grad_norm": 2.1241447925567627, |
| "learning_rate": 4.989554001136003e-06, |
| "loss": 1.1623, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.2302022302022302, |
| "grad_norm": 2.0898942947387695, |
| "learning_rate": 4.989485438624843e-06, |
| "loss": 1.1604, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 2.0134730339050293, |
| "learning_rate": 4.989416652317566e-06, |
| "loss": 1.225, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.23133623133623132, |
| "grad_norm": 2.0604565143585205, |
| "learning_rate": 4.989347642220357e-06, |
| "loss": 1.1959, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2319032319032319, |
| "grad_norm": 2.229936361312866, |
| "learning_rate": 4.98927840833942e-06, |
| "loss": 1.1592, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.23247023247023246, |
| "grad_norm": 2.2757463455200195, |
| "learning_rate": 4.989208950680979e-06, |
| "loss": 1.148, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.23303723303723303, |
| "grad_norm": 2.0375144481658936, |
| "learning_rate": 4.989139269251278e-06, |
| "loss": 1.2944, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.2336042336042336, |
| "grad_norm": 2.230483293533325, |
| "learning_rate": 4.98906936405658e-06, |
| "loss": 1.1945, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.23417123417123417, |
| "grad_norm": 1.9437626600265503, |
| "learning_rate": 4.988999235103171e-06, |
| "loss": 1.1952, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.23473823473823474, |
| "grad_norm": 2.0950779914855957, |
| "learning_rate": 4.9889288823973535e-06, |
| "loss": 1.2084, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.2353052353052353, |
| "grad_norm": 2.0104267597198486, |
| "learning_rate": 4.9888583059454536e-06, |
| "loss": 1.1729, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.23587223587223588, |
| "grad_norm": 2.0139896869659424, |
| "learning_rate": 4.988787505753815e-06, |
| "loss": 1.1628, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.23643923643923645, |
| "grad_norm": 2.155890703201294, |
| "learning_rate": 4.9887164818288016e-06, |
| "loss": 1.1806, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.23700623700623702, |
| "grad_norm": 2.0404961109161377, |
| "learning_rate": 4.9886452341768e-06, |
| "loss": 1.1794, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.23757323757323756, |
| "grad_norm": 2.07344126701355, |
| "learning_rate": 4.988573762804214e-06, |
| "loss": 1.1908, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.23814023814023813, |
| "grad_norm": 2.1261799335479736, |
| "learning_rate": 4.988502067717469e-06, |
| "loss": 1.2493, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2387072387072387, |
| "grad_norm": 2.111435651779175, |
| "learning_rate": 4.98843014892301e-06, |
| "loss": 1.0969, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.23927423927423927, |
| "grad_norm": 2.3221940994262695, |
| "learning_rate": 4.988358006427303e-06, |
| "loss": 1.2238, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.23984123984123984, |
| "grad_norm": 2.3007023334503174, |
| "learning_rate": 4.988285640236832e-06, |
| "loss": 1.2167, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.2404082404082404, |
| "grad_norm": 2.247527837753296, |
| "learning_rate": 4.988213050358103e-06, |
| "loss": 1.1468, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.24097524097524098, |
| "grad_norm": 2.294705867767334, |
| "learning_rate": 4.988140236797642e-06, |
| "loss": 1.1917, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.24154224154224155, |
| "grad_norm": 2.0505738258361816, |
| "learning_rate": 4.9880671995619935e-06, |
| "loss": 1.1772, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.24210924210924212, |
| "grad_norm": 2.2414138317108154, |
| "learning_rate": 4.987993938657725e-06, |
| "loss": 1.1632, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2426762426762427, |
| "grad_norm": 2.2510695457458496, |
| "learning_rate": 4.987920454091422e-06, |
| "loss": 1.235, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.24324324324324326, |
| "grad_norm": 1.9748800992965698, |
| "learning_rate": 4.987846745869689e-06, |
| "loss": 1.1171, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.2438102438102438, |
| "grad_norm": 2.399214506149292, |
| "learning_rate": 4.987772813999154e-06, |
| "loss": 1.2321, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.24437724437724437, |
| "grad_norm": 2.1555111408233643, |
| "learning_rate": 4.987698658486462e-06, |
| "loss": 1.147, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.24494424494424494, |
| "grad_norm": 2.1852099895477295, |
| "learning_rate": 4.9876242793382795e-06, |
| "loss": 1.2108, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2455112455112455, |
| "grad_norm": 2.2297446727752686, |
| "learning_rate": 4.9875496765612935e-06, |
| "loss": 1.2424, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.24607824607824608, |
| "grad_norm": 1.976022481918335, |
| "learning_rate": 4.98747485016221e-06, |
| "loss": 1.1248, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.24664524664524665, |
| "grad_norm": 2.116549015045166, |
| "learning_rate": 4.9873998001477564e-06, |
| "loss": 1.1704, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.24721224721224722, |
| "grad_norm": 2.195775270462036, |
| "learning_rate": 4.987324526524678e-06, |
| "loss": 1.206, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.24777924777924779, |
| "grad_norm": 2.012995719909668, |
| "learning_rate": 4.987249029299743e-06, |
| "loss": 1.1893, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.24834624834624835, |
| "grad_norm": 1.9351956844329834, |
| "learning_rate": 4.987173308479738e-06, |
| "loss": 1.2489, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.24891324891324892, |
| "grad_norm": 2.075611114501953, |
| "learning_rate": 4.98709736407147e-06, |
| "loss": 1.2025, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.2494802494802495, |
| "grad_norm": 2.0834317207336426, |
| "learning_rate": 4.987021196081766e-06, |
| "loss": 1.1608, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.25004725004725004, |
| "grad_norm": 1.9521535634994507, |
| "learning_rate": 4.986944804517473e-06, |
| "loss": 1.0972, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.25061425061425063, |
| "grad_norm": 2.0758798122406006, |
| "learning_rate": 4.986868189385459e-06, |
| "loss": 1.1865, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.2511812511812512, |
| "grad_norm": 2.025371789932251, |
| "learning_rate": 4.98679135069261e-06, |
| "loss": 1.1592, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.2517482517482518, |
| "grad_norm": 2.3723526000976562, |
| "learning_rate": 4.986714288445835e-06, |
| "loss": 1.2082, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2523152523152523, |
| "grad_norm": 2.081716299057007, |
| "learning_rate": 4.986637002652061e-06, |
| "loss": 1.1611, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.25288225288225286, |
| "grad_norm": 2.23604679107666, |
| "learning_rate": 4.986559493318237e-06, |
| "loss": 1.1433, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.25344925344925345, |
| "grad_norm": 2.1360273361206055, |
| "learning_rate": 4.986481760451329e-06, |
| "loss": 1.1006, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.254016254016254, |
| "grad_norm": 2.1812918186187744, |
| "learning_rate": 4.986403804058326e-06, |
| "loss": 1.153, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.2545832545832546, |
| "grad_norm": 2.2628848552703857, |
| "learning_rate": 4.986325624146236e-06, |
| "loss": 1.2299, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.25515025515025513, |
| "grad_norm": 2.1486809253692627, |
| "learning_rate": 4.986247220722085e-06, |
| "loss": 1.1961, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.25571725571725573, |
| "grad_norm": 2.16284441947937, |
| "learning_rate": 4.986168593792924e-06, |
| "loss": 1.2001, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.2562842562842563, |
| "grad_norm": 2.333041191101074, |
| "learning_rate": 4.986089743365821e-06, |
| "loss": 1.1224, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.25685125685125687, |
| "grad_norm": 2.0724565982818604, |
| "learning_rate": 4.986010669447863e-06, |
| "loss": 1.1522, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.2574182574182574, |
| "grad_norm": 2.077805519104004, |
| "learning_rate": 4.985931372046159e-06, |
| "loss": 1.1658, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.257985257985258, |
| "grad_norm": 2.3617515563964844, |
| "learning_rate": 4.985851851167838e-06, |
| "loss": 1.2318, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.25855225855225855, |
| "grad_norm": 2.1975150108337402, |
| "learning_rate": 4.985772106820048e-06, |
| "loss": 1.155, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2591192591192591, |
| "grad_norm": 2.175889015197754, |
| "learning_rate": 4.985692139009958e-06, |
| "loss": 1.2338, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.2596862596862597, |
| "grad_norm": 2.2394015789031982, |
| "learning_rate": 4.9856119477447575e-06, |
| "loss": 1.1954, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.26025326025326023, |
| "grad_norm": 2.231133460998535, |
| "learning_rate": 4.985531533031654e-06, |
| "loss": 1.152, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.26082026082026083, |
| "grad_norm": 2.161984920501709, |
| "learning_rate": 4.9854508948778776e-06, |
| "loss": 1.1859, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.26138726138726137, |
| "grad_norm": 2.168325901031494, |
| "learning_rate": 4.985370033290678e-06, |
| "loss": 1.153, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.26195426195426197, |
| "grad_norm": 2.126570701599121, |
| "learning_rate": 4.985288948277322e-06, |
| "loss": 1.1973, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2625212625212625, |
| "grad_norm": 2.019768476486206, |
| "learning_rate": 4.985207639845101e-06, |
| "loss": 1.2032, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2630882630882631, |
| "grad_norm": 2.0683631896972656, |
| "learning_rate": 4.985126108001323e-06, |
| "loss": 1.1933, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.26365526365526365, |
| "grad_norm": 2.112457036972046, |
| "learning_rate": 4.9850443527533186e-06, |
| "loss": 1.1886, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.26422226422226425, |
| "grad_norm": 2.1040475368499756, |
| "learning_rate": 4.984962374108438e-06, |
| "loss": 1.1435, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2647892647892648, |
| "grad_norm": 2.0582830905914307, |
| "learning_rate": 4.9848801720740484e-06, |
| "loss": 1.1349, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.26535626535626533, |
| "grad_norm": 2.101658582687378, |
| "learning_rate": 4.98479774665754e-06, |
| "loss": 1.2083, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.26592326592326593, |
| "grad_norm": 1.9494727849960327, |
| "learning_rate": 4.984715097866325e-06, |
| "loss": 1.1068, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.26649026649026647, |
| "grad_norm": 2.1678626537323, |
| "learning_rate": 4.984632225707831e-06, |
| "loss": 1.2217, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.26705726705726707, |
| "grad_norm": 2.1645007133483887, |
| "learning_rate": 4.984549130189508e-06, |
| "loss": 1.2297, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.2676242676242676, |
| "grad_norm": 1.9701532125473022, |
| "learning_rate": 4.984465811318826e-06, |
| "loss": 1.2184, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.2681912681912682, |
| "grad_norm": 2.028223752975464, |
| "learning_rate": 4.984382269103276e-06, |
| "loss": 1.2268, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.26875826875826875, |
| "grad_norm": 2.1951472759246826, |
| "learning_rate": 4.984298503550367e-06, |
| "loss": 1.0856, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.26932526932526935, |
| "grad_norm": 2.046638250350952, |
| "learning_rate": 4.984214514667631e-06, |
| "loss": 1.1667, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.2698922698922699, |
| "grad_norm": 2.0294766426086426, |
| "learning_rate": 4.984130302462617e-06, |
| "loss": 1.1449, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2704592704592705, |
| "grad_norm": 1.9961752891540527, |
| "learning_rate": 4.984045866942895e-06, |
| "loss": 1.1493, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.271026271026271, |
| "grad_norm": 2.1608307361602783, |
| "learning_rate": 4.983961208116057e-06, |
| "loss": 1.1564, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.27159327159327157, |
| "grad_norm": 2.082221031188965, |
| "learning_rate": 4.983876325989712e-06, |
| "loss": 1.1251, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.27216027216027217, |
| "grad_norm": 2.020977258682251, |
| "learning_rate": 4.983791220571491e-06, |
| "loss": 1.1933, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 2.1868746280670166, |
| "learning_rate": 4.983705891869045e-06, |
| "loss": 1.0961, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.2732942732942733, |
| "grad_norm": 2.204965829849243, |
| "learning_rate": 4.983620339890045e-06, |
| "loss": 1.1872, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.27386127386127385, |
| "grad_norm": 2.166038751602173, |
| "learning_rate": 4.983534564642181e-06, |
| "loss": 1.1613, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.27442827442827444, |
| "grad_norm": 2.204023599624634, |
| "learning_rate": 4.9834485661331635e-06, |
| "loss": 1.1728, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.274995274995275, |
| "grad_norm": 2.2817134857177734, |
| "learning_rate": 4.983362344370725e-06, |
| "loss": 1.1308, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2755622755622756, |
| "grad_norm": 2.1722960472106934, |
| "learning_rate": 4.983275899362617e-06, |
| "loss": 1.1608, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.2761292761292761, |
| "grad_norm": 1.996817946434021, |
| "learning_rate": 4.983189231116609e-06, |
| "loss": 1.209, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.2766962766962767, |
| "grad_norm": 2.1152753829956055, |
| "learning_rate": 4.9831023396404915e-06, |
| "loss": 1.2043, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.27726327726327726, |
| "grad_norm": 2.458534002304077, |
| "learning_rate": 4.983015224942077e-06, |
| "loss": 1.1945, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2778302778302778, |
| "grad_norm": 2.107175350189209, |
| "learning_rate": 4.9829278870291975e-06, |
| "loss": 1.1164, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2783972783972784, |
| "grad_norm": 2.216923236846924, |
| "learning_rate": 4.982840325909704e-06, |
| "loss": 1.1624, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.27896427896427894, |
| "grad_norm": 1.9773650169372559, |
| "learning_rate": 4.982752541591467e-06, |
| "loss": 1.077, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.27953127953127954, |
| "grad_norm": 1.9695847034454346, |
| "learning_rate": 4.982664534082377e-06, |
| "loss": 1.1566, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.2800982800982801, |
| "grad_norm": 2.343064546585083, |
| "learning_rate": 4.98257630339035e-06, |
| "loss": 1.2313, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.2806652806652807, |
| "grad_norm": 1.9904532432556152, |
| "learning_rate": 4.982487849523312e-06, |
| "loss": 1.1701, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2812322812322812, |
| "grad_norm": 1.9797039031982422, |
| "learning_rate": 4.982399172489219e-06, |
| "loss": 1.1929, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.2817992817992818, |
| "grad_norm": 1.9941498041152954, |
| "learning_rate": 4.98231027229604e-06, |
| "loss": 1.1203, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.28236628236628236, |
| "grad_norm": 2.0945804119110107, |
| "learning_rate": 4.982221148951769e-06, |
| "loss": 1.081, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.28293328293328296, |
| "grad_norm": 2.344377040863037, |
| "learning_rate": 4.982131802464417e-06, |
| "loss": 1.1263, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.2835002835002835, |
| "grad_norm": 2.075709342956543, |
| "learning_rate": 4.982042232842015e-06, |
| "loss": 1.1351, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.28406728406728404, |
| "grad_norm": 2.089801073074341, |
| "learning_rate": 4.9819524400926165e-06, |
| "loss": 1.1428, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.28463428463428464, |
| "grad_norm": 2.149322032928467, |
| "learning_rate": 4.981862424224292e-06, |
| "loss": 1.2166, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.2852012852012852, |
| "grad_norm": 2.2097578048706055, |
| "learning_rate": 4.981772185245135e-06, |
| "loss": 1.1438, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.2857682857682858, |
| "grad_norm": 2.0914175510406494, |
| "learning_rate": 4.981681723163257e-06, |
| "loss": 1.1878, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.2863352863352863, |
| "grad_norm": 2.1141879558563232, |
| "learning_rate": 4.981591037986791e-06, |
| "loss": 1.1921, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2869022869022869, |
| "grad_norm": 2.122882127761841, |
| "learning_rate": 4.981500129723888e-06, |
| "loss": 1.1832, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.28746928746928746, |
| "grad_norm": 2.0918145179748535, |
| "learning_rate": 4.981408998382722e-06, |
| "loss": 1.1446, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.28803628803628806, |
| "grad_norm": 2.1555593013763428, |
| "learning_rate": 4.981317643971483e-06, |
| "loss": 1.1637, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.2886032886032886, |
| "grad_norm": 2.057591438293457, |
| "learning_rate": 4.981226066498386e-06, |
| "loss": 1.1216, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.2891702891702892, |
| "grad_norm": 2.0887913703918457, |
| "learning_rate": 4.981134265971661e-06, |
| "loss": 1.1765, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.28973728973728974, |
| "grad_norm": 2.009054660797119, |
| "learning_rate": 4.981042242399563e-06, |
| "loss": 1.1756, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2903042903042903, |
| "grad_norm": 2.092836380004883, |
| "learning_rate": 4.980949995790363e-06, |
| "loss": 1.1748, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2908712908712909, |
| "grad_norm": 2.105720281600952, |
| "learning_rate": 4.980857526152354e-06, |
| "loss": 1.1859, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2914382914382914, |
| "grad_norm": 2.0198593139648438, |
| "learning_rate": 4.9807648334938495e-06, |
| "loss": 1.1511, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.292005292005292, |
| "grad_norm": 2.1741116046905518, |
| "learning_rate": 4.9806719178231815e-06, |
| "loss": 1.146, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.29257229257229256, |
| "grad_norm": 2.0599679946899414, |
| "learning_rate": 4.980578779148702e-06, |
| "loss": 1.166, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.29313929313929316, |
| "grad_norm": 2.0803048610687256, |
| "learning_rate": 4.980485417478785e-06, |
| "loss": 1.2055, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2937062937062937, |
| "grad_norm": 2.0175561904907227, |
| "learning_rate": 4.980391832821823e-06, |
| "loss": 1.1096, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.2942732942732943, |
| "grad_norm": 2.224367380142212, |
| "learning_rate": 4.98029802518623e-06, |
| "loss": 1.1226, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.29484029484029484, |
| "grad_norm": 2.0235671997070312, |
| "learning_rate": 4.980203994580438e-06, |
| "loss": 1.1662, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.29540729540729543, |
| "grad_norm": 2.0228888988494873, |
| "learning_rate": 4.980109741012899e-06, |
| "loss": 1.2074, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.295974295974296, |
| "grad_norm": 2.286905288696289, |
| "learning_rate": 4.980015264492087e-06, |
| "loss": 1.1795, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.2965412965412965, |
| "grad_norm": 2.0491106510162354, |
| "learning_rate": 4.979920565026496e-06, |
| "loss": 1.1727, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2971082971082971, |
| "grad_norm": 1.9510176181793213, |
| "learning_rate": 4.979825642624639e-06, |
| "loss": 1.1782, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.29767529767529766, |
| "grad_norm": 2.1948649883270264, |
| "learning_rate": 4.979730497295048e-06, |
| "loss": 1.1906, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.29824229824229825, |
| "grad_norm": 2.01057505607605, |
| "learning_rate": 4.979635129046276e-06, |
| "loss": 1.2282, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.2988092988092988, |
| "grad_norm": 2.133222818374634, |
| "learning_rate": 4.979539537886899e-06, |
| "loss": 1.2072, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.2993762993762994, |
| "grad_norm": 1.993911623954773, |
| "learning_rate": 4.979443723825506e-06, |
| "loss": 1.1421, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.29994329994329993, |
| "grad_norm": 2.000917673110962, |
| "learning_rate": 4.979347686870714e-06, |
| "loss": 1.1226, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.30051030051030053, |
| "grad_norm": 2.2361230850219727, |
| "learning_rate": 4.9792514270311556e-06, |
| "loss": 1.2491, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3010773010773011, |
| "grad_norm": 2.1424977779388428, |
| "learning_rate": 4.979154944315483e-06, |
| "loss": 1.1685, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.30164430164430167, |
| "grad_norm": 2.0004067420959473, |
| "learning_rate": 4.979058238732371e-06, |
| "loss": 1.2122, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.3022113022113022, |
| "grad_norm": 2.1690309047698975, |
| "learning_rate": 4.978961310290512e-06, |
| "loss": 1.155, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.30277830277830275, |
| "grad_norm": 2.143450975418091, |
| "learning_rate": 4.97886415899862e-06, |
| "loss": 1.1719, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.30334530334530335, |
| "grad_norm": 2.197096824645996, |
| "learning_rate": 4.978766784865429e-06, |
| "loss": 1.1263, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.3039123039123039, |
| "grad_norm": 2.075712203979492, |
| "learning_rate": 4.9786691878996926e-06, |
| "loss": 1.1412, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.3044793044793045, |
| "grad_norm": 2.1067452430725098, |
| "learning_rate": 4.978571368110183e-06, |
| "loss": 1.231, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.30504630504630503, |
| "grad_norm": 2.2791428565979004, |
| "learning_rate": 4.978473325505696e-06, |
| "loss": 1.1304, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.30561330561330563, |
| "grad_norm": 1.856226921081543, |
| "learning_rate": 4.978375060095044e-06, |
| "loss": 1.1559, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.30618030618030617, |
| "grad_norm": 2.2657668590545654, |
| "learning_rate": 4.97827657188706e-06, |
| "loss": 1.1616, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.30674730674730677, |
| "grad_norm": 2.008127212524414, |
| "learning_rate": 4.9781778608906e-06, |
| "loss": 1.1456, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.3073143073143073, |
| "grad_norm": 2.2255024909973145, |
| "learning_rate": 4.978078927114536e-06, |
| "loss": 1.162, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.3078813078813079, |
| "grad_norm": 2.2034647464752197, |
| "learning_rate": 4.977979770567762e-06, |
| "loss": 1.1093, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.30844830844830845, |
| "grad_norm": 2.1638619899749756, |
| "learning_rate": 4.977880391259192e-06, |
| "loss": 1.1953, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.309015309015309, |
| "grad_norm": 2.1344549655914307, |
| "learning_rate": 4.977780789197761e-06, |
| "loss": 1.1511, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.3095823095823096, |
| "grad_norm": 2.2296082973480225, |
| "learning_rate": 4.97768096439242e-06, |
| "loss": 1.1982, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.31014931014931013, |
| "grad_norm": 2.3037984371185303, |
| "learning_rate": 4.977580916852146e-06, |
| "loss": 1.1815, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.31071631071631073, |
| "grad_norm": 2.4563586711883545, |
| "learning_rate": 4.977480646585931e-06, |
| "loss": 1.1606, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.31128331128331127, |
| "grad_norm": 1.9984495639801025, |
| "learning_rate": 4.97738015360279e-06, |
| "loss": 1.144, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.31185031185031187, |
| "grad_norm": 2.0485143661499023, |
| "learning_rate": 4.977279437911756e-06, |
| "loss": 1.1536, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3124173124173124, |
| "grad_norm": 2.2146592140197754, |
| "learning_rate": 4.9771784995218845e-06, |
| "loss": 1.1156, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.312984312984313, |
| "grad_norm": 1.880562424659729, |
| "learning_rate": 4.9770773384422485e-06, |
| "loss": 1.1479, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.31355131355131355, |
| "grad_norm": 2.0846965312957764, |
| "learning_rate": 4.976975954681942e-06, |
| "loss": 1.1266, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.31411831411831415, |
| "grad_norm": 1.9795643091201782, |
| "learning_rate": 4.976874348250078e-06, |
| "loss": 1.1197, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.3146853146853147, |
| "grad_norm": 2.1454899311065674, |
| "learning_rate": 4.976772519155793e-06, |
| "loss": 1.1333, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.31525231525231523, |
| "grad_norm": 2.2674753665924072, |
| "learning_rate": 4.97667046740824e-06, |
| "loss": 1.1433, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.3158193158193158, |
| "grad_norm": 2.0304932594299316, |
| "learning_rate": 4.976568193016592e-06, |
| "loss": 1.1531, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.31638631638631637, |
| "grad_norm": 2.2592039108276367, |
| "learning_rate": 4.976465695990045e-06, |
| "loss": 1.21, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.31695331695331697, |
| "grad_norm": 2.1316540241241455, |
| "learning_rate": 4.976362976337811e-06, |
| "loss": 1.1602, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.3175203175203175, |
| "grad_norm": 2.0556883811950684, |
| "learning_rate": 4.976260034069126e-06, |
| "loss": 1.1739, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3180873180873181, |
| "grad_norm": 2.580925226211548, |
| "learning_rate": 4.976156869193243e-06, |
| "loss": 1.1982, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.31865431865431865, |
| "grad_norm": 2.2084896564483643, |
| "learning_rate": 4.976053481719437e-06, |
| "loss": 1.191, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.31922131922131924, |
| "grad_norm": 2.0614700317382812, |
| "learning_rate": 4.975949871657001e-06, |
| "loss": 1.1364, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.3197883197883198, |
| "grad_norm": 2.1390912532806396, |
| "learning_rate": 4.975846039015251e-06, |
| "loss": 1.1374, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.3203553203553203, |
| "grad_norm": 2.044769763946533, |
| "learning_rate": 4.97574198380352e-06, |
| "loss": 1.1242, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.3209223209223209, |
| "grad_norm": 1.8909872770309448, |
| "learning_rate": 4.975637706031162e-06, |
| "loss": 1.1693, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.32148932148932147, |
| "grad_norm": 2.2310988903045654, |
| "learning_rate": 4.975533205707552e-06, |
| "loss": 1.1736, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.32205632205632206, |
| "grad_norm": 2.057612180709839, |
| "learning_rate": 4.975428482842083e-06, |
| "loss": 1.2525, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.3226233226233226, |
| "grad_norm": 2.104064464569092, |
| "learning_rate": 4.975323537444171e-06, |
| "loss": 1.108, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.3231903231903232, |
| "grad_norm": 2.0129916667938232, |
| "learning_rate": 4.975218369523249e-06, |
| "loss": 1.1598, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.32375732375732375, |
| "grad_norm": 2.080134153366089, |
| "learning_rate": 4.9751129790887705e-06, |
| "loss": 1.1726, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.32432432432432434, |
| "grad_norm": 2.1668407917022705, |
| "learning_rate": 4.975007366150212e-06, |
| "loss": 1.1063, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.3248913248913249, |
| "grad_norm": 2.0228352546691895, |
| "learning_rate": 4.974901530717066e-06, |
| "loss": 1.1907, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.3254583254583255, |
| "grad_norm": 2.016080379486084, |
| "learning_rate": 4.974795472798847e-06, |
| "loss": 1.1973, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.326025326025326, |
| "grad_norm": 2.2395012378692627, |
| "learning_rate": 4.97468919240509e-06, |
| "loss": 1.2248, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.32659232659232657, |
| "grad_norm": 2.1966094970703125, |
| "learning_rate": 4.974582689545348e-06, |
| "loss": 1.1356, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.32715932715932716, |
| "grad_norm": 2.1334877014160156, |
| "learning_rate": 4.974475964229196e-06, |
| "loss": 1.1458, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.3277263277263277, |
| "grad_norm": 2.388422727584839, |
| "learning_rate": 4.9743690164662286e-06, |
| "loss": 1.1866, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.3282933282933283, |
| "grad_norm": 2.077364683151245, |
| "learning_rate": 4.974261846266059e-06, |
| "loss": 1.163, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.32886032886032884, |
| "grad_norm": 1.9961100816726685, |
| "learning_rate": 4.974154453638323e-06, |
| "loss": 1.1281, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.32942732942732944, |
| "grad_norm": 2.213353395462036, |
| "learning_rate": 4.974046838592672e-06, |
| "loss": 1.2425, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.32999432999433, |
| "grad_norm": 2.022712230682373, |
| "learning_rate": 4.973939001138783e-06, |
| "loss": 1.2039, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.3305613305613306, |
| "grad_norm": 2.2076306343078613, |
| "learning_rate": 4.97383094128635e-06, |
| "loss": 1.1468, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.3311283311283311, |
| "grad_norm": 1.9615507125854492, |
| "learning_rate": 4.9737226590450855e-06, |
| "loss": 1.1382, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.3316953316953317, |
| "grad_norm": 2.1458539962768555, |
| "learning_rate": 4.973614154424725e-06, |
| "loss": 1.1042, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.33226233226233226, |
| "grad_norm": 2.045104742050171, |
| "learning_rate": 4.973505427435023e-06, |
| "loss": 1.1347, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.3328293328293328, |
| "grad_norm": 2.0573666095733643, |
| "learning_rate": 4.973396478085752e-06, |
| "loss": 1.1718, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.3333963333963334, |
| "grad_norm": 2.056199550628662, |
| "learning_rate": 4.973287306386707e-06, |
| "loss": 1.1704, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.33396333396333394, |
| "grad_norm": 2.456008195877075, |
| "learning_rate": 4.973177912347703e-06, |
| "loss": 1.086, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.33453033453033454, |
| "grad_norm": 2.0783896446228027, |
| "learning_rate": 4.9730682959785735e-06, |
| "loss": 1.1944, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3350973350973351, |
| "grad_norm": 2.096651077270508, |
| "learning_rate": 4.972958457289173e-06, |
| "loss": 1.1146, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.3356643356643357, |
| "grad_norm": 2.1893470287323, |
| "learning_rate": 4.972848396289375e-06, |
| "loss": 1.1147, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.3362313362313362, |
| "grad_norm": 2.1144778728485107, |
| "learning_rate": 4.972738112989073e-06, |
| "loss": 1.2118, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.3367983367983368, |
| "grad_norm": 2.035900354385376, |
| "learning_rate": 4.972627607398183e-06, |
| "loss": 1.155, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.33736533736533736, |
| "grad_norm": 2.0177159309387207, |
| "learning_rate": 4.972516879526638e-06, |
| "loss": 1.1114, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.33793233793233796, |
| "grad_norm": 1.9777759313583374, |
| "learning_rate": 4.972405929384391e-06, |
| "loss": 1.0969, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.3384993384993385, |
| "grad_norm": 1.9437038898468018, |
| "learning_rate": 4.972294756981419e-06, |
| "loss": 1.0776, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.33906633906633904, |
| "grad_norm": 2.2023017406463623, |
| "learning_rate": 4.972183362327712e-06, |
| "loss": 1.085, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.33963333963333964, |
| "grad_norm": 2.137089490890503, |
| "learning_rate": 4.972071745433287e-06, |
| "loss": 1.1618, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.3402003402003402, |
| "grad_norm": 2.292973518371582, |
| "learning_rate": 4.971959906308177e-06, |
| "loss": 1.2049, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3407673407673408, |
| "grad_norm": 2.0405850410461426, |
| "learning_rate": 4.971847844962436e-06, |
| "loss": 1.1778, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.3413343413343413, |
| "grad_norm": 2.1747894287109375, |
| "learning_rate": 4.971735561406138e-06, |
| "loss": 1.1973, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.3419013419013419, |
| "grad_norm": 2.003805637359619, |
| "learning_rate": 4.971623055649377e-06, |
| "loss": 1.1549, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.34246834246834246, |
| "grad_norm": 2.138134002685547, |
| "learning_rate": 4.971510327702267e-06, |
| "loss": 1.1143, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.34303534303534305, |
| "grad_norm": 2.085298538208008, |
| "learning_rate": 4.971397377574941e-06, |
| "loss": 1.1557, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.3436023436023436, |
| "grad_norm": 2.045970916748047, |
| "learning_rate": 4.9712842052775536e-06, |
| "loss": 1.1313, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.3441693441693442, |
| "grad_norm": 2.070230007171631, |
| "learning_rate": 4.971170810820279e-06, |
| "loss": 1.1676, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.34473634473634474, |
| "grad_norm": 1.9645379781723022, |
| "learning_rate": 4.97105719421331e-06, |
| "loss": 1.1211, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3453033453033453, |
| "grad_norm": 2.0739731788635254, |
| "learning_rate": 4.970943355466861e-06, |
| "loss": 1.2498, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.3458703458703459, |
| "grad_norm": 2.0373897552490234, |
| "learning_rate": 4.970829294591164e-06, |
| "loss": 1.1665, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3464373464373464, |
| "grad_norm": 2.105302333831787, |
| "learning_rate": 4.9707150115964756e-06, |
| "loss": 1.2105, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.347004347004347, |
| "grad_norm": 1.993300437927246, |
| "learning_rate": 4.9706005064930674e-06, |
| "loss": 1.1395, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.34757134757134756, |
| "grad_norm": 2.1406335830688477, |
| "learning_rate": 4.970485779291234e-06, |
| "loss": 1.1353, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.34813834813834815, |
| "grad_norm": 2.1167380809783936, |
| "learning_rate": 4.970370830001288e-06, |
| "loss": 1.1699, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3487053487053487, |
| "grad_norm": 2.1319780349731445, |
| "learning_rate": 4.970255658633564e-06, |
| "loss": 1.1761, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.3492723492723493, |
| "grad_norm": 2.0811727046966553, |
| "learning_rate": 4.970140265198414e-06, |
| "loss": 1.1692, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.34983934983934983, |
| "grad_norm": 2.139535903930664, |
| "learning_rate": 4.9700246497062135e-06, |
| "loss": 1.1949, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.35040635040635043, |
| "grad_norm": 2.1217803955078125, |
| "learning_rate": 4.969908812167354e-06, |
| "loss": 1.1141, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.350973350973351, |
| "grad_norm": 2.010310173034668, |
| "learning_rate": 4.969792752592251e-06, |
| "loss": 1.1575, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.3515403515403515, |
| "grad_norm": 2.0576040744781494, |
| "learning_rate": 4.969676470991336e-06, |
| "loss": 1.1145, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3521073521073521, |
| "grad_norm": 2.233651638031006, |
| "learning_rate": 4.969559967375063e-06, |
| "loss": 1.1472, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.35267435267435265, |
| "grad_norm": 1.9711737632751465, |
| "learning_rate": 4.969443241753905e-06, |
| "loss": 1.1164, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.35324135324135325, |
| "grad_norm": 2.076789379119873, |
| "learning_rate": 4.969326294138355e-06, |
| "loss": 1.1369, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.3538083538083538, |
| "grad_norm": 2.148998260498047, |
| "learning_rate": 4.9692091245389275e-06, |
| "loss": 1.1001, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3543753543753544, |
| "grad_norm": 1.9037641286849976, |
| "learning_rate": 4.969091732966155e-06, |
| "loss": 1.201, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.35494235494235493, |
| "grad_norm": 1.9544503688812256, |
| "learning_rate": 4.968974119430589e-06, |
| "loss": 1.1571, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.35550935550935553, |
| "grad_norm": 2.1833956241607666, |
| "learning_rate": 4.968856283942805e-06, |
| "loss": 1.1889, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.35607635607635607, |
| "grad_norm": 2.0116825103759766, |
| "learning_rate": 4.968738226513395e-06, |
| "loss": 1.1488, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.35664335664335667, |
| "grad_norm": 2.1879611015319824, |
| "learning_rate": 4.968619947152971e-06, |
| "loss": 1.1717, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.3572103572103572, |
| "grad_norm": 2.06209135055542, |
| "learning_rate": 4.968501445872168e-06, |
| "loss": 1.1698, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.35777735777735775, |
| "grad_norm": 2.126161813735962, |
| "learning_rate": 4.968382722681637e-06, |
| "loss": 1.1888, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.35834435834435835, |
| "grad_norm": 2.016767978668213, |
| "learning_rate": 4.968263777592052e-06, |
| "loss": 1.1583, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.3589113589113589, |
| "grad_norm": 2.19541335105896, |
| "learning_rate": 4.968144610614104e-06, |
| "loss": 1.2259, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.3594783594783595, |
| "grad_norm": 2.040583610534668, |
| "learning_rate": 4.968025221758508e-06, |
| "loss": 1.1836, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.36004536004536003, |
| "grad_norm": 1.9836755990982056, |
| "learning_rate": 4.967905611035994e-06, |
| "loss": 1.0771, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.3606123606123606, |
| "grad_norm": 2.1664631366729736, |
| "learning_rate": 4.967785778457318e-06, |
| "loss": 1.1864, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.36117936117936117, |
| "grad_norm": 1.9839632511138916, |
| "learning_rate": 4.967665724033249e-06, |
| "loss": 1.1075, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.36174636174636177, |
| "grad_norm": 2.1936347484588623, |
| "learning_rate": 4.9675454477745825e-06, |
| "loss": 1.2054, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3623133623133623, |
| "grad_norm": 1.9246869087219238, |
| "learning_rate": 4.967424949692129e-06, |
| "loss": 1.1856, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.3628803628803629, |
| "grad_norm": 2.0133697986602783, |
| "learning_rate": 4.967304229796722e-06, |
| "loss": 1.162, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.36344736344736345, |
| "grad_norm": 2.135317802429199, |
| "learning_rate": 4.967183288099212e-06, |
| "loss": 1.1268, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.364014364014364, |
| "grad_norm": 2.1062726974487305, |
| "learning_rate": 4.967062124610473e-06, |
| "loss": 1.1205, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.3645813645813646, |
| "grad_norm": 2.252697229385376, |
| "learning_rate": 4.966940739341397e-06, |
| "loss": 1.1184, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.36514836514836513, |
| "grad_norm": 2.0622639656066895, |
| "learning_rate": 4.9668191323028956e-06, |
| "loss": 1.1073, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.3657153657153657, |
| "grad_norm": 2.094453811645508, |
| "learning_rate": 4.966697303505901e-06, |
| "loss": 1.1755, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.36628236628236627, |
| "grad_norm": 2.2338266372680664, |
| "learning_rate": 4.966575252961365e-06, |
| "loss": 1.1128, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.36684936684936686, |
| "grad_norm": 2.259481430053711, |
| "learning_rate": 4.9664529806802605e-06, |
| "loss": 1.1412, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3674163674163674, |
| "grad_norm": 2.1204957962036133, |
| "learning_rate": 4.966330486673578e-06, |
| "loss": 1.1276, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.367983367983368, |
| "grad_norm": 2.659971237182617, |
| "learning_rate": 4.966207770952329e-06, |
| "loss": 1.1212, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.36855036855036855, |
| "grad_norm": 2.29526948928833, |
| "learning_rate": 4.966084833527547e-06, |
| "loss": 1.1902, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.36911736911736914, |
| "grad_norm": 2.237398147583008, |
| "learning_rate": 4.9659616744102825e-06, |
| "loss": 1.1895, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.3696843696843697, |
| "grad_norm": 2.0594875812530518, |
| "learning_rate": 4.965838293611608e-06, |
| "loss": 1.1617, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.3702513702513702, |
| "grad_norm": 2.1710054874420166, |
| "learning_rate": 4.965714691142614e-06, |
| "loss": 1.168, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.3708183708183708, |
| "grad_norm": 2.2017593383789062, |
| "learning_rate": 4.965590867014411e-06, |
| "loss": 1.1226, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.37138537138537137, |
| "grad_norm": 2.2235498428344727, |
| "learning_rate": 4.965466821238133e-06, |
| "loss": 1.1587, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.37195237195237196, |
| "grad_norm": 2.168541431427002, |
| "learning_rate": 4.965342553824929e-06, |
| "loss": 1.1707, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.3725193725193725, |
| "grad_norm": 1.9276596307754517, |
| "learning_rate": 4.965218064785972e-06, |
| "loss": 1.1589, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.3730863730863731, |
| "grad_norm": 2.0985217094421387, |
| "learning_rate": 4.965093354132451e-06, |
| "loss": 1.1817, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.37365337365337364, |
| "grad_norm": 2.0856056213378906, |
| "learning_rate": 4.964968421875579e-06, |
| "loss": 1.1571, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.37422037422037424, |
| "grad_norm": 2.0469911098480225, |
| "learning_rate": 4.964843268026586e-06, |
| "loss": 1.1054, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3747873747873748, |
| "grad_norm": 2.1270503997802734, |
| "learning_rate": 4.964717892596723e-06, |
| "loss": 1.1567, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.3753543753543754, |
| "grad_norm": 2.1887195110321045, |
| "learning_rate": 4.964592295597261e-06, |
| "loss": 1.1441, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.3759213759213759, |
| "grad_norm": 2.317736864089966, |
| "learning_rate": 4.964466477039492e-06, |
| "loss": 1.1534, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.37648837648837646, |
| "grad_norm": 2.509260892868042, |
| "learning_rate": 4.964340436934724e-06, |
| "loss": 1.1391, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.37705537705537706, |
| "grad_norm": 2.0452466011047363, |
| "learning_rate": 4.96421417529429e-06, |
| "loss": 1.1786, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.3776223776223776, |
| "grad_norm": 2.159749746322632, |
| "learning_rate": 4.964087692129538e-06, |
| "loss": 1.2412, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3781893781893782, |
| "grad_norm": 1.99373197555542, |
| "learning_rate": 4.963960987451841e-06, |
| "loss": 1.1374, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.37875637875637874, |
| "grad_norm": 2.1809184551239014, |
| "learning_rate": 4.9638340612725875e-06, |
| "loss": 1.1689, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.37932337932337934, |
| "grad_norm": 2.27892804145813, |
| "learning_rate": 4.963706913603188e-06, |
| "loss": 1.1155, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.3798903798903799, |
| "grad_norm": 2.08236026763916, |
| "learning_rate": 4.963579544455074e-06, |
| "loss": 1.1535, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.3804573804573805, |
| "grad_norm": 2.14473557472229, |
| "learning_rate": 4.963451953839694e-06, |
| "loss": 1.1698, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.381024381024381, |
| "grad_norm": 2.0856521129608154, |
| "learning_rate": 4.963324141768519e-06, |
| "loss": 1.1349, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3815913815913816, |
| "grad_norm": 2.1324524879455566, |
| "learning_rate": 4.963196108253037e-06, |
| "loss": 1.1598, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.38215838215838216, |
| "grad_norm": 2.0533447265625, |
| "learning_rate": 4.96306785330476e-06, |
| "loss": 1.1546, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.3827253827253827, |
| "grad_norm": 1.913636326789856, |
| "learning_rate": 4.962939376935216e-06, |
| "loss": 1.1914, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3832923832923833, |
| "grad_norm": 2.0817277431488037, |
| "learning_rate": 4.962810679155957e-06, |
| "loss": 1.1146, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.38385938385938384, |
| "grad_norm": 2.0026915073394775, |
| "learning_rate": 4.96268175997855e-06, |
| "loss": 1.1487, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.38442638442638444, |
| "grad_norm": 2.05865478515625, |
| "learning_rate": 4.962552619414584e-06, |
| "loss": 1.148, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.384993384993385, |
| "grad_norm": 2.0297701358795166, |
| "learning_rate": 4.962423257475672e-06, |
| "loss": 1.1555, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3855603855603856, |
| "grad_norm": 2.2152462005615234, |
| "learning_rate": 4.962293674173438e-06, |
| "loss": 1.1278, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3861273861273861, |
| "grad_norm": 1.9059257507324219, |
| "learning_rate": 4.962163869519536e-06, |
| "loss": 1.1193, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.3866943866943867, |
| "grad_norm": 2.1127209663391113, |
| "learning_rate": 4.962033843525632e-06, |
| "loss": 1.0973, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.38726138726138726, |
| "grad_norm": 2.3520565032958984, |
| "learning_rate": 4.961903596203416e-06, |
| "loss": 1.2412, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.38782838782838785, |
| "grad_norm": 2.072892904281616, |
| "learning_rate": 4.961773127564596e-06, |
| "loss": 1.1184, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.3883953883953884, |
| "grad_norm": 2.1626739501953125, |
| "learning_rate": 4.961642437620901e-06, |
| "loss": 1.1772, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.38896238896238894, |
| "grad_norm": 2.09814453125, |
| "learning_rate": 4.96151152638408e-06, |
| "loss": 1.09, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.38952938952938954, |
| "grad_norm": 1.9176437854766846, |
| "learning_rate": 4.9613803938659014e-06, |
| "loss": 1.0821, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.3900963900963901, |
| "grad_norm": 2.1363837718963623, |
| "learning_rate": 4.961249040078153e-06, |
| "loss": 1.1564, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.3906633906633907, |
| "grad_norm": 2.1223223209381104, |
| "learning_rate": 4.961117465032643e-06, |
| "loss": 1.1089, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.3912303912303912, |
| "grad_norm": 2.165515184402466, |
| "learning_rate": 4.960985668741201e-06, |
| "loss": 1.1745, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3917973917973918, |
| "grad_norm": 2.15566349029541, |
| "learning_rate": 4.960853651215673e-06, |
| "loss": 1.1345, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.39236439236439236, |
| "grad_norm": 2.015986919403076, |
| "learning_rate": 4.960721412467929e-06, |
| "loss": 1.1247, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.39293139293139295, |
| "grad_norm": 2.1274542808532715, |
| "learning_rate": 4.960588952509855e-06, |
| "loss": 1.1688, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.3934983934983935, |
| "grad_norm": 2.058623790740967, |
| "learning_rate": 4.960456271353359e-06, |
| "loss": 1.1785, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.3940653940653941, |
| "grad_norm": 2.4499433040618896, |
| "learning_rate": 4.9603233690103695e-06, |
| "loss": 1.1351, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.39463239463239463, |
| "grad_norm": 2.1276276111602783, |
| "learning_rate": 4.960190245492833e-06, |
| "loss": 1.1368, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.3951993951993952, |
| "grad_norm": 2.095386028289795, |
| "learning_rate": 4.960056900812717e-06, |
| "loss": 1.1141, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.3957663957663958, |
| "grad_norm": 2.1107561588287354, |
| "learning_rate": 4.95992333498201e-06, |
| "loss": 1.1269, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3963333963333963, |
| "grad_norm": 2.3567731380462646, |
| "learning_rate": 4.9597895480127175e-06, |
| "loss": 1.1367, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.3969003969003969, |
| "grad_norm": 2.1174514293670654, |
| "learning_rate": 4.959655539916868e-06, |
| "loss": 1.1891, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.39746739746739745, |
| "grad_norm": 2.0991039276123047, |
| "learning_rate": 4.959521310706506e-06, |
| "loss": 1.1735, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.39803439803439805, |
| "grad_norm": 1.934212327003479, |
| "learning_rate": 4.9593868603937e-06, |
| "loss": 1.0731, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.3986013986013986, |
| "grad_norm": 2.1108388900756836, |
| "learning_rate": 4.959252188990536e-06, |
| "loss": 1.1735, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3991683991683992, |
| "grad_norm": 2.010054349899292, |
| "learning_rate": 4.9591172965091224e-06, |
| "loss": 1.1337, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.39973539973539973, |
| "grad_norm": 2.167483329772949, |
| "learning_rate": 4.9589821829615825e-06, |
| "loss": 1.1609, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.40030240030240033, |
| "grad_norm": 2.048516273498535, |
| "learning_rate": 4.958846848360065e-06, |
| "loss": 1.098, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.40086940086940087, |
| "grad_norm": 2.2454535961151123, |
| "learning_rate": 4.958711292716733e-06, |
| "loss": 1.1277, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.4014364014364014, |
| "grad_norm": 2.0777549743652344, |
| "learning_rate": 4.958575516043776e-06, |
| "loss": 1.1743, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.402003402003402, |
| "grad_norm": 2.114323377609253, |
| "learning_rate": 4.958439518353399e-06, |
| "loss": 1.1638, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.40257040257040255, |
| "grad_norm": 2.010104179382324, |
| "learning_rate": 4.958303299657826e-06, |
| "loss": 1.1594, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.40313740313740315, |
| "grad_norm": 2.0232269763946533, |
| "learning_rate": 4.958166859969304e-06, |
| "loss": 1.1402, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.4037044037044037, |
| "grad_norm": 1.9577562808990479, |
| "learning_rate": 4.9580301993000984e-06, |
| "loss": 1.2293, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.4042714042714043, |
| "grad_norm": 1.968520164489746, |
| "learning_rate": 4.957893317662494e-06, |
| "loss": 1.1197, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.40483840483840483, |
| "grad_norm": 2.126594305038452, |
| "learning_rate": 4.9577562150687955e-06, |
| "loss": 1.16, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.40540540540540543, |
| "grad_norm": 2.0474393367767334, |
| "learning_rate": 4.957618891531329e-06, |
| "loss": 1.126, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.40597240597240597, |
| "grad_norm": 1.9974445104599, |
| "learning_rate": 4.95748134706244e-06, |
| "loss": 1.0981, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.40653940653940657, |
| "grad_norm": 2.0076394081115723, |
| "learning_rate": 4.957343581674492e-06, |
| "loss": 1.1171, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.4071064071064071, |
| "grad_norm": 2.1541495323181152, |
| "learning_rate": 4.9572055953798695e-06, |
| "loss": 1.1941, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.40767340767340765, |
| "grad_norm": 2.1172938346862793, |
| "learning_rate": 4.957067388190977e-06, |
| "loss": 1.1361, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.40824040824040825, |
| "grad_norm": 2.162334442138672, |
| "learning_rate": 4.9569289601202405e-06, |
| "loss": 1.0521, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4088074088074088, |
| "grad_norm": 2.1444907188415527, |
| "learning_rate": 4.956790311180102e-06, |
| "loss": 1.1954, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.4093744093744094, |
| "grad_norm": 1.9972236156463623, |
| "learning_rate": 4.956651441383027e-06, |
| "loss": 1.0979, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.40994140994140993, |
| "grad_norm": 2.1866066455841064, |
| "learning_rate": 4.9565123507414994e-06, |
| "loss": 1.1854, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.4105084105084105, |
| "grad_norm": 2.0343639850616455, |
| "learning_rate": 4.956373039268022e-06, |
| "loss": 1.1422, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.41107541107541107, |
| "grad_norm": 2.0357658863067627, |
| "learning_rate": 4.956233506975119e-06, |
| "loss": 1.1096, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.41164241164241167, |
| "grad_norm": 2.22104549407959, |
| "learning_rate": 4.956093753875334e-06, |
| "loss": 1.1119, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.4122094122094122, |
| "grad_norm": 2.0534398555755615, |
| "learning_rate": 4.95595377998123e-06, |
| "loss": 1.2207, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.41277641277641275, |
| "grad_norm": 1.9503989219665527, |
| "learning_rate": 4.95581358530539e-06, |
| "loss": 1.1033, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.41334341334341335, |
| "grad_norm": 2.2704718112945557, |
| "learning_rate": 4.955673169860418e-06, |
| "loss": 1.1745, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.4139104139104139, |
| "grad_norm": 2.0502419471740723, |
| "learning_rate": 4.955532533658936e-06, |
| "loss": 1.1606, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4144774144774145, |
| "grad_norm": 1.971962809562683, |
| "learning_rate": 4.955391676713587e-06, |
| "loss": 1.1077, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.415044415044415, |
| "grad_norm": 2.115936040878296, |
| "learning_rate": 4.955250599037034e-06, |
| "loss": 1.123, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.4156114156114156, |
| "grad_norm": 1.9907145500183105, |
| "learning_rate": 4.9551093006419574e-06, |
| "loss": 1.1332, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.41617841617841617, |
| "grad_norm": 2.122241497039795, |
| "learning_rate": 4.954967781541062e-06, |
| "loss": 1.0907, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.41674541674541676, |
| "grad_norm": 2.1141576766967773, |
| "learning_rate": 4.954826041747068e-06, |
| "loss": 1.0933, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.4173124173124173, |
| "grad_norm": 2.121520519256592, |
| "learning_rate": 4.954684081272719e-06, |
| "loss": 1.1604, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.4178794178794179, |
| "grad_norm": 2.0803537368774414, |
| "learning_rate": 4.954541900130775e-06, |
| "loss": 1.1552, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.41844641844641844, |
| "grad_norm": 2.2571604251861572, |
| "learning_rate": 4.954399498334019e-06, |
| "loss": 1.0974, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.419013419013419, |
| "grad_norm": 2.1840076446533203, |
| "learning_rate": 4.954256875895252e-06, |
| "loss": 1.1438, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.4195804195804196, |
| "grad_norm": 2.1919736862182617, |
| "learning_rate": 4.954114032827294e-06, |
| "loss": 1.1731, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4201474201474201, |
| "grad_norm": 2.0454063415527344, |
| "learning_rate": 4.953970969142989e-06, |
| "loss": 1.1379, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.4207144207144207, |
| "grad_norm": 2.4228060245513916, |
| "learning_rate": 4.953827684855195e-06, |
| "loss": 1.1868, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.42128142128142126, |
| "grad_norm": 2.1020874977111816, |
| "learning_rate": 4.953684179976794e-06, |
| "loss": 1.1355, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.42184842184842186, |
| "grad_norm": 2.1466715335845947, |
| "learning_rate": 4.953540454520687e-06, |
| "loss": 1.1692, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.4224154224154224, |
| "grad_norm": 2.0675599575042725, |
| "learning_rate": 4.953396508499794e-06, |
| "loss": 1.1205, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.422982422982423, |
| "grad_norm": 2.4063730239868164, |
| "learning_rate": 4.953252341927054e-06, |
| "loss": 1.1227, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.42354942354942354, |
| "grad_norm": 2.484339714050293, |
| "learning_rate": 4.953107954815429e-06, |
| "loss": 1.1665, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.42411642411642414, |
| "grad_norm": 2.1248579025268555, |
| "learning_rate": 4.952963347177898e-06, |
| "loss": 1.1755, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.4246834246834247, |
| "grad_norm": 2.005690574645996, |
| "learning_rate": 4.952818519027461e-06, |
| "loss": 1.1516, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.4252504252504252, |
| "grad_norm": 3.1751925945281982, |
| "learning_rate": 4.952673470377137e-06, |
| "loss": 1.1643, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4258174258174258, |
| "grad_norm": 1.9519903659820557, |
| "learning_rate": 4.952528201239967e-06, |
| "loss": 1.1672, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.42638442638442636, |
| "grad_norm": 1.906704306602478, |
| "learning_rate": 4.952382711629008e-06, |
| "loss": 1.0814, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.42695142695142696, |
| "grad_norm": 2.2957396507263184, |
| "learning_rate": 4.9522370015573405e-06, |
| "loss": 1.2129, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.4275184275184275, |
| "grad_norm": 2.1339492797851562, |
| "learning_rate": 4.952091071038062e-06, |
| "loss": 1.1826, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.4280854280854281, |
| "grad_norm": 2.051973819732666, |
| "learning_rate": 4.951944920084293e-06, |
| "loss": 1.1258, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.42865242865242864, |
| "grad_norm": 1.99086332321167, |
| "learning_rate": 4.95179854870917e-06, |
| "loss": 1.1508, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.42921942921942924, |
| "grad_norm": 1.9002501964569092, |
| "learning_rate": 4.951651956925853e-06, |
| "loss": 1.097, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.4297864297864298, |
| "grad_norm": 2.087890625, |
| "learning_rate": 4.951505144747519e-06, |
| "loss": 1.1231, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.4303534303534304, |
| "grad_norm": 1.9995880126953125, |
| "learning_rate": 4.9513581121873665e-06, |
| "loss": 1.1392, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.4309204309204309, |
| "grad_norm": 2.1408588886260986, |
| "learning_rate": 4.9512108592586125e-06, |
| "loss": 1.1433, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.43148743148743146, |
| "grad_norm": 2.1597390174865723, |
| "learning_rate": 4.951063385974495e-06, |
| "loss": 1.063, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.43205443205443206, |
| "grad_norm": 2.084463357925415, |
| "learning_rate": 4.950915692348271e-06, |
| "loss": 1.1381, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.4326214326214326, |
| "grad_norm": 2.148618698120117, |
| "learning_rate": 4.95076777839322e-06, |
| "loss": 1.1597, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.4331884331884332, |
| "grad_norm": 2.15020489692688, |
| "learning_rate": 4.9506196441226345e-06, |
| "loss": 1.09, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.43375543375543374, |
| "grad_norm": 2.059931516647339, |
| "learning_rate": 4.950471289549834e-06, |
| "loss": 1.1254, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.43432243432243434, |
| "grad_norm": 2.0239474773406982, |
| "learning_rate": 4.950322714688156e-06, |
| "loss": 1.1228, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.4348894348894349, |
| "grad_norm": 2.121474027633667, |
| "learning_rate": 4.950173919550955e-06, |
| "loss": 1.1657, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.4354564354564355, |
| "grad_norm": 2.2115628719329834, |
| "learning_rate": 4.950024904151607e-06, |
| "loss": 1.158, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.436023436023436, |
| "grad_norm": 2.045806646347046, |
| "learning_rate": 4.9498756685035095e-06, |
| "loss": 1.1509, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.4365904365904366, |
| "grad_norm": 2.263608932495117, |
| "learning_rate": 4.949726212620077e-06, |
| "loss": 1.1491, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.43715743715743716, |
| "grad_norm": 2.2598161697387695, |
| "learning_rate": 4.949576536514747e-06, |
| "loss": 1.1495, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.4377244377244377, |
| "grad_norm": 2.0730316638946533, |
| "learning_rate": 4.949426640200972e-06, |
| "loss": 1.1024, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.4382914382914383, |
| "grad_norm": 2.0575971603393555, |
| "learning_rate": 4.949276523692228e-06, |
| "loss": 1.0622, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.43885843885843884, |
| "grad_norm": 1.9827250242233276, |
| "learning_rate": 4.949126187002012e-06, |
| "loss": 1.1452, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.43942543942543943, |
| "grad_norm": 2.11433482170105, |
| "learning_rate": 4.948975630143837e-06, |
| "loss": 1.1375, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.43999243999244, |
| "grad_norm": 2.214946985244751, |
| "learning_rate": 4.948824853131237e-06, |
| "loss": 1.0639, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.4405594405594406, |
| "grad_norm": 2.0897412300109863, |
| "learning_rate": 4.948673855977767e-06, |
| "loss": 1.0991, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.4411264411264411, |
| "grad_norm": 2.051321506500244, |
| "learning_rate": 4.948522638697002e-06, |
| "loss": 1.1743, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.4416934416934417, |
| "grad_norm": 2.1338798999786377, |
| "learning_rate": 4.9483712013025356e-06, |
| "loss": 1.1451, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.44226044226044225, |
| "grad_norm": 2.172214984893799, |
| "learning_rate": 4.94821954380798e-06, |
| "loss": 1.0999, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.44282744282744285, |
| "grad_norm": 2.14731502532959, |
| "learning_rate": 4.9480676662269704e-06, |
| "loss": 1.1238, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.4433944433944434, |
| "grad_norm": 2.144775867462158, |
| "learning_rate": 4.9479155685731595e-06, |
| "loss": 1.1571, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.44396144396144394, |
| "grad_norm": 2.0654232501983643, |
| "learning_rate": 4.94776325086022e-06, |
| "loss": 1.1644, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.44452844452844453, |
| "grad_norm": 2.08166241645813, |
| "learning_rate": 4.947610713101846e-06, |
| "loss": 1.1535, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.4450954450954451, |
| "grad_norm": 2.0543806552886963, |
| "learning_rate": 4.947457955311748e-06, |
| "loss": 1.1814, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.44566244566244567, |
| "grad_norm": 2.543776512145996, |
| "learning_rate": 4.94730497750366e-06, |
| "loss": 1.1607, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.4462294462294462, |
| "grad_norm": 2.1171834468841553, |
| "learning_rate": 4.9471517796913325e-06, |
| "loss": 1.1503, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.4467964467964468, |
| "grad_norm": 1.9275233745574951, |
| "learning_rate": 4.946998361888541e-06, |
| "loss": 1.1236, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.44736344736344735, |
| "grad_norm": 2.2657222747802734, |
| "learning_rate": 4.946844724109073e-06, |
| "loss": 1.171, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.44793044793044795, |
| "grad_norm": 2.3687210083007812, |
| "learning_rate": 4.9466908663667425e-06, |
| "loss": 1.1054, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4484974484974485, |
| "grad_norm": 2.2839155197143555, |
| "learning_rate": 4.94653678867538e-06, |
| "loss": 1.1713, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.4490644490644491, |
| "grad_norm": 2.1770317554473877, |
| "learning_rate": 4.946382491048836e-06, |
| "loss": 1.187, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.44963144963144963, |
| "grad_norm": 2.0334887504577637, |
| "learning_rate": 4.9462279735009835e-06, |
| "loss": 1.1199, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.4501984501984502, |
| "grad_norm": 1.87135910987854, |
| "learning_rate": 4.946073236045712e-06, |
| "loss": 1.1181, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.45076545076545077, |
| "grad_norm": 2.0009896755218506, |
| "learning_rate": 4.945918278696929e-06, |
| "loss": 1.1139, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.4513324513324513, |
| "grad_norm": 1.9485775232315063, |
| "learning_rate": 4.945763101468569e-06, |
| "loss": 1.1215, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.4518994518994519, |
| "grad_norm": 2.148066282272339, |
| "learning_rate": 4.9456077043745805e-06, |
| "loss": 1.1767, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.45246645246645245, |
| "grad_norm": 2.1108059883117676, |
| "learning_rate": 4.945452087428931e-06, |
| "loss": 1.0699, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.45303345303345305, |
| "grad_norm": 2.168656826019287, |
| "learning_rate": 4.945296250645613e-06, |
| "loss": 1.1925, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.4536004536004536, |
| "grad_norm": 2.0252230167388916, |
| "learning_rate": 4.945140194038633e-06, |
| "loss": 1.1474, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4541674541674542, |
| "grad_norm": 2.010436773300171, |
| "learning_rate": 4.944983917622023e-06, |
| "loss": 1.0687, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.45473445473445473, |
| "grad_norm": 2.039442539215088, |
| "learning_rate": 4.944827421409829e-06, |
| "loss": 1.2066, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.4553014553014553, |
| "grad_norm": 2.1052887439727783, |
| "learning_rate": 4.94467070541612e-06, |
| "loss": 1.095, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.45586845586845587, |
| "grad_norm": 2.1052777767181396, |
| "learning_rate": 4.944513769654985e-06, |
| "loss": 1.1136, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.4564354564354564, |
| "grad_norm": 2.0587961673736572, |
| "learning_rate": 4.944356614140532e-06, |
| "loss": 1.1154, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.457002457002457, |
| "grad_norm": 2.0752010345458984, |
| "learning_rate": 4.9441992388868876e-06, |
| "loss": 1.1544, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.45756945756945755, |
| "grad_norm": 2.405973434448242, |
| "learning_rate": 4.9440416439082006e-06, |
| "loss": 1.1276, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.45813645813645815, |
| "grad_norm": 2.0728070735931396, |
| "learning_rate": 4.943883829218638e-06, |
| "loss": 1.1398, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.4587034587034587, |
| "grad_norm": 2.0885121822357178, |
| "learning_rate": 4.943725794832386e-06, |
| "loss": 1.1287, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.4592704592704593, |
| "grad_norm": 1.9414550065994263, |
| "learning_rate": 4.943567540763651e-06, |
| "loss": 1.1355, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4598374598374598, |
| "grad_norm": 2.2840662002563477, |
| "learning_rate": 4.943409067026662e-06, |
| "loss": 1.1273, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.4604044604044604, |
| "grad_norm": 2.0449087619781494, |
| "learning_rate": 4.943250373635663e-06, |
| "loss": 1.0825, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.46097146097146097, |
| "grad_norm": 2.1993865966796875, |
| "learning_rate": 4.94309146060492e-06, |
| "loss": 1.2063, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 2.00508189201355, |
| "learning_rate": 4.942932327948719e-06, |
| "loss": 1.1049, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.4621054621054621, |
| "grad_norm": 1.9916926622390747, |
| "learning_rate": 4.942772975681366e-06, |
| "loss": 1.0996, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.46267246267246265, |
| "grad_norm": 2.2713184356689453, |
| "learning_rate": 4.942613403817187e-06, |
| "loss": 1.1215, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.46323946323946324, |
| "grad_norm": 2.0838847160339355, |
| "learning_rate": 4.942453612370525e-06, |
| "loss": 1.1203, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.4638064638064638, |
| "grad_norm": 2.103275775909424, |
| "learning_rate": 4.9422936013557454e-06, |
| "loss": 1.1335, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.4643734643734644, |
| "grad_norm": 2.257582664489746, |
| "learning_rate": 4.9421333707872335e-06, |
| "loss": 1.1705, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.4649404649404649, |
| "grad_norm": 2.2377452850341797, |
| "learning_rate": 4.941972920679393e-06, |
| "loss": 1.1128, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.4655074655074655, |
| "grad_norm": 2.1154627799987793, |
| "learning_rate": 4.941812251046647e-06, |
| "loss": 1.203, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.46607446607446607, |
| "grad_norm": 2.1149961948394775, |
| "learning_rate": 4.94165136190344e-06, |
| "loss": 1.1938, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.46664146664146666, |
| "grad_norm": 2.020078659057617, |
| "learning_rate": 4.941490253264235e-06, |
| "loss": 1.1896, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.4672084672084672, |
| "grad_norm": 1.9953190088272095, |
| "learning_rate": 4.9413289251435156e-06, |
| "loss": 1.1152, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.4677754677754678, |
| "grad_norm": 2.0713393688201904, |
| "learning_rate": 4.941167377555785e-06, |
| "loss": 1.1156, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.46834246834246834, |
| "grad_norm": 1.975783348083496, |
| "learning_rate": 4.941005610515563e-06, |
| "loss": 1.0871, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.4689094689094689, |
| "grad_norm": 2.0003039836883545, |
| "learning_rate": 4.940843624037396e-06, |
| "loss": 1.0953, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.4694764694764695, |
| "grad_norm": 2.0911831855773926, |
| "learning_rate": 4.940681418135843e-06, |
| "loss": 1.1284, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.47004347004347, |
| "grad_norm": 2.032801866531372, |
| "learning_rate": 4.940518992825487e-06, |
| "loss": 1.1489, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.4706104706104706, |
| "grad_norm": 2.1229052543640137, |
| "learning_rate": 4.940356348120929e-06, |
| "loss": 1.0989, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.47117747117747116, |
| "grad_norm": 1.9044902324676514, |
| "learning_rate": 4.94019348403679e-06, |
| "loss": 1.1146, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.47174447174447176, |
| "grad_norm": 2.077392101287842, |
| "learning_rate": 4.940030400587712e-06, |
| "loss": 1.1295, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.4723114723114723, |
| "grad_norm": 2.1112782955169678, |
| "learning_rate": 4.939867097788356e-06, |
| "loss": 1.1323, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.4728784728784729, |
| "grad_norm": 2.0358989238739014, |
| "learning_rate": 4.9397035756534e-06, |
| "loss": 1.0978, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.47344547344547344, |
| "grad_norm": 2.0520427227020264, |
| "learning_rate": 4.939539834197545e-06, |
| "loss": 1.152, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.47401247401247404, |
| "grad_norm": 2.0846059322357178, |
| "learning_rate": 4.939375873435512e-06, |
| "loss": 1.107, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.4745794745794746, |
| "grad_norm": 2.139598846435547, |
| "learning_rate": 4.93921169338204e-06, |
| "loss": 1.0811, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.4751464751464751, |
| "grad_norm": 2.6360387802124023, |
| "learning_rate": 4.939047294051887e-06, |
| "loss": 1.1115, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.4757134757134757, |
| "grad_norm": 2.1863367557525635, |
| "learning_rate": 4.938882675459833e-06, |
| "loss": 1.0615, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.47628047628047626, |
| "grad_norm": 2.0317673683166504, |
| "learning_rate": 4.938717837620677e-06, |
| "loss": 1.1024, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.47684747684747686, |
| "grad_norm": 2.2008063793182373, |
| "learning_rate": 4.938552780549236e-06, |
| "loss": 1.149, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.4774144774144774, |
| "grad_norm": 2.037165641784668, |
| "learning_rate": 4.9383875042603496e-06, |
| "loss": 1.1239, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.477981477981478, |
| "grad_norm": 1.9858256578445435, |
| "learning_rate": 4.9382220087688745e-06, |
| "loss": 1.0986, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.47854847854847854, |
| "grad_norm": 2.032320022583008, |
| "learning_rate": 4.938056294089689e-06, |
| "loss": 1.1053, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.47911547911547914, |
| "grad_norm": 2.054124355316162, |
| "learning_rate": 4.93789036023769e-06, |
| "loss": 1.1036, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.4796824796824797, |
| "grad_norm": 2.1493234634399414, |
| "learning_rate": 4.937724207227793e-06, |
| "loss": 1.1359, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.4802494802494803, |
| "grad_norm": 1.9878216981887817, |
| "learning_rate": 4.937557835074937e-06, |
| "loss": 1.143, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.4808164808164808, |
| "grad_norm": 2.1001598834991455, |
| "learning_rate": 4.9373912437940765e-06, |
| "loss": 1.1545, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.48138348138348136, |
| "grad_norm": 2.1448514461517334, |
| "learning_rate": 4.9372244334001874e-06, |
| "loss": 1.1517, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.48195048195048196, |
| "grad_norm": 2.144200325012207, |
| "learning_rate": 4.937057403908266e-06, |
| "loss": 1.1104, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.4825174825174825, |
| "grad_norm": 2.0703322887420654, |
| "learning_rate": 4.9368901553333296e-06, |
| "loss": 1.0506, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.4830844830844831, |
| "grad_norm": 2.108504295349121, |
| "learning_rate": 4.936722687690409e-06, |
| "loss": 1.1578, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.48365148365148364, |
| "grad_norm": 2.0468504428863525, |
| "learning_rate": 4.936555000994563e-06, |
| "loss": 1.1275, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.48421848421848424, |
| "grad_norm": 1.9859349727630615, |
| "learning_rate": 4.9363870952608634e-06, |
| "loss": 1.1569, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.4847854847854848, |
| "grad_norm": 2.0685060024261475, |
| "learning_rate": 4.936218970504406e-06, |
| "loss": 1.0957, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.4853524853524854, |
| "grad_norm": 1.965552568435669, |
| "learning_rate": 4.936050626740303e-06, |
| "loss": 1.2005, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.4859194859194859, |
| "grad_norm": 2.2556333541870117, |
| "learning_rate": 4.935882063983689e-06, |
| "loss": 1.1436, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.4864864864864865, |
| "grad_norm": 4.329214096069336, |
| "learning_rate": 4.935713282249718e-06, |
| "loss": 1.1138, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.48705348705348706, |
| "grad_norm": 2.037137269973755, |
| "learning_rate": 4.935544281553561e-06, |
| "loss": 1.1449, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.4876204876204876, |
| "grad_norm": 1.913141131401062, |
| "learning_rate": 4.935375061910412e-06, |
| "loss": 1.1467, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.4881874881874882, |
| "grad_norm": 2.0674262046813965, |
| "learning_rate": 4.935205623335483e-06, |
| "loss": 1.099, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.48875448875448874, |
| "grad_norm": 2.456698179244995, |
| "learning_rate": 4.935035965844005e-06, |
| "loss": 1.179, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.48932148932148933, |
| "grad_norm": 1.964211106300354, |
| "learning_rate": 4.93486608945123e-06, |
| "loss": 1.1761, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.4898884898884899, |
| "grad_norm": 2.1101772785186768, |
| "learning_rate": 4.9346959941724305e-06, |
| "loss": 1.1189, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4904554904554905, |
| "grad_norm": 1.9686706066131592, |
| "learning_rate": 4.934525680022897e-06, |
| "loss": 1.1225, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.491022491022491, |
| "grad_norm": 1.93234121799469, |
| "learning_rate": 4.934355147017939e-06, |
| "loss": 1.0838, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.4915894915894916, |
| "grad_norm": 2.1068077087402344, |
| "learning_rate": 4.934184395172888e-06, |
| "loss": 1.1495, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.49215649215649215, |
| "grad_norm": 1.9966765642166138, |
| "learning_rate": 4.934013424503094e-06, |
| "loss": 1.1457, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.49272349272349275, |
| "grad_norm": 2.1224286556243896, |
| "learning_rate": 4.9338422350239245e-06, |
| "loss": 1.1205, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.4932904932904933, |
| "grad_norm": 2.2381792068481445, |
| "learning_rate": 4.9336708267507724e-06, |
| "loss": 1.177, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.49385749385749383, |
| "grad_norm": 1.9738177061080933, |
| "learning_rate": 4.933499199699044e-06, |
| "loss": 1.1008, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.49442449442449443, |
| "grad_norm": 2.031609058380127, |
| "learning_rate": 4.93332735388417e-06, |
| "loss": 1.1318, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.494991494991495, |
| "grad_norm": 2.3054850101470947, |
| "learning_rate": 4.933155289321596e-06, |
| "loss": 1.1877, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.49555849555849557, |
| "grad_norm": 2.2658283710479736, |
| "learning_rate": 4.932983006026792e-06, |
| "loss": 1.1726, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.4961254961254961, |
| "grad_norm": 2.0128214359283447, |
| "learning_rate": 4.932810504015246e-06, |
| "loss": 1.1001, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.4966924966924967, |
| "grad_norm": 2.054161787033081, |
| "learning_rate": 4.932637783302465e-06, |
| "loss": 1.1191, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.49725949725949725, |
| "grad_norm": 1.9922339916229248, |
| "learning_rate": 4.932464843903976e-06, |
| "loss": 1.1631, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.49782649782649785, |
| "grad_norm": 2.0135786533355713, |
| "learning_rate": 4.932291685835324e-06, |
| "loss": 1.1587, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.4983934983934984, |
| "grad_norm": 2.167001485824585, |
| "learning_rate": 4.932118309112077e-06, |
| "loss": 1.1278, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.498960498960499, |
| "grad_norm": 2.2132434844970703, |
| "learning_rate": 4.931944713749821e-06, |
| "loss": 1.1372, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.49952749952749953, |
| "grad_norm": 2.3871099948883057, |
| "learning_rate": 4.93177089976416e-06, |
| "loss": 1.1333, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.5000945000945001, |
| "grad_norm": 2.0161664485931396, |
| "learning_rate": 4.931596867170721e-06, |
| "loss": 1.1531, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.5006615006615006, |
| "grad_norm": 2.056384325027466, |
| "learning_rate": 4.93142261598515e-06, |
| "loss": 1.1791, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.5012285012285013, |
| "grad_norm": 2.0523018836975098, |
| "learning_rate": 4.931248146223108e-06, |
| "loss": 1.1363, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.5017955017955018, |
| "grad_norm": 2.1028332710266113, |
| "learning_rate": 4.9310734579002815e-06, |
| "loss": 1.1545, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.5023625023625024, |
| "grad_norm": 2.1507184505462646, |
| "learning_rate": 4.9308985510323745e-06, |
| "loss": 1.1455, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.5029295029295029, |
| "grad_norm": 2.3825719356536865, |
| "learning_rate": 4.93072342563511e-06, |
| "loss": 1.1448, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.5034965034965035, |
| "grad_norm": 2.4547119140625, |
| "learning_rate": 4.930548081724232e-06, |
| "loss": 1.1699, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.5040635040635041, |
| "grad_norm": 1.9959300756454468, |
| "learning_rate": 4.930372519315501e-06, |
| "loss": 1.1495, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.5046305046305046, |
| "grad_norm": 2.0104527473449707, |
| "learning_rate": 4.930196738424703e-06, |
| "loss": 1.1168, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5051975051975052, |
| "grad_norm": 1.8669036626815796, |
| "learning_rate": 4.930020739067637e-06, |
| "loss": 1.0347, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.5057645057645057, |
| "grad_norm": 2.0974411964416504, |
| "learning_rate": 4.929844521260125e-06, |
| "loss": 1.0764, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.5063315063315064, |
| "grad_norm": 2.0043864250183105, |
| "learning_rate": 4.929668085018011e-06, |
| "loss": 1.1351, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.5068985068985069, |
| "grad_norm": 2.148527145385742, |
| "learning_rate": 4.929491430357154e-06, |
| "loss": 1.1796, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.5074655074655074, |
| "grad_norm": 1.9664125442504883, |
| "learning_rate": 4.929314557293434e-06, |
| "loss": 1.044, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.508032508032508, |
| "grad_norm": 2.263545513153076, |
| "learning_rate": 4.929137465842752e-06, |
| "loss": 1.1961, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5085995085995086, |
| "grad_norm": 2.004988193511963, |
| "learning_rate": 4.928960156021029e-06, |
| "loss": 1.118, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.5091665091665092, |
| "grad_norm": 2.54754900932312, |
| "learning_rate": 4.928782627844202e-06, |
| "loss": 1.1106, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.5097335097335097, |
| "grad_norm": 1.9804669618606567, |
| "learning_rate": 4.928604881328234e-06, |
| "loss": 1.0989, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.5103005103005103, |
| "grad_norm": 2.1374270915985107, |
| "learning_rate": 4.9284269164891e-06, |
| "loss": 1.1583, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5108675108675109, |
| "grad_norm": 1.9350876808166504, |
| "learning_rate": 4.9282487333428e-06, |
| "loss": 1.1411, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.5114345114345115, |
| "grad_norm": 2.1036617755889893, |
| "learning_rate": 4.928070331905352e-06, |
| "loss": 1.1273, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.512001512001512, |
| "grad_norm": 2.180227756500244, |
| "learning_rate": 4.927891712192795e-06, |
| "loss": 1.0769, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.5125685125685125, |
| "grad_norm": 1.9973223209381104, |
| "learning_rate": 4.927712874221184e-06, |
| "loss": 1.0891, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5131355131355131, |
| "grad_norm": 2.426229953765869, |
| "learning_rate": 4.927533818006597e-06, |
| "loss": 1.151, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.5137025137025137, |
| "grad_norm": 2.2276418209075928, |
| "learning_rate": 4.927354543565131e-06, |
| "loss": 1.1493, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.5142695142695143, |
| "grad_norm": 2.0039780139923096, |
| "learning_rate": 4.9271750509129e-06, |
| "loss": 1.1024, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.5148365148365148, |
| "grad_norm": 2.0033607482910156, |
| "learning_rate": 4.926995340066043e-06, |
| "loss": 1.1248, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5154035154035154, |
| "grad_norm": 2.007786750793457, |
| "learning_rate": 4.926815411040713e-06, |
| "loss": 1.0888, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.515970515970516, |
| "grad_norm": 2.130481481552124, |
| "learning_rate": 4.926635263853086e-06, |
| "loss": 1.2004, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5165375165375166, |
| "grad_norm": 1.9995726346969604, |
| "learning_rate": 4.926454898519356e-06, |
| "loss": 1.0705, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.5171045171045171, |
| "grad_norm": 2.122728109359741, |
| "learning_rate": 4.926274315055738e-06, |
| "loss": 1.1051, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.5176715176715176, |
| "grad_norm": 2.3630666732788086, |
| "learning_rate": 4.926093513478466e-06, |
| "loss": 1.1809, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.5182385182385182, |
| "grad_norm": 2.1009976863861084, |
| "learning_rate": 4.925912493803792e-06, |
| "loss": 1.1338, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.5188055188055188, |
| "grad_norm": 2.086754560470581, |
| "learning_rate": 4.9257312560479895e-06, |
| "loss": 1.1692, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.5193725193725194, |
| "grad_norm": 1.97232186794281, |
| "learning_rate": 4.925549800227352e-06, |
| "loss": 1.1131, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.5199395199395199, |
| "grad_norm": 2.0924556255340576, |
| "learning_rate": 4.925368126358191e-06, |
| "loss": 1.1051, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.5205065205065205, |
| "grad_norm": 2.1015572547912598, |
| "learning_rate": 4.925186234456839e-06, |
| "loss": 1.1694, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.5210735210735211, |
| "grad_norm": 2.045950412750244, |
| "learning_rate": 4.925004124539648e-06, |
| "loss": 1.0953, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.5216405216405217, |
| "grad_norm": 2.4197933673858643, |
| "learning_rate": 4.9248217966229865e-06, |
| "loss": 1.1401, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5222075222075222, |
| "grad_norm": 2.0570056438446045, |
| "learning_rate": 4.924639250723247e-06, |
| "loss": 1.1512, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.5227745227745227, |
| "grad_norm": 2.0190746784210205, |
| "learning_rate": 4.92445648685684e-06, |
| "loss": 1.0759, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.5233415233415234, |
| "grad_norm": 2.069953680038452, |
| "learning_rate": 4.924273505040195e-06, |
| "loss": 1.1182, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.5239085239085239, |
| "grad_norm": 2.082301378250122, |
| "learning_rate": 4.9240903052897605e-06, |
| "loss": 1.1443, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.5244755244755245, |
| "grad_norm": 2.077989339828491, |
| "learning_rate": 4.9239068876220064e-06, |
| "loss": 1.068, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.525042525042525, |
| "grad_norm": 2.0633435249328613, |
| "learning_rate": 4.9237232520534216e-06, |
| "loss": 1.108, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.5256095256095256, |
| "grad_norm": 2.0655646324157715, |
| "learning_rate": 4.9235393986005145e-06, |
| "loss": 1.1233, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.5261765261765262, |
| "grad_norm": 2.005661725997925, |
| "learning_rate": 4.923355327279811e-06, |
| "loss": 1.1472, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.5267435267435268, |
| "grad_norm": 2.087590217590332, |
| "learning_rate": 4.923171038107861e-06, |
| "loss": 1.1612, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.5273105273105273, |
| "grad_norm": 1.943999171257019, |
| "learning_rate": 4.922986531101229e-06, |
| "loss": 1.1393, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5278775278775278, |
| "grad_norm": 2.139828681945801, |
| "learning_rate": 4.922801806276504e-06, |
| "loss": 1.1677, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.5284445284445285, |
| "grad_norm": 2.0876002311706543, |
| "learning_rate": 4.92261686365029e-06, |
| "loss": 1.1315, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.529011529011529, |
| "grad_norm": 2.1113197803497314, |
| "learning_rate": 4.922431703239214e-06, |
| "loss": 1.0681, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.5295785295785296, |
| "grad_norm": 1.9526565074920654, |
| "learning_rate": 4.922246325059922e-06, |
| "loss": 1.0847, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.5301455301455301, |
| "grad_norm": 2.103508234024048, |
| "learning_rate": 4.922060729129076e-06, |
| "loss": 1.1318, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.5307125307125307, |
| "grad_norm": 2.0978636741638184, |
| "learning_rate": 4.921874915463363e-06, |
| "loss": 1.1212, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.5312795312795313, |
| "grad_norm": 2.017594575881958, |
| "learning_rate": 4.921688884079486e-06, |
| "loss": 1.1121, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.5318465318465319, |
| "grad_norm": 2.0037190914154053, |
| "learning_rate": 4.921502634994169e-06, |
| "loss": 1.0777, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.5324135324135324, |
| "grad_norm": 2.0331473350524902, |
| "learning_rate": 4.9213161682241546e-06, |
| "loss": 1.1632, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.5329805329805329, |
| "grad_norm": 2.252798080444336, |
| "learning_rate": 4.9211294837862055e-06, |
| "loss": 1.1374, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5335475335475336, |
| "grad_norm": 2.1416819095611572, |
| "learning_rate": 4.920942581697105e-06, |
| "loss": 1.1136, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.5341145341145341, |
| "grad_norm": 2.1041433811187744, |
| "learning_rate": 4.920755461973654e-06, |
| "loss": 1.133, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.5346815346815347, |
| "grad_norm": 2.319733142852783, |
| "learning_rate": 4.920568124632674e-06, |
| "loss": 1.1501, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.5352485352485352, |
| "grad_norm": 2.0705809593200684, |
| "learning_rate": 4.920380569691007e-06, |
| "loss": 1.1071, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.5358155358155359, |
| "grad_norm": 2.1050591468811035, |
| "learning_rate": 4.920192797165511e-06, |
| "loss": 1.1014, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.5363825363825364, |
| "grad_norm": 1.9876645803451538, |
| "learning_rate": 4.920004807073069e-06, |
| "loss": 1.1281, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.536949536949537, |
| "grad_norm": 1.9790834188461304, |
| "learning_rate": 4.919816599430579e-06, |
| "loss": 1.1586, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.5375165375165375, |
| "grad_norm": 1.9907217025756836, |
| "learning_rate": 4.919628174254961e-06, |
| "loss": 1.0882, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.538083538083538, |
| "grad_norm": 2.121260166168213, |
| "learning_rate": 4.9194395315631535e-06, |
| "loss": 1.1715, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.5386505386505387, |
| "grad_norm": 2.3340444564819336, |
| "learning_rate": 4.919250671372114e-06, |
| "loss": 1.2029, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5392175392175392, |
| "grad_norm": 1.9696297645568848, |
| "learning_rate": 4.919061593698822e-06, |
| "loss": 1.1487, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.5397845397845398, |
| "grad_norm": 1.9165290594100952, |
| "learning_rate": 4.918872298560273e-06, |
| "loss": 1.077, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.5403515403515403, |
| "grad_norm": 2.135812520980835, |
| "learning_rate": 4.918682785973486e-06, |
| "loss": 1.0452, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.540918540918541, |
| "grad_norm": 2.068650484085083, |
| "learning_rate": 4.918493055955497e-06, |
| "loss": 1.1309, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.5414855414855415, |
| "grad_norm": 1.990132451057434, |
| "learning_rate": 4.91830310852336e-06, |
| "loss": 1.1296, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.542052542052542, |
| "grad_norm": 2.1037912368774414, |
| "learning_rate": 4.918112943694153e-06, |
| "loss": 1.1017, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.5426195426195426, |
| "grad_norm": 2.4106600284576416, |
| "learning_rate": 4.917922561484971e-06, |
| "loss": 1.1424, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.5431865431865431, |
| "grad_norm": 2.0177202224731445, |
| "learning_rate": 4.917731961912927e-06, |
| "loss": 1.1401, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.5437535437535438, |
| "grad_norm": 2.0823750495910645, |
| "learning_rate": 4.917541144995157e-06, |
| "loss": 1.1077, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.5443205443205443, |
| "grad_norm": 2.0904784202575684, |
| "learning_rate": 4.917350110748815e-06, |
| "loss": 1.1433, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5448875448875449, |
| "grad_norm": 2.1177074909210205, |
| "learning_rate": 4.917158859191072e-06, |
| "loss": 1.1711, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 2.118147373199463, |
| "learning_rate": 4.916967390339123e-06, |
| "loss": 1.1629, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.5460215460215461, |
| "grad_norm": 1.9789443016052246, |
| "learning_rate": 4.916775704210179e-06, |
| "loss": 1.0943, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.5465885465885466, |
| "grad_norm": 1.9533722400665283, |
| "learning_rate": 4.916583800821474e-06, |
| "loss": 1.1124, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5471555471555472, |
| "grad_norm": 2.237156867980957, |
| "learning_rate": 4.916391680190257e-06, |
| "loss": 1.0787, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.5477225477225477, |
| "grad_norm": 2.0534963607788086, |
| "learning_rate": 4.9161993423338e-06, |
| "loss": 1.1373, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.5482895482895482, |
| "grad_norm": 2.4482421875, |
| "learning_rate": 4.916006787269394e-06, |
| "loss": 1.1619, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.5488565488565489, |
| "grad_norm": 2.11044979095459, |
| "learning_rate": 4.915814015014349e-06, |
| "loss": 1.1629, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.5494235494235494, |
| "grad_norm": 2.1861917972564697, |
| "learning_rate": 4.915621025585993e-06, |
| "loss": 1.1492, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.54999054999055, |
| "grad_norm": 2.104365587234497, |
| "learning_rate": 4.915427819001676e-06, |
| "loss": 1.1158, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5505575505575505, |
| "grad_norm": 2.0724103450775146, |
| "learning_rate": 4.915234395278768e-06, |
| "loss": 1.1259, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.5511245511245512, |
| "grad_norm": 1.9905637502670288, |
| "learning_rate": 4.915040754434655e-06, |
| "loss": 1.0791, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.5516915516915517, |
| "grad_norm": 2.011446714401245, |
| "learning_rate": 4.914846896486746e-06, |
| "loss": 1.136, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.5522585522585522, |
| "grad_norm": 2.1651599407196045, |
| "learning_rate": 4.914652821452468e-06, |
| "loss": 1.1253, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.5528255528255528, |
| "grad_norm": 2.071033239364624, |
| "learning_rate": 4.914458529349267e-06, |
| "loss": 1.0814, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.5533925533925534, |
| "grad_norm": 1.9809670448303223, |
| "learning_rate": 4.914264020194609e-06, |
| "loss": 1.1501, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.553959553959554, |
| "grad_norm": 1.9492532014846802, |
| "learning_rate": 4.914069294005982e-06, |
| "loss": 1.0487, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.5545265545265545, |
| "grad_norm": 2.0437419414520264, |
| "learning_rate": 4.913874350800888e-06, |
| "loss": 1.0744, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.5550935550935551, |
| "grad_norm": 2.072704553604126, |
| "learning_rate": 4.913679190596854e-06, |
| "loss": 1.0803, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.5556605556605556, |
| "grad_norm": 1.9283461570739746, |
| "learning_rate": 4.913483813411423e-06, |
| "loss": 1.0459, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5562275562275563, |
| "grad_norm": 2.1060099601745605, |
| "learning_rate": 4.913288219262159e-06, |
| "loss": 1.0704, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.5567945567945568, |
| "grad_norm": 2.0446958541870117, |
| "learning_rate": 4.913092408166646e-06, |
| "loss": 1.1412, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.5573615573615573, |
| "grad_norm": 1.904240608215332, |
| "learning_rate": 4.912896380142486e-06, |
| "loss": 1.065, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.5579285579285579, |
| "grad_norm": 1.9327867031097412, |
| "learning_rate": 4.912700135207301e-06, |
| "loss": 1.1088, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.5584955584955585, |
| "grad_norm": 2.0221242904663086, |
| "learning_rate": 4.912503673378733e-06, |
| "loss": 1.1335, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.5590625590625591, |
| "grad_norm": 2.1040868759155273, |
| "learning_rate": 4.912306994674444e-06, |
| "loss": 1.1691, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.5596295596295596, |
| "grad_norm": 1.9867613315582275, |
| "learning_rate": 4.912110099112114e-06, |
| "loss": 1.0998, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.5601965601965602, |
| "grad_norm": 2.1432271003723145, |
| "learning_rate": 4.911912986709444e-06, |
| "loss": 1.1162, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.5607635607635607, |
| "grad_norm": 2.1761672496795654, |
| "learning_rate": 4.911715657484152e-06, |
| "loss": 1.1269, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.5613305613305614, |
| "grad_norm": 2.068603038787842, |
| "learning_rate": 4.911518111453979e-06, |
| "loss": 1.1357, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5618975618975619, |
| "grad_norm": 2.151704788208008, |
| "learning_rate": 4.911320348636682e-06, |
| "loss": 1.0952, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.5624645624645624, |
| "grad_norm": 2.053663492202759, |
| "learning_rate": 4.911122369050041e-06, |
| "loss": 1.0994, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.563031563031563, |
| "grad_norm": 2.0221524238586426, |
| "learning_rate": 4.910924172711852e-06, |
| "loss": 1.0787, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.5635985635985636, |
| "grad_norm": 2.0243706703186035, |
| "learning_rate": 4.910725759639934e-06, |
| "loss": 1.0871, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.5641655641655642, |
| "grad_norm": 2.173171281814575, |
| "learning_rate": 4.910527129852122e-06, |
| "loss": 1.142, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.5647325647325647, |
| "grad_norm": 2.029360771179199, |
| "learning_rate": 4.910328283366274e-06, |
| "loss": 1.1157, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.5652995652995653, |
| "grad_norm": 2.067548990249634, |
| "learning_rate": 4.910129220200263e-06, |
| "loss": 1.1589, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.5658665658665659, |
| "grad_norm": 1.99434232711792, |
| "learning_rate": 4.9099299403719855e-06, |
| "loss": 1.1006, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.5664335664335665, |
| "grad_norm": 2.186133623123169, |
| "learning_rate": 4.909730443899357e-06, |
| "loss": 1.1297, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.567000567000567, |
| "grad_norm": 2.068250894546509, |
| "learning_rate": 4.909530730800309e-06, |
| "loss": 1.1129, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5675675675675675, |
| "grad_norm": 1.9011445045471191, |
| "learning_rate": 4.909330801092798e-06, |
| "loss": 1.0905, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.5681345681345681, |
| "grad_norm": 2.13041090965271, |
| "learning_rate": 4.909130654794795e-06, |
| "loss": 1.0878, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.5687015687015687, |
| "grad_norm": 2.140449047088623, |
| "learning_rate": 4.908930291924294e-06, |
| "loss": 1.148, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.5692685692685693, |
| "grad_norm": 2.160778045654297, |
| "learning_rate": 4.908729712499305e-06, |
| "loss": 1.1301, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.5698355698355698, |
| "grad_norm": 2.007202625274658, |
| "learning_rate": 4.90852891653786e-06, |
| "loss": 1.1445, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.5704025704025704, |
| "grad_norm": 2.148878335952759, |
| "learning_rate": 4.908327904058011e-06, |
| "loss": 1.1313, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.570969570969571, |
| "grad_norm": 2.013780117034912, |
| "learning_rate": 4.908126675077828e-06, |
| "loss": 1.0782, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.5715365715365716, |
| "grad_norm": 2.027580976486206, |
| "learning_rate": 4.9079252296154e-06, |
| "loss": 1.0961, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.5721035721035721, |
| "grad_norm": 2.006068706512451, |
| "learning_rate": 4.907723567688836e-06, |
| "loss": 1.1487, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.5726705726705726, |
| "grad_norm": 2.081812858581543, |
| "learning_rate": 4.907521689316265e-06, |
| "loss": 1.1765, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.5732375732375732, |
| "grad_norm": 1.988435983657837, |
| "learning_rate": 4.907319594515837e-06, |
| "loss": 1.0866, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.5738045738045738, |
| "grad_norm": 1.9649443626403809, |
| "learning_rate": 4.907117283305717e-06, |
| "loss": 1.101, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.5743715743715744, |
| "grad_norm": 1.9727582931518555, |
| "learning_rate": 4.906914755704094e-06, |
| "loss": 1.14, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.5749385749385749, |
| "grad_norm": 2.19130277633667, |
| "learning_rate": 4.906712011729173e-06, |
| "loss": 1.114, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.5755055755055755, |
| "grad_norm": 1.9982514381408691, |
| "learning_rate": 4.906509051399181e-06, |
| "loss": 1.1499, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.5760725760725761, |
| "grad_norm": 1.844041347503662, |
| "learning_rate": 4.906305874732362e-06, |
| "loss": 1.0985, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.5766395766395767, |
| "grad_norm": 2.0490691661834717, |
| "learning_rate": 4.9061024817469835e-06, |
| "loss": 1.1272, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.5772065772065772, |
| "grad_norm": 2.1834962368011475, |
| "learning_rate": 4.905898872461328e-06, |
| "loss": 1.1504, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.5777735777735777, |
| "grad_norm": 2.2521510124206543, |
| "learning_rate": 4.905695046893699e-06, |
| "loss": 1.1116, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.5783405783405784, |
| "grad_norm": 2.023836135864258, |
| "learning_rate": 4.905491005062421e-06, |
| "loss": 1.1157, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.5789075789075789, |
| "grad_norm": 2.420994520187378, |
| "learning_rate": 4.905286746985836e-06, |
| "loss": 1.1131, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.5794745794745795, |
| "grad_norm": 2.1546456813812256, |
| "learning_rate": 4.905082272682305e-06, |
| "loss": 1.1565, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.58004158004158, |
| "grad_norm": 2.156719207763672, |
| "learning_rate": 4.904877582170212e-06, |
| "loss": 1.0556, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.5806085806085806, |
| "grad_norm": 2.073331117630005, |
| "learning_rate": 4.904672675467956e-06, |
| "loss": 1.1314, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.5811755811755812, |
| "grad_norm": 2.097214460372925, |
| "learning_rate": 4.9044675525939575e-06, |
| "loss": 1.1041, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.5817425817425818, |
| "grad_norm": 2.1299214363098145, |
| "learning_rate": 4.904262213566657e-06, |
| "loss": 1.1585, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.5823095823095823, |
| "grad_norm": 2.228649139404297, |
| "learning_rate": 4.904056658404514e-06, |
| "loss": 1.1194, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.5828765828765828, |
| "grad_norm": 2.1192493438720703, |
| "learning_rate": 4.903850887126006e-06, |
| "loss": 1.0576, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.5834435834435835, |
| "grad_norm": 2.076824903488159, |
| "learning_rate": 4.903644899749632e-06, |
| "loss": 1.1033, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.584010584010584, |
| "grad_norm": 2.0137550830841064, |
| "learning_rate": 4.90343869629391e-06, |
| "loss": 1.0799, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.5845775845775846, |
| "grad_norm": 2.003573417663574, |
| "learning_rate": 4.903232276777376e-06, |
| "loss": 1.09, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.5851445851445851, |
| "grad_norm": 2.0129973888397217, |
| "learning_rate": 4.9030256412185875e-06, |
| "loss": 1.1007, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.5857115857115857, |
| "grad_norm": 2.07281494140625, |
| "learning_rate": 4.9028187896361185e-06, |
| "loss": 1.1368, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.5862785862785863, |
| "grad_norm": 2.1313490867614746, |
| "learning_rate": 4.902611722048566e-06, |
| "loss": 1.1255, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.5868455868455869, |
| "grad_norm": 2.160646438598633, |
| "learning_rate": 4.902404438474544e-06, |
| "loss": 1.083, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.5874125874125874, |
| "grad_norm": 2.0317294597625732, |
| "learning_rate": 4.9021969389326866e-06, |
| "loss": 1.1161, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.5879795879795879, |
| "grad_norm": 2.01206374168396, |
| "learning_rate": 4.901989223441647e-06, |
| "loss": 1.1186, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.5885465885465886, |
| "grad_norm": 2.1760642528533936, |
| "learning_rate": 4.901781292020098e-06, |
| "loss": 1.1131, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.5891135891135891, |
| "grad_norm": 2.3249378204345703, |
| "learning_rate": 4.9015731446867334e-06, |
| "loss": 1.1342, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.5896805896805897, |
| "grad_norm": 1.980068325996399, |
| "learning_rate": 4.901364781460263e-06, |
| "loss": 1.1192, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.5902475902475902, |
| "grad_norm": 1.993043303489685, |
| "learning_rate": 4.90115620235942e-06, |
| "loss": 1.0759, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.5908145908145909, |
| "grad_norm": 1.9559072256088257, |
| "learning_rate": 4.900947407402952e-06, |
| "loss": 1.1278, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.5913815913815914, |
| "grad_norm": 2.1052565574645996, |
| "learning_rate": 4.900738396609631e-06, |
| "loss": 1.1116, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.591948591948592, |
| "grad_norm": 2.000621795654297, |
| "learning_rate": 4.900529169998247e-06, |
| "loss": 1.1006, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.5925155925155925, |
| "grad_norm": 2.008673906326294, |
| "learning_rate": 4.900319727587607e-06, |
| "loss": 1.0611, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.593082593082593, |
| "grad_norm": 2.092581272125244, |
| "learning_rate": 4.90011006939654e-06, |
| "loss": 1.1278, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.5936495936495937, |
| "grad_norm": 2.192446708679199, |
| "learning_rate": 4.899900195443894e-06, |
| "loss": 1.1296, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.5942165942165942, |
| "grad_norm": 2.0756747722625732, |
| "learning_rate": 4.899690105748534e-06, |
| "loss": 1.1812, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.5947835947835948, |
| "grad_norm": 1.9756616353988647, |
| "learning_rate": 4.899479800329348e-06, |
| "loss": 1.1218, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.5953505953505953, |
| "grad_norm": 2.0761704444885254, |
| "learning_rate": 4.899269279205243e-06, |
| "loss": 1.1347, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.595917595917596, |
| "grad_norm": 2.0106265544891357, |
| "learning_rate": 4.899058542395141e-06, |
| "loss": 1.087, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.5964845964845965, |
| "grad_norm": 2.1320059299468994, |
| "learning_rate": 4.898847589917989e-06, |
| "loss": 1.1559, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.597051597051597, |
| "grad_norm": 2.087963104248047, |
| "learning_rate": 4.89863642179275e-06, |
| "loss": 1.1293, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.5976185976185976, |
| "grad_norm": 2.0533034801483154, |
| "learning_rate": 4.898425038038406e-06, |
| "loss": 1.1516, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.5981855981855981, |
| "grad_norm": 2.3996708393096924, |
| "learning_rate": 4.898213438673962e-06, |
| "loss": 1.0521, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.5987525987525988, |
| "grad_norm": 2.2481696605682373, |
| "learning_rate": 4.898001623718439e-06, |
| "loss": 1.1748, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.5993195993195993, |
| "grad_norm": 2.2351038455963135, |
| "learning_rate": 4.897789593190878e-06, |
| "loss": 1.1295, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.5998865998865999, |
| "grad_norm": 2.008779764175415, |
| "learning_rate": 4.897577347110339e-06, |
| "loss": 1.1014, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.6004536004536004, |
| "grad_norm": 2.0178744792938232, |
| "learning_rate": 4.897364885495905e-06, |
| "loss": 1.0628, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.6010206010206011, |
| "grad_norm": 1.9279972314834595, |
| "learning_rate": 4.8971522083666735e-06, |
| "loss": 1.0835, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6015876015876016, |
| "grad_norm": 2.0269901752471924, |
| "learning_rate": 4.896939315741765e-06, |
| "loss": 1.097, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.6021546021546021, |
| "grad_norm": 2.0053329467773438, |
| "learning_rate": 4.896726207640315e-06, |
| "loss": 1.1419, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.6027216027216027, |
| "grad_norm": 2.123281478881836, |
| "learning_rate": 4.896512884081484e-06, |
| "loss": 1.1296, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.6032886032886033, |
| "grad_norm": 2.836108922958374, |
| "learning_rate": 4.896299345084447e-06, |
| "loss": 1.0291, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.6038556038556039, |
| "grad_norm": 2.102825880050659, |
| "learning_rate": 4.896085590668402e-06, |
| "loss": 1.1113, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.6044226044226044, |
| "grad_norm": 2.155285358428955, |
| "learning_rate": 4.895871620852564e-06, |
| "loss": 1.1262, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.604989604989605, |
| "grad_norm": 2.055398464202881, |
| "learning_rate": 4.895657435656168e-06, |
| "loss": 1.0959, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.6055566055566055, |
| "grad_norm": 1.9637835025787354, |
| "learning_rate": 4.8954430350984685e-06, |
| "loss": 1.1196, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.6061236061236062, |
| "grad_norm": 2.1283085346221924, |
| "learning_rate": 4.895228419198741e-06, |
| "loss": 1.1331, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.6066906066906067, |
| "grad_norm": 1.9687258005142212, |
| "learning_rate": 4.895013587976276e-06, |
| "loss": 1.0903, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6072576072576072, |
| "grad_norm": 1.920413613319397, |
| "learning_rate": 4.8947985414503876e-06, |
| "loss": 1.0724, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.6078246078246078, |
| "grad_norm": 2.2596044540405273, |
| "learning_rate": 4.894583279640408e-06, |
| "loss": 1.1474, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.6083916083916084, |
| "grad_norm": 2.154895544052124, |
| "learning_rate": 4.894367802565688e-06, |
| "loss": 1.1321, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.608958608958609, |
| "grad_norm": 2.055975914001465, |
| "learning_rate": 4.894152110245599e-06, |
| "loss": 1.1589, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.6095256095256095, |
| "grad_norm": 2.0067338943481445, |
| "learning_rate": 4.8939362026995295e-06, |
| "loss": 1.0869, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.6100926100926101, |
| "grad_norm": 1.965383529663086, |
| "learning_rate": 4.89372007994689e-06, |
| "loss": 1.0597, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.6106596106596106, |
| "grad_norm": 2.137803316116333, |
| "learning_rate": 4.893503742007108e-06, |
| "loss": 1.1084, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.6112266112266113, |
| "grad_norm": 1.892305850982666, |
| "learning_rate": 4.893287188899633e-06, |
| "loss": 1.0763, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.6117936117936118, |
| "grad_norm": 2.238926649093628, |
| "learning_rate": 4.893070420643932e-06, |
| "loss": 1.1118, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.6123606123606123, |
| "grad_norm": 2.2139806747436523, |
| "learning_rate": 4.892853437259491e-06, |
| "loss": 1.1198, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6129276129276129, |
| "grad_norm": 2.139768600463867, |
| "learning_rate": 4.892636238765817e-06, |
| "loss": 1.1724, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.6134946134946135, |
| "grad_norm": 2.024958848953247, |
| "learning_rate": 4.892418825182435e-06, |
| "loss": 1.1211, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.6140616140616141, |
| "grad_norm": 2.0417838096618652, |
| "learning_rate": 4.892201196528888e-06, |
| "loss": 1.1068, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.6146286146286146, |
| "grad_norm": 2.1328155994415283, |
| "learning_rate": 4.891983352824744e-06, |
| "loss": 1.0458, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.6151956151956152, |
| "grad_norm": 2.0734100341796875, |
| "learning_rate": 4.891765294089583e-06, |
| "loss": 1.0968, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.6157626157626158, |
| "grad_norm": 2.066288709640503, |
| "learning_rate": 4.891547020343009e-06, |
| "loss": 1.0867, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.6163296163296164, |
| "grad_norm": 2.0422234535217285, |
| "learning_rate": 4.891328531604643e-06, |
| "loss": 1.0878, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.6168966168966169, |
| "grad_norm": 2.113037347793579, |
| "learning_rate": 4.891109827894129e-06, |
| "loss": 1.0847, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.6174636174636174, |
| "grad_norm": 2.0433382987976074, |
| "learning_rate": 4.890890909231124e-06, |
| "loss": 1.0748, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.618030618030618, |
| "grad_norm": 2.0215182304382324, |
| "learning_rate": 4.890671775635311e-06, |
| "loss": 1.1735, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6185976185976186, |
| "grad_norm": 1.9553345441818237, |
| "learning_rate": 4.890452427126389e-06, |
| "loss": 1.1418, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.6191646191646192, |
| "grad_norm": 2.063011646270752, |
| "learning_rate": 4.890232863724075e-06, |
| "loss": 1.1011, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.6197316197316197, |
| "grad_norm": 1.9352067708969116, |
| "learning_rate": 4.890013085448108e-06, |
| "loss": 1.0389, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.6202986202986203, |
| "grad_norm": 2.1764583587646484, |
| "learning_rate": 4.889793092318246e-06, |
| "loss": 1.1362, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.6208656208656209, |
| "grad_norm": 1.887978434562683, |
| "learning_rate": 4.889572884354265e-06, |
| "loss": 1.0563, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.6214326214326215, |
| "grad_norm": 2.02877140045166, |
| "learning_rate": 4.88935246157596e-06, |
| "loss": 1.1007, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.621999621999622, |
| "grad_norm": 1.9961140155792236, |
| "learning_rate": 4.889131824003147e-06, |
| "loss": 1.0799, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.6225666225666225, |
| "grad_norm": 2.059858798980713, |
| "learning_rate": 4.888910971655662e-06, |
| "loss": 1.112, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.6231336231336231, |
| "grad_norm": 1.9874125719070435, |
| "learning_rate": 4.888689904553356e-06, |
| "loss": 1.1, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.6237006237006237, |
| "grad_norm": 1.9725184440612793, |
| "learning_rate": 4.8884686227161034e-06, |
| "loss": 1.0764, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6242676242676243, |
| "grad_norm": 2.049431562423706, |
| "learning_rate": 4.8882471261637985e-06, |
| "loss": 1.1104, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.6248346248346248, |
| "grad_norm": 2.1070902347564697, |
| "learning_rate": 4.888025414916351e-06, |
| "loss": 1.0904, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.6254016254016254, |
| "grad_norm": 2.054180145263672, |
| "learning_rate": 4.8878034889936924e-06, |
| "loss": 1.1429, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.625968625968626, |
| "grad_norm": 2.134850263595581, |
| "learning_rate": 4.887581348415773e-06, |
| "loss": 1.1664, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.6265356265356266, |
| "grad_norm": 2.1475751399993896, |
| "learning_rate": 4.887358993202563e-06, |
| "loss": 1.1337, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.6271026271026271, |
| "grad_norm": 2.002305507659912, |
| "learning_rate": 4.8871364233740505e-06, |
| "loss": 1.0891, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.6276696276696276, |
| "grad_norm": 1.9193668365478516, |
| "learning_rate": 4.886913638950245e-06, |
| "loss": 1.0568, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.6282366282366283, |
| "grad_norm": 2.232956886291504, |
| "learning_rate": 4.886690639951173e-06, |
| "loss": 1.1313, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.6288036288036288, |
| "grad_norm": 2.0268003940582275, |
| "learning_rate": 4.8864674263968815e-06, |
| "loss": 1.0933, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.6293706293706294, |
| "grad_norm": 2.0502026081085205, |
| "learning_rate": 4.886243998307436e-06, |
| "loss": 1.101, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6299376299376299, |
| "grad_norm": 2.0282256603240967, |
| "learning_rate": 4.886020355702924e-06, |
| "loss": 1.088, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.6305046305046305, |
| "grad_norm": 2.0302987098693848, |
| "learning_rate": 4.885796498603448e-06, |
| "loss": 1.1083, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.6310716310716311, |
| "grad_norm": 2.0896990299224854, |
| "learning_rate": 4.885572427029133e-06, |
| "loss": 1.1143, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.6316386316386317, |
| "grad_norm": 1.9054166078567505, |
| "learning_rate": 4.8853481410001225e-06, |
| "loss": 1.0576, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.6322056322056322, |
| "grad_norm": 2.0707859992980957, |
| "learning_rate": 4.885123640536579e-06, |
| "loss": 1.1889, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.6327726327726327, |
| "grad_norm": 2.1405484676361084, |
| "learning_rate": 4.884898925658683e-06, |
| "loss": 1.0916, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.6333396333396334, |
| "grad_norm": 2.050922155380249, |
| "learning_rate": 4.884673996386637e-06, |
| "loss": 1.0856, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.6339066339066339, |
| "grad_norm": 2.1457486152648926, |
| "learning_rate": 4.884448852740661e-06, |
| "loss": 1.0539, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.6344736344736345, |
| "grad_norm": 2.2316818237304688, |
| "learning_rate": 4.884223494740994e-06, |
| "loss": 1.094, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.635040635040635, |
| "grad_norm": 1.9859856367111206, |
| "learning_rate": 4.8839979224078955e-06, |
| "loss": 1.1308, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6356076356076356, |
| "grad_norm": 2.208192825317383, |
| "learning_rate": 4.883772135761644e-06, |
| "loss": 1.1239, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.6361746361746362, |
| "grad_norm": 1.978887677192688, |
| "learning_rate": 4.8835461348225365e-06, |
| "loss": 1.0661, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.6367416367416368, |
| "grad_norm": 2.0355641841888428, |
| "learning_rate": 4.88331991961089e-06, |
| "loss": 1.0705, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.6373086373086373, |
| "grad_norm": 2.0058109760284424, |
| "learning_rate": 4.8830934901470405e-06, |
| "loss": 1.0777, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.6378756378756378, |
| "grad_norm": 2.11564564704895, |
| "learning_rate": 4.882866846451342e-06, |
| "loss": 1.0747, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.6384426384426385, |
| "grad_norm": 2.005120277404785, |
| "learning_rate": 4.88263998854417e-06, |
| "loss": 1.0923, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.639009639009639, |
| "grad_norm": 2.0766701698303223, |
| "learning_rate": 4.882412916445919e-06, |
| "loss": 1.1058, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.6395766395766396, |
| "grad_norm": 2.267301082611084, |
| "learning_rate": 4.8821856301770004e-06, |
| "loss": 1.0833, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.6401436401436401, |
| "grad_norm": 2.0669078826904297, |
| "learning_rate": 4.881958129757848e-06, |
| "loss": 1.1013, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.6407106407106407, |
| "grad_norm": 2.0686910152435303, |
| "learning_rate": 4.8817304152089115e-06, |
| "loss": 1.0707, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6412776412776413, |
| "grad_norm": 2.045891523361206, |
| "learning_rate": 4.881502486550663e-06, |
| "loss": 1.0751, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.6418446418446418, |
| "grad_norm": 2.202099323272705, |
| "learning_rate": 4.881274343803593e-06, |
| "loss": 1.1599, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.6424116424116424, |
| "grad_norm": 2.1953632831573486, |
| "learning_rate": 4.881045986988209e-06, |
| "loss": 1.1134, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.6429786429786429, |
| "grad_norm": 2.0201382637023926, |
| "learning_rate": 4.88081741612504e-06, |
| "loss": 1.1127, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.6435456435456436, |
| "grad_norm": 2.0316286087036133, |
| "learning_rate": 4.880588631234635e-06, |
| "loss": 1.1194, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.6441126441126441, |
| "grad_norm": 1.9625301361083984, |
| "learning_rate": 4.88035963233756e-06, |
| "loss": 1.1443, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.6446796446796447, |
| "grad_norm": 1.9853287935256958, |
| "learning_rate": 4.8801304194544006e-06, |
| "loss": 1.0763, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.6452466452466452, |
| "grad_norm": 2.0005953311920166, |
| "learning_rate": 4.879900992605764e-06, |
| "loss": 1.1616, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.6458136458136459, |
| "grad_norm": 2.028704881668091, |
| "learning_rate": 4.879671351812273e-06, |
| "loss": 1.1073, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.6463806463806464, |
| "grad_norm": 1.998048186302185, |
| "learning_rate": 4.879441497094572e-06, |
| "loss": 1.1021, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.646947646947647, |
| "grad_norm": 2.0080220699310303, |
| "learning_rate": 4.8792114284733264e-06, |
| "loss": 1.137, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.6475146475146475, |
| "grad_norm": 2.1290740966796875, |
| "learning_rate": 4.878981145969215e-06, |
| "loss": 1.0943, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.648081648081648, |
| "grad_norm": 1.9962797164916992, |
| "learning_rate": 4.8787506496029416e-06, |
| "loss": 1.0646, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.6486486486486487, |
| "grad_norm": 2.1631579399108887, |
| "learning_rate": 4.878519939395225e-06, |
| "loss": 1.219, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.6492156492156492, |
| "grad_norm": 2.115211009979248, |
| "learning_rate": 4.8782890153668085e-06, |
| "loss": 1.1076, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.6497826497826498, |
| "grad_norm": 2.169215440750122, |
| "learning_rate": 4.878057877538449e-06, |
| "loss": 1.1226, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.6503496503496503, |
| "grad_norm": 1.9753289222717285, |
| "learning_rate": 4.877826525930925e-06, |
| "loss": 1.0658, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.650916650916651, |
| "grad_norm": 2.0990424156188965, |
| "learning_rate": 4.877594960565036e-06, |
| "loss": 1.1459, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.6514836514836515, |
| "grad_norm": 1.9168016910552979, |
| "learning_rate": 4.877363181461598e-06, |
| "loss": 1.0719, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.652050652050652, |
| "grad_norm": 2.021908760070801, |
| "learning_rate": 4.877131188641445e-06, |
| "loss": 1.1096, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6526176526176526, |
| "grad_norm": 2.1016788482666016, |
| "learning_rate": 4.876898982125435e-06, |
| "loss": 1.1969, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.6531846531846531, |
| "grad_norm": 2.069887399673462, |
| "learning_rate": 4.876666561934442e-06, |
| "loss": 1.1377, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.6537516537516538, |
| "grad_norm": 2.016388416290283, |
| "learning_rate": 4.876433928089359e-06, |
| "loss": 1.0541, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.6543186543186543, |
| "grad_norm": 2.005340814590454, |
| "learning_rate": 4.8762010806111e-06, |
| "loss": 1.102, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.6548856548856549, |
| "grad_norm": 2.129586696624756, |
| "learning_rate": 4.875968019520596e-06, |
| "loss": 1.0463, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.6554526554526554, |
| "grad_norm": 2.067054510116577, |
| "learning_rate": 4.8757347448388e-06, |
| "loss": 1.0828, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.6560196560196561, |
| "grad_norm": 1.9568535089492798, |
| "learning_rate": 4.875501256586682e-06, |
| "loss": 1.1493, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.6565866565866566, |
| "grad_norm": 2.120835542678833, |
| "learning_rate": 4.8752675547852304e-06, |
| "loss": 1.1571, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.6571536571536571, |
| "grad_norm": 1.8922606706619263, |
| "learning_rate": 4.875033639455455e-06, |
| "loss": 1.1587, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.6577206577206577, |
| "grad_norm": 1.973677635192871, |
| "learning_rate": 4.874799510618385e-06, |
| "loss": 1.0663, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6582876582876583, |
| "grad_norm": 1.9903017282485962, |
| "learning_rate": 4.874565168295067e-06, |
| "loss": 1.0962, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.6588546588546589, |
| "grad_norm": 2.0548391342163086, |
| "learning_rate": 4.874330612506567e-06, |
| "loss": 1.1234, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.6594216594216594, |
| "grad_norm": 2.2835071086883545, |
| "learning_rate": 4.874095843273972e-06, |
| "loss": 1.1057, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.65998865998866, |
| "grad_norm": 2.045727252960205, |
| "learning_rate": 4.873860860618386e-06, |
| "loss": 1.0984, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.6605556605556605, |
| "grad_norm": 2.0151989459991455, |
| "learning_rate": 4.8736256645609325e-06, |
| "loss": 1.0752, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.6611226611226612, |
| "grad_norm": 2.1338889598846436, |
| "learning_rate": 4.873390255122756e-06, |
| "loss": 1.1256, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.6616896616896617, |
| "grad_norm": 2.144221782684326, |
| "learning_rate": 4.873154632325019e-06, |
| "loss": 1.1575, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.6622566622566622, |
| "grad_norm": 1.9910509586334229, |
| "learning_rate": 4.872918796188903e-06, |
| "loss": 1.0589, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.6628236628236628, |
| "grad_norm": 2.05189847946167, |
| "learning_rate": 4.872682746735609e-06, |
| "loss": 1.1265, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.6633906633906634, |
| "grad_norm": 2.0158932209014893, |
| "learning_rate": 4.872446483986355e-06, |
| "loss": 1.0658, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.663957663957664, |
| "grad_norm": 2.1114933490753174, |
| "learning_rate": 4.872210007962384e-06, |
| "loss": 1.129, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.6645246645246645, |
| "grad_norm": 1.9263200759887695, |
| "learning_rate": 4.871973318684951e-06, |
| "loss": 1.0575, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.6650916650916651, |
| "grad_norm": 2.173271656036377, |
| "learning_rate": 4.871736416175335e-06, |
| "loss": 1.1171, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.6656586656586656, |
| "grad_norm": 1.9181896448135376, |
| "learning_rate": 4.871499300454832e-06, |
| "loss": 1.0679, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.6662256662256663, |
| "grad_norm": 2.257871627807617, |
| "learning_rate": 4.8712619715447596e-06, |
| "loss": 1.1906, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.6667926667926668, |
| "grad_norm": 2.0567474365234375, |
| "learning_rate": 4.871024429466451e-06, |
| "loss": 1.1113, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.6673596673596673, |
| "grad_norm": 2.1029961109161377, |
| "learning_rate": 4.870786674241262e-06, |
| "loss": 1.0982, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.6679266679266679, |
| "grad_norm": 2.1588408946990967, |
| "learning_rate": 4.870548705890565e-06, |
| "loss": 1.1198, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.6684936684936685, |
| "grad_norm": 2.129164695739746, |
| "learning_rate": 4.8703105244357504e-06, |
| "loss": 1.1259, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.6690606690606691, |
| "grad_norm": 2.0273795127868652, |
| "learning_rate": 4.870072129898235e-06, |
| "loss": 1.1151, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6696276696276696, |
| "grad_norm": 2.060084342956543, |
| "learning_rate": 4.8698335222994446e-06, |
| "loss": 1.0883, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.6701946701946702, |
| "grad_norm": 2.05039119720459, |
| "learning_rate": 4.869594701660832e-06, |
| "loss": 1.1613, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.6707616707616708, |
| "grad_norm": 1.8904789686203003, |
| "learning_rate": 4.869355668003866e-06, |
| "loss": 1.0545, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.6713286713286714, |
| "grad_norm": 2.0806965827941895, |
| "learning_rate": 4.8691164213500345e-06, |
| "loss": 1.0701, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.6718956718956719, |
| "grad_norm": 1.8995743989944458, |
| "learning_rate": 4.868876961720844e-06, |
| "loss": 1.117, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.6724626724626724, |
| "grad_norm": 1.8391963243484497, |
| "learning_rate": 4.868637289137823e-06, |
| "loss": 1.0774, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.673029673029673, |
| "grad_norm": 1.979702115058899, |
| "learning_rate": 4.8683974036225165e-06, |
| "loss": 1.1233, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.6735966735966736, |
| "grad_norm": 1.955850601196289, |
| "learning_rate": 4.868157305196489e-06, |
| "loss": 1.1159, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.6741636741636742, |
| "grad_norm": 2.1158955097198486, |
| "learning_rate": 4.867916993881324e-06, |
| "loss": 1.1319, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.6747306747306747, |
| "grad_norm": 2.1634299755096436, |
| "learning_rate": 4.867676469698627e-06, |
| "loss": 1.1027, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.6752976752976753, |
| "grad_norm": 2.337096691131592, |
| "learning_rate": 4.867435732670017e-06, |
| "loss": 1.2458, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.6758646758646759, |
| "grad_norm": 1.9703235626220703, |
| "learning_rate": 4.867194782817138e-06, |
| "loss": 1.0695, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.6764316764316765, |
| "grad_norm": 2.1863348484039307, |
| "learning_rate": 4.8669536201616495e-06, |
| "loss": 1.061, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.676998676998677, |
| "grad_norm": 2.2519869804382324, |
| "learning_rate": 4.866712244725232e-06, |
| "loss": 1.1289, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.6775656775656775, |
| "grad_norm": 2.0396711826324463, |
| "learning_rate": 4.866470656529581e-06, |
| "loss": 1.0968, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.6781326781326781, |
| "grad_norm": 2.0688552856445312, |
| "learning_rate": 4.86622885559642e-06, |
| "loss": 1.1406, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.6786996786996787, |
| "grad_norm": 2.0836985111236572, |
| "learning_rate": 4.865986841947482e-06, |
| "loss": 1.0849, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.6792666792666793, |
| "grad_norm": 2.044564962387085, |
| "learning_rate": 4.8657446156045245e-06, |
| "loss": 1.1471, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.6798336798336798, |
| "grad_norm": 2.9498212337493896, |
| "learning_rate": 4.865502176589323e-06, |
| "loss": 1.1025, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.6804006804006804, |
| "grad_norm": 2.525343179702759, |
| "learning_rate": 4.865259524923671e-06, |
| "loss": 1.1382, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.680967680967681, |
| "grad_norm": 2.1106417179107666, |
| "learning_rate": 4.865016660629383e-06, |
| "loss": 1.1217, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.6815346815346816, |
| "grad_norm": 1.9746856689453125, |
| "learning_rate": 4.864773583728291e-06, |
| "loss": 1.1246, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.6821016821016821, |
| "grad_norm": 2.0932469367980957, |
| "learning_rate": 4.864530294242247e-06, |
| "loss": 1.0897, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.6826686826686826, |
| "grad_norm": 2.091334342956543, |
| "learning_rate": 4.864286792193122e-06, |
| "loss": 1.0791, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.6832356832356833, |
| "grad_norm": 2.004572629928589, |
| "learning_rate": 4.864043077602807e-06, |
| "loss": 1.1148, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.6838026838026838, |
| "grad_norm": 2.5519580841064453, |
| "learning_rate": 4.863799150493209e-06, |
| "loss": 1.0816, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.6843696843696844, |
| "grad_norm": 2.1835830211639404, |
| "learning_rate": 4.863555010886257e-06, |
| "loss": 1.158, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.6849366849366849, |
| "grad_norm": 2.0027101039886475, |
| "learning_rate": 4.8633106588038995e-06, |
| "loss": 1.142, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.6855036855036855, |
| "grad_norm": 2.0194194316864014, |
| "learning_rate": 4.8630660942681004e-06, |
| "loss": 1.0262, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.6860706860706861, |
| "grad_norm": 1.973973274230957, |
| "learning_rate": 4.862821317300848e-06, |
| "loss": 1.0618, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.6866376866376867, |
| "grad_norm": 2.0087716579437256, |
| "learning_rate": 4.862576327924145e-06, |
| "loss": 1.1234, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.6872046872046872, |
| "grad_norm": 2.050034284591675, |
| "learning_rate": 4.862331126160017e-06, |
| "loss": 1.1161, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.6877716877716877, |
| "grad_norm": 1.87214994430542, |
| "learning_rate": 4.8620857120305045e-06, |
| "loss": 1.1049, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.6883386883386884, |
| "grad_norm": 2.108328104019165, |
| "learning_rate": 4.861840085557671e-06, |
| "loss": 1.0142, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.6889056889056889, |
| "grad_norm": 1.8961143493652344, |
| "learning_rate": 4.861594246763596e-06, |
| "loss": 1.0867, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.6894726894726895, |
| "grad_norm": 2.029620885848999, |
| "learning_rate": 4.861348195670381e-06, |
| "loss": 1.122, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.69003969003969, |
| "grad_norm": 1.9730783700942993, |
| "learning_rate": 4.861101932300144e-06, |
| "loss": 1.0595, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.6906066906066906, |
| "grad_norm": 2.0151636600494385, |
| "learning_rate": 4.860855456675024e-06, |
| "loss": 1.1576, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.6911736911736912, |
| "grad_norm": 1.9657617807388306, |
| "learning_rate": 4.8606087688171786e-06, |
| "loss": 1.0515, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.6917406917406917, |
| "grad_norm": 2.0308215618133545, |
| "learning_rate": 4.860361868748783e-06, |
| "loss": 1.0984, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 2.583967685699463, |
| "learning_rate": 4.860114756492034e-06, |
| "loss": 1.1196, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.6928746928746928, |
| "grad_norm": 1.9201866388320923, |
| "learning_rate": 4.859867432069145e-06, |
| "loss": 1.0976, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.6934416934416935, |
| "grad_norm": 1.9514093399047852, |
| "learning_rate": 4.859619895502351e-06, |
| "loss": 1.1566, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.694008694008694, |
| "grad_norm": 2.0827386379241943, |
| "learning_rate": 4.859372146813903e-06, |
| "loss": 1.1449, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.6945756945756946, |
| "grad_norm": 2.1744086742401123, |
| "learning_rate": 4.859124186026074e-06, |
| "loss": 1.147, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.6951426951426951, |
| "grad_norm": 1.9219239950180054, |
| "learning_rate": 4.858876013161153e-06, |
| "loss": 1.0744, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.6957096957096958, |
| "grad_norm": 2.0406641960144043, |
| "learning_rate": 4.858627628241453e-06, |
| "loss": 1.0826, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.6962766962766963, |
| "grad_norm": 1.9926422834396362, |
| "learning_rate": 4.8583790312893005e-06, |
| "loss": 1.0833, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.6968436968436968, |
| "grad_norm": 2.0115749835968018, |
| "learning_rate": 4.858130222327044e-06, |
| "loss": 1.1028, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.6974106974106974, |
| "grad_norm": 2.016007900238037, |
| "learning_rate": 4.85788120137705e-06, |
| "loss": 1.0219, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.6979776979776979, |
| "grad_norm": 1.9939322471618652, |
| "learning_rate": 4.8576319684617064e-06, |
| "loss": 1.1327, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.6985446985446986, |
| "grad_norm": 2.1431961059570312, |
| "learning_rate": 4.8573825236034175e-06, |
| "loss": 1.1653, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.6991116991116991, |
| "grad_norm": 2.201347589492798, |
| "learning_rate": 4.857132866824607e-06, |
| "loss": 1.105, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.6996786996786997, |
| "grad_norm": 2.0271849632263184, |
| "learning_rate": 4.856882998147719e-06, |
| "loss": 1.1261, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.7002457002457002, |
| "grad_norm": 1.9960558414459229, |
| "learning_rate": 4.856632917595214e-06, |
| "loss": 1.137, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.7008127008127009, |
| "grad_norm": 1.947149634361267, |
| "learning_rate": 4.856382625189576e-06, |
| "loss": 1.0938, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.7013797013797014, |
| "grad_norm": 2.0694658756256104, |
| "learning_rate": 4.856132120953304e-06, |
| "loss": 1.1362, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.701946701946702, |
| "grad_norm": 1.90565025806427, |
| "learning_rate": 4.8558814049089174e-06, |
| "loss": 1.0873, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.7025137025137025, |
| "grad_norm": 1.8799278736114502, |
| "learning_rate": 4.8556304770789545e-06, |
| "loss": 1.0283, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.703080703080703, |
| "grad_norm": 2.064629554748535, |
| "learning_rate": 4.855379337485973e-06, |
| "loss": 1.0587, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.7036477036477037, |
| "grad_norm": 2.037976026535034, |
| "learning_rate": 4.8551279861525515e-06, |
| "loss": 1.0816, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.7042147042147042, |
| "grad_norm": 2.141584873199463, |
| "learning_rate": 4.854876423101283e-06, |
| "loss": 1.156, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.7047817047817048, |
| "grad_norm": 1.953120470046997, |
| "learning_rate": 4.854624648354782e-06, |
| "loss": 1.1033, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.7053487053487053, |
| "grad_norm": 2.0960190296173096, |
| "learning_rate": 4.8543726619356846e-06, |
| "loss": 1.1671, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.705915705915706, |
| "grad_norm": 2.338067054748535, |
| "learning_rate": 4.854120463866641e-06, |
| "loss": 1.1358, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.7064827064827065, |
| "grad_norm": 2.08161997795105, |
| "learning_rate": 4.8538680541703245e-06, |
| "loss": 1.1562, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.707049707049707, |
| "grad_norm": 2.118591785430908, |
| "learning_rate": 4.853615432869425e-06, |
| "loss": 1.1358, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.7076167076167076, |
| "grad_norm": 2.370490074157715, |
| "learning_rate": 4.853362599986653e-06, |
| "loss": 1.1824, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.7081837081837082, |
| "grad_norm": 1.973972201347351, |
| "learning_rate": 4.853109555544737e-06, |
| "loss": 1.1056, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.7087507087507088, |
| "grad_norm": 2.1963696479797363, |
| "learning_rate": 4.852856299566425e-06, |
| "loss": 1.1267, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7093177093177093, |
| "grad_norm": 2.1779510974884033, |
| "learning_rate": 4.852602832074483e-06, |
| "loss": 1.1242, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.7098847098847099, |
| "grad_norm": 1.9847594499588013, |
| "learning_rate": 4.852349153091699e-06, |
| "loss": 1.0993, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.7104517104517104, |
| "grad_norm": 2.06015944480896, |
| "learning_rate": 4.852095262640875e-06, |
| "loss": 1.1279, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.7110187110187111, |
| "grad_norm": 1.868611454963684, |
| "learning_rate": 4.851841160744836e-06, |
| "loss": 1.118, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.7115857115857116, |
| "grad_norm": 2.069683074951172, |
| "learning_rate": 4.851586847426426e-06, |
| "loss": 1.109, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.7121527121527121, |
| "grad_norm": 2.7425143718719482, |
| "learning_rate": 4.8513323227085055e-06, |
| "loss": 1.09, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.7127197127197127, |
| "grad_norm": 2.0621962547302246, |
| "learning_rate": 4.8510775866139556e-06, |
| "loss": 1.1183, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.7132867132867133, |
| "grad_norm": 2.1513285636901855, |
| "learning_rate": 4.850822639165676e-06, |
| "loss": 1.0869, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.7138537138537139, |
| "grad_norm": 1.9639217853546143, |
| "learning_rate": 4.850567480386586e-06, |
| "loss": 1.0739, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.7144207144207144, |
| "grad_norm": 2.014523983001709, |
| "learning_rate": 4.850312110299625e-06, |
| "loss": 1.1124, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.714987714987715, |
| "grad_norm": 1.9941346645355225, |
| "learning_rate": 4.850056528927748e-06, |
| "loss": 1.0457, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.7155547155547155, |
| "grad_norm": 1.8968991041183472, |
| "learning_rate": 4.8498007362939304e-06, |
| "loss": 1.0871, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.7161217161217162, |
| "grad_norm": 1.9723222255706787, |
| "learning_rate": 4.8495447324211685e-06, |
| "loss": 1.0315, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.7166887166887167, |
| "grad_norm": 2.0764787197113037, |
| "learning_rate": 4.849288517332476e-06, |
| "loss": 1.1086, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.7172557172557172, |
| "grad_norm": 2.0694315433502197, |
| "learning_rate": 4.849032091050885e-06, |
| "loss": 1.0452, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.7178227178227178, |
| "grad_norm": 1.9298354387283325, |
| "learning_rate": 4.848775453599448e-06, |
| "loss": 1.0481, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.7183897183897184, |
| "grad_norm": 2.0368900299072266, |
| "learning_rate": 4.848518605001235e-06, |
| "loss": 1.068, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.718956718956719, |
| "grad_norm": 1.9808545112609863, |
| "learning_rate": 4.848261545279337e-06, |
| "loss": 1.0679, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.7195237195237195, |
| "grad_norm": 2.044835090637207, |
| "learning_rate": 4.848004274456861e-06, |
| "loss": 1.0736, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.7200907200907201, |
| "grad_norm": 2.0716989040374756, |
| "learning_rate": 4.8477467925569365e-06, |
| "loss": 1.0793, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7206577206577207, |
| "grad_norm": 2.009671688079834, |
| "learning_rate": 4.84748909960271e-06, |
| "loss": 1.1016, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.7212247212247213, |
| "grad_norm": 2.013240098953247, |
| "learning_rate": 4.847231195617346e-06, |
| "loss": 1.0615, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.7217917217917218, |
| "grad_norm": 1.89451265335083, |
| "learning_rate": 4.8469730806240305e-06, |
| "loss": 1.0454, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.7223587223587223, |
| "grad_norm": 2.019988536834717, |
| "learning_rate": 4.846714754645967e-06, |
| "loss": 1.0882, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.7229257229257229, |
| "grad_norm": 2.1510188579559326, |
| "learning_rate": 4.846456217706376e-06, |
| "loss": 1.1119, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.7234927234927235, |
| "grad_norm": 2.1269772052764893, |
| "learning_rate": 4.846197469828503e-06, |
| "loss": 1.0481, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.7240597240597241, |
| "grad_norm": 2.001478433609009, |
| "learning_rate": 4.845938511035605e-06, |
| "loss": 1.1756, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.7246267246267246, |
| "grad_norm": 1.980617880821228, |
| "learning_rate": 4.845679341350963e-06, |
| "loss": 1.0841, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.7251937251937252, |
| "grad_norm": 2.010340452194214, |
| "learning_rate": 4.845419960797876e-06, |
| "loss": 1.1233, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.7257607257607258, |
| "grad_norm": 2.12774395942688, |
| "learning_rate": 4.84516036939966e-06, |
| "loss": 1.1496, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7263277263277264, |
| "grad_norm": 2.0324392318725586, |
| "learning_rate": 4.844900567179652e-06, |
| "loss": 1.1279, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.7268947268947269, |
| "grad_norm": 2.0038788318634033, |
| "learning_rate": 4.844640554161209e-06, |
| "loss": 1.0345, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.7274617274617274, |
| "grad_norm": 2.1524405479431152, |
| "learning_rate": 4.844380330367701e-06, |
| "loss": 1.1229, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.728028728028728, |
| "grad_norm": 1.9963539838790894, |
| "learning_rate": 4.8441198958225255e-06, |
| "loss": 1.1124, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.7285957285957286, |
| "grad_norm": 2.119422197341919, |
| "learning_rate": 4.843859250549093e-06, |
| "loss": 1.117, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.7291627291627292, |
| "grad_norm": 2.045699119567871, |
| "learning_rate": 4.8435983945708345e-06, |
| "loss": 1.1382, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.7297297297297297, |
| "grad_norm": 1.9394176006317139, |
| "learning_rate": 4.8433373279112e-06, |
| "loss": 1.1363, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.7302967302967303, |
| "grad_norm": 2.0747976303100586, |
| "learning_rate": 4.8430760505936596e-06, |
| "loss": 1.0777, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.7308637308637309, |
| "grad_norm": 1.814608097076416, |
| "learning_rate": 4.842814562641699e-06, |
| "loss": 1.0878, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.7314307314307315, |
| "grad_norm": 1.9501152038574219, |
| "learning_rate": 4.842552864078827e-06, |
| "loss": 1.1065, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.731997731997732, |
| "grad_norm": 1.955440878868103, |
| "learning_rate": 4.8422909549285686e-06, |
| "loss": 1.0804, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.7325647325647325, |
| "grad_norm": 2.325143337249756, |
| "learning_rate": 4.842028835214469e-06, |
| "loss": 1.1443, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.7331317331317331, |
| "grad_norm": 1.977352499961853, |
| "learning_rate": 4.841766504960091e-06, |
| "loss": 1.1329, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.7336987336987337, |
| "grad_norm": 2.104295015335083, |
| "learning_rate": 4.8415039641890185e-06, |
| "loss": 1.0813, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.7342657342657343, |
| "grad_norm": 2.1327080726623535, |
| "learning_rate": 4.841241212924851e-06, |
| "loss": 1.0582, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.7348327348327348, |
| "grad_norm": 2.021190643310547, |
| "learning_rate": 4.840978251191212e-06, |
| "loss": 1.123, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.7353997353997354, |
| "grad_norm": 2.0947611331939697, |
| "learning_rate": 4.840715079011738e-06, |
| "loss": 1.0579, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.735966735966736, |
| "grad_norm": 2.0282726287841797, |
| "learning_rate": 4.840451696410087e-06, |
| "loss": 1.1516, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.7365337365337365, |
| "grad_norm": 2.0446856021881104, |
| "learning_rate": 4.840188103409939e-06, |
| "loss": 1.1577, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.7371007371007371, |
| "grad_norm": 1.9985852241516113, |
| "learning_rate": 4.839924300034988e-06, |
| "loss": 0.9967, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7376677376677376, |
| "grad_norm": 2.1126623153686523, |
| "learning_rate": 4.839660286308951e-06, |
| "loss": 1.1316, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.7382347382347383, |
| "grad_norm": 2.0512776374816895, |
| "learning_rate": 4.839396062255558e-06, |
| "loss": 1.1535, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.7388017388017388, |
| "grad_norm": 2.0764071941375732, |
| "learning_rate": 4.839131627898565e-06, |
| "loss": 1.1503, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.7393687393687394, |
| "grad_norm": 1.9599411487579346, |
| "learning_rate": 4.838866983261745e-06, |
| "loss": 1.1099, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.7399357399357399, |
| "grad_norm": 2.029916524887085, |
| "learning_rate": 4.838602128368885e-06, |
| "loss": 1.0437, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.7405027405027405, |
| "grad_norm": 2.106447219848633, |
| "learning_rate": 4.838337063243797e-06, |
| "loss": 1.0959, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.7410697410697411, |
| "grad_norm": 2.0462288856506348, |
| "learning_rate": 4.838071787910308e-06, |
| "loss": 1.0623, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.7416367416367416, |
| "grad_norm": 1.8768260478973389, |
| "learning_rate": 4.837806302392266e-06, |
| "loss": 1.1131, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.7422037422037422, |
| "grad_norm": 1.8805971145629883, |
| "learning_rate": 4.837540606713538e-06, |
| "loss": 1.1141, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.7427707427707427, |
| "grad_norm": 2.0501909255981445, |
| "learning_rate": 4.837274700898007e-06, |
| "loss": 1.096, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7433377433377434, |
| "grad_norm": 2.987117052078247, |
| "learning_rate": 4.837008584969579e-06, |
| "loss": 1.182, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.7439047439047439, |
| "grad_norm": 2.1888184547424316, |
| "learning_rate": 4.836742258952176e-06, |
| "loss": 1.0684, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.7444717444717445, |
| "grad_norm": 2.088069438934326, |
| "learning_rate": 4.836475722869741e-06, |
| "loss": 1.1147, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.745038745038745, |
| "grad_norm": 1.9525327682495117, |
| "learning_rate": 4.836208976746233e-06, |
| "loss": 1.0685, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.7456057456057456, |
| "grad_norm": 1.9905927181243896, |
| "learning_rate": 4.835942020605633e-06, |
| "loss": 1.1229, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.7461727461727462, |
| "grad_norm": 2.027682304382324, |
| "learning_rate": 4.835674854471938e-06, |
| "loss": 1.0984, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.7467397467397467, |
| "grad_norm": 1.9673049449920654, |
| "learning_rate": 4.835407478369166e-06, |
| "loss": 1.0853, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.7473067473067473, |
| "grad_norm": 2.155024290084839, |
| "learning_rate": 4.835139892321353e-06, |
| "loss": 1.1211, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.7478737478737478, |
| "grad_norm": 2.1094534397125244, |
| "learning_rate": 4.834872096352554e-06, |
| "loss": 1.1137, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.7484407484407485, |
| "grad_norm": 2.0188000202178955, |
| "learning_rate": 4.834604090486844e-06, |
| "loss": 1.0767, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.749007749007749, |
| "grad_norm": 2.0796914100646973, |
| "learning_rate": 4.834335874748315e-06, |
| "loss": 1.1056, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.7495747495747496, |
| "grad_norm": 1.9424731731414795, |
| "learning_rate": 4.8340674491610786e-06, |
| "loss": 1.0779, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.7501417501417501, |
| "grad_norm": 2.0110368728637695, |
| "learning_rate": 4.833798813749265e-06, |
| "loss": 1.125, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.7507087507087508, |
| "grad_norm": 2.0801455974578857, |
| "learning_rate": 4.833529968537024e-06, |
| "loss": 1.1088, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.7512757512757513, |
| "grad_norm": 2.0912394523620605, |
| "learning_rate": 4.833260913548524e-06, |
| "loss": 1.1316, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.7518427518427518, |
| "grad_norm": 1.9779878854751587, |
| "learning_rate": 4.832991648807951e-06, |
| "loss": 1.0939, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.7524097524097524, |
| "grad_norm": 2.2234911918640137, |
| "learning_rate": 4.832722174339513e-06, |
| "loss": 1.096, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.7529767529767529, |
| "grad_norm": 2.1075234413146973, |
| "learning_rate": 4.832452490167433e-06, |
| "loss": 1.1334, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.7535437535437536, |
| "grad_norm": 2.2699432373046875, |
| "learning_rate": 4.832182596315956e-06, |
| "loss": 1.1511, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.7541107541107541, |
| "grad_norm": 1.9237340688705444, |
| "learning_rate": 4.8319124928093445e-06, |
| "loss": 1.0717, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.7546777546777547, |
| "grad_norm": 1.976044774055481, |
| "learning_rate": 4.831642179671878e-06, |
| "loss": 1.0268, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.7552447552447552, |
| "grad_norm": 1.9518632888793945, |
| "learning_rate": 4.831371656927858e-06, |
| "loss": 1.1481, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.7558117558117559, |
| "grad_norm": 2.0657694339752197, |
| "learning_rate": 4.831100924601604e-06, |
| "loss": 1.0781, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.7563787563787564, |
| "grad_norm": 2.010305166244507, |
| "learning_rate": 4.830829982717454e-06, |
| "loss": 1.0608, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.7569457569457569, |
| "grad_norm": 1.9423797130584717, |
| "learning_rate": 4.8305588312997635e-06, |
| "loss": 1.1217, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.7575127575127575, |
| "grad_norm": 1.993203043937683, |
| "learning_rate": 4.830287470372909e-06, |
| "loss": 1.1038, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.758079758079758, |
| "grad_norm": 1.8566621541976929, |
| "learning_rate": 4.830015899961285e-06, |
| "loss": 1.0882, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.7586467586467587, |
| "grad_norm": 2.2911787033081055, |
| "learning_rate": 4.829744120089304e-06, |
| "loss": 1.1048, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.7592137592137592, |
| "grad_norm": 2.038271188735962, |
| "learning_rate": 4.829472130781398e-06, |
| "loss": 1.0803, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.7597807597807598, |
| "grad_norm": 2.142582893371582, |
| "learning_rate": 4.8291999320620185e-06, |
| "loss": 1.1509, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.7603477603477603, |
| "grad_norm": 1.9460140466690063, |
| "learning_rate": 4.828927523955636e-06, |
| "loss": 1.0876, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.760914760914761, |
| "grad_norm": 1.8868871927261353, |
| "learning_rate": 4.828654906486737e-06, |
| "loss": 1.1114, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.7614817614817615, |
| "grad_norm": 1.9232128858566284, |
| "learning_rate": 4.8283820796798305e-06, |
| "loss": 1.1119, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.762048762048762, |
| "grad_norm": 2.0091655254364014, |
| "learning_rate": 4.828109043559443e-06, |
| "loss": 1.0971, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.7626157626157626, |
| "grad_norm": 2.1124536991119385, |
| "learning_rate": 4.827835798150117e-06, |
| "loss": 1.0622, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.7631827631827632, |
| "grad_norm": 2.06380295753479, |
| "learning_rate": 4.827562343476419e-06, |
| "loss": 1.1451, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.7637497637497638, |
| "grad_norm": 2.1077919006347656, |
| "learning_rate": 4.827288679562931e-06, |
| "loss": 1.0732, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.7643167643167643, |
| "grad_norm": 2.0187129974365234, |
| "learning_rate": 4.827014806434254e-06, |
| "loss": 1.1245, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.7648837648837649, |
| "grad_norm": 1.9610968828201294, |
| "learning_rate": 4.826740724115007e-06, |
| "loss": 1.1366, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.7654507654507654, |
| "grad_norm": 2.1612725257873535, |
| "learning_rate": 4.826466432629831e-06, |
| "loss": 1.1181, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.766017766017766, |
| "grad_norm": 2.034870147705078, |
| "learning_rate": 4.826191932003384e-06, |
| "loss": 1.0504, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.7665847665847666, |
| "grad_norm": 2.0349764823913574, |
| "learning_rate": 4.825917222260342e-06, |
| "loss": 1.0562, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.7671517671517671, |
| "grad_norm": 2.0796329975128174, |
| "learning_rate": 4.825642303425399e-06, |
| "loss": 1.123, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.7677187677187677, |
| "grad_norm": 1.989540696144104, |
| "learning_rate": 4.825367175523272e-06, |
| "loss": 1.0993, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.7682857682857683, |
| "grad_norm": 1.9621320962905884, |
| "learning_rate": 4.825091838578691e-06, |
| "loss": 1.0958, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.7688527688527689, |
| "grad_norm": 1.8925583362579346, |
| "learning_rate": 4.8248162926164115e-06, |
| "loss": 1.1285, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.7694197694197694, |
| "grad_norm": 2.136880397796631, |
| "learning_rate": 4.824540537661201e-06, |
| "loss": 1.0884, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.76998676998677, |
| "grad_norm": 3.522716760635376, |
| "learning_rate": 4.824264573737849e-06, |
| "loss": 1.0651, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.7705537705537705, |
| "grad_norm": 1.9552546739578247, |
| "learning_rate": 4.823988400871166e-06, |
| "loss": 1.0475, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.7711207711207712, |
| "grad_norm": 2.041712999343872, |
| "learning_rate": 4.823712019085978e-06, |
| "loss": 1.091, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.7716877716877717, |
| "grad_norm": 1.9892549514770508, |
| "learning_rate": 4.823435428407129e-06, |
| "loss": 1.0681, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.7722547722547722, |
| "grad_norm": 2.180232524871826, |
| "learning_rate": 4.823158628859487e-06, |
| "loss": 1.0711, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.7728217728217728, |
| "grad_norm": 2.0575640201568604, |
| "learning_rate": 4.822881620467932e-06, |
| "loss": 1.11, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.7733887733887734, |
| "grad_norm": 2.0994784832000732, |
| "learning_rate": 4.822604403257367e-06, |
| "loss": 1.0946, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.773955773955774, |
| "grad_norm": 1.9343117475509644, |
| "learning_rate": 4.822326977252714e-06, |
| "loss": 1.1042, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.7745227745227745, |
| "grad_norm": 1.8998512029647827, |
| "learning_rate": 4.822049342478912e-06, |
| "loss": 1.0746, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.7750897750897751, |
| "grad_norm": 1.939725637435913, |
| "learning_rate": 4.821771498960919e-06, |
| "loss": 1.1468, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.7756567756567757, |
| "grad_norm": 2.0788631439208984, |
| "learning_rate": 4.821493446723713e-06, |
| "loss": 1.1207, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.7762237762237763, |
| "grad_norm": 1.9664413928985596, |
| "learning_rate": 4.821215185792288e-06, |
| "loss": 1.1096, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.7767907767907768, |
| "grad_norm": 2.1204512119293213, |
| "learning_rate": 4.820936716191662e-06, |
| "loss": 1.1106, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.7773577773577773, |
| "grad_norm": 2.1662580966949463, |
| "learning_rate": 4.8206580379468655e-06, |
| "loss": 1.1301, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.7779247779247779, |
| "grad_norm": 2.049109697341919, |
| "learning_rate": 4.820379151082952e-06, |
| "loss": 1.0862, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.7784917784917785, |
| "grad_norm": 2.087808132171631, |
| "learning_rate": 4.820100055624992e-06, |
| "loss": 1.1094, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.7790587790587791, |
| "grad_norm": 1.9093419313430786, |
| "learning_rate": 4.819820751598076e-06, |
| "loss": 1.1155, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.7796257796257796, |
| "grad_norm": 2.1120524406433105, |
| "learning_rate": 4.819541239027311e-06, |
| "loss": 1.0826, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.7801927801927802, |
| "grad_norm": 2.1566476821899414, |
| "learning_rate": 4.819261517937826e-06, |
| "loss": 1.1011, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.7807597807597808, |
| "grad_norm": 2.032398223876953, |
| "learning_rate": 4.818981588354767e-06, |
| "loss": 1.0908, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.7813267813267813, |
| "grad_norm": 1.8764480352401733, |
| "learning_rate": 4.8187014503032955e-06, |
| "loss": 1.0315, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.7818937818937819, |
| "grad_norm": 2.0319604873657227, |
| "learning_rate": 4.818421103808599e-06, |
| "loss": 1.0873, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.7824607824607824, |
| "grad_norm": 1.8628435134887695, |
| "learning_rate": 4.818140548895877e-06, |
| "loss": 1.0493, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.783027783027783, |
| "grad_norm": 2.18454647064209, |
| "learning_rate": 4.817859785590352e-06, |
| "loss": 1.1007, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.7835947835947836, |
| "grad_norm": 1.9577960968017578, |
| "learning_rate": 4.817578813917262e-06, |
| "loss": 1.1401, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.7841617841617842, |
| "grad_norm": 2.080314874649048, |
| "learning_rate": 4.817297633901867e-06, |
| "loss": 1.0414, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.7847287847287847, |
| "grad_norm": 1.928419828414917, |
| "learning_rate": 4.8170162455694435e-06, |
| "loss": 1.151, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.7852957852957853, |
| "grad_norm": 2.083796739578247, |
| "learning_rate": 4.816734648945287e-06, |
| "loss": 1.1054, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.7858627858627859, |
| "grad_norm": 1.9626907110214233, |
| "learning_rate": 4.816452844054712e-06, |
| "loss": 1.1136, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.7864297864297864, |
| "grad_norm": 2.0195517539978027, |
| "learning_rate": 4.816170830923053e-06, |
| "loss": 1.0842, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.786996786996787, |
| "grad_norm": 1.9990777969360352, |
| "learning_rate": 4.815888609575661e-06, |
| "loss": 1.0576, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.7875637875637875, |
| "grad_norm": 1.8750730752944946, |
| "learning_rate": 4.815606180037907e-06, |
| "loss": 1.0409, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.7881307881307882, |
| "grad_norm": 2.1351377964019775, |
| "learning_rate": 4.81532354233518e-06, |
| "loss": 1.0463, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.7886977886977887, |
| "grad_norm": 1.896248698234558, |
| "learning_rate": 4.815040696492888e-06, |
| "loss": 1.1202, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.7892647892647893, |
| "grad_norm": 2.018113136291504, |
| "learning_rate": 4.814757642536459e-06, |
| "loss": 1.1117, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.7898317898317898, |
| "grad_norm": 1.9583754539489746, |
| "learning_rate": 4.814474380491338e-06, |
| "loss": 1.1024, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.7903987903987904, |
| "grad_norm": 2.1564619541168213, |
| "learning_rate": 4.814190910382988e-06, |
| "loss": 1.1485, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.790965790965791, |
| "grad_norm": 1.9851993322372437, |
| "learning_rate": 4.813907232236894e-06, |
| "loss": 1.0982, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.7915327915327915, |
| "grad_norm": 1.9537993669509888, |
| "learning_rate": 4.813623346078557e-06, |
| "loss": 1.0622, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.7920997920997921, |
| "grad_norm": 2.0332624912261963, |
| "learning_rate": 4.813339251933497e-06, |
| "loss": 1.0678, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.7926667926667926, |
| "grad_norm": 1.9309813976287842, |
| "learning_rate": 4.8130549498272535e-06, |
| "loss": 1.0916, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.7932337932337933, |
| "grad_norm": 2.0063681602478027, |
| "learning_rate": 4.812770439785383e-06, |
| "loss": 1.0946, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.7938007938007938, |
| "grad_norm": 1.9828499555587769, |
| "learning_rate": 4.812485721833465e-06, |
| "loss": 1.0701, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7943677943677944, |
| "grad_norm": 1.8927239179611206, |
| "learning_rate": 4.812200795997091e-06, |
| "loss": 1.075, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.7949347949347949, |
| "grad_norm": 2.046656370162964, |
| "learning_rate": 4.811915662301877e-06, |
| "loss": 1.0895, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.7955017955017955, |
| "grad_norm": 2.0462124347686768, |
| "learning_rate": 4.811630320773455e-06, |
| "loss": 1.0668, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.7960687960687961, |
| "grad_norm": 1.8638513088226318, |
| "learning_rate": 4.811344771437476e-06, |
| "loss": 1.102, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.7966357966357966, |
| "grad_norm": 2.1097896099090576, |
| "learning_rate": 4.811059014319611e-06, |
| "loss": 1.0449, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.7972027972027972, |
| "grad_norm": 1.946921706199646, |
| "learning_rate": 4.8107730494455475e-06, |
| "loss": 1.0702, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.7977697977697977, |
| "grad_norm": 2.3324482440948486, |
| "learning_rate": 4.810486876840992e-06, |
| "loss": 1.0534, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.7983367983367984, |
| "grad_norm": 2.0709335803985596, |
| "learning_rate": 4.810200496531673e-06, |
| "loss": 1.0584, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.7989037989037989, |
| "grad_norm": 2.0188565254211426, |
| "learning_rate": 4.809913908543332e-06, |
| "loss": 1.0851, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.7994707994707995, |
| "grad_norm": 2.03055739402771, |
| "learning_rate": 4.809627112901735e-06, |
| "loss": 1.1214, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.8000378000378, |
| "grad_norm": 2.0110137462615967, |
| "learning_rate": 4.809340109632662e-06, |
| "loss": 1.1203, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.8006048006048007, |
| "grad_norm": 2.0457754135131836, |
| "learning_rate": 4.809052898761915e-06, |
| "loss": 1.0835, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.8011718011718012, |
| "grad_norm": 2.07012939453125, |
| "learning_rate": 4.808765480315312e-06, |
| "loss": 1.1397, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.8017388017388017, |
| "grad_norm": 1.9364756345748901, |
| "learning_rate": 4.808477854318691e-06, |
| "loss": 1.0797, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.8023058023058023, |
| "grad_norm": 1.9942682981491089, |
| "learning_rate": 4.80819002079791e-06, |
| "loss": 1.1026, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.8028728028728028, |
| "grad_norm": 2.145369529724121, |
| "learning_rate": 4.807901979778843e-06, |
| "loss": 1.0976, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.8034398034398035, |
| "grad_norm": 1.9216880798339844, |
| "learning_rate": 4.807613731287384e-06, |
| "loss": 1.1241, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.804006804006804, |
| "grad_norm": 1.972402811050415, |
| "learning_rate": 4.807325275349446e-06, |
| "loss": 1.0899, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.8045738045738046, |
| "grad_norm": 1.946427583694458, |
| "learning_rate": 4.80703661199096e-06, |
| "loss": 1.089, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.8051408051408051, |
| "grad_norm": 2.1797311305999756, |
| "learning_rate": 4.806747741237876e-06, |
| "loss": 1.1445, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.8057078057078058, |
| "grad_norm": 2.0358588695526123, |
| "learning_rate": 4.806458663116161e-06, |
| "loss": 1.1113, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.8062748062748063, |
| "grad_norm": 2.0892183780670166, |
| "learning_rate": 4.806169377651805e-06, |
| "loss": 1.1153, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.8068418068418068, |
| "grad_norm": 1.9392249584197998, |
| "learning_rate": 4.805879884870811e-06, |
| "loss": 1.1191, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.8074088074088074, |
| "grad_norm": 1.9548989534378052, |
| "learning_rate": 4.805590184799206e-06, |
| "loss": 1.0886, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.8079758079758079, |
| "grad_norm": 2.1020822525024414, |
| "learning_rate": 4.80530027746303e-06, |
| "loss": 1.0654, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.8085428085428086, |
| "grad_norm": 1.968111276626587, |
| "learning_rate": 4.805010162888347e-06, |
| "loss": 1.0522, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.8091098091098091, |
| "grad_norm": 2.0265655517578125, |
| "learning_rate": 4.804719841101237e-06, |
| "loss": 1.1122, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.8096768096768097, |
| "grad_norm": 2.0999677181243896, |
| "learning_rate": 4.8044293121277975e-06, |
| "loss": 1.075, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.8102438102438102, |
| "grad_norm": 2.0023441314697266, |
| "learning_rate": 4.8041385759941475e-06, |
| "loss": 1.1009, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.8108108108108109, |
| "grad_norm": 2.0415596961975098, |
| "learning_rate": 4.803847632726422e-06, |
| "loss": 1.0809, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.8113778113778114, |
| "grad_norm": 1.9322887659072876, |
| "learning_rate": 4.803556482350777e-06, |
| "loss": 1.0597, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.8119448119448119, |
| "grad_norm": 1.927483081817627, |
| "learning_rate": 4.8032651248933855e-06, |
| "loss": 1.0894, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.8125118125118125, |
| "grad_norm": 1.9914480447769165, |
| "learning_rate": 4.802973560380439e-06, |
| "loss": 1.0948, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.8130788130788131, |
| "grad_norm": 1.9616143703460693, |
| "learning_rate": 4.802681788838149e-06, |
| "loss": 1.096, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.8136458136458137, |
| "grad_norm": 1.884313941001892, |
| "learning_rate": 4.802389810292744e-06, |
| "loss": 1.0895, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.8142128142128142, |
| "grad_norm": 1.8811097145080566, |
| "learning_rate": 4.802097624770472e-06, |
| "loss": 1.0194, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.8147798147798148, |
| "grad_norm": 1.9978089332580566, |
| "learning_rate": 4.8018052322976e-06, |
| "loss": 1.0993, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.8153468153468153, |
| "grad_norm": 2.0147740840911865, |
| "learning_rate": 4.801512632900413e-06, |
| "loss": 1.0659, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.815913815913816, |
| "grad_norm": 1.9632623195648193, |
| "learning_rate": 4.801219826605213e-06, |
| "loss": 1.079, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.8164808164808165, |
| "grad_norm": 1.959356665611267, |
| "learning_rate": 4.800926813438325e-06, |
| "loss": 1.133, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.817047817047817, |
| "grad_norm": 2.189089298248291, |
| "learning_rate": 4.8006335934260885e-06, |
| "loss": 1.1282, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.8176148176148176, |
| "grad_norm": 2.0199830532073975, |
| "learning_rate": 4.800340166594862e-06, |
| "loss": 1.0958, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 2.248572587966919, |
| "learning_rate": 4.800046532971025e-06, |
| "loss": 1.1197, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.8187488187488188, |
| "grad_norm": 2.16886043548584, |
| "learning_rate": 4.799752692580973e-06, |
| "loss": 1.1203, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.8193158193158193, |
| "grad_norm": 1.995705008506775, |
| "learning_rate": 4.799458645451122e-06, |
| "loss": 1.0579, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.8198828198828199, |
| "grad_norm": 2.0506200790405273, |
| "learning_rate": 4.799164391607908e-06, |
| "loss": 1.138, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.8204498204498204, |
| "grad_norm": 2.047494411468506, |
| "learning_rate": 4.798869931077779e-06, |
| "loss": 1.1272, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.821016821016821, |
| "grad_norm": 2.0765833854675293, |
| "learning_rate": 4.798575263887208e-06, |
| "loss": 1.0581, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.8215838215838216, |
| "grad_norm": 2.1554183959960938, |
| "learning_rate": 4.798280390062685e-06, |
| "loss": 1.1212, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.8221508221508221, |
| "grad_norm": 2.6534454822540283, |
| "learning_rate": 4.797985309630718e-06, |
| "loss": 1.1472, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8227178227178227, |
| "grad_norm": 2.0224509239196777, |
| "learning_rate": 4.797690022617834e-06, |
| "loss": 1.1336, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.8232848232848233, |
| "grad_norm": 2.20784068107605, |
| "learning_rate": 4.797394529050577e-06, |
| "loss": 1.1146, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.8238518238518239, |
| "grad_norm": 1.9155614376068115, |
| "learning_rate": 4.797098828955512e-06, |
| "loss": 1.0795, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.8244188244188244, |
| "grad_norm": 2.068676233291626, |
| "learning_rate": 4.7968029223592205e-06, |
| "loss": 1.0609, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.824985824985825, |
| "grad_norm": 1.9581069946289062, |
| "learning_rate": 4.796506809288305e-06, |
| "loss": 1.1375, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.8255528255528255, |
| "grad_norm": 1.9850029945373535, |
| "learning_rate": 4.796210489769383e-06, |
| "loss": 1.0904, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.8261198261198262, |
| "grad_norm": 2.549131155014038, |
| "learning_rate": 4.7959139638290945e-06, |
| "loss": 1.0845, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.8266868266868267, |
| "grad_norm": 2.00335693359375, |
| "learning_rate": 4.7956172314940945e-06, |
| "loss": 1.1247, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.8272538272538272, |
| "grad_norm": 2.0605878829956055, |
| "learning_rate": 4.795320292791059e-06, |
| "loss": 1.0974, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.8278208278208278, |
| "grad_norm": 1.9676733016967773, |
| "learning_rate": 4.7950231477466825e-06, |
| "loss": 1.1461, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.8283878283878284, |
| "grad_norm": 2.107635974884033, |
| "learning_rate": 4.794725796387677e-06, |
| "loss": 1.1107, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.828954828954829, |
| "grad_norm": 2.1952548027038574, |
| "learning_rate": 4.794428238740771e-06, |
| "loss": 1.0946, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.8295218295218295, |
| "grad_norm": 2.205260992050171, |
| "learning_rate": 4.794130474832718e-06, |
| "loss": 1.1676, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.83008883008883, |
| "grad_norm": 2.144150733947754, |
| "learning_rate": 4.793832504690283e-06, |
| "loss": 1.1435, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.8306558306558307, |
| "grad_norm": 1.9926881790161133, |
| "learning_rate": 4.793534328340253e-06, |
| "loss": 1.0526, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.8312228312228312, |
| "grad_norm": 2.093427896499634, |
| "learning_rate": 4.7932359458094335e-06, |
| "loss": 1.15, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.8317898317898318, |
| "grad_norm": 2.014958620071411, |
| "learning_rate": 4.792937357124647e-06, |
| "loss": 1.0616, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.8323568323568323, |
| "grad_norm": 1.9660993814468384, |
| "learning_rate": 4.792638562312738e-06, |
| "loss": 1.0524, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.8329238329238329, |
| "grad_norm": 2.1528244018554688, |
| "learning_rate": 4.792339561400565e-06, |
| "loss": 1.0605, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.8334908334908335, |
| "grad_norm": 1.8813989162445068, |
| "learning_rate": 4.792040354415008e-06, |
| "loss": 1.0126, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8340578340578341, |
| "grad_norm": 2.1694982051849365, |
| "learning_rate": 4.791740941382963e-06, |
| "loss": 1.07, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.8346248346248346, |
| "grad_norm": 2.0867135524749756, |
| "learning_rate": 4.7914413223313484e-06, |
| "loss": 1.0531, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.8351918351918352, |
| "grad_norm": 2.2400665283203125, |
| "learning_rate": 4.791141497287098e-06, |
| "loss": 1.1123, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.8357588357588358, |
| "grad_norm": 2.0336430072784424, |
| "learning_rate": 4.7908414662771655e-06, |
| "loss": 1.0809, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.8363258363258363, |
| "grad_norm": 2.1923985481262207, |
| "learning_rate": 4.790541229328522e-06, |
| "loss": 1.0294, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.8368928368928369, |
| "grad_norm": 2.0450212955474854, |
| "learning_rate": 4.790240786468158e-06, |
| "loss": 1.135, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.8374598374598374, |
| "grad_norm": 1.9666211605072021, |
| "learning_rate": 4.789940137723082e-06, |
| "loss": 1.0146, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.838026838026838, |
| "grad_norm": 1.9747493267059326, |
| "learning_rate": 4.789639283120323e-06, |
| "loss": 1.1008, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.8385938385938386, |
| "grad_norm": 1.9848383665084839, |
| "learning_rate": 4.789338222686924e-06, |
| "loss": 1.1277, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.8391608391608392, |
| "grad_norm": 2.0215046405792236, |
| "learning_rate": 4.789036956449951e-06, |
| "loss": 1.0636, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8397278397278397, |
| "grad_norm": 1.9995296001434326, |
| "learning_rate": 4.788735484436486e-06, |
| "loss": 1.0614, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.8402948402948403, |
| "grad_norm": 1.9948177337646484, |
| "learning_rate": 4.7884338066736315e-06, |
| "loss": 1.1005, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.8408618408618409, |
| "grad_norm": 2.206935405731201, |
| "learning_rate": 4.788131923188506e-06, |
| "loss": 1.1549, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.8414288414288414, |
| "grad_norm": 2.1533777713775635, |
| "learning_rate": 4.787829834008248e-06, |
| "loss": 1.1103, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.841995841995842, |
| "grad_norm": 1.962776780128479, |
| "learning_rate": 4.787527539160016e-06, |
| "loss": 1.0235, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.8425628425628425, |
| "grad_norm": 1.902707576751709, |
| "learning_rate": 4.787225038670983e-06, |
| "loss": 1.095, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.8431298431298432, |
| "grad_norm": 1.9712163209915161, |
| "learning_rate": 4.786922332568343e-06, |
| "loss": 1.1332, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.8436968436968437, |
| "grad_norm": 2.0679397583007812, |
| "learning_rate": 4.786619420879309e-06, |
| "loss": 1.0823, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.8442638442638443, |
| "grad_norm": 2.060270071029663, |
| "learning_rate": 4.786316303631112e-06, |
| "loss": 1.1254, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.8448308448308448, |
| "grad_norm": 2.0360071659088135, |
| "learning_rate": 4.786012980851e-06, |
| "loss": 1.1093, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.8453978453978453, |
| "grad_norm": 2.075654983520508, |
| "learning_rate": 4.785709452566243e-06, |
| "loss": 1.0913, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.845964845964846, |
| "grad_norm": 2.0330331325531006, |
| "learning_rate": 4.785405718804124e-06, |
| "loss": 1.1261, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.8465318465318465, |
| "grad_norm": 2.006742238998413, |
| "learning_rate": 4.78510177959195e-06, |
| "loss": 1.0898, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.8470988470988471, |
| "grad_norm": 1.96126127243042, |
| "learning_rate": 4.784797634957042e-06, |
| "loss": 1.1011, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.8476658476658476, |
| "grad_norm": 2.077693462371826, |
| "learning_rate": 4.784493284926743e-06, |
| "loss": 1.0646, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.8482328482328483, |
| "grad_norm": 2.056105375289917, |
| "learning_rate": 4.784188729528414e-06, |
| "loss": 1.0702, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.8487998487998488, |
| "grad_norm": 1.9399914741516113, |
| "learning_rate": 4.783883968789431e-06, |
| "loss": 1.0561, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.8493668493668494, |
| "grad_norm": 2.05819034576416, |
| "learning_rate": 4.783579002737193e-06, |
| "loss": 1.0837, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.8499338499338499, |
| "grad_norm": 2.0962612628936768, |
| "learning_rate": 4.783273831399114e-06, |
| "loss": 1.1275, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.8505008505008504, |
| "grad_norm": 2.1176199913024902, |
| "learning_rate": 4.782968454802629e-06, |
| "loss": 1.1522, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.8510678510678511, |
| "grad_norm": 2.1833150386810303, |
| "learning_rate": 4.78266287297519e-06, |
| "loss": 1.0479, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.8516348516348516, |
| "grad_norm": 1.9732375144958496, |
| "learning_rate": 4.782357085944267e-06, |
| "loss": 1.1363, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.8522018522018522, |
| "grad_norm": 1.987606167793274, |
| "learning_rate": 4.782051093737349e-06, |
| "loss": 1.0978, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.8527688527688527, |
| "grad_norm": 1.9279766082763672, |
| "learning_rate": 4.781744896381945e-06, |
| "loss": 1.0906, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.8533358533358534, |
| "grad_norm": 1.9426352977752686, |
| "learning_rate": 4.78143849390558e-06, |
| "loss": 1.1195, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.8539028539028539, |
| "grad_norm": 1.9374680519104004, |
| "learning_rate": 4.781131886335799e-06, |
| "loss": 1.0835, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.8544698544698545, |
| "grad_norm": 2.1612491607666016, |
| "learning_rate": 4.780825073700166e-06, |
| "loss": 1.0892, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.855036855036855, |
| "grad_norm": 2.1090891361236572, |
| "learning_rate": 4.78051805602626e-06, |
| "loss": 1.1032, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.8556038556038557, |
| "grad_norm": 2.0436275005340576, |
| "learning_rate": 4.780210833341682e-06, |
| "loss": 1.1011, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.8561708561708562, |
| "grad_norm": 1.9176534414291382, |
| "learning_rate": 4.77990340567405e-06, |
| "loss": 1.0907, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.8567378567378567, |
| "grad_norm": 2.086163282394409, |
| "learning_rate": 4.779595773051002e-06, |
| "loss": 1.1346, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.8573048573048573, |
| "grad_norm": 1.9749596118927002, |
| "learning_rate": 4.779287935500192e-06, |
| "loss": 1.0875, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.8578718578718578, |
| "grad_norm": 1.9084590673446655, |
| "learning_rate": 4.778979893049294e-06, |
| "loss": 1.0849, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.8584388584388585, |
| "grad_norm": 1.9401350021362305, |
| "learning_rate": 4.778671645725999e-06, |
| "loss": 1.1297, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.859005859005859, |
| "grad_norm": 2.069439649581909, |
| "learning_rate": 4.778363193558017e-06, |
| "loss": 1.0059, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.8595728595728596, |
| "grad_norm": 2.0672447681427, |
| "learning_rate": 4.77805453657308e-06, |
| "loss": 1.1441, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.8601398601398601, |
| "grad_norm": 1.9461488723754883, |
| "learning_rate": 4.777745674798931e-06, |
| "loss": 1.0962, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.8607068607068608, |
| "grad_norm": 2.1001944541931152, |
| "learning_rate": 4.777436608263338e-06, |
| "loss": 1.1358, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.8612738612738613, |
| "grad_norm": 1.9706493616104126, |
| "learning_rate": 4.777127336994085e-06, |
| "loss": 1.101, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.8618408618408618, |
| "grad_norm": 1.9673707485198975, |
| "learning_rate": 4.7768178610189744e-06, |
| "loss": 1.1193, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.8624078624078624, |
| "grad_norm": 1.9464128017425537, |
| "learning_rate": 4.776508180365826e-06, |
| "loss": 1.0663, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.8629748629748629, |
| "grad_norm": 2.0577609539031982, |
| "learning_rate": 4.77619829506248e-06, |
| "loss": 1.0971, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.8635418635418636, |
| "grad_norm": 2.171048402786255, |
| "learning_rate": 4.775888205136793e-06, |
| "loss": 1.104, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.8641088641088641, |
| "grad_norm": 2.2168309688568115, |
| "learning_rate": 4.775577910616642e-06, |
| "loss": 1.0856, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.8646758646758647, |
| "grad_norm": 1.9648188352584839, |
| "learning_rate": 4.77526741152992e-06, |
| "loss": 1.1112, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.8652428652428652, |
| "grad_norm": 2.0772106647491455, |
| "learning_rate": 4.774956707904542e-06, |
| "loss": 1.0805, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.8658098658098659, |
| "grad_norm": 2.0109941959381104, |
| "learning_rate": 4.774645799768438e-06, |
| "loss": 1.1004, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.8663768663768664, |
| "grad_norm": 2.0720832347869873, |
| "learning_rate": 4.7743346871495575e-06, |
| "loss": 1.0671, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.8669438669438669, |
| "grad_norm": 2.011953830718994, |
| "learning_rate": 4.774023370075868e-06, |
| "loss": 1.0671, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.8675108675108675, |
| "grad_norm": 1.9619215726852417, |
| "learning_rate": 4.773711848575357e-06, |
| "loss": 1.1336, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.8680778680778681, |
| "grad_norm": 1.8378658294677734, |
| "learning_rate": 4.773400122676028e-06, |
| "loss": 1.1083, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.8686448686448687, |
| "grad_norm": 1.9250093698501587, |
| "learning_rate": 4.7730881924059046e-06, |
| "loss": 1.1024, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.8692118692118692, |
| "grad_norm": 2.106729745864868, |
| "learning_rate": 4.772776057793029e-06, |
| "loss": 1.0636, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.8697788697788698, |
| "grad_norm": 1.9255436658859253, |
| "learning_rate": 4.77246371886546e-06, |
| "loss": 1.1246, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.8703458703458703, |
| "grad_norm": 1.9523237943649292, |
| "learning_rate": 4.772151175651275e-06, |
| "loss": 1.0753, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.870912870912871, |
| "grad_norm": 2.121988296508789, |
| "learning_rate": 4.771838428178574e-06, |
| "loss": 1.149, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.8714798714798715, |
| "grad_norm": 2.094503879547119, |
| "learning_rate": 4.771525476475467e-06, |
| "loss": 1.0711, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.872046872046872, |
| "grad_norm": 2.0268783569335938, |
| "learning_rate": 4.771212320570091e-06, |
| "loss": 1.0787, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.8726138726138726, |
| "grad_norm": 2.261963129043579, |
| "learning_rate": 4.770898960490596e-06, |
| "loss": 1.0989, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.8731808731808732, |
| "grad_norm": 2.0570859909057617, |
| "learning_rate": 4.770585396265153e-06, |
| "loss": 1.0863, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.8737478737478738, |
| "grad_norm": 2.00663423538208, |
| "learning_rate": 4.77027162792195e-06, |
| "loss": 1.0525, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.8743148743148743, |
| "grad_norm": 1.9876610040664673, |
| "learning_rate": 4.769957655489193e-06, |
| "loss": 1.0734, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.8748818748818749, |
| "grad_norm": 2.09036922454834, |
| "learning_rate": 4.7696434789951074e-06, |
| "loss": 1.0851, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.8754488754488754, |
| "grad_norm": 1.9771044254302979, |
| "learning_rate": 4.769329098467937e-06, |
| "loss": 1.1107, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.876015876015876, |
| "grad_norm": 1.9424091577529907, |
| "learning_rate": 4.7690145139359435e-06, |
| "loss": 1.0558, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.8765828765828766, |
| "grad_norm": 1.9221347570419312, |
| "learning_rate": 4.7686997254274056e-06, |
| "loss": 1.117, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.8771498771498771, |
| "grad_norm": 2.007035493850708, |
| "learning_rate": 4.7683847329706236e-06, |
| "loss": 1.0622, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.8777168777168777, |
| "grad_norm": 2.046154499053955, |
| "learning_rate": 4.768069536593913e-06, |
| "loss": 1.0602, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.8782838782838783, |
| "grad_norm": 2.043686866760254, |
| "learning_rate": 4.76775413632561e-06, |
| "loss": 1.1248, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.8788508788508789, |
| "grad_norm": 1.8838635683059692, |
| "learning_rate": 4.767438532194066e-06, |
| "loss": 1.0887, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8794178794178794, |
| "grad_norm": 2.2751243114471436, |
| "learning_rate": 4.767122724227655e-06, |
| "loss": 1.0549, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.87998487998488, |
| "grad_norm": 1.9918965101242065, |
| "learning_rate": 4.766806712454766e-06, |
| "loss": 1.0428, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.8805518805518806, |
| "grad_norm": 2.0295073986053467, |
| "learning_rate": 4.7664904969038064e-06, |
| "loss": 1.0678, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.8811188811188811, |
| "grad_norm": 2.027252435684204, |
| "learning_rate": 4.766174077603204e-06, |
| "loss": 1.1304, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.8816858816858817, |
| "grad_norm": 2.0209717750549316, |
| "learning_rate": 4.765857454581404e-06, |
| "loss": 1.0862, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.8822528822528822, |
| "grad_norm": 2.1175334453582764, |
| "learning_rate": 4.76554062786687e-06, |
| "loss": 1.0914, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.8828198828198828, |
| "grad_norm": 2.0487277507781982, |
| "learning_rate": 4.765223597488082e-06, |
| "loss": 1.0725, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.8833868833868834, |
| "grad_norm": 2.0266189575195312, |
| "learning_rate": 4.764906363473542e-06, |
| "loss": 1.1061, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.883953883953884, |
| "grad_norm": 2.035072088241577, |
| "learning_rate": 4.764588925851766e-06, |
| "loss": 1.1178, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.8845208845208845, |
| "grad_norm": 2.010530948638916, |
| "learning_rate": 4.764271284651292e-06, |
| "loss": 1.0839, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.885087885087885, |
| "grad_norm": 2.035928964614868, |
| "learning_rate": 4.7639534399006745e-06, |
| "loss": 1.1263, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.8856548856548857, |
| "grad_norm": 1.9451258182525635, |
| "learning_rate": 4.763635391628487e-06, |
| "loss": 1.0582, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.8862218862218862, |
| "grad_norm": 1.9632164239883423, |
| "learning_rate": 4.763317139863321e-06, |
| "loss": 1.0674, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.8867888867888868, |
| "grad_norm": 1.9365487098693848, |
| "learning_rate": 4.762998684633785e-06, |
| "loss": 1.0518, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.8873558873558873, |
| "grad_norm": 1.942113995552063, |
| "learning_rate": 4.762680025968508e-06, |
| "loss": 1.0884, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.8879228879228879, |
| "grad_norm": 1.9525227546691895, |
| "learning_rate": 4.7623611638961365e-06, |
| "loss": 1.057, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.8884898884898885, |
| "grad_norm": 1.9437819719314575, |
| "learning_rate": 4.762042098445334e-06, |
| "loss": 1.0831, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.8890568890568891, |
| "grad_norm": 1.907525897026062, |
| "learning_rate": 4.7617228296447846e-06, |
| "loss": 1.099, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.8896238896238896, |
| "grad_norm": 1.9731396436691284, |
| "learning_rate": 4.76140335752319e-06, |
| "loss": 1.1293, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.8901908901908901, |
| "grad_norm": 2.044581174850464, |
| "learning_rate": 4.761083682109268e-06, |
| "loss": 1.1112, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.8907578907578908, |
| "grad_norm": 2.2614455223083496, |
| "learning_rate": 4.760763803431756e-06, |
| "loss": 1.1442, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.8913248913248913, |
| "grad_norm": 1.9900983572006226, |
| "learning_rate": 4.760443721519412e-06, |
| "loss": 1.1082, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.8918918918918919, |
| "grad_norm": 2.0624983310699463, |
| "learning_rate": 4.760123436401009e-06, |
| "loss": 1.0991, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.8924588924588924, |
| "grad_norm": 1.9901314973831177, |
| "learning_rate": 4.75980294810534e-06, |
| "loss": 1.0694, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.8930258930258931, |
| "grad_norm": 2.052838087081909, |
| "learning_rate": 4.759482256661215e-06, |
| "loss": 1.1102, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.8935928935928936, |
| "grad_norm": 1.9329949617385864, |
| "learning_rate": 4.759161362097463e-06, |
| "loss": 1.0261, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.8941598941598942, |
| "grad_norm": 2.1841771602630615, |
| "learning_rate": 4.7588402644429335e-06, |
| "loss": 1.0946, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.8947268947268947, |
| "grad_norm": 1.9439847469329834, |
| "learning_rate": 4.75851896372649e-06, |
| "loss": 1.0731, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.8952938952938952, |
| "grad_norm": 2.0109915733337402, |
| "learning_rate": 4.758197459977015e-06, |
| "loss": 1.0552, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.8958608958608959, |
| "grad_norm": 1.86398184299469, |
| "learning_rate": 4.7578757532234145e-06, |
| "loss": 1.0914, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.8964278964278964, |
| "grad_norm": 1.985925555229187, |
| "learning_rate": 4.757553843494606e-06, |
| "loss": 1.0717, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.896994896994897, |
| "grad_norm": 2.2007815837860107, |
| "learning_rate": 4.757231730819528e-06, |
| "loss": 1.1239, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.8975618975618975, |
| "grad_norm": 2.279360055923462, |
| "learning_rate": 4.756909415227139e-06, |
| "loss": 1.1248, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.8981288981288982, |
| "grad_norm": 2.0951502323150635, |
| "learning_rate": 4.7565868967464124e-06, |
| "loss": 1.0919, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.8986958986958987, |
| "grad_norm": 2.100532054901123, |
| "learning_rate": 4.756264175406342e-06, |
| "loss": 1.0742, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.8992628992628993, |
| "grad_norm": 2.1471176147460938, |
| "learning_rate": 4.75594125123594e-06, |
| "loss": 1.0993, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.8998298998298998, |
| "grad_norm": 1.9409059286117554, |
| "learning_rate": 4.755618124264236e-06, |
| "loss": 1.0884, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.9003969003969003, |
| "grad_norm": 2.0202972888946533, |
| "learning_rate": 4.755294794520277e-06, |
| "loss": 1.1407, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.900963900963901, |
| "grad_norm": 1.8660567998886108, |
| "learning_rate": 4.75497126203313e-06, |
| "loss": 1.0864, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.9015309015309015, |
| "grad_norm": 2.0232956409454346, |
| "learning_rate": 4.7546475268318795e-06, |
| "loss": 1.0715, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.9020979020979021, |
| "grad_norm": 1.869956612586975, |
| "learning_rate": 4.754323588945628e-06, |
| "loss": 1.0768, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.9026649026649026, |
| "grad_norm": 2.032975196838379, |
| "learning_rate": 4.753999448403497e-06, |
| "loss": 1.1404, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.9032319032319033, |
| "grad_norm": 2.062655448913574, |
| "learning_rate": 4.7536751052346244e-06, |
| "loss": 1.0803, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.9037989037989038, |
| "grad_norm": 2.068575143814087, |
| "learning_rate": 4.753350559468169e-06, |
| "loss": 1.1214, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.9043659043659044, |
| "grad_norm": 1.8464081287384033, |
| "learning_rate": 4.753025811133304e-06, |
| "loss": 1.0328, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.9049329049329049, |
| "grad_norm": 2.0097310543060303, |
| "learning_rate": 4.752700860259225e-06, |
| "loss": 1.063, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.9054999054999056, |
| "grad_norm": 2.3630588054656982, |
| "learning_rate": 4.7523757068751445e-06, |
| "loss": 1.1093, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.9060669060669061, |
| "grad_norm": 2.0079240798950195, |
| "learning_rate": 4.752050351010291e-06, |
| "loss": 1.0643, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.9066339066339066, |
| "grad_norm": 2.055734157562256, |
| "learning_rate": 4.751724792693914e-06, |
| "loss": 0.9884, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.9072009072009072, |
| "grad_norm": 2.0036795139312744, |
| "learning_rate": 4.751399031955279e-06, |
| "loss": 1.0962, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9077679077679077, |
| "grad_norm": 2.157174587249756, |
| "learning_rate": 4.751073068823673e-06, |
| "loss": 1.0921, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.9083349083349084, |
| "grad_norm": 2.27801251411438, |
| "learning_rate": 4.750746903328396e-06, |
| "loss": 1.1183, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.9089019089019089, |
| "grad_norm": 1.9836596250534058, |
| "learning_rate": 4.750420535498771e-06, |
| "loss": 1.0127, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.9094689094689095, |
| "grad_norm": 1.9263114929199219, |
| "learning_rate": 4.750093965364137e-06, |
| "loss": 1.0417, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.91003591003591, |
| "grad_norm": 1.8833136558532715, |
| "learning_rate": 4.749767192953852e-06, |
| "loss": 1.0528, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.9106029106029107, |
| "grad_norm": 2.0248804092407227, |
| "learning_rate": 4.74944021829729e-06, |
| "loss": 1.1033, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.9111699111699112, |
| "grad_norm": 2.1038992404937744, |
| "learning_rate": 4.749113041423846e-06, |
| "loss": 1.1072, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.9117369117369117, |
| "grad_norm": 2.1123745441436768, |
| "learning_rate": 4.7487856623629325e-06, |
| "loss": 1.1115, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.9123039123039123, |
| "grad_norm": 1.9227306842803955, |
| "learning_rate": 4.74845808114398e-06, |
| "loss": 1.0282, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.9128709128709128, |
| "grad_norm": 2.635802745819092, |
| "learning_rate": 4.748130297796435e-06, |
| "loss": 1.0809, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.9134379134379135, |
| "grad_norm": 2.026642084121704, |
| "learning_rate": 4.747802312349767e-06, |
| "loss": 1.0672, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.914004914004914, |
| "grad_norm": 2.060119390487671, |
| "learning_rate": 4.747474124833456e-06, |
| "loss": 1.0266, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.9145719145719146, |
| "grad_norm": 1.9226115942001343, |
| "learning_rate": 4.747145735277011e-06, |
| "loss": 1.0446, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.9151389151389151, |
| "grad_norm": 2.0081734657287598, |
| "learning_rate": 4.746817143709949e-06, |
| "loss": 1.1019, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.9157059157059158, |
| "grad_norm": 1.902655005455017, |
| "learning_rate": 4.746488350161811e-06, |
| "loss": 1.0723, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.9162729162729163, |
| "grad_norm": 1.9542418718338013, |
| "learning_rate": 4.746159354662153e-06, |
| "loss": 1.0677, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.9168399168399168, |
| "grad_norm": 2.057506561279297, |
| "learning_rate": 4.745830157240551e-06, |
| "loss": 1.143, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.9174069174069174, |
| "grad_norm": 2.0979979038238525, |
| "learning_rate": 4.7455007579266e-06, |
| "loss": 1.0977, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.9179739179739179, |
| "grad_norm": 2.1611087322235107, |
| "learning_rate": 4.74517115674991e-06, |
| "loss": 1.049, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.9185409185409186, |
| "grad_norm": 1.9556939601898193, |
| "learning_rate": 4.744841353740112e-06, |
| "loss": 1.0329, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.9191079191079191, |
| "grad_norm": 1.9895910024642944, |
| "learning_rate": 4.744511348926855e-06, |
| "loss": 1.0884, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.9196749196749197, |
| "grad_norm": 2.0237414836883545, |
| "learning_rate": 4.744181142339803e-06, |
| "loss": 1.1147, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.9202419202419202, |
| "grad_norm": 2.115481376647949, |
| "learning_rate": 4.743850734008643e-06, |
| "loss": 1.0856, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.9208089208089208, |
| "grad_norm": 2.0079002380371094, |
| "learning_rate": 4.743520123963075e-06, |
| "loss": 1.0896, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.9213759213759214, |
| "grad_norm": 1.9966343641281128, |
| "learning_rate": 4.743189312232821e-06, |
| "loss": 1.1146, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.9219429219429219, |
| "grad_norm": 2.072922468185425, |
| "learning_rate": 4.742858298847621e-06, |
| "loss": 1.1268, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.9225099225099225, |
| "grad_norm": 1.9800165891647339, |
| "learning_rate": 4.742527083837229e-06, |
| "loss": 1.0057, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 2.2015650272369385, |
| "learning_rate": 4.742195667231424e-06, |
| "loss": 1.0517, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.9236439236439237, |
| "grad_norm": 2.021609306335449, |
| "learning_rate": 4.741864049059995e-06, |
| "loss": 1.0832, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.9242109242109242, |
| "grad_norm": 1.9899101257324219, |
| "learning_rate": 4.741532229352756e-06, |
| "loss": 1.075, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.9247779247779248, |
| "grad_norm": 2.0984103679656982, |
| "learning_rate": 4.741200208139537e-06, |
| "loss": 1.1141, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.9253449253449253, |
| "grad_norm": 2.0580196380615234, |
| "learning_rate": 4.740867985450184e-06, |
| "loss": 1.0965, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.925911925911926, |
| "grad_norm": 2.0495893955230713, |
| "learning_rate": 4.740535561314562e-06, |
| "loss": 1.0752, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.9264789264789265, |
| "grad_norm": 1.908860445022583, |
| "learning_rate": 4.740202935762557e-06, |
| "loss": 1.072, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.927045927045927, |
| "grad_norm": 1.9863654375076294, |
| "learning_rate": 4.739870108824069e-06, |
| "loss": 1.0827, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.9276129276129276, |
| "grad_norm": 2.0442192554473877, |
| "learning_rate": 4.739537080529019e-06, |
| "loss": 1.1489, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.9281799281799282, |
| "grad_norm": 1.8727167844772339, |
| "learning_rate": 4.739203850907345e-06, |
| "loss": 1.1274, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.9287469287469288, |
| "grad_norm": 2.0538337230682373, |
| "learning_rate": 4.7388704199890025e-06, |
| "loss": 1.1224, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.9293139293139293, |
| "grad_norm": 1.9616420269012451, |
| "learning_rate": 4.738536787803967e-06, |
| "loss": 1.1123, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.9298809298809299, |
| "grad_norm": 2.009235143661499, |
| "learning_rate": 4.738202954382228e-06, |
| "loss": 1.0661, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.9304479304479304, |
| "grad_norm": 1.9128503799438477, |
| "learning_rate": 4.7378689197538005e-06, |
| "loss": 1.0612, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.931014931014931, |
| "grad_norm": 1.9708175659179688, |
| "learning_rate": 4.73753468394871e-06, |
| "loss": 1.122, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.9315819315819316, |
| "grad_norm": 1.7882028818130493, |
| "learning_rate": 4.737200246997004e-06, |
| "loss": 1.0251, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.9321489321489321, |
| "grad_norm": 1.9868979454040527, |
| "learning_rate": 4.7368656089287455e-06, |
| "loss": 1.1074, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.9327159327159327, |
| "grad_norm": 4.167366981506348, |
| "learning_rate": 4.73653076977402e-06, |
| "loss": 1.1029, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.9332829332829333, |
| "grad_norm": 2.0651445388793945, |
| "learning_rate": 4.736195729562928e-06, |
| "loss": 1.1035, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.9338499338499339, |
| "grad_norm": 1.9564183950424194, |
| "learning_rate": 4.735860488325586e-06, |
| "loss": 1.0094, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.9344169344169344, |
| "grad_norm": 2.0358612537384033, |
| "learning_rate": 4.7355250460921346e-06, |
| "loss": 1.123, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.934983934983935, |
| "grad_norm": 2.1947906017303467, |
| "learning_rate": 4.735189402892726e-06, |
| "loss": 1.1135, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.9355509355509356, |
| "grad_norm": 2.077989101409912, |
| "learning_rate": 4.734853558757534e-06, |
| "loss": 1.0527, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.9361179361179361, |
| "grad_norm": 1.989847183227539, |
| "learning_rate": 4.73451751371675e-06, |
| "loss": 1.0593, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.9366849366849367, |
| "grad_norm": 2.135556936264038, |
| "learning_rate": 4.734181267800584e-06, |
| "loss": 1.1115, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.9372519372519372, |
| "grad_norm": 2.0910916328430176, |
| "learning_rate": 4.733844821039263e-06, |
| "loss": 1.1552, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.9378189378189378, |
| "grad_norm": 2.14231276512146, |
| "learning_rate": 4.733508173463032e-06, |
| "loss": 1.1227, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.9383859383859384, |
| "grad_norm": 1.8958340883255005, |
| "learning_rate": 4.733171325102154e-06, |
| "loss": 1.0518, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.938952938952939, |
| "grad_norm": 2.190434694290161, |
| "learning_rate": 4.732834275986912e-06, |
| "loss": 1.0807, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.9395199395199395, |
| "grad_norm": 2.2391319274902344, |
| "learning_rate": 4.732497026147605e-06, |
| "loss": 1.1503, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.94008694008694, |
| "grad_norm": 2.0487253665924072, |
| "learning_rate": 4.732159575614549e-06, |
| "loss": 1.1004, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.9406539406539407, |
| "grad_norm": 2.263414144515991, |
| "learning_rate": 4.7318219244180816e-06, |
| "loss": 1.0249, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.9412209412209412, |
| "grad_norm": 2.0268044471740723, |
| "learning_rate": 4.731484072588556e-06, |
| "loss": 1.0823, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.9417879417879418, |
| "grad_norm": 2.096123695373535, |
| "learning_rate": 4.731146020156343e-06, |
| "loss": 1.0718, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.9423549423549423, |
| "grad_norm": 2.0111961364746094, |
| "learning_rate": 4.730807767151834e-06, |
| "loss": 1.092, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.9429219429219429, |
| "grad_norm": 2.0895767211914062, |
| "learning_rate": 4.730469313605435e-06, |
| "loss": 1.1478, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.9434889434889435, |
| "grad_norm": 1.9681237936019897, |
| "learning_rate": 4.730130659547573e-06, |
| "loss": 1.0627, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.9440559440559441, |
| "grad_norm": 2.094294786453247, |
| "learning_rate": 4.729791805008691e-06, |
| "loss": 1.1258, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.9446229446229446, |
| "grad_norm": 2.05210542678833, |
| "learning_rate": 4.729452750019252e-06, |
| "loss": 1.1241, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.9451899451899451, |
| "grad_norm": 1.9600073099136353, |
| "learning_rate": 4.729113494609735e-06, |
| "loss": 1.0742, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.9457569457569458, |
| "grad_norm": 1.8465203046798706, |
| "learning_rate": 4.728774038810638e-06, |
| "loss": 1.0268, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.9463239463239463, |
| "grad_norm": 1.9174362421035767, |
| "learning_rate": 4.728434382652477e-06, |
| "loss": 1.0674, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.9468909468909469, |
| "grad_norm": 2.0088138580322266, |
| "learning_rate": 4.728094526165786e-06, |
| "loss": 1.0755, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.9474579474579474, |
| "grad_norm": 1.899976372718811, |
| "learning_rate": 4.727754469381116e-06, |
| "loss": 1.0559, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.9480249480249481, |
| "grad_norm": 2.0253117084503174, |
| "learning_rate": 4.7274142123290386e-06, |
| "loss": 1.0736, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.9485919485919486, |
| "grad_norm": 2.2883076667785645, |
| "learning_rate": 4.72707375504014e-06, |
| "loss": 1.0642, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.9491589491589492, |
| "grad_norm": 2.1243669986724854, |
| "learning_rate": 4.726733097545028e-06, |
| "loss": 1.0961, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.9497259497259497, |
| "grad_norm": 2.0006613731384277, |
| "learning_rate": 4.726392239874325e-06, |
| "loss": 1.0739, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.9502929502929502, |
| "grad_norm": 2.0095293521881104, |
| "learning_rate": 4.726051182058673e-06, |
| "loss": 1.0438, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.9508599508599509, |
| "grad_norm": 2.0329766273498535, |
| "learning_rate": 4.725709924128733e-06, |
| "loss": 1.1254, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.9514269514269514, |
| "grad_norm": 2.016813278198242, |
| "learning_rate": 4.725368466115182e-06, |
| "loss": 1.0904, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.951993951993952, |
| "grad_norm": 1.9678806066513062, |
| "learning_rate": 4.725026808048716e-06, |
| "loss": 1.0721, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.9525609525609525, |
| "grad_norm": 1.994081735610962, |
| "learning_rate": 4.7246849499600485e-06, |
| "loss": 1.0723, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.9531279531279532, |
| "grad_norm": 2.020341396331787, |
| "learning_rate": 4.724342891879913e-06, |
| "loss": 1.0537, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.9536949536949537, |
| "grad_norm": 1.9112085103988647, |
| "learning_rate": 4.724000633839057e-06, |
| "loss": 1.0588, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.9542619542619543, |
| "grad_norm": 1.9430484771728516, |
| "learning_rate": 4.723658175868251e-06, |
| "loss": 1.0341, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.9548289548289548, |
| "grad_norm": 1.9295274019241333, |
| "learning_rate": 4.723315517998278e-06, |
| "loss": 1.1012, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.9553959553959553, |
| "grad_norm": 1.913633108139038, |
| "learning_rate": 4.722972660259944e-06, |
| "loss": 1.1075, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.955962955962956, |
| "grad_norm": 2.0695457458496094, |
| "learning_rate": 4.722629602684069e-06, |
| "loss": 1.0851, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.9565299565299565, |
| "grad_norm": 2.084043502807617, |
| "learning_rate": 4.722286345301494e-06, |
| "loss": 1.1, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.9570969570969571, |
| "grad_norm": 2.147137403488159, |
| "learning_rate": 4.721942888143076e-06, |
| "loss": 1.089, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.9576639576639576, |
| "grad_norm": 2.0710439682006836, |
| "learning_rate": 4.721599231239691e-06, |
| "loss": 1.0876, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.9582309582309583, |
| "grad_norm": 2.159060478210449, |
| "learning_rate": 4.721255374622231e-06, |
| "loss": 1.0908, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.9587979587979588, |
| "grad_norm": 2.000617742538452, |
| "learning_rate": 4.7209113183216105e-06, |
| "loss": 1.07, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.9593649593649594, |
| "grad_norm": 1.9861509799957275, |
| "learning_rate": 4.720567062368757e-06, |
| "loss": 1.0692, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.9599319599319599, |
| "grad_norm": 2.0842881202697754, |
| "learning_rate": 4.720222606794617e-06, |
| "loss": 1.1225, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.9604989604989606, |
| "grad_norm": 2.016493320465088, |
| "learning_rate": 4.719877951630158e-06, |
| "loss": 1.126, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.9610659610659611, |
| "grad_norm": 1.9814172983169556, |
| "learning_rate": 4.719533096906363e-06, |
| "loss": 1.0924, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.9616329616329616, |
| "grad_norm": 1.9399725198745728, |
| "learning_rate": 4.7191880426542306e-06, |
| "loss": 1.1072, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.9621999621999622, |
| "grad_norm": 1.9508792161941528, |
| "learning_rate": 4.718842788904784e-06, |
| "loss": 1.0957, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.9627669627669627, |
| "grad_norm": 1.943466305732727, |
| "learning_rate": 4.718497335689057e-06, |
| "loss": 1.1059, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.9633339633339634, |
| "grad_norm": 2.0598337650299072, |
| "learning_rate": 4.7181516830381065e-06, |
| "loss": 1.1545, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.9639009639009639, |
| "grad_norm": 2.0156242847442627, |
| "learning_rate": 4.717805830983005e-06, |
| "loss": 1.1019, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9644679644679645, |
| "grad_norm": 1.872069239616394, |
| "learning_rate": 4.717459779554843e-06, |
| "loss": 1.1587, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.965034965034965, |
| "grad_norm": 1.9101303815841675, |
| "learning_rate": 4.7171135287847295e-06, |
| "loss": 1.0322, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.9656019656019657, |
| "grad_norm": 2.0382840633392334, |
| "learning_rate": 4.716767078703793e-06, |
| "loss": 1.0802, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.9661689661689662, |
| "grad_norm": 2.093435049057007, |
| "learning_rate": 4.716420429343175e-06, |
| "loss": 1.1195, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.9667359667359667, |
| "grad_norm": 1.9311339855194092, |
| "learning_rate": 4.7160735807340395e-06, |
| "loss": 1.0978, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.9673029673029673, |
| "grad_norm": 1.9358426332473755, |
| "learning_rate": 4.7157265329075675e-06, |
| "loss": 1.0104, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.9678699678699678, |
| "grad_norm": 2.0609984397888184, |
| "learning_rate": 4.715379285894957e-06, |
| "loss": 1.0827, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.9684369684369685, |
| "grad_norm": 2.046546697616577, |
| "learning_rate": 4.715031839727424e-06, |
| "loss": 1.1349, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.969003969003969, |
| "grad_norm": 1.9720268249511719, |
| "learning_rate": 4.714684194436204e-06, |
| "loss": 1.0625, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.9695709695709696, |
| "grad_norm": 2.13085675239563, |
| "learning_rate": 4.714336350052547e-06, |
| "loss": 1.075, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.9701379701379701, |
| "grad_norm": 2.105146884918213, |
| "learning_rate": 4.713988306607726e-06, |
| "loss": 1.12, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.9707049707049707, |
| "grad_norm": 2.1501526832580566, |
| "learning_rate": 4.7136400641330245e-06, |
| "loss": 1.0891, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.9712719712719713, |
| "grad_norm": 2.1131129264831543, |
| "learning_rate": 4.713291622659753e-06, |
| "loss": 1.1084, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.9718389718389718, |
| "grad_norm": 2.1475110054016113, |
| "learning_rate": 4.712942982219232e-06, |
| "loss": 1.1547, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.9724059724059724, |
| "grad_norm": 2.140531301498413, |
| "learning_rate": 4.712594142842804e-06, |
| "loss": 1.1253, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.972972972972973, |
| "grad_norm": 1.925569772720337, |
| "learning_rate": 4.712245104561829e-06, |
| "loss": 1.0605, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.9735399735399736, |
| "grad_norm": 2.097440242767334, |
| "learning_rate": 4.711895867407684e-06, |
| "loss": 1.0883, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.9741069741069741, |
| "grad_norm": 2.093440055847168, |
| "learning_rate": 4.711546431411763e-06, |
| "loss": 1.07, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.9746739746739747, |
| "grad_norm": 1.9718364477157593, |
| "learning_rate": 4.711196796605482e-06, |
| "loss": 1.0653, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.9752409752409752, |
| "grad_norm": 1.944761872291565, |
| "learning_rate": 4.710846963020268e-06, |
| "loss": 0.9752, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.9758079758079758, |
| "grad_norm": 2.169654130935669, |
| "learning_rate": 4.710496930687574e-06, |
| "loss": 1.123, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.9763749763749764, |
| "grad_norm": 2.033576011657715, |
| "learning_rate": 4.710146699638864e-06, |
| "loss": 1.0782, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.9769419769419769, |
| "grad_norm": 1.983486533164978, |
| "learning_rate": 4.709796269905622e-06, |
| "loss": 1.0464, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.9775089775089775, |
| "grad_norm": 2.1388914585113525, |
| "learning_rate": 4.709445641519352e-06, |
| "loss": 1.0805, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.9780759780759781, |
| "grad_norm": 2.0662407875061035, |
| "learning_rate": 4.709094814511574e-06, |
| "loss": 1.0845, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.9786429786429787, |
| "grad_norm": 5.486571788787842, |
| "learning_rate": 4.708743788913827e-06, |
| "loss": 1.0293, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.9792099792099792, |
| "grad_norm": 2.0278141498565674, |
| "learning_rate": 4.708392564757665e-06, |
| "loss": 1.1524, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.9797769797769798, |
| "grad_norm": 2.124642848968506, |
| "learning_rate": 4.708041142074664e-06, |
| "loss": 1.1332, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.9803439803439803, |
| "grad_norm": 2.006274938583374, |
| "learning_rate": 4.707689520896413e-06, |
| "loss": 1.1231, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.980910980910981, |
| "grad_norm": 1.7690280675888062, |
| "learning_rate": 4.707337701254524e-06, |
| "loss": 1.0224, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.9814779814779815, |
| "grad_norm": 2.001707077026367, |
| "learning_rate": 4.706985683180624e-06, |
| "loss": 1.0755, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.982044982044982, |
| "grad_norm": 1.9623841047286987, |
| "learning_rate": 4.706633466706356e-06, |
| "loss": 1.0818, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.9826119826119826, |
| "grad_norm": 1.9035817384719849, |
| "learning_rate": 4.706281051863386e-06, |
| "loss": 1.1194, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.9831789831789832, |
| "grad_norm": 1.875132441520691, |
| "learning_rate": 4.705928438683394e-06, |
| "loss": 1.0762, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.9837459837459838, |
| "grad_norm": 1.8791394233703613, |
| "learning_rate": 4.705575627198077e-06, |
| "loss": 1.0526, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.9843129843129843, |
| "grad_norm": 1.932252049446106, |
| "learning_rate": 4.705222617439152e-06, |
| "loss": 1.0841, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.9848799848799848, |
| "grad_norm": 1.9309083223342896, |
| "learning_rate": 4.7048694094383564e-06, |
| "loss": 1.1053, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.9854469854469855, |
| "grad_norm": 2.0522756576538086, |
| "learning_rate": 4.704516003227439e-06, |
| "loss": 1.0378, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.986013986013986, |
| "grad_norm": 2.725698232650757, |
| "learning_rate": 4.70416239883817e-06, |
| "loss": 1.0885, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.9865809865809866, |
| "grad_norm": 1.9060684442520142, |
| "learning_rate": 4.703808596302339e-06, |
| "loss": 1.0453, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.9871479871479871, |
| "grad_norm": 2.087420701980591, |
| "learning_rate": 4.703454595651752e-06, |
| "loss": 1.0884, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.9877149877149877, |
| "grad_norm": 1.921978235244751, |
| "learning_rate": 4.7031003969182295e-06, |
| "loss": 1.0927, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.9882819882819883, |
| "grad_norm": 2.0271310806274414, |
| "learning_rate": 4.702746000133614e-06, |
| "loss": 1.1026, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.9888489888489889, |
| "grad_norm": 1.9672328233718872, |
| "learning_rate": 4.702391405329766e-06, |
| "loss": 1.018, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.9894159894159894, |
| "grad_norm": 1.9041637182235718, |
| "learning_rate": 4.702036612538562e-06, |
| "loss": 1.078, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.98998298998299, |
| "grad_norm": 2.149569034576416, |
| "learning_rate": 4.701681621791895e-06, |
| "loss": 1.0468, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.9905499905499906, |
| "grad_norm": 2.0086848735809326, |
| "learning_rate": 4.701326433121678e-06, |
| "loss": 1.1154, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.9911169911169911, |
| "grad_norm": 2.129842519760132, |
| "learning_rate": 4.700971046559842e-06, |
| "loss": 1.0542, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.9916839916839917, |
| "grad_norm": 2.165759801864624, |
| "learning_rate": 4.700615462138334e-06, |
| "loss": 1.1032, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.9922509922509922, |
| "grad_norm": 2.163757801055908, |
| "learning_rate": 4.700259679889122e-06, |
| "loss": 1.0914, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.9928179928179928, |
| "grad_norm": 2.3616926670074463, |
| "learning_rate": 4.699903699844186e-06, |
| "loss": 1.0627, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.9933849933849934, |
| "grad_norm": 1.9337599277496338, |
| "learning_rate": 4.69954752203553e-06, |
| "loss": 1.1096, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.993951993951994, |
| "grad_norm": 1.9729043245315552, |
| "learning_rate": 4.699191146495174e-06, |
| "loss": 1.0626, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.9945189945189945, |
| "grad_norm": 1.9833909273147583, |
| "learning_rate": 4.698834573255152e-06, |
| "loss": 1.1127, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.995085995085995, |
| "grad_norm": 2.1493887901306152, |
| "learning_rate": 4.69847780234752e-06, |
| "loss": 1.0547, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.9956529956529957, |
| "grad_norm": 2.0263702869415283, |
| "learning_rate": 4.698120833804352e-06, |
| "loss": 1.0631, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.9962199962199962, |
| "grad_norm": 1.9223586320877075, |
| "learning_rate": 4.697763667657737e-06, |
| "loss": 1.1226, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.9967869967869968, |
| "grad_norm": 2.044142961502075, |
| "learning_rate": 4.697406303939781e-06, |
| "loss": 1.0361, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.9973539973539973, |
| "grad_norm": 1.9353723526000977, |
| "learning_rate": 4.697048742682613e-06, |
| "loss": 1.0546, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.997920997920998, |
| "grad_norm": 3.121025800704956, |
| "learning_rate": 4.696690983918375e-06, |
| "loss": 1.0101, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.9984879984879985, |
| "grad_norm": 2.016129732131958, |
| "learning_rate": 4.696333027679229e-06, |
| "loss": 1.0542, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.9990549990549991, |
| "grad_norm": 2.0094079971313477, |
| "learning_rate": 4.695974873997352e-06, |
| "loss": 1.0867, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.9996219996219996, |
| "grad_norm": 1.9897685050964355, |
| "learning_rate": 4.695616522904943e-06, |
| "loss": 1.1321, |
| "step": 1763 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 10578, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 1763, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.453402696388444e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|