| { |
| "best_global_step": 1122, |
| "best_metric": 0.10693030804395676, |
| "best_model_checkpoint": "saves_multiple/lora/llama-3-8b-instruct/train_rte_789_1760637901/checkpoint-1122", |
| "epoch": 20.0, |
| "eval_steps": 561, |
| "global_step": 11220, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008912655971479501, |
| "grad_norm": 0.10324911028146744, |
| "learning_rate": 1.7825311942959003e-07, |
| "loss": 0.2859, |
| "num_input_tokens_seen": 2848, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.017825311942959002, |
| "grad_norm": 1.0517581701278687, |
| "learning_rate": 4.010695187165776e-07, |
| "loss": 0.098, |
| "num_input_tokens_seen": 6272, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.026737967914438502, |
| "grad_norm": 0.4652565121650696, |
| "learning_rate": 6.238859180035651e-07, |
| "loss": 0.0333, |
| "num_input_tokens_seen": 9344, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.035650623885918005, |
| "grad_norm": 3.560412645339966, |
| "learning_rate": 8.467023172905526e-07, |
| "loss": 0.2653, |
| "num_input_tokens_seen": 12672, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.044563279857397504, |
| "grad_norm": 9.704721450805664, |
| "learning_rate": 1.0695187165775401e-06, |
| "loss": 0.6547, |
| "num_input_tokens_seen": 16000, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.053475935828877004, |
| "grad_norm": 0.041648488491773605, |
| "learning_rate": 1.2923351158645277e-06, |
| "loss": 0.2366, |
| "num_input_tokens_seen": 19488, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.062388591800356503, |
| "grad_norm": 4.751997470855713, |
| "learning_rate": 1.5151515151515152e-06, |
| "loss": 0.3026, |
| "num_input_tokens_seen": 23008, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07130124777183601, |
| "grad_norm": 5.809197902679443, |
| "learning_rate": 1.7379679144385028e-06, |
| "loss": 0.1454, |
| "num_input_tokens_seen": 25728, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08021390374331551, |
| "grad_norm": 4.5152974128723145, |
| "learning_rate": 1.96078431372549e-06, |
| "loss": 0.2345, |
| "num_input_tokens_seen": 28672, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.08912655971479501, |
| "grad_norm": 6.366511821746826, |
| "learning_rate": 2.1836007130124777e-06, |
| "loss": 0.2825, |
| "num_input_tokens_seen": 31360, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09803921568627451, |
| "grad_norm": 0.06518510729074478, |
| "learning_rate": 2.4064171122994653e-06, |
| "loss": 0.3811, |
| "num_input_tokens_seen": 34432, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.10695187165775401, |
| "grad_norm": 3.4774608612060547, |
| "learning_rate": 2.629233511586453e-06, |
| "loss": 0.4573, |
| "num_input_tokens_seen": 37920, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11586452762923351, |
| "grad_norm": 8.832063674926758, |
| "learning_rate": 2.8520499108734404e-06, |
| "loss": 0.3591, |
| "num_input_tokens_seen": 41344, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.12477718360071301, |
| "grad_norm": 6.110069274902344, |
| "learning_rate": 3.074866310160428e-06, |
| "loss": 0.0523, |
| "num_input_tokens_seen": 44448, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.13368983957219252, |
| "grad_norm": 0.27768051624298096, |
| "learning_rate": 3.297682709447415e-06, |
| "loss": 0.3264, |
| "num_input_tokens_seen": 48000, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.14260249554367202, |
| "grad_norm": 6.778256416320801, |
| "learning_rate": 3.5204991087344027e-06, |
| "loss": 0.5562, |
| "num_input_tokens_seen": 50912, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.15151515151515152, |
| "grad_norm": 0.3445241153240204, |
| "learning_rate": 3.7433155080213903e-06, |
| "loss": 0.1069, |
| "num_input_tokens_seen": 54272, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.16042780748663102, |
| "grad_norm": 5.583451271057129, |
| "learning_rate": 3.966131907308377e-06, |
| "loss": 0.2782, |
| "num_input_tokens_seen": 57632, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.16934046345811052, |
| "grad_norm": 4.155448913574219, |
| "learning_rate": 4.188948306595366e-06, |
| "loss": 0.1467, |
| "num_input_tokens_seen": 60736, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.17825311942959002, |
| "grad_norm": 6.108593940734863, |
| "learning_rate": 4.411764705882353e-06, |
| "loss": 0.2713, |
| "num_input_tokens_seen": 64032, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18716577540106952, |
| "grad_norm": 1.6728438138961792, |
| "learning_rate": 4.6345811051693405e-06, |
| "loss": 0.4227, |
| "num_input_tokens_seen": 67424, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.19607843137254902, |
| "grad_norm": 6.609097003936768, |
| "learning_rate": 4.8573975044563285e-06, |
| "loss": 0.1545, |
| "num_input_tokens_seen": 70048, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.20499108734402852, |
| "grad_norm": 2.8476388454437256, |
| "learning_rate": 5.080213903743316e-06, |
| "loss": 0.1301, |
| "num_input_tokens_seen": 73376, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.21390374331550802, |
| "grad_norm": 2.5001397132873535, |
| "learning_rate": 5.303030303030304e-06, |
| "loss": 0.2149, |
| "num_input_tokens_seen": 76224, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.22281639928698752, |
| "grad_norm": 0.32793930172920227, |
| "learning_rate": 5.525846702317291e-06, |
| "loss": 0.0202, |
| "num_input_tokens_seen": 79712, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.23172905525846701, |
| "grad_norm": 1.1511719226837158, |
| "learning_rate": 5.748663101604279e-06, |
| "loss": 0.1234, |
| "num_input_tokens_seen": 82336, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.24064171122994651, |
| "grad_norm": 2.1722285747528076, |
| "learning_rate": 5.971479500891266e-06, |
| "loss": 0.2654, |
| "num_input_tokens_seen": 85376, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.24955436720142601, |
| "grad_norm": 1.2259153127670288, |
| "learning_rate": 6.194295900178253e-06, |
| "loss": 0.1989, |
| "num_input_tokens_seen": 88992, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.25846702317290554, |
| "grad_norm": 2.9561991691589355, |
| "learning_rate": 6.417112299465242e-06, |
| "loss": 0.2072, |
| "num_input_tokens_seen": 91840, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.26737967914438504, |
| "grad_norm": 3.219242811203003, |
| "learning_rate": 6.639928698752229e-06, |
| "loss": 0.081, |
| "num_input_tokens_seen": 95424, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.27629233511586454, |
| "grad_norm": 2.12544584274292, |
| "learning_rate": 6.862745098039216e-06, |
| "loss": 0.1405, |
| "num_input_tokens_seen": 98016, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.28520499108734404, |
| "grad_norm": 2.4310574531555176, |
| "learning_rate": 7.085561497326204e-06, |
| "loss": 0.1359, |
| "num_input_tokens_seen": 101184, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 1.650681495666504, |
| "learning_rate": 7.308377896613191e-06, |
| "loss": 0.1142, |
| "num_input_tokens_seen": 103776, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 5.215426445007324, |
| "learning_rate": 7.531194295900179e-06, |
| "loss": 0.1139, |
| "num_input_tokens_seen": 107104, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.31194295900178254, |
| "grad_norm": 3.0023438930511475, |
| "learning_rate": 7.754010695187166e-06, |
| "loss": 0.1606, |
| "num_input_tokens_seen": 109728, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.32085561497326204, |
| "grad_norm": 0.5228773951530457, |
| "learning_rate": 7.976827094474154e-06, |
| "loss": 0.1457, |
| "num_input_tokens_seen": 112224, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.32976827094474154, |
| "grad_norm": 1.5815906524658203, |
| "learning_rate": 8.19964349376114e-06, |
| "loss": 0.1591, |
| "num_input_tokens_seen": 115776, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.33868092691622104, |
| "grad_norm": 1.2593677043914795, |
| "learning_rate": 8.42245989304813e-06, |
| "loss": 0.0762, |
| "num_input_tokens_seen": 118272, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.34759358288770054, |
| "grad_norm": 3.1801679134368896, |
| "learning_rate": 8.645276292335117e-06, |
| "loss": 0.1485, |
| "num_input_tokens_seen": 121120, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.35650623885918004, |
| "grad_norm": 13.186616897583008, |
| "learning_rate": 8.868092691622104e-06, |
| "loss": 0.1945, |
| "num_input_tokens_seen": 124096, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.36541889483065954, |
| "grad_norm": 1.0450903177261353, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 0.2069, |
| "num_input_tokens_seen": 127328, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.37433155080213903, |
| "grad_norm": 7.1771650314331055, |
| "learning_rate": 9.31372549019608e-06, |
| "loss": 0.1029, |
| "num_input_tokens_seen": 130400, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.38324420677361853, |
| "grad_norm": 1.999472975730896, |
| "learning_rate": 9.536541889483067e-06, |
| "loss": 0.1051, |
| "num_input_tokens_seen": 134272, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 0.8615163564682007, |
| "learning_rate": 9.759358288770054e-06, |
| "loss": 0.0401, |
| "num_input_tokens_seen": 136480, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.40106951871657753, |
| "grad_norm": 1.6216832399368286, |
| "learning_rate": 9.982174688057041e-06, |
| "loss": 0.0511, |
| "num_input_tokens_seen": 139872, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.40998217468805703, |
| "grad_norm": 1.0488457679748535, |
| "learning_rate": 1.0204991087344028e-05, |
| "loss": 0.0885, |
| "num_input_tokens_seen": 143200, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.41889483065953653, |
| "grad_norm": 1.4865139722824097, |
| "learning_rate": 1.0427807486631017e-05, |
| "loss": 0.1056, |
| "num_input_tokens_seen": 146752, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.42780748663101603, |
| "grad_norm": 8.77585506439209, |
| "learning_rate": 1.0650623885918004e-05, |
| "loss": 0.1464, |
| "num_input_tokens_seen": 149952, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.43672014260249553, |
| "grad_norm": 5.2574238777160645, |
| "learning_rate": 1.0873440285204992e-05, |
| "loss": 0.1099, |
| "num_input_tokens_seen": 153184, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.44563279857397503, |
| "grad_norm": 2.0072691440582275, |
| "learning_rate": 1.1096256684491979e-05, |
| "loss": 0.127, |
| "num_input_tokens_seen": 156864, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 1.7638945579528809, |
| "learning_rate": 1.1319073083778966e-05, |
| "loss": 0.0674, |
| "num_input_tokens_seen": 159680, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.46345811051693403, |
| "grad_norm": 1.7958506345748901, |
| "learning_rate": 1.1541889483065955e-05, |
| "loss": 0.0197, |
| "num_input_tokens_seen": 162912, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.47237076648841353, |
| "grad_norm": 7.391216278076172, |
| "learning_rate": 1.1764705882352942e-05, |
| "loss": 0.0956, |
| "num_input_tokens_seen": 165632, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.48128342245989303, |
| "grad_norm": 0.057393141090869904, |
| "learning_rate": 1.1987522281639929e-05, |
| "loss": 0.1021, |
| "num_input_tokens_seen": 168960, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.49019607843137253, |
| "grad_norm": 9.154584884643555, |
| "learning_rate": 1.2210338680926916e-05, |
| "loss": 0.1136, |
| "num_input_tokens_seen": 171936, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.49910873440285203, |
| "grad_norm": 2.9709866046905518, |
| "learning_rate": 1.2433155080213903e-05, |
| "loss": 0.0711, |
| "num_input_tokens_seen": 175200, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5080213903743316, |
| "grad_norm": 2.884493589401245, |
| "learning_rate": 1.2655971479500892e-05, |
| "loss": 0.0415, |
| "num_input_tokens_seen": 177728, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5169340463458111, |
| "grad_norm": 3.1335482597351074, |
| "learning_rate": 1.287878787878788e-05, |
| "loss": 0.0946, |
| "num_input_tokens_seen": 180768, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5258467023172906, |
| "grad_norm": 4.962701320648193, |
| "learning_rate": 1.3101604278074866e-05, |
| "loss": 0.0913, |
| "num_input_tokens_seen": 183584, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5347593582887701, |
| "grad_norm": 3.826408624649048, |
| "learning_rate": 1.3324420677361854e-05, |
| "loss": 0.0802, |
| "num_input_tokens_seen": 186304, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5436720142602496, |
| "grad_norm": 4.077633380889893, |
| "learning_rate": 1.3547237076648842e-05, |
| "loss": 0.0612, |
| "num_input_tokens_seen": 189472, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5525846702317291, |
| "grad_norm": 0.1405235230922699, |
| "learning_rate": 1.377005347593583e-05, |
| "loss": 0.0308, |
| "num_input_tokens_seen": 192384, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5614973262032086, |
| "grad_norm": 2.734724521636963, |
| "learning_rate": 1.3992869875222817e-05, |
| "loss": 0.0437, |
| "num_input_tokens_seen": 195328, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.5704099821746881, |
| "grad_norm": 8.127835273742676, |
| "learning_rate": 1.4215686274509804e-05, |
| "loss": 0.3415, |
| "num_input_tokens_seen": 197888, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5793226381461676, |
| "grad_norm": 8.873647689819336, |
| "learning_rate": 1.4438502673796791e-05, |
| "loss": 0.0985, |
| "num_input_tokens_seen": 200672, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 1.7725785970687866, |
| "learning_rate": 1.466131907308378e-05, |
| "loss": 0.1717, |
| "num_input_tokens_seen": 203904, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5971479500891266, |
| "grad_norm": 1.473873257637024, |
| "learning_rate": 1.4884135472370767e-05, |
| "loss": 0.0824, |
| "num_input_tokens_seen": 206688, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 0.9841282963752747, |
| "learning_rate": 1.5106951871657754e-05, |
| "loss": 0.0436, |
| "num_input_tokens_seen": 208832, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6149732620320856, |
| "grad_norm": 4.264573097229004, |
| "learning_rate": 1.532976827094474e-05, |
| "loss": 0.1102, |
| "num_input_tokens_seen": 211488, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6238859180035651, |
| "grad_norm": 1.2473746538162231, |
| "learning_rate": 1.555258467023173e-05, |
| "loss": 0.0409, |
| "num_input_tokens_seen": 214080, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6327985739750446, |
| "grad_norm": 2.1777610778808594, |
| "learning_rate": 1.5775401069518716e-05, |
| "loss": 0.0346, |
| "num_input_tokens_seen": 217728, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6417112299465241, |
| "grad_norm": 0.04029938206076622, |
| "learning_rate": 1.5998217468805704e-05, |
| "loss": 0.0371, |
| "num_input_tokens_seen": 220640, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6506238859180036, |
| "grad_norm": 1.2787014245986938, |
| "learning_rate": 1.6221033868092693e-05, |
| "loss": 0.1806, |
| "num_input_tokens_seen": 223328, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.6595365418894831, |
| "grad_norm": 6.66487979888916, |
| "learning_rate": 1.644385026737968e-05, |
| "loss": 0.0279, |
| "num_input_tokens_seen": 226336, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6684491978609626, |
| "grad_norm": 1.7244598865509033, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.1167, |
| "num_input_tokens_seen": 228896, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.6773618538324421, |
| "grad_norm": 0.15285556018352509, |
| "learning_rate": 1.6889483065953653e-05, |
| "loss": 0.1697, |
| "num_input_tokens_seen": 231584, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6862745098039216, |
| "grad_norm": 7.496185779571533, |
| "learning_rate": 1.7112299465240642e-05, |
| "loss": 0.2399, |
| "num_input_tokens_seen": 235552, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.6951871657754011, |
| "grad_norm": 2.4987404346466064, |
| "learning_rate": 1.733511586452763e-05, |
| "loss": 0.0541, |
| "num_input_tokens_seen": 238784, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7040998217468806, |
| "grad_norm": 0.9350168108940125, |
| "learning_rate": 1.7557932263814616e-05, |
| "loss": 0.1148, |
| "num_input_tokens_seen": 241728, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.7130124777183601, |
| "grad_norm": 11.738545417785645, |
| "learning_rate": 1.7780748663101605e-05, |
| "loss": 0.1737, |
| "num_input_tokens_seen": 244544, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7219251336898396, |
| "grad_norm": 0.8024265170097351, |
| "learning_rate": 1.800356506238859e-05, |
| "loss": 0.0953, |
| "num_input_tokens_seen": 247008, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.7308377896613191, |
| "grad_norm": 0.6156238317489624, |
| "learning_rate": 1.822638146167558e-05, |
| "loss": 0.0574, |
| "num_input_tokens_seen": 250368, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7397504456327986, |
| "grad_norm": 2.1485512256622314, |
| "learning_rate": 1.8449197860962568e-05, |
| "loss": 0.0385, |
| "num_input_tokens_seen": 253216, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.7486631016042781, |
| "grad_norm": 0.7714303135871887, |
| "learning_rate": 1.8672014260249553e-05, |
| "loss": 0.0446, |
| "num_input_tokens_seen": 257216, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 0.0683104544878006, |
| "learning_rate": 1.8894830659536542e-05, |
| "loss": 0.1535, |
| "num_input_tokens_seen": 261088, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.7664884135472371, |
| "grad_norm": 0.10318350046873093, |
| "learning_rate": 1.9117647058823528e-05, |
| "loss": 0.057, |
| "num_input_tokens_seen": 264288, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7754010695187166, |
| "grad_norm": 3.8079121112823486, |
| "learning_rate": 1.9340463458110517e-05, |
| "loss": 0.1, |
| "num_input_tokens_seen": 266816, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 6.305472373962402, |
| "learning_rate": 1.9563279857397505e-05, |
| "loss": 0.0466, |
| "num_input_tokens_seen": 269760, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7932263814616756, |
| "grad_norm": 0.969581663608551, |
| "learning_rate": 1.9786096256684494e-05, |
| "loss": 0.0307, |
| "num_input_tokens_seen": 272512, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.8021390374331551, |
| "grad_norm": 0.7665776610374451, |
| "learning_rate": 2.0008912655971483e-05, |
| "loss": 0.1512, |
| "num_input_tokens_seen": 276256, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.8110516934046346, |
| "grad_norm": 2.4583513736724854, |
| "learning_rate": 2.023172905525847e-05, |
| "loss": 0.0516, |
| "num_input_tokens_seen": 279456, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.8199643493761141, |
| "grad_norm": 11.411388397216797, |
| "learning_rate": 2.0454545454545457e-05, |
| "loss": 0.1566, |
| "num_input_tokens_seen": 282496, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8288770053475936, |
| "grad_norm": 4.11398458480835, |
| "learning_rate": 2.0677361853832443e-05, |
| "loss": 0.139, |
| "num_input_tokens_seen": 286080, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.8377896613190731, |
| "grad_norm": 0.013057722710072994, |
| "learning_rate": 2.090017825311943e-05, |
| "loss": 0.1, |
| "num_input_tokens_seen": 288640, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8467023172905526, |
| "grad_norm": 0.1136278584599495, |
| "learning_rate": 2.112299465240642e-05, |
| "loss": 0.0078, |
| "num_input_tokens_seen": 291840, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.8556149732620321, |
| "grad_norm": 3.418334722518921, |
| "learning_rate": 2.1345811051693406e-05, |
| "loss": 0.0275, |
| "num_input_tokens_seen": 294496, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8645276292335116, |
| "grad_norm": 3.2661521434783936, |
| "learning_rate": 2.1568627450980395e-05, |
| "loss": 0.1015, |
| "num_input_tokens_seen": 297664, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.8734402852049911, |
| "grad_norm": 4.954894542694092, |
| "learning_rate": 2.179144385026738e-05, |
| "loss": 0.0496, |
| "num_input_tokens_seen": 300608, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 3.7999918460845947, |
| "learning_rate": 2.201426024955437e-05, |
| "loss": 0.2225, |
| "num_input_tokens_seen": 303808, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.8912655971479501, |
| "grad_norm": 0.635353684425354, |
| "learning_rate": 2.2237076648841358e-05, |
| "loss": 0.0955, |
| "num_input_tokens_seen": 307904, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9001782531194296, |
| "grad_norm": 0.8217610120773315, |
| "learning_rate": 2.2459893048128343e-05, |
| "loss": 0.1362, |
| "num_input_tokens_seen": 311968, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 1.324615716934204, |
| "learning_rate": 2.2682709447415332e-05, |
| "loss": 0.0888, |
| "num_input_tokens_seen": 315232, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.9180035650623886, |
| "grad_norm": 0.03214741870760918, |
| "learning_rate": 2.2905525846702318e-05, |
| "loss": 0.0141, |
| "num_input_tokens_seen": 317792, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.9269162210338681, |
| "grad_norm": 5.245635032653809, |
| "learning_rate": 2.3128342245989306e-05, |
| "loss": 0.0916, |
| "num_input_tokens_seen": 320160, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9358288770053476, |
| "grad_norm": 3.70466685295105, |
| "learning_rate": 2.3351158645276295e-05, |
| "loss": 0.1095, |
| "num_input_tokens_seen": 323680, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.9447415329768271, |
| "grad_norm": 0.4552616477012634, |
| "learning_rate": 2.357397504456328e-05, |
| "loss": 0.0635, |
| "num_input_tokens_seen": 326464, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.9536541889483066, |
| "grad_norm": 0.10007640719413757, |
| "learning_rate": 2.379679144385027e-05, |
| "loss": 0.0561, |
| "num_input_tokens_seen": 329632, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.9625668449197861, |
| "grad_norm": 0.3329647183418274, |
| "learning_rate": 2.401960784313726e-05, |
| "loss": 0.0307, |
| "num_input_tokens_seen": 333344, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9714795008912656, |
| "grad_norm": 0.3274289071559906, |
| "learning_rate": 2.4242424242424244e-05, |
| "loss": 0.0247, |
| "num_input_tokens_seen": 337120, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.9803921568627451, |
| "grad_norm": 0.05923886224627495, |
| "learning_rate": 2.4465240641711233e-05, |
| "loss": 0.002, |
| "num_input_tokens_seen": 340416, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9893048128342246, |
| "grad_norm": 4.643728733062744, |
| "learning_rate": 2.4688057040998218e-05, |
| "loss": 0.1167, |
| "num_input_tokens_seen": 344160, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.9982174688057041, |
| "grad_norm": 0.12344362586736679, |
| "learning_rate": 2.4910873440285207e-05, |
| "loss": 0.0084, |
| "num_input_tokens_seen": 347744, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.1087946742773056, |
| "eval_runtime": 4.5784, |
| "eval_samples_per_second": 54.386, |
| "eval_steps_per_second": 13.76, |
| "num_input_tokens_seen": 347936, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.0071301247771836, |
| "grad_norm": 8.679508209228516, |
| "learning_rate": 2.5133689839572196e-05, |
| "loss": 0.1602, |
| "num_input_tokens_seen": 350816, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.0160427807486632, |
| "grad_norm": 4.490102291107178, |
| "learning_rate": 2.5356506238859178e-05, |
| "loss": 0.157, |
| "num_input_tokens_seen": 353824, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.0249554367201426, |
| "grad_norm": 5.047804355621338, |
| "learning_rate": 2.557932263814617e-05, |
| "loss": 0.1128, |
| "num_input_tokens_seen": 356960, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.0338680926916222, |
| "grad_norm": 1.7584697008132935, |
| "learning_rate": 2.5802139037433156e-05, |
| "loss": 0.151, |
| "num_input_tokens_seen": 360288, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.0427807486631016, |
| "grad_norm": 6.616455554962158, |
| "learning_rate": 2.6024955436720144e-05, |
| "loss": 0.0656, |
| "num_input_tokens_seen": 363552, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.0516934046345812, |
| "grad_norm": 0.09188894182443619, |
| "learning_rate": 2.624777183600713e-05, |
| "loss": 0.0211, |
| "num_input_tokens_seen": 366976, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.0606060606060606, |
| "grad_norm": 5.818865776062012, |
| "learning_rate": 2.647058823529412e-05, |
| "loss": 0.1186, |
| "num_input_tokens_seen": 369952, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.0695187165775402, |
| "grad_norm": 0.9368240833282471, |
| "learning_rate": 2.6693404634581104e-05, |
| "loss": 0.0526, |
| "num_input_tokens_seen": 373248, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0784313725490196, |
| "grad_norm": 4.885313034057617, |
| "learning_rate": 2.6916221033868093e-05, |
| "loss": 0.0932, |
| "num_input_tokens_seen": 376512, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.0873440285204992, |
| "grad_norm": 0.019336791709065437, |
| "learning_rate": 2.713903743315508e-05, |
| "loss": 0.0464, |
| "num_input_tokens_seen": 379200, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.0962566844919786, |
| "grad_norm": 6.228531360626221, |
| "learning_rate": 2.736185383244207e-05, |
| "loss": 0.118, |
| "num_input_tokens_seen": 382656, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.1051693404634582, |
| "grad_norm": 8.659507751464844, |
| "learning_rate": 2.7584670231729053e-05, |
| "loss": 0.0363, |
| "num_input_tokens_seen": 384928, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.1140819964349375, |
| "grad_norm": 5.95408821105957, |
| "learning_rate": 2.7807486631016045e-05, |
| "loss": 0.0361, |
| "num_input_tokens_seen": 387968, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.1229946524064172, |
| "grad_norm": 1.6606276035308838, |
| "learning_rate": 2.803030303030303e-05, |
| "loss": 0.0721, |
| "num_input_tokens_seen": 390848, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.1319073083778965, |
| "grad_norm": 0.025730885565280914, |
| "learning_rate": 2.825311942959002e-05, |
| "loss": 0.0982, |
| "num_input_tokens_seen": 393952, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.1408199643493762, |
| "grad_norm": 0.515564501285553, |
| "learning_rate": 2.8475935828877005e-05, |
| "loss": 0.0538, |
| "num_input_tokens_seen": 397344, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.1497326203208555, |
| "grad_norm": 1.605162262916565, |
| "learning_rate": 2.8698752228163994e-05, |
| "loss": 0.0718, |
| "num_input_tokens_seen": 400512, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.1586452762923352, |
| "grad_norm": 4.248813629150391, |
| "learning_rate": 2.8921568627450986e-05, |
| "loss": 0.0573, |
| "num_input_tokens_seen": 403872, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.1675579322638145, |
| "grad_norm": 4.230849266052246, |
| "learning_rate": 2.9144385026737968e-05, |
| "loss": 0.0858, |
| "num_input_tokens_seen": 406752, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "grad_norm": 0.38777992129325867, |
| "learning_rate": 2.936720142602496e-05, |
| "loss": 0.0643, |
| "num_input_tokens_seen": 410080, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.1853832442067735, |
| "grad_norm": 8.435359001159668, |
| "learning_rate": 2.9590017825311946e-05, |
| "loss": 0.0513, |
| "num_input_tokens_seen": 413184, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.1942959001782532, |
| "grad_norm": 0.2321002185344696, |
| "learning_rate": 2.9812834224598934e-05, |
| "loss": 0.0781, |
| "num_input_tokens_seen": 416224, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.2032085561497325, |
| "grad_norm": 0.8981883525848389, |
| "learning_rate": 3.003565062388592e-05, |
| "loss": 0.0792, |
| "num_input_tokens_seen": 419776, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.2121212121212122, |
| "grad_norm": 4.635167121887207, |
| "learning_rate": 3.025846702317291e-05, |
| "loss": 0.065, |
| "num_input_tokens_seen": 423008, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.2210338680926915, |
| "grad_norm": 0.7758586406707764, |
| "learning_rate": 3.0481283422459894e-05, |
| "loss": 0.0854, |
| "num_input_tokens_seen": 426112, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.2299465240641712, |
| "grad_norm": 0.11127970367670059, |
| "learning_rate": 3.0704099821746886e-05, |
| "loss": 0.1187, |
| "num_input_tokens_seen": 429376, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.2388591800356505, |
| "grad_norm": 2.0615482330322266, |
| "learning_rate": 3.092691622103387e-05, |
| "loss": 0.0345, |
| "num_input_tokens_seen": 432480, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.2477718360071302, |
| "grad_norm": 0.4409101605415344, |
| "learning_rate": 3.114973262032086e-05, |
| "loss": 0.0525, |
| "num_input_tokens_seen": 435392, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.2566844919786098, |
| "grad_norm": 5.350732326507568, |
| "learning_rate": 3.137254901960784e-05, |
| "loss": 0.0874, |
| "num_input_tokens_seen": 438816, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.2655971479500892, |
| "grad_norm": 1.308171033859253, |
| "learning_rate": 3.1595365418894835e-05, |
| "loss": 0.0303, |
| "num_input_tokens_seen": 441792, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.2745098039215685, |
| "grad_norm": 1.4284766912460327, |
| "learning_rate": 3.181818181818182e-05, |
| "loss": 0.0395, |
| "num_input_tokens_seen": 445216, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.2834224598930482, |
| "grad_norm": 2.7852184772491455, |
| "learning_rate": 3.204099821746881e-05, |
| "loss": 0.0206, |
| "num_input_tokens_seen": 447136, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.2923351158645278, |
| "grad_norm": 0.03415602445602417, |
| "learning_rate": 3.226381461675579e-05, |
| "loss": 0.0704, |
| "num_input_tokens_seen": 449728, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.3012477718360071, |
| "grad_norm": 0.20296916365623474, |
| "learning_rate": 3.2486631016042783e-05, |
| "loss": 0.0941, |
| "num_input_tokens_seen": 452704, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.3101604278074865, |
| "grad_norm": 0.4193146824836731, |
| "learning_rate": 3.270944741532977e-05, |
| "loss": 0.0214, |
| "num_input_tokens_seen": 455488, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.3190730837789661, |
| "grad_norm": 0.00828193873167038, |
| "learning_rate": 3.293226381461676e-05, |
| "loss": 0.0095, |
| "num_input_tokens_seen": 458592, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.3279857397504458, |
| "grad_norm": 0.07712722569704056, |
| "learning_rate": 3.3155080213903747e-05, |
| "loss": 0.0574, |
| "num_input_tokens_seen": 461440, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.3368983957219251, |
| "grad_norm": 6.220320224761963, |
| "learning_rate": 3.337789661319073e-05, |
| "loss": 0.1225, |
| "num_input_tokens_seen": 465184, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.3458110516934045, |
| "grad_norm": 6.957297325134277, |
| "learning_rate": 3.360071301247772e-05, |
| "loss": 0.1108, |
| "num_input_tokens_seen": 468320, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.3547237076648841, |
| "grad_norm": 0.24312429130077362, |
| "learning_rate": 3.382352941176471e-05, |
| "loss": 0.0746, |
| "num_input_tokens_seen": 471456, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 4.236689567565918, |
| "learning_rate": 3.4046345811051695e-05, |
| "loss": 0.0884, |
| "num_input_tokens_seen": 474400, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.3725490196078431, |
| "grad_norm": 1.9785674810409546, |
| "learning_rate": 3.426916221033869e-05, |
| "loss": 0.0374, |
| "num_input_tokens_seen": 476928, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.3814616755793225, |
| "grad_norm": 3.5035243034362793, |
| "learning_rate": 3.4491978609625666e-05, |
| "loss": 0.0541, |
| "num_input_tokens_seen": 480192, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.3903743315508021, |
| "grad_norm": 2.0421881675720215, |
| "learning_rate": 3.471479500891266e-05, |
| "loss": 0.0793, |
| "num_input_tokens_seen": 483296, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.3992869875222818, |
| "grad_norm": 0.9459624886512756, |
| "learning_rate": 3.4937611408199644e-05, |
| "loss": 0.0154, |
| "num_input_tokens_seen": 486368, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.4081996434937611, |
| "grad_norm": 8.17954158782959, |
| "learning_rate": 3.5160427807486636e-05, |
| "loss": 0.0469, |
| "num_input_tokens_seen": 489280, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.4171122994652405, |
| "grad_norm": 0.35897743701934814, |
| "learning_rate": 3.538324420677362e-05, |
| "loss": 0.013, |
| "num_input_tokens_seen": 492704, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.4260249554367201, |
| "grad_norm": 3.8780298233032227, |
| "learning_rate": 3.560606060606061e-05, |
| "loss": 0.033, |
| "num_input_tokens_seen": 495808, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.4349376114081998, |
| "grad_norm": 0.00485863396897912, |
| "learning_rate": 3.582887700534759e-05, |
| "loss": 0.0335, |
| "num_input_tokens_seen": 499552, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.4438502673796791, |
| "grad_norm": 0.05903235450387001, |
| "learning_rate": 3.6051693404634585e-05, |
| "loss": 0.1034, |
| "num_input_tokens_seen": 502592, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.4527629233511585, |
| "grad_norm": 0.00699259340763092, |
| "learning_rate": 3.627450980392157e-05, |
| "loss": 0.0126, |
| "num_input_tokens_seen": 505408, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.4616755793226381, |
| "grad_norm": 7.649658679962158, |
| "learning_rate": 3.649732620320856e-05, |
| "loss": 0.1106, |
| "num_input_tokens_seen": 507904, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 0.009012856520712376, |
| "learning_rate": 3.672014260249554e-05, |
| "loss": 0.0749, |
| "num_input_tokens_seen": 510688, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.4795008912655971, |
| "grad_norm": 0.829964280128479, |
| "learning_rate": 3.694295900178253e-05, |
| "loss": 0.0826, |
| "num_input_tokens_seen": 514240, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.4884135472370765, |
| "grad_norm": 12.730154037475586, |
| "learning_rate": 3.716577540106952e-05, |
| "loss": 0.063, |
| "num_input_tokens_seen": 517568, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.4973262032085561, |
| "grad_norm": 6.636897563934326, |
| "learning_rate": 3.738859180035651e-05, |
| "loss": 0.0932, |
| "num_input_tokens_seen": 520704, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.5062388591800357, |
| "grad_norm": 5.837046146392822, |
| "learning_rate": 3.7611408199643496e-05, |
| "loss": 0.0382, |
| "num_input_tokens_seen": 524320, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.5151515151515151, |
| "grad_norm": 2.597928285598755, |
| "learning_rate": 3.783422459893048e-05, |
| "loss": 0.0211, |
| "num_input_tokens_seen": 527104, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.5240641711229945, |
| "grad_norm": 1.7771317958831787, |
| "learning_rate": 3.805704099821747e-05, |
| "loss": 0.0126, |
| "num_input_tokens_seen": 530624, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.5329768270944741, |
| "grad_norm": 11.486368179321289, |
| "learning_rate": 3.827985739750446e-05, |
| "loss": 0.1684, |
| "num_input_tokens_seen": 533504, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.5418894830659537, |
| "grad_norm": 0.2925088107585907, |
| "learning_rate": 3.8502673796791445e-05, |
| "loss": 0.0023, |
| "num_input_tokens_seen": 536992, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.5508021390374331, |
| "grad_norm": 5.498477458953857, |
| "learning_rate": 3.872549019607844e-05, |
| "loss": 0.117, |
| "num_input_tokens_seen": 539840, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.5597147950089125, |
| "grad_norm": 8.601882934570312, |
| "learning_rate": 3.894830659536542e-05, |
| "loss": 0.1269, |
| "num_input_tokens_seen": 543008, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.5686274509803921, |
| "grad_norm": 0.4930436909198761, |
| "learning_rate": 3.917112299465241e-05, |
| "loss": 0.076, |
| "num_input_tokens_seen": 546144, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.5775401069518717, |
| "grad_norm": 4.17765474319458, |
| "learning_rate": 3.939393939393939e-05, |
| "loss": 0.0449, |
| "num_input_tokens_seen": 549440, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.5864527629233511, |
| "grad_norm": 0.14472661912441254, |
| "learning_rate": 3.9616755793226386e-05, |
| "loss": 0.0066, |
| "num_input_tokens_seen": 552608, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.5953654188948305, |
| "grad_norm": 0.04021904617547989, |
| "learning_rate": 3.983957219251337e-05, |
| "loss": 0.0097, |
| "num_input_tokens_seen": 555424, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.6042780748663101, |
| "grad_norm": 0.017523914575576782, |
| "learning_rate": 4.0062388591800356e-05, |
| "loss": 0.047, |
| "num_input_tokens_seen": 558720, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.6131907308377897, |
| "grad_norm": 0.019056592136621475, |
| "learning_rate": 4.028520499108734e-05, |
| "loss": 0.0817, |
| "num_input_tokens_seen": 560992, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.6221033868092691, |
| "grad_norm": 11.486791610717773, |
| "learning_rate": 4.0508021390374334e-05, |
| "loss": 0.0296, |
| "num_input_tokens_seen": 563808, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.6310160427807485, |
| "grad_norm": 0.019042672589421272, |
| "learning_rate": 4.073083778966132e-05, |
| "loss": 0.0296, |
| "num_input_tokens_seen": 566752, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.6399286987522281, |
| "grad_norm": 0.024380512535572052, |
| "learning_rate": 4.095365418894831e-05, |
| "loss": 0.0492, |
| "num_input_tokens_seen": 570048, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.6488413547237077, |
| "grad_norm": 0.14979250729084015, |
| "learning_rate": 4.11764705882353e-05, |
| "loss": 0.0183, |
| "num_input_tokens_seen": 573312, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.6577540106951871, |
| "grad_norm": 0.011035463772714138, |
| "learning_rate": 4.139928698752228e-05, |
| "loss": 0.0249, |
| "num_input_tokens_seen": 576160, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.15351063013076782, |
| "learning_rate": 4.162210338680927e-05, |
| "loss": 0.0384, |
| "num_input_tokens_seen": 578976, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.6755793226381461, |
| "grad_norm": 7.670846462249756, |
| "learning_rate": 4.184491978609626e-05, |
| "loss": 0.1065, |
| "num_input_tokens_seen": 581856, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.6844919786096257, |
| "grad_norm": 0.29702576994895935, |
| "learning_rate": 4.2067736185383246e-05, |
| "loss": 0.068, |
| "num_input_tokens_seen": 584704, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.6934046345811051, |
| "grad_norm": 0.04708877205848694, |
| "learning_rate": 4.229055258467023e-05, |
| "loss": 0.0205, |
| "num_input_tokens_seen": 587776, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.7023172905525845, |
| "grad_norm": 1.0828979015350342, |
| "learning_rate": 4.251336898395722e-05, |
| "loss": 0.0402, |
| "num_input_tokens_seen": 590848, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.7112299465240641, |
| "grad_norm": 13.534217834472656, |
| "learning_rate": 4.273618538324421e-05, |
| "loss": 0.2889, |
| "num_input_tokens_seen": 594048, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.7201426024955437, |
| "grad_norm": 0.07279043644666672, |
| "learning_rate": 4.2959001782531194e-05, |
| "loss": 0.0494, |
| "num_input_tokens_seen": 596800, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.7290552584670231, |
| "grad_norm": 6.436086177825928, |
| "learning_rate": 4.318181818181819e-05, |
| "loss": 0.0643, |
| "num_input_tokens_seen": 599552, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.7379679144385025, |
| "grad_norm": 0.8411062955856323, |
| "learning_rate": 4.340463458110517e-05, |
| "loss": 0.0762, |
| "num_input_tokens_seen": 602816, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.7468805704099821, |
| "grad_norm": 11.923518180847168, |
| "learning_rate": 4.362745098039216e-05, |
| "loss": 0.1196, |
| "num_input_tokens_seen": 605280, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.7557932263814617, |
| "grad_norm": 2.0461044311523438, |
| "learning_rate": 4.385026737967914e-05, |
| "loss": 0.0464, |
| "num_input_tokens_seen": 608096, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.7647058823529411, |
| "grad_norm": 0.1000722125172615, |
| "learning_rate": 4.4073083778966135e-05, |
| "loss": 0.0015, |
| "num_input_tokens_seen": 611328, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.7736185383244205, |
| "grad_norm": 6.380837917327881, |
| "learning_rate": 4.429590017825312e-05, |
| "loss": 0.0903, |
| "num_input_tokens_seen": 614304, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.7825311942959001, |
| "grad_norm": 2.4291083812713623, |
| "learning_rate": 4.4518716577540106e-05, |
| "loss": 0.0074, |
| "num_input_tokens_seen": 617952, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.7914438502673797, |
| "grad_norm": 11.327752113342285, |
| "learning_rate": 4.474153297682709e-05, |
| "loss": 0.0638, |
| "num_input_tokens_seen": 621120, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.8003565062388591, |
| "grad_norm": 0.16634012758731842, |
| "learning_rate": 4.4964349376114084e-05, |
| "loss": 0.0011, |
| "num_input_tokens_seen": 624416, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.8092691622103387, |
| "grad_norm": 8.74438762664795, |
| "learning_rate": 4.518716577540107e-05, |
| "loss": 0.0811, |
| "num_input_tokens_seen": 627040, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.05186864361166954, |
| "learning_rate": 4.540998217468806e-05, |
| "loss": 0.1683, |
| "num_input_tokens_seen": 630432, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.8270944741532977, |
| "grad_norm": 7.079963207244873, |
| "learning_rate": 4.563279857397505e-05, |
| "loss": 0.1726, |
| "num_input_tokens_seen": 633600, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.8360071301247771, |
| "grad_norm": 1.006164312362671, |
| "learning_rate": 4.585561497326203e-05, |
| "loss": 0.0053, |
| "num_input_tokens_seen": 636640, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.8449197860962567, |
| "grad_norm": 9.37168025970459, |
| "learning_rate": 4.607843137254902e-05, |
| "loss": 0.1272, |
| "num_input_tokens_seen": 639776, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.8538324420677363, |
| "grad_norm": 7.00251579284668, |
| "learning_rate": 4.630124777183601e-05, |
| "loss": 0.0867, |
| "num_input_tokens_seen": 643072, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.8627450980392157, |
| "grad_norm": 2.162777900695801, |
| "learning_rate": 4.6524064171123e-05, |
| "loss": 0.0372, |
| "num_input_tokens_seen": 646048, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.8716577540106951, |
| "grad_norm": 1.0629892349243164, |
| "learning_rate": 4.674688057040999e-05, |
| "loss": 0.05, |
| "num_input_tokens_seen": 649472, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.8805704099821747, |
| "grad_norm": 0.0947604700922966, |
| "learning_rate": 4.696969696969697e-05, |
| "loss": 0.0477, |
| "num_input_tokens_seen": 652800, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.8894830659536543, |
| "grad_norm": 0.02225646935403347, |
| "learning_rate": 4.719251336898396e-05, |
| "loss": 0.1286, |
| "num_input_tokens_seen": 655904, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.8983957219251337, |
| "grad_norm": 0.07933518290519714, |
| "learning_rate": 4.741532976827095e-05, |
| "loss": 0.0116, |
| "num_input_tokens_seen": 659072, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.9073083778966131, |
| "grad_norm": 4.901011943817139, |
| "learning_rate": 4.7638146167557936e-05, |
| "loss": 0.0222, |
| "num_input_tokens_seen": 662656, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.9162210338680927, |
| "grad_norm": 1.2279400825500488, |
| "learning_rate": 4.786096256684492e-05, |
| "loss": 0.0725, |
| "num_input_tokens_seen": 665856, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.9251336898395723, |
| "grad_norm": 6.082876205444336, |
| "learning_rate": 4.808377896613191e-05, |
| "loss": 0.226, |
| "num_input_tokens_seen": 669728, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.9340463458110517, |
| "grad_norm": 10.361590385437012, |
| "learning_rate": 4.83065953654189e-05, |
| "loss": 0.0448, |
| "num_input_tokens_seen": 672384, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.9429590017825311, |
| "grad_norm": 5.1663713455200195, |
| "learning_rate": 4.8529411764705885e-05, |
| "loss": 0.0127, |
| "num_input_tokens_seen": 675392, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.9518716577540107, |
| "grad_norm": 0.454103946685791, |
| "learning_rate": 4.875222816399288e-05, |
| "loss": 0.0531, |
| "num_input_tokens_seen": 678592, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.9607843137254903, |
| "grad_norm": 0.015812421217560768, |
| "learning_rate": 4.897504456327986e-05, |
| "loss": 0.0549, |
| "num_input_tokens_seen": 682080, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.9696969696969697, |
| "grad_norm": 7.40580940246582, |
| "learning_rate": 4.919786096256685e-05, |
| "loss": 0.0428, |
| "num_input_tokens_seen": 684448, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.9786096256684491, |
| "grad_norm": 5.290456771850586, |
| "learning_rate": 4.9420677361853833e-05, |
| "loss": 0.0128, |
| "num_input_tokens_seen": 688032, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.9875222816399287, |
| "grad_norm": 2.195844888687134, |
| "learning_rate": 4.9643493761140826e-05, |
| "loss": 0.0973, |
| "num_input_tokens_seen": 691520, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.9964349376114083, |
| "grad_norm": 0.004842815455049276, |
| "learning_rate": 4.986631016042781e-05, |
| "loss": 0.0807, |
| "num_input_tokens_seen": 694080, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.10693030804395676, |
| "eval_runtime": 4.5835, |
| "eval_samples_per_second": 54.325, |
| "eval_steps_per_second": 13.745, |
| "num_input_tokens_seen": 694664, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.0053475935828877, |
| "grad_norm": 1.4178153276443481, |
| "learning_rate": 4.999999516051662e-05, |
| "loss": 0.0538, |
| "num_input_tokens_seen": 696360, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.014260249554367, |
| "grad_norm": 1.4418376684188843, |
| "learning_rate": 4.999994071635008e-05, |
| "loss": 0.0031, |
| "num_input_tokens_seen": 700456, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.0231729055258465, |
| "grad_norm": 0.04916919022798538, |
| "learning_rate": 4.999982577879495e-05, |
| "loss": 0.0009, |
| "num_input_tokens_seen": 703304, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.0320855614973263, |
| "grad_norm": 0.00426515331491828, |
| "learning_rate": 4.999965034812935e-05, |
| "loss": 0.0583, |
| "num_input_tokens_seen": 706472, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.0409982174688057, |
| "grad_norm": 0.01999180018901825, |
| "learning_rate": 4.9999414424777766e-05, |
| "loss": 0.0316, |
| "num_input_tokens_seen": 709512, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.049910873440285, |
| "grad_norm": 1.4245250225067139, |
| "learning_rate": 4.9999118009311084e-05, |
| "loss": 0.1014, |
| "num_input_tokens_seen": 712328, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.0588235294117645, |
| "grad_norm": 0.011511596851050854, |
| "learning_rate": 4.9998761102446554e-05, |
| "loss": 0.003, |
| "num_input_tokens_seen": 714888, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.0677361853832443, |
| "grad_norm": 20.848102569580078, |
| "learning_rate": 4.999834370504779e-05, |
| "loss": 0.0615, |
| "num_input_tokens_seen": 717192, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.0766488413547237, |
| "grad_norm": 4.06023645401001, |
| "learning_rate": 4.99978658181248e-05, |
| "loss": 0.0976, |
| "num_input_tokens_seen": 720616, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.085561497326203, |
| "grad_norm": 0.18716619908809662, |
| "learning_rate": 4.999732744283393e-05, |
| "loss": 0.0859, |
| "num_input_tokens_seen": 723400, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.0944741532976825, |
| "grad_norm": 0.1339133232831955, |
| "learning_rate": 4.999672858047791e-05, |
| "loss": 0.0051, |
| "num_input_tokens_seen": 726856, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.1033868092691623, |
| "grad_norm": 6.125833034515381, |
| "learning_rate": 4.999606923250585e-05, |
| "loss": 0.0108, |
| "num_input_tokens_seen": 730312, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.1122994652406417, |
| "grad_norm": 0.3494812846183777, |
| "learning_rate": 4.999534940051317e-05, |
| "loss": 0.0027, |
| "num_input_tokens_seen": 733480, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.121212121212121, |
| "grad_norm": 0.4654238224029541, |
| "learning_rate": 4.9994569086241716e-05, |
| "loss": 0.0633, |
| "num_input_tokens_seen": 736936, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.1301247771836005, |
| "grad_norm": 0.0284738652408123, |
| "learning_rate": 4.999372829157962e-05, |
| "loss": 0.0038, |
| "num_input_tokens_seen": 740328, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.1390374331550803, |
| "grad_norm": 0.025350507348775864, |
| "learning_rate": 4.9992827018561386e-05, |
| "loss": 0.0767, |
| "num_input_tokens_seen": 743496, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.1479500891265597, |
| "grad_norm": 12.299341201782227, |
| "learning_rate": 4.999186526936788e-05, |
| "loss": 0.0968, |
| "num_input_tokens_seen": 746888, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.156862745098039, |
| "grad_norm": 0.029630400240421295, |
| "learning_rate": 4.999084304632627e-05, |
| "loss": 0.034, |
| "num_input_tokens_seen": 749704, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.165775401069519, |
| "grad_norm": 1.3330607414245605, |
| "learning_rate": 4.9989760351910074e-05, |
| "loss": 0.067, |
| "num_input_tokens_seen": 753000, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.1746880570409983, |
| "grad_norm": 0.08385679125785828, |
| "learning_rate": 4.998861718873915e-05, |
| "loss": 0.0046, |
| "num_input_tokens_seen": 756104, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.1836007130124777, |
| "grad_norm": 14.622919082641602, |
| "learning_rate": 4.9987413559579636e-05, |
| "loss": 0.1344, |
| "num_input_tokens_seen": 759176, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.192513368983957, |
| "grad_norm": 0.14707021415233612, |
| "learning_rate": 4.9986149467344004e-05, |
| "loss": 0.0029, |
| "num_input_tokens_seen": 762248, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.2014260249554365, |
| "grad_norm": 0.8391829133033752, |
| "learning_rate": 4.998482491509104e-05, |
| "loss": 0.0056, |
| "num_input_tokens_seen": 765320, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.2103386809269163, |
| "grad_norm": 0.046552758663892746, |
| "learning_rate": 4.998343990602582e-05, |
| "loss": 0.031, |
| "num_input_tokens_seen": 768008, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.2192513368983957, |
| "grad_norm": 0.09806407988071442, |
| "learning_rate": 4.998199444349969e-05, |
| "loss": 0.051, |
| "num_input_tokens_seen": 771496, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.228163992869875, |
| "grad_norm": 0.853639543056488, |
| "learning_rate": 4.998048853101031e-05, |
| "loss": 0.1757, |
| "num_input_tokens_seen": 775048, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.237076648841355, |
| "grad_norm": 0.11520189791917801, |
| "learning_rate": 4.99789221722016e-05, |
| "loss": 0.0012, |
| "num_input_tokens_seen": 778312, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.2459893048128343, |
| "grad_norm": 0.007044652942568064, |
| "learning_rate": 4.997729537086373e-05, |
| "loss": 0.0165, |
| "num_input_tokens_seen": 781224, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.2549019607843137, |
| "grad_norm": 0.011247215792536736, |
| "learning_rate": 4.997560813093316e-05, |
| "loss": 0.0043, |
| "num_input_tokens_seen": 784520, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.263814616755793, |
| "grad_norm": 0.6388627290725708, |
| "learning_rate": 4.997386045649255e-05, |
| "loss": 0.002, |
| "num_input_tokens_seen": 787880, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 12.410165786743164, |
| "learning_rate": 4.9972052351770836e-05, |
| "loss": 0.1314, |
| "num_input_tokens_seen": 791240, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.2816399286987523, |
| "grad_norm": 0.06250201910734177, |
| "learning_rate": 4.997018382114316e-05, |
| "loss": 0.032, |
| "num_input_tokens_seen": 794408, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.2905525846702317, |
| "grad_norm": 0.0694412887096405, |
| "learning_rate": 4.996825486913088e-05, |
| "loss": 0.0025, |
| "num_input_tokens_seen": 797800, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.299465240641711, |
| "grad_norm": 0.05269337072968483, |
| "learning_rate": 4.996626550040157e-05, |
| "loss": 0.0549, |
| "num_input_tokens_seen": 801160, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.308377896613191, |
| "grad_norm": 0.044849712401628494, |
| "learning_rate": 4.9964215719768964e-05, |
| "loss": 0.0009, |
| "num_input_tokens_seen": 804008, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.3172905525846703, |
| "grad_norm": 0.7724558711051941, |
| "learning_rate": 4.9962105532193024e-05, |
| "loss": 0.0246, |
| "num_input_tokens_seen": 807080, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.3262032085561497, |
| "grad_norm": 5.825749397277832, |
| "learning_rate": 4.995993494277985e-05, |
| "loss": 0.0343, |
| "num_input_tokens_seen": 810056, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.335115864527629, |
| "grad_norm": 0.011077574454247952, |
| "learning_rate": 4.995770395678171e-05, |
| "loss": 0.0477, |
| "num_input_tokens_seen": 813544, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.344028520499109, |
| "grad_norm": 6.1359782218933105, |
| "learning_rate": 4.9955412579597004e-05, |
| "loss": 0.0576, |
| "num_input_tokens_seen": 816200, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.3529411764705883, |
| "grad_norm": 0.015752514824271202, |
| "learning_rate": 4.995306081677028e-05, |
| "loss": 0.001, |
| "num_input_tokens_seen": 818792, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.3618538324420677, |
| "grad_norm": 0.03618216887116432, |
| "learning_rate": 4.99506486739922e-05, |
| "loss": 0.0123, |
| "num_input_tokens_seen": 821736, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.370766488413547, |
| "grad_norm": 0.06211644038558006, |
| "learning_rate": 4.994817615709951e-05, |
| "loss": 0.0018, |
| "num_input_tokens_seen": 825032, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.379679144385027, |
| "grad_norm": 3.1051697731018066, |
| "learning_rate": 4.994564327207508e-05, |
| "loss": 0.2383, |
| "num_input_tokens_seen": 827912, |
| "step": 1335 |
| }, |
| { |
| "epoch": 2.3885918003565063, |
| "grad_norm": 0.001248644315637648, |
| "learning_rate": 4.9943050025047824e-05, |
| "loss": 0.0008, |
| "num_input_tokens_seen": 831080, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.3975044563279857, |
| "grad_norm": 20.717248916625977, |
| "learning_rate": 4.994039642229274e-05, |
| "loss": 0.0835, |
| "num_input_tokens_seen": 834440, |
| "step": 1345 |
| }, |
| { |
| "epoch": 2.406417112299465, |
| "grad_norm": 3.5113887786865234, |
| "learning_rate": 4.993768247023084e-05, |
| "loss": 0.185, |
| "num_input_tokens_seen": 837096, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.415329768270945, |
| "grad_norm": 0.20629923045635223, |
| "learning_rate": 4.9934908175429194e-05, |
| "loss": 0.0054, |
| "num_input_tokens_seen": 840520, |
| "step": 1355 |
| }, |
| { |
| "epoch": 2.4242424242424243, |
| "grad_norm": 0.030959784984588623, |
| "learning_rate": 4.993207354460089e-05, |
| "loss": 0.0024, |
| "num_input_tokens_seen": 843400, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.4331550802139037, |
| "grad_norm": 0.0362035296857357, |
| "learning_rate": 4.9929178584605e-05, |
| "loss": 0.0362, |
| "num_input_tokens_seen": 846088, |
| "step": 1365 |
| }, |
| { |
| "epoch": 2.442067736185383, |
| "grad_norm": 0.13232892751693726, |
| "learning_rate": 4.992622330244656e-05, |
| "loss": 0.0356, |
| "num_input_tokens_seen": 848776, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.450980392156863, |
| "grad_norm": 0.03575564920902252, |
| "learning_rate": 4.99232077052766e-05, |
| "loss": 0.0025, |
| "num_input_tokens_seen": 852328, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.4598930481283423, |
| "grad_norm": 6.295513153076172, |
| "learning_rate": 4.992013180039209e-05, |
| "loss": 0.0426, |
| "num_input_tokens_seen": 854952, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.4688057040998217, |
| "grad_norm": 0.1265345811843872, |
| "learning_rate": 4.991699559523591e-05, |
| "loss": 0.0016, |
| "num_input_tokens_seen": 858536, |
| "step": 1385 |
| }, |
| { |
| "epoch": 2.477718360071301, |
| "grad_norm": 6.578834533691406, |
| "learning_rate": 4.9913799097396877e-05, |
| "loss": 0.0049, |
| "num_input_tokens_seen": 861864, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.486631016042781, |
| "grad_norm": 0.011173945851624012, |
| "learning_rate": 4.9910542314609684e-05, |
| "loss": 0.0108, |
| "num_input_tokens_seen": 864104, |
| "step": 1395 |
| }, |
| { |
| "epoch": 2.4955436720142603, |
| "grad_norm": 6.777383327484131, |
| "learning_rate": 4.990722525475491e-05, |
| "loss": 0.0097, |
| "num_input_tokens_seen": 867336, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.5044563279857397, |
| "grad_norm": 0.019024401903152466, |
| "learning_rate": 4.990384792585897e-05, |
| "loss": 0.041, |
| "num_input_tokens_seen": 870376, |
| "step": 1405 |
| }, |
| { |
| "epoch": 2.5133689839572195, |
| "grad_norm": 0.026918886229395866, |
| "learning_rate": 4.990041033609413e-05, |
| "loss": 0.0134, |
| "num_input_tokens_seen": 872456, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.522281639928699, |
| "grad_norm": 0.0020757236052304506, |
| "learning_rate": 4.989691249377847e-05, |
| "loss": 0.0461, |
| "num_input_tokens_seen": 874888, |
| "step": 1415 |
| }, |
| { |
| "epoch": 2.5311942959001783, |
| "grad_norm": 0.0028826724737882614, |
| "learning_rate": 4.989335440737586e-05, |
| "loss": 0.1087, |
| "num_input_tokens_seen": 877992, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.5401069518716577, |
| "grad_norm": 0.02220228686928749, |
| "learning_rate": 4.9889736085495965e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 880776, |
| "step": 1425 |
| }, |
| { |
| "epoch": 2.549019607843137, |
| "grad_norm": 0.06741692870855331, |
| "learning_rate": 4.988605753689416e-05, |
| "loss": 0.1482, |
| "num_input_tokens_seen": 883944, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.557932263814617, |
| "grad_norm": 0.012142196297645569, |
| "learning_rate": 4.98823187704716e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 886632, |
| "step": 1435 |
| }, |
| { |
| "epoch": 2.5668449197860963, |
| "grad_norm": 0.021630438044667244, |
| "learning_rate": 4.9878519795275133e-05, |
| "loss": 0.0134, |
| "num_input_tokens_seen": 889448, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.5757575757575757, |
| "grad_norm": 16.687862396240234, |
| "learning_rate": 4.987466062049728e-05, |
| "loss": 0.021, |
| "num_input_tokens_seen": 892744, |
| "step": 1445 |
| }, |
| { |
| "epoch": 2.5846702317290555, |
| "grad_norm": 0.5119276642799377, |
| "learning_rate": 4.9870741255476266e-05, |
| "loss": 0.0019, |
| "num_input_tokens_seen": 895816, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.593582887700535, |
| "grad_norm": 0.02749607339501381, |
| "learning_rate": 4.986676170969593e-05, |
| "loss": 0.0562, |
| "num_input_tokens_seen": 899784, |
| "step": 1455 |
| }, |
| { |
| "epoch": 2.6024955436720143, |
| "grad_norm": 1.1509554386138916, |
| "learning_rate": 4.986272199278574e-05, |
| "loss": 0.0698, |
| "num_input_tokens_seen": 902984, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.6114081996434937, |
| "grad_norm": 0.04116099700331688, |
| "learning_rate": 4.985862211452077e-05, |
| "loss": 0.0216, |
| "num_input_tokens_seen": 905800, |
| "step": 1465 |
| }, |
| { |
| "epoch": 2.620320855614973, |
| "grad_norm": 0.025392096489667892, |
| "learning_rate": 4.985446208482166e-05, |
| "loss": 0.0026, |
| "num_input_tokens_seen": 908488, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.629233511586453, |
| "grad_norm": 0.03849076107144356, |
| "learning_rate": 4.985024191375462e-05, |
| "loss": 0.152, |
| "num_input_tokens_seen": 911528, |
| "step": 1475 |
| }, |
| { |
| "epoch": 2.6381461675579323, |
| "grad_norm": 3.8424088954925537, |
| "learning_rate": 4.984596161153136e-05, |
| "loss": 0.0847, |
| "num_input_tokens_seen": 914536, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.6470588235294117, |
| "grad_norm": 3.8528010845184326, |
| "learning_rate": 4.9841621188509105e-05, |
| "loss": 0.0324, |
| "num_input_tokens_seen": 917384, |
| "step": 1485 |
| }, |
| { |
| "epoch": 2.6559714795008915, |
| "grad_norm": 1.6923863887786865, |
| "learning_rate": 4.983722065519055e-05, |
| "loss": 0.0044, |
| "num_input_tokens_seen": 920072, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.664884135472371, |
| "grad_norm": 0.2570834457874298, |
| "learning_rate": 4.983276002222386e-05, |
| "loss": 0.066, |
| "num_input_tokens_seen": 923304, |
| "step": 1495 |
| }, |
| { |
| "epoch": 2.6737967914438503, |
| "grad_norm": 0.06823292374610901, |
| "learning_rate": 4.9828239300402605e-05, |
| "loss": 0.0289, |
| "num_input_tokens_seen": 925480, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.6827094474153297, |
| "grad_norm": 0.1330983191728592, |
| "learning_rate": 4.982365850066576e-05, |
| "loss": 0.018, |
| "num_input_tokens_seen": 928264, |
| "step": 1505 |
| }, |
| { |
| "epoch": 2.691622103386809, |
| "grad_norm": 0.988633394241333, |
| "learning_rate": 4.9819017634097685e-05, |
| "loss": 0.0038, |
| "num_input_tokens_seen": 931784, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.700534759358289, |
| "grad_norm": 0.1021723598241806, |
| "learning_rate": 4.981431671192807e-05, |
| "loss": 0.0014, |
| "num_input_tokens_seen": 935336, |
| "step": 1515 |
| }, |
| { |
| "epoch": 2.7094474153297683, |
| "grad_norm": 0.029408583417534828, |
| "learning_rate": 4.9809555745531934e-05, |
| "loss": 0.07, |
| "num_input_tokens_seen": 938472, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.7183600713012477, |
| "grad_norm": 2.873579978942871, |
| "learning_rate": 4.980473474642957e-05, |
| "loss": 0.1587, |
| "num_input_tokens_seen": 941288, |
| "step": 1525 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 0.38247668743133545, |
| "learning_rate": 4.979985372628657e-05, |
| "loss": 0.0555, |
| "num_input_tokens_seen": 944616, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.736185383244207, |
| "grad_norm": 0.10064245760440826, |
| "learning_rate": 4.979491269691372e-05, |
| "loss": 0.0012, |
| "num_input_tokens_seen": 947880, |
| "step": 1535 |
| }, |
| { |
| "epoch": 2.7450980392156863, |
| "grad_norm": 7.594188213348389, |
| "learning_rate": 4.978991167026705e-05, |
| "loss": 0.1035, |
| "num_input_tokens_seen": 950440, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.7540106951871657, |
| "grad_norm": 2.325514078140259, |
| "learning_rate": 4.9784850658447745e-05, |
| "loss": 0.0049, |
| "num_input_tokens_seen": 953576, |
| "step": 1545 |
| }, |
| { |
| "epoch": 2.762923351158645, |
| "grad_norm": 7.605442523956299, |
| "learning_rate": 4.9779729673702135e-05, |
| "loss": 0.1588, |
| "num_input_tokens_seen": 956456, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.771836007130125, |
| "grad_norm": 0.043773096054792404, |
| "learning_rate": 4.977454872842169e-05, |
| "loss": 0.003, |
| "num_input_tokens_seen": 959496, |
| "step": 1555 |
| }, |
| { |
| "epoch": 2.7807486631016043, |
| "grad_norm": 5.104632377624512, |
| "learning_rate": 4.9769307835142946e-05, |
| "loss": 0.0405, |
| "num_input_tokens_seen": 962440, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.7896613190730837, |
| "grad_norm": 1.4859371185302734, |
| "learning_rate": 4.9764007006547516e-05, |
| "loss": 0.0479, |
| "num_input_tokens_seen": 965576, |
| "step": 1565 |
| }, |
| { |
| "epoch": 2.7985739750445635, |
| "grad_norm": 0.03639693185687065, |
| "learning_rate": 4.975864625546204e-05, |
| "loss": 0.0031, |
| "num_input_tokens_seen": 969160, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.807486631016043, |
| "grad_norm": 3.4172000885009766, |
| "learning_rate": 4.975322559485814e-05, |
| "loss": 0.0214, |
| "num_input_tokens_seen": 972232, |
| "step": 1575 |
| }, |
| { |
| "epoch": 2.8163992869875223, |
| "grad_norm": 0.13606803119182587, |
| "learning_rate": 4.974774503785241e-05, |
| "loss": 0.0031, |
| "num_input_tokens_seen": 975592, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.8253119429590017, |
| "grad_norm": 0.018815385177731514, |
| "learning_rate": 4.974220459770639e-05, |
| "loss": 0.0165, |
| "num_input_tokens_seen": 979304, |
| "step": 1585 |
| }, |
| { |
| "epoch": 2.834224598930481, |
| "grad_norm": 0.00781914871186018, |
| "learning_rate": 4.9736604287826497e-05, |
| "loss": 0.0122, |
| "num_input_tokens_seen": 983176, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.843137254901961, |
| "grad_norm": 0.008310927078127861, |
| "learning_rate": 4.9730944121764045e-05, |
| "loss": 0.0007, |
| "num_input_tokens_seen": 986280, |
| "step": 1595 |
| }, |
| { |
| "epoch": 2.8520499108734403, |
| "grad_norm": 0.09141358733177185, |
| "learning_rate": 4.9725224113215164e-05, |
| "loss": 0.0173, |
| "num_input_tokens_seen": 989064, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.8609625668449197, |
| "grad_norm": 0.36811965703964233, |
| "learning_rate": 4.971944427602081e-05, |
| "loss": 0.0182, |
| "num_input_tokens_seen": 992456, |
| "step": 1605 |
| }, |
| { |
| "epoch": 2.8698752228163995, |
| "grad_norm": 0.006202696356922388, |
| "learning_rate": 4.971360462416667e-05, |
| "loss": 0.0006, |
| "num_input_tokens_seen": 995336, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.878787878787879, |
| "grad_norm": 0.01375326793640852, |
| "learning_rate": 4.97077051717832e-05, |
| "loss": 0.0812, |
| "num_input_tokens_seen": 998248, |
| "step": 1615 |
| }, |
| { |
| "epoch": 2.8877005347593583, |
| "grad_norm": 0.005168983247131109, |
| "learning_rate": 4.970174593314556e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1001512, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.8966131907308377, |
| "grad_norm": 0.16203118860721588, |
| "learning_rate": 4.969572692267355e-05, |
| "loss": 0.0738, |
| "num_input_tokens_seen": 1004616, |
| "step": 1625 |
| }, |
| { |
| "epoch": 2.905525846702317, |
| "grad_norm": 0.006419321522116661, |
| "learning_rate": 4.968964815493162e-05, |
| "loss": 0.0614, |
| "num_input_tokens_seen": 1007336, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.914438502673797, |
| "grad_norm": 8.11766529083252, |
| "learning_rate": 4.968350964462883e-05, |
| "loss": 0.0619, |
| "num_input_tokens_seen": 1010120, |
| "step": 1635 |
| }, |
| { |
| "epoch": 2.9233511586452763, |
| "grad_norm": 4.019729137420654, |
| "learning_rate": 4.967731140661878e-05, |
| "loss": 0.0659, |
| "num_input_tokens_seen": 1013384, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.9322638146167557, |
| "grad_norm": 11.93336296081543, |
| "learning_rate": 4.9671053455899584e-05, |
| "loss": 0.0643, |
| "num_input_tokens_seen": 1016168, |
| "step": 1645 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 0.25298967957496643, |
| "learning_rate": 4.966473580761389e-05, |
| "loss": 0.0014, |
| "num_input_tokens_seen": 1019336, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.950089126559715, |
| "grad_norm": 8.617263793945312, |
| "learning_rate": 4.965835847704876e-05, |
| "loss": 0.0267, |
| "num_input_tokens_seen": 1021704, |
| "step": 1655 |
| }, |
| { |
| "epoch": 2.9590017825311943, |
| "grad_norm": 11.657393455505371, |
| "learning_rate": 4.965192147963568e-05, |
| "loss": 0.0074, |
| "num_input_tokens_seen": 1025352, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.9679144385026737, |
| "grad_norm": 2.9546124935150146, |
| "learning_rate": 4.9645424830950526e-05, |
| "loss": 0.0319, |
| "num_input_tokens_seen": 1028712, |
| "step": 1665 |
| }, |
| { |
| "epoch": 2.976827094474153, |
| "grad_norm": 0.027331123128533363, |
| "learning_rate": 4.963886854671351e-05, |
| "loss": 0.0657, |
| "num_input_tokens_seen": 1031592, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.985739750445633, |
| "grad_norm": 11.084074020385742, |
| "learning_rate": 4.963225264278914e-05, |
| "loss": 0.0646, |
| "num_input_tokens_seen": 1034728, |
| "step": 1675 |
| }, |
| { |
| "epoch": 2.9946524064171123, |
| "grad_norm": 9.177580833435059, |
| "learning_rate": 4.962557713518617e-05, |
| "loss": 0.0532, |
| "num_input_tokens_seen": 1038920, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.11031711101531982, |
| "eval_runtime": 4.581, |
| "eval_samples_per_second": 54.356, |
| "eval_steps_per_second": 13.753, |
| "num_input_tokens_seen": 1039864, |
| "step": 1683 |
| }, |
| { |
| "epoch": 3.0035650623885917, |
| "grad_norm": 0.3040387034416199, |
| "learning_rate": 4.961884204005764e-05, |
| "loss": 0.3259, |
| "num_input_tokens_seen": 1041016, |
| "step": 1685 |
| }, |
| { |
| "epoch": 3.0124777183600715, |
| "grad_norm": 0.030186321586370468, |
| "learning_rate": 4.961204737370071e-05, |
| "loss": 0.0567, |
| "num_input_tokens_seen": 1043704, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.021390374331551, |
| "grad_norm": 14.996708869934082, |
| "learning_rate": 4.960519315255673e-05, |
| "loss": 0.0911, |
| "num_input_tokens_seen": 1046296, |
| "step": 1695 |
| }, |
| { |
| "epoch": 3.0303030303030303, |
| "grad_norm": 0.9796962141990662, |
| "learning_rate": 4.959827939321113e-05, |
| "loss": 0.047, |
| "num_input_tokens_seen": 1049528, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.0392156862745097, |
| "grad_norm": 0.07655226439237595, |
| "learning_rate": 4.959130611239343e-05, |
| "loss": 0.0008, |
| "num_input_tokens_seen": 1052408, |
| "step": 1705 |
| }, |
| { |
| "epoch": 3.0481283422459895, |
| "grad_norm": 0.0062500229105353355, |
| "learning_rate": 4.958427332697716e-05, |
| "loss": 0.0054, |
| "num_input_tokens_seen": 1056088, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.057040998217469, |
| "grad_norm": 15.905081748962402, |
| "learning_rate": 4.9577181053979836e-05, |
| "loss": 0.0085, |
| "num_input_tokens_seen": 1059544, |
| "step": 1715 |
| }, |
| { |
| "epoch": 3.0659536541889483, |
| "grad_norm": 1.3312292098999023, |
| "learning_rate": 4.957002931056293e-05, |
| "loss": 0.0019, |
| "num_input_tokens_seen": 1062776, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.0748663101604277, |
| "grad_norm": 0.00571996346116066, |
| "learning_rate": 4.956281811403181e-05, |
| "loss": 0.0007, |
| "num_input_tokens_seen": 1065432, |
| "step": 1725 |
| }, |
| { |
| "epoch": 3.0837789661319075, |
| "grad_norm": 0.04415470361709595, |
| "learning_rate": 4.955554748183571e-05, |
| "loss": 0.0007, |
| "num_input_tokens_seen": 1068312, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.092691622103387, |
| "grad_norm": 0.014133289456367493, |
| "learning_rate": 4.9548217431567665e-05, |
| "loss": 0.0037, |
| "num_input_tokens_seen": 1070616, |
| "step": 1735 |
| }, |
| { |
| "epoch": 3.1016042780748663, |
| "grad_norm": 0.006229729391634464, |
| "learning_rate": 4.954082798096452e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1074200, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.1105169340463457, |
| "grad_norm": 0.0035163508728146553, |
| "learning_rate": 4.9533379147906825e-05, |
| "loss": 0.0137, |
| "num_input_tokens_seen": 1077656, |
| "step": 1745 |
| }, |
| { |
| "epoch": 3.1194295900178255, |
| "grad_norm": 0.008305568248033524, |
| "learning_rate": 4.952587095041882e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1080440, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.128342245989305, |
| "grad_norm": 0.0018318976508453488, |
| "learning_rate": 4.9518303406668404e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1083288, |
| "step": 1755 |
| }, |
| { |
| "epoch": 3.1372549019607843, |
| "grad_norm": 0.0020866121631115675, |
| "learning_rate": 4.9510676534967085e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1086616, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.1461675579322637, |
| "grad_norm": 3.221034526824951, |
| "learning_rate": 4.950299035376991e-05, |
| "loss": 0.0738, |
| "num_input_tokens_seen": 1090360, |
| "step": 1765 |
| }, |
| { |
| "epoch": 3.1550802139037435, |
| "grad_norm": 0.016158247366547585, |
| "learning_rate": 4.949524488167545e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1093176, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.163992869875223, |
| "grad_norm": 0.021480686962604523, |
| "learning_rate": 4.9487440137425755e-05, |
| "loss": 0.0329, |
| "num_input_tokens_seen": 1096088, |
| "step": 1775 |
| }, |
| { |
| "epoch": 3.1729055258467023, |
| "grad_norm": 0.024992266669869423, |
| "learning_rate": 4.947957613990627e-05, |
| "loss": 0.0059, |
| "num_input_tokens_seen": 1098872, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.1818181818181817, |
| "grad_norm": 0.025295181199908257, |
| "learning_rate": 4.947165290814584e-05, |
| "loss": 0.0061, |
| "num_input_tokens_seen": 1102360, |
| "step": 1785 |
| }, |
| { |
| "epoch": 3.1907308377896615, |
| "grad_norm": 0.044885795563459396, |
| "learning_rate": 4.9463670461316644e-05, |
| "loss": 0.0273, |
| "num_input_tokens_seen": 1105592, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.199643493761141, |
| "grad_norm": 0.012321226298809052, |
| "learning_rate": 4.945562881873412e-05, |
| "loss": 0.0009, |
| "num_input_tokens_seen": 1109432, |
| "step": 1795 |
| }, |
| { |
| "epoch": 3.2085561497326203, |
| "grad_norm": 0.004980860278010368, |
| "learning_rate": 4.944752799985699e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1112664, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.2174688057040997, |
| "grad_norm": 0.00631902227178216, |
| "learning_rate": 4.943936802428712e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1115928, |
| "step": 1805 |
| }, |
| { |
| "epoch": 3.2263814616755795, |
| "grad_norm": 0.017392151057720184, |
| "learning_rate": 4.9431148911769534e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1119576, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.235294117647059, |
| "grad_norm": 0.03878406435251236, |
| "learning_rate": 4.942287068219238e-05, |
| "loss": 0.0018, |
| "num_input_tokens_seen": 1123256, |
| "step": 1815 |
| }, |
| { |
| "epoch": 3.2442067736185383, |
| "grad_norm": 15.287306785583496, |
| "learning_rate": 4.941453335558681e-05, |
| "loss": 0.0258, |
| "num_input_tokens_seen": 1126872, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.2531194295900177, |
| "grad_norm": 24.802167892456055, |
| "learning_rate": 4.9406136952127015e-05, |
| "loss": 0.0088, |
| "num_input_tokens_seen": 1129496, |
| "step": 1825 |
| }, |
| { |
| "epoch": 3.2620320855614975, |
| "grad_norm": 0.0961998999118805, |
| "learning_rate": 4.9397681492130104e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1132280, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.270944741532977, |
| "grad_norm": 0.00621894421055913, |
| "learning_rate": 4.9389166996056114e-05, |
| "loss": 0.0024, |
| "num_input_tokens_seen": 1135640, |
| "step": 1835 |
| }, |
| { |
| "epoch": 3.2798573975044563, |
| "grad_norm": 0.005441363900899887, |
| "learning_rate": 4.938059348450792e-05, |
| "loss": 0.0678, |
| "num_input_tokens_seen": 1138264, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.2887700534759357, |
| "grad_norm": 0.07161960005760193, |
| "learning_rate": 4.937196097823119e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1141432, |
| "step": 1845 |
| }, |
| { |
| "epoch": 3.2976827094474155, |
| "grad_norm": 0.010849026031792164, |
| "learning_rate": 4.936326949811437e-05, |
| "loss": 0.0015, |
| "num_input_tokens_seen": 1143736, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.306595365418895, |
| "grad_norm": 0.01427000667899847, |
| "learning_rate": 4.93545190651886e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1146392, |
| "step": 1855 |
| }, |
| { |
| "epoch": 3.3155080213903743, |
| "grad_norm": 19.148462295532227, |
| "learning_rate": 4.934570970062765e-05, |
| "loss": 0.0625, |
| "num_input_tokens_seen": 1149656, |
| "step": 1860 |
| }, |
| { |
| "epoch": 3.3244206773618536, |
| "grad_norm": 22.86386489868164, |
| "learning_rate": 4.93368414257479e-05, |
| "loss": 0.0068, |
| "num_input_tokens_seen": 1152696, |
| "step": 1865 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 1.5289530754089355, |
| "learning_rate": 4.932791426200829e-05, |
| "loss": 0.0015, |
| "num_input_tokens_seen": 1155480, |
| "step": 1870 |
| }, |
| { |
| "epoch": 3.342245989304813, |
| "grad_norm": 20.363109588623047, |
| "learning_rate": 4.931892823101024e-05, |
| "loss": 0.0252, |
| "num_input_tokens_seen": 1159288, |
| "step": 1875 |
| }, |
| { |
| "epoch": 3.3511586452762923, |
| "grad_norm": 6.934281826019287, |
| "learning_rate": 4.930988335449762e-05, |
| "loss": 0.0456, |
| "num_input_tokens_seen": 1161848, |
| "step": 1880 |
| }, |
| { |
| "epoch": 3.3600713012477716, |
| "grad_norm": 0.005279934033751488, |
| "learning_rate": 4.9300779654356706e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1165048, |
| "step": 1885 |
| }, |
| { |
| "epoch": 3.3689839572192515, |
| "grad_norm": 0.005097201559692621, |
| "learning_rate": 4.929161715261608e-05, |
| "loss": 0.014, |
| "num_input_tokens_seen": 1167928, |
| "step": 1890 |
| }, |
| { |
| "epoch": 3.377896613190731, |
| "grad_norm": 0.0021505567710846663, |
| "learning_rate": 4.9282395871446626e-05, |
| "loss": 0.0048, |
| "num_input_tokens_seen": 1171928, |
| "step": 1895 |
| }, |
| { |
| "epoch": 3.3868092691622103, |
| "grad_norm": 0.08554159104824066, |
| "learning_rate": 4.927311583316148e-05, |
| "loss": 0.0915, |
| "num_input_tokens_seen": 1174200, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.3957219251336896, |
| "grad_norm": 0.024701252579689026, |
| "learning_rate": 4.92637770602159e-05, |
| "loss": 0.0749, |
| "num_input_tokens_seen": 1177368, |
| "step": 1905 |
| }, |
| { |
| "epoch": 3.4046345811051695, |
| "grad_norm": 0.5353766083717346, |
| "learning_rate": 4.925437957520733e-05, |
| "loss": 0.0578, |
| "num_input_tokens_seen": 1180600, |
| "step": 1910 |
| }, |
| { |
| "epoch": 3.413547237076649, |
| "grad_norm": 0.5619534850120544, |
| "learning_rate": 4.9244923400875245e-05, |
| "loss": 0.0067, |
| "num_input_tokens_seen": 1183416, |
| "step": 1915 |
| }, |
| { |
| "epoch": 3.4224598930481283, |
| "grad_norm": 0.03234964981675148, |
| "learning_rate": 4.923540856010113e-05, |
| "loss": 0.0009, |
| "num_input_tokens_seen": 1186904, |
| "step": 1920 |
| }, |
| { |
| "epoch": 3.431372549019608, |
| "grad_norm": 0.010062027722597122, |
| "learning_rate": 4.922583507590843e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1190360, |
| "step": 1925 |
| }, |
| { |
| "epoch": 3.4402852049910875, |
| "grad_norm": 0.0606231763958931, |
| "learning_rate": 4.921620297146253e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1193912, |
| "step": 1930 |
| }, |
| { |
| "epoch": 3.449197860962567, |
| "grad_norm": 0.0035294261761009693, |
| "learning_rate": 4.920651227007062e-05, |
| "loss": 0.0014, |
| "num_input_tokens_seen": 1197432, |
| "step": 1935 |
| }, |
| { |
| "epoch": 3.4581105169340463, |
| "grad_norm": 0.00476317061111331, |
| "learning_rate": 4.919676299518167e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1200728, |
| "step": 1940 |
| }, |
| { |
| "epoch": 3.4670231729055256, |
| "grad_norm": 0.000967069179750979, |
| "learning_rate": 4.918695517038643e-05, |
| "loss": 0.0018, |
| "num_input_tokens_seen": 1204408, |
| "step": 1945 |
| }, |
| { |
| "epoch": 3.4759358288770055, |
| "grad_norm": 0.009276410564780235, |
| "learning_rate": 4.917708881941728e-05, |
| "loss": 0.0093, |
| "num_input_tokens_seen": 1207512, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.484848484848485, |
| "grad_norm": 0.0032905612606555223, |
| "learning_rate": 4.916716396614824e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1210520, |
| "step": 1955 |
| }, |
| { |
| "epoch": 3.4937611408199643, |
| "grad_norm": 0.00179088837467134, |
| "learning_rate": 4.91571806345949e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1213784, |
| "step": 1960 |
| }, |
| { |
| "epoch": 3.502673796791444, |
| "grad_norm": 3.6233127117156982, |
| "learning_rate": 4.914713884891433e-05, |
| "loss": 0.0028, |
| "num_input_tokens_seen": 1216504, |
| "step": 1965 |
| }, |
| { |
| "epoch": 3.5115864527629235, |
| "grad_norm": 0.0005736930761486292, |
| "learning_rate": 4.913703863340504e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1219160, |
| "step": 1970 |
| }, |
| { |
| "epoch": 3.520499108734403, |
| "grad_norm": 0.018492160364985466, |
| "learning_rate": 4.912688001250697e-05, |
| "loss": 0.0042, |
| "num_input_tokens_seen": 1222296, |
| "step": 1975 |
| }, |
| { |
| "epoch": 3.5294117647058822, |
| "grad_norm": 12.424592018127441, |
| "learning_rate": 4.9116663010801326e-05, |
| "loss": 0.1497, |
| "num_input_tokens_seen": 1225400, |
| "step": 1980 |
| }, |
| { |
| "epoch": 3.5383244206773616, |
| "grad_norm": 0.0016981695080175996, |
| "learning_rate": 4.910638765301062e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1229112, |
| "step": 1985 |
| }, |
| { |
| "epoch": 3.5472370766488415, |
| "grad_norm": 0.30625298619270325, |
| "learning_rate": 4.909605396399856e-05, |
| "loss": 0.0615, |
| "num_input_tokens_seen": 1232088, |
| "step": 1990 |
| }, |
| { |
| "epoch": 3.556149732620321, |
| "grad_norm": 0.03002898208796978, |
| "learning_rate": 4.908566196876999e-05, |
| "loss": 0.006, |
| "num_input_tokens_seen": 1234680, |
| "step": 1995 |
| }, |
| { |
| "epoch": 3.5650623885918002, |
| "grad_norm": 0.7189759016036987, |
| "learning_rate": 4.9075211692470865e-05, |
| "loss": 0.0006, |
| "num_input_tokens_seen": 1238648, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.57397504456328, |
| "grad_norm": 0.020788084715604782, |
| "learning_rate": 4.906470316038814e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1242104, |
| "step": 2005 |
| }, |
| { |
| "epoch": 3.5828877005347595, |
| "grad_norm": 0.000429228093707934, |
| "learning_rate": 4.9054136397949753e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1244472, |
| "step": 2010 |
| }, |
| { |
| "epoch": 3.591800356506239, |
| "grad_norm": 0.005330861080437899, |
| "learning_rate": 4.904351143072452e-05, |
| "loss": 0.0291, |
| "num_input_tokens_seen": 1247864, |
| "step": 2015 |
| }, |
| { |
| "epoch": 3.6007130124777182, |
| "grad_norm": 0.00216846214607358, |
| "learning_rate": 4.903282828442213e-05, |
| "loss": 0.0035, |
| "num_input_tokens_seen": 1251192, |
| "step": 2020 |
| }, |
| { |
| "epoch": 3.6096256684491976, |
| "grad_norm": 0.004735489841550589, |
| "learning_rate": 4.902208698489302e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1253688, |
| "step": 2025 |
| }, |
| { |
| "epoch": 3.6185383244206775, |
| "grad_norm": 0.0038757645525038242, |
| "learning_rate": 4.9011287558128366e-05, |
| "loss": 0.1682, |
| "num_input_tokens_seen": 1256728, |
| "step": 2030 |
| }, |
| { |
| "epoch": 3.627450980392157, |
| "grad_norm": 0.8156978487968445, |
| "learning_rate": 4.900043003025998e-05, |
| "loss": 0.0019, |
| "num_input_tokens_seen": 1259960, |
| "step": 2035 |
| }, |
| { |
| "epoch": 3.6363636363636362, |
| "grad_norm": 6.951355457305908, |
| "learning_rate": 4.898951442756027e-05, |
| "loss": 0.0179, |
| "num_input_tokens_seen": 1262136, |
| "step": 2040 |
| }, |
| { |
| "epoch": 3.645276292335116, |
| "grad_norm": 1.6026536226272583, |
| "learning_rate": 4.897854077644217e-05, |
| "loss": 0.0058, |
| "num_input_tokens_seen": 1264696, |
| "step": 2045 |
| }, |
| { |
| "epoch": 3.6541889483065955, |
| "grad_norm": 0.007580269128084183, |
| "learning_rate": 4.8967509103459084e-05, |
| "loss": 0.0022, |
| "num_input_tokens_seen": 1267768, |
| "step": 2050 |
| }, |
| { |
| "epoch": 3.663101604278075, |
| "grad_norm": 0.027493992820382118, |
| "learning_rate": 4.8956419435304804e-05, |
| "loss": 0.0008, |
| "num_input_tokens_seen": 1270776, |
| "step": 2055 |
| }, |
| { |
| "epoch": 3.6720142602495542, |
| "grad_norm": 0.008505471050739288, |
| "learning_rate": 4.894527179881345e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1273848, |
| "step": 2060 |
| }, |
| { |
| "epoch": 3.6809269162210336, |
| "grad_norm": 0.011075139045715332, |
| "learning_rate": 4.893406622095943e-05, |
| "loss": 0.0412, |
| "num_input_tokens_seen": 1275896, |
| "step": 2065 |
| }, |
| { |
| "epoch": 3.6898395721925135, |
| "grad_norm": 0.018276818096637726, |
| "learning_rate": 4.8922802728857334e-05, |
| "loss": 0.1426, |
| "num_input_tokens_seen": 1278552, |
| "step": 2070 |
| }, |
| { |
| "epoch": 3.698752228163993, |
| "grad_norm": 0.0035789543762803078, |
| "learning_rate": 4.89114813497619e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1281592, |
| "step": 2075 |
| }, |
| { |
| "epoch": 3.7076648841354722, |
| "grad_norm": 0.0011354973539710045, |
| "learning_rate": 4.890010211106795e-05, |
| "loss": 0.0006, |
| "num_input_tokens_seen": 1284504, |
| "step": 2080 |
| }, |
| { |
| "epoch": 3.716577540106952, |
| "grad_norm": 0.1557454615831375, |
| "learning_rate": 4.8888665040310273e-05, |
| "loss": 0.0069, |
| "num_input_tokens_seen": 1287256, |
| "step": 2085 |
| }, |
| { |
| "epoch": 3.7254901960784315, |
| "grad_norm": 0.017272913828492165, |
| "learning_rate": 4.887717016516363e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1290808, |
| "step": 2090 |
| }, |
| { |
| "epoch": 3.734402852049911, |
| "grad_norm": 0.007790794596076012, |
| "learning_rate": 4.886561751344266e-05, |
| "loss": 0.062, |
| "num_input_tokens_seen": 1294040, |
| "step": 2095 |
| }, |
| { |
| "epoch": 3.7433155080213902, |
| "grad_norm": 0.0025972179137170315, |
| "learning_rate": 4.885400711310178e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1296568, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.7522281639928696, |
| "grad_norm": 0.03971244767308235, |
| "learning_rate": 4.8842338992235146e-05, |
| "loss": 0.0119, |
| "num_input_tokens_seen": 1299704, |
| "step": 2105 |
| }, |
| { |
| "epoch": 3.7611408199643495, |
| "grad_norm": 0.014764413237571716, |
| "learning_rate": 4.883061317907661e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1301816, |
| "step": 2110 |
| }, |
| { |
| "epoch": 3.770053475935829, |
| "grad_norm": 0.02033417299389839, |
| "learning_rate": 4.8818829701999596e-05, |
| "loss": 0.0446, |
| "num_input_tokens_seen": 1304728, |
| "step": 2115 |
| }, |
| { |
| "epoch": 3.7789661319073082, |
| "grad_norm": 0.0014599093701690435, |
| "learning_rate": 4.880698858951707e-05, |
| "loss": 0.0268, |
| "num_input_tokens_seen": 1307736, |
| "step": 2120 |
| }, |
| { |
| "epoch": 3.787878787878788, |
| "grad_norm": 0.03279818594455719, |
| "learning_rate": 4.879508987028146e-05, |
| "loss": 0.0444, |
| "num_input_tokens_seen": 1311320, |
| "step": 2125 |
| }, |
| { |
| "epoch": 3.7967914438502675, |
| "grad_norm": 23.06256103515625, |
| "learning_rate": 4.87831335730846e-05, |
| "loss": 0.0369, |
| "num_input_tokens_seen": 1314136, |
| "step": 2130 |
| }, |
| { |
| "epoch": 3.805704099821747, |
| "grad_norm": 0.00789332389831543, |
| "learning_rate": 4.877111972685762e-05, |
| "loss": 0.026, |
| "num_input_tokens_seen": 1317176, |
| "step": 2135 |
| }, |
| { |
| "epoch": 3.8146167557932262, |
| "grad_norm": 0.002600538544356823, |
| "learning_rate": 4.875904836067092e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1320632, |
| "step": 2140 |
| }, |
| { |
| "epoch": 3.8235294117647056, |
| "grad_norm": 0.18604746460914612, |
| "learning_rate": 4.874691950373409e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1323512, |
| "step": 2145 |
| }, |
| { |
| "epoch": 3.8324420677361855, |
| "grad_norm": 0.0031476416625082493, |
| "learning_rate": 4.873473318539583e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1327160, |
| "step": 2150 |
| }, |
| { |
| "epoch": 3.841354723707665, |
| "grad_norm": 0.0015608452958986163, |
| "learning_rate": 4.872248943514387e-05, |
| "loss": 0.0005, |
| "num_input_tokens_seen": 1329912, |
| "step": 2155 |
| }, |
| { |
| "epoch": 3.8502673796791442, |
| "grad_norm": 0.008163553662598133, |
| "learning_rate": 4.871018828260492e-05, |
| "loss": 0.007, |
| "num_input_tokens_seen": 1332760, |
| "step": 2160 |
| }, |
| { |
| "epoch": 3.859180035650624, |
| "grad_norm": 0.005292457994073629, |
| "learning_rate": 4.869782975754458e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1335608, |
| "step": 2165 |
| }, |
| { |
| "epoch": 3.8680926916221035, |
| "grad_norm": 0.002038877457380295, |
| "learning_rate": 4.86854138898673e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1339064, |
| "step": 2170 |
| }, |
| { |
| "epoch": 3.877005347593583, |
| "grad_norm": 0.0013985374243929982, |
| "learning_rate": 4.867294070961625e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1342264, |
| "step": 2175 |
| }, |
| { |
| "epoch": 3.8859180035650622, |
| "grad_norm": 0.19727768003940582, |
| "learning_rate": 4.8660410246973306e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1345496, |
| "step": 2180 |
| }, |
| { |
| "epoch": 3.8948306595365416, |
| "grad_norm": 0.15647095441818237, |
| "learning_rate": 4.8647822532258955e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1347896, |
| "step": 2185 |
| }, |
| { |
| "epoch": 3.9037433155080214, |
| "grad_norm": 30.87330436706543, |
| "learning_rate": 4.86351775959322e-05, |
| "loss": 0.0068, |
| "num_input_tokens_seen": 1350808, |
| "step": 2190 |
| }, |
| { |
| "epoch": 3.912655971479501, |
| "grad_norm": 0.0015438764821738005, |
| "learning_rate": 4.8622475468590514e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1353080, |
| "step": 2195 |
| }, |
| { |
| "epoch": 3.9215686274509802, |
| "grad_norm": 0.001988581381738186, |
| "learning_rate": 4.8609716180969755e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1356728, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.93048128342246, |
| "grad_norm": 0.002157883020117879, |
| "learning_rate": 4.859689976394412e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1360152, |
| "step": 2205 |
| }, |
| { |
| "epoch": 3.9393939393939394, |
| "grad_norm": 0.0009718194487504661, |
| "learning_rate": 4.858402624852599e-05, |
| "loss": 0.001, |
| "num_input_tokens_seen": 1364152, |
| "step": 2210 |
| }, |
| { |
| "epoch": 3.948306595365419, |
| "grad_norm": 0.022542769089341164, |
| "learning_rate": 4.8571095665865976e-05, |
| "loss": 0.0866, |
| "num_input_tokens_seen": 1367512, |
| "step": 2215 |
| }, |
| { |
| "epoch": 3.9572192513368982, |
| "grad_norm": 0.001373310573399067, |
| "learning_rate": 4.855810804725271e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1370520, |
| "step": 2220 |
| }, |
| { |
| "epoch": 3.966131907308378, |
| "grad_norm": 0.014193962328135967, |
| "learning_rate": 4.854506342411289e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1373016, |
| "step": 2225 |
| }, |
| { |
| "epoch": 3.9750445632798574, |
| "grad_norm": 0.04215296730399132, |
| "learning_rate": 4.8531961828011124e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1375960, |
| "step": 2230 |
| }, |
| { |
| "epoch": 3.983957219251337, |
| "grad_norm": 0.0009914386318996549, |
| "learning_rate": 4.8518803290649885e-05, |
| "loss": 0.0439, |
| "num_input_tokens_seen": 1379160, |
| "step": 2235 |
| }, |
| { |
| "epoch": 3.9928698752228167, |
| "grad_norm": 0.0015359356766566634, |
| "learning_rate": 4.8505587843869425e-05, |
| "loss": 0.1269, |
| "num_input_tokens_seen": 1382744, |
| "step": 2240 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.13699407875537872, |
| "eval_runtime": 4.5818, |
| "eval_samples_per_second": 54.345, |
| "eval_steps_per_second": 13.75, |
| "num_input_tokens_seen": 1384096, |
| "step": 2244 |
| }, |
| { |
| "epoch": 4.001782531194296, |
| "grad_norm": 0.003554535796865821, |
| "learning_rate": 4.849231551964771e-05, |
| "loss": 0.001, |
| "num_input_tokens_seen": 1384736, |
| "step": 2245 |
| }, |
| { |
| "epoch": 4.010695187165775, |
| "grad_norm": 0.006082756910473108, |
| "learning_rate": 4.847898635010033e-05, |
| "loss": 0.0011, |
| "num_input_tokens_seen": 1387552, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.019607843137255, |
| "grad_norm": 0.0006636533071286976, |
| "learning_rate": 4.846560036748043e-05, |
| "loss": 0.0026, |
| "num_input_tokens_seen": 1390944, |
| "step": 2255 |
| }, |
| { |
| "epoch": 4.028520499108734, |
| "grad_norm": 0.014104689471423626, |
| "learning_rate": 4.8452157604178626e-05, |
| "loss": 0.0008, |
| "num_input_tokens_seen": 1393728, |
| "step": 2260 |
| }, |
| { |
| "epoch": 4.037433155080214, |
| "grad_norm": 0.005312174558639526, |
| "learning_rate": 4.8438658092722914e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1396288, |
| "step": 2265 |
| }, |
| { |
| "epoch": 4.046345811051693, |
| "grad_norm": 0.0029452915769070387, |
| "learning_rate": 4.8425101865778634e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1399424, |
| "step": 2270 |
| }, |
| { |
| "epoch": 4.055258467023173, |
| "grad_norm": 0.0014865044504404068, |
| "learning_rate": 4.8411488956148344e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1402720, |
| "step": 2275 |
| }, |
| { |
| "epoch": 4.064171122994653, |
| "grad_norm": 0.0015569854294881225, |
| "learning_rate": 4.839781939677176e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1406208, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.073083778966132, |
| "grad_norm": 0.002135837683454156, |
| "learning_rate": 4.838409322072568e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1409952, |
| "step": 2285 |
| }, |
| { |
| "epoch": 4.081996434937611, |
| "grad_norm": 0.0058957538567483425, |
| "learning_rate": 4.8370310461223894e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1413056, |
| "step": 2290 |
| }, |
| { |
| "epoch": 4.090909090909091, |
| "grad_norm": 0.001857185736298561, |
| "learning_rate": 4.835647115161712e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1415712, |
| "step": 2295 |
| }, |
| { |
| "epoch": 4.09982174688057, |
| "grad_norm": 0.0014896428911015391, |
| "learning_rate": 4.8342575325392916e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1418368, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.10873440285205, |
| "grad_norm": 0.002626370871439576, |
| "learning_rate": 4.832862301617557e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1421248, |
| "step": 2305 |
| }, |
| { |
| "epoch": 4.117647058823529, |
| "grad_norm": 0.0008361501968465745, |
| "learning_rate": 4.8314614257726076e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1424320, |
| "step": 2310 |
| }, |
| { |
| "epoch": 4.126559714795009, |
| "grad_norm": 0.0006268385332077742, |
| "learning_rate": 4.8300549083941985e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1427040, |
| "step": 2315 |
| }, |
| { |
| "epoch": 4.135472370766489, |
| "grad_norm": 0.0009334477363154292, |
| "learning_rate": 4.82864275288574e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1430048, |
| "step": 2320 |
| }, |
| { |
| "epoch": 4.144385026737968, |
| "grad_norm": 0.0012060723965987563, |
| "learning_rate": 4.827224962664282e-05, |
| "loss": 0.0169, |
| "num_input_tokens_seen": 1433376, |
| "step": 2325 |
| }, |
| { |
| "epoch": 4.153297682709447, |
| "grad_norm": 0.0006173542933538556, |
| "learning_rate": 4.8258015411605095e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1436416, |
| "step": 2330 |
| }, |
| { |
| "epoch": 4.162210338680927, |
| "grad_norm": 0.0023394590243697166, |
| "learning_rate": 4.824372491818735e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1439392, |
| "step": 2335 |
| }, |
| { |
| "epoch": 4.171122994652406, |
| "grad_norm": 0.02793467603623867, |
| "learning_rate": 4.822937818096888e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1442368, |
| "step": 2340 |
| }, |
| { |
| "epoch": 4.180035650623886, |
| "grad_norm": 0.006997761782258749, |
| "learning_rate": 4.821497523466508e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1445088, |
| "step": 2345 |
| }, |
| { |
| "epoch": 4.188948306595365, |
| "grad_norm": 0.0006095584249123931, |
| "learning_rate": 4.820051611412736e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1447904, |
| "step": 2350 |
| }, |
| { |
| "epoch": 4.197860962566845, |
| "grad_norm": 0.0007617850787937641, |
| "learning_rate": 4.8186000854343034e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1451616, |
| "step": 2355 |
| }, |
| { |
| "epoch": 4.206773618538325, |
| "grad_norm": 0.9482964277267456, |
| "learning_rate": 4.8171429490435285e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1454976, |
| "step": 2360 |
| }, |
| { |
| "epoch": 4.215686274509804, |
| "grad_norm": 0.0003435949329286814, |
| "learning_rate": 4.815680205766304e-05, |
| "loss": 0.1751, |
| "num_input_tokens_seen": 1458624, |
| "step": 2365 |
| }, |
| { |
| "epoch": 4.224598930481283, |
| "grad_norm": 0.013822204433381557, |
| "learning_rate": 4.814211859142092e-05, |
| "loss": 0.0028, |
| "num_input_tokens_seen": 1462048, |
| "step": 2370 |
| }, |
| { |
| "epoch": 4.233511586452763, |
| "grad_norm": 0.024504275992512703, |
| "learning_rate": 4.812737912723908e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1465248, |
| "step": 2375 |
| }, |
| { |
| "epoch": 4.242424242424242, |
| "grad_norm": 0.00452042929828167, |
| "learning_rate": 4.811258370078324e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1467584, |
| "step": 2380 |
| }, |
| { |
| "epoch": 4.251336898395722, |
| "grad_norm": 2.1872692108154297, |
| "learning_rate": 4.809773234785449e-05, |
| "loss": 0.089, |
| "num_input_tokens_seen": 1470592, |
| "step": 2385 |
| }, |
| { |
| "epoch": 4.260249554367201, |
| "grad_norm": 2.717146158218384, |
| "learning_rate": 4.8082825104389264e-05, |
| "loss": 0.0027, |
| "num_input_tokens_seen": 1473696, |
| "step": 2390 |
| }, |
| { |
| "epoch": 4.269162210338681, |
| "grad_norm": 0.06081445887684822, |
| "learning_rate": 4.806786200645924e-05, |
| "loss": 0.0012, |
| "num_input_tokens_seen": 1477504, |
| "step": 2395 |
| }, |
| { |
| "epoch": 4.278074866310161, |
| "grad_norm": 0.05218374356627464, |
| "learning_rate": 4.8052843090271235e-05, |
| "loss": 0.0012, |
| "num_input_tokens_seen": 1481600, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.28698752228164, |
| "grad_norm": 0.012032375670969486, |
| "learning_rate": 4.803776839216715e-05, |
| "loss": 0.0014, |
| "num_input_tokens_seen": 1484768, |
| "step": 2405 |
| }, |
| { |
| "epoch": 4.295900178253119, |
| "grad_norm": 0.3018365204334259, |
| "learning_rate": 4.802263794862385e-05, |
| "loss": 0.0023, |
| "num_input_tokens_seen": 1488160, |
| "step": 2410 |
| }, |
| { |
| "epoch": 4.304812834224599, |
| "grad_norm": 0.006996179930865765, |
| "learning_rate": 4.8007451796253075e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1491072, |
| "step": 2415 |
| }, |
| { |
| "epoch": 4.313725490196078, |
| "grad_norm": 0.006158452481031418, |
| "learning_rate": 4.7992209971801425e-05, |
| "loss": 0.0034, |
| "num_input_tokens_seen": 1494400, |
| "step": 2420 |
| }, |
| { |
| "epoch": 4.322638146167558, |
| "grad_norm": 0.005736066959798336, |
| "learning_rate": 4.797691251215014e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1497504, |
| "step": 2425 |
| }, |
| { |
| "epoch": 4.331550802139038, |
| "grad_norm": 0.007838579826056957, |
| "learning_rate": 4.7961559454315126e-05, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 1500928, |
| "step": 2430 |
| }, |
| { |
| "epoch": 4.340463458110517, |
| "grad_norm": 0.003752675373107195, |
| "learning_rate": 4.7946150835446805e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1503936, |
| "step": 2435 |
| }, |
| { |
| "epoch": 4.349376114081997, |
| "grad_norm": 0.04063476249575615, |
| "learning_rate": 4.7930686692830064e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1506624, |
| "step": 2440 |
| }, |
| { |
| "epoch": 4.358288770053476, |
| "grad_norm": 0.003907045815140009, |
| "learning_rate": 4.79151670638841e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1509792, |
| "step": 2445 |
| }, |
| { |
| "epoch": 4.367201426024955, |
| "grad_norm": 0.0013991671148687601, |
| "learning_rate": 4.789959198616243e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1513024, |
| "step": 2450 |
| }, |
| { |
| "epoch": 4.376114081996435, |
| "grad_norm": 0.002485500182956457, |
| "learning_rate": 4.7883961497352686e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1515936, |
| "step": 2455 |
| }, |
| { |
| "epoch": 4.385026737967914, |
| "grad_norm": 0.00234747352078557, |
| "learning_rate": 4.786827563527663e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1519424, |
| "step": 2460 |
| }, |
| { |
| "epoch": 4.393939393939394, |
| "grad_norm": 0.003072569379583001, |
| "learning_rate": 4.785253443788997e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1521856, |
| "step": 2465 |
| }, |
| { |
| "epoch": 4.402852049910873, |
| "grad_norm": 0.001119230524636805, |
| "learning_rate": 4.783673794328234e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1525056, |
| "step": 2470 |
| }, |
| { |
| "epoch": 4.411764705882353, |
| "grad_norm": 0.003584908088669181, |
| "learning_rate": 4.7820886189677175e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1528640, |
| "step": 2475 |
| }, |
| { |
| "epoch": 4.420677361853833, |
| "grad_norm": 0.0011027141008526087, |
| "learning_rate": 4.780497921543161e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1531424, |
| "step": 2480 |
| }, |
| { |
| "epoch": 4.429590017825312, |
| "grad_norm": 0.0016615098575130105, |
| "learning_rate": 4.7789017059036413e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1534848, |
| "step": 2485 |
| }, |
| { |
| "epoch": 4.438502673796791, |
| "grad_norm": 0.0021322835236787796, |
| "learning_rate": 4.777299975911587e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1537568, |
| "step": 2490 |
| }, |
| { |
| "epoch": 4.447415329768271, |
| "grad_norm": 0.0021837984677404165, |
| "learning_rate": 4.775692735442769e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1540192, |
| "step": 2495 |
| }, |
| { |
| "epoch": 4.45632798573975, |
| "grad_norm": 0.002096204087138176, |
| "learning_rate": 4.774079988386296e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1543552, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.46524064171123, |
| "grad_norm": 4.282310485839844, |
| "learning_rate": 4.772461738644597e-05, |
| "loss": 0.0018, |
| "num_input_tokens_seen": 1546368, |
| "step": 2505 |
| }, |
| { |
| "epoch": 4.47415329768271, |
| "grad_norm": 0.0036549854557961226, |
| "learning_rate": 4.7708379901334184e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1549440, |
| "step": 2510 |
| }, |
| { |
| "epoch": 4.483065953654189, |
| "grad_norm": 0.0010477956384420395, |
| "learning_rate": 4.76920874678181e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1552864, |
| "step": 2515 |
| }, |
| { |
| "epoch": 4.491978609625669, |
| "grad_norm": 0.005400074180215597, |
| "learning_rate": 4.767574012532122e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1555968, |
| "step": 2520 |
| }, |
| { |
| "epoch": 4.500891265597148, |
| "grad_norm": 0.0010502905352041125, |
| "learning_rate": 4.765933791339985e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1558464, |
| "step": 2525 |
| }, |
| { |
| "epoch": 4.509803921568627, |
| "grad_norm": 0.0023418355267494917, |
| "learning_rate": 4.7642880871743124e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1561472, |
| "step": 2530 |
| }, |
| { |
| "epoch": 4.518716577540107, |
| "grad_norm": 0.0029139232356101274, |
| "learning_rate": 4.762636904017281e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1564960, |
| "step": 2535 |
| }, |
| { |
| "epoch": 4.527629233511586, |
| "grad_norm": 0.001190957729704678, |
| "learning_rate": 4.760980245864329e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1567744, |
| "step": 2540 |
| }, |
| { |
| "epoch": 4.536541889483066, |
| "grad_norm": 0.009031428024172783, |
| "learning_rate": 4.759318116724138e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1571552, |
| "step": 2545 |
| }, |
| { |
| "epoch": 4.545454545454545, |
| "grad_norm": 0.003633075626567006, |
| "learning_rate": 4.757650520618632e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1575072, |
| "step": 2550 |
| }, |
| { |
| "epoch": 4.554367201426025, |
| "grad_norm": 0.005398147739470005, |
| "learning_rate": 4.755977461582961e-05, |
| "loss": 0.0598, |
| "num_input_tokens_seen": 1578272, |
| "step": 2555 |
| }, |
| { |
| "epoch": 4.563279857397505, |
| "grad_norm": 0.06160321831703186, |
| "learning_rate": 4.754298943665496e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1581504, |
| "step": 2560 |
| }, |
| { |
| "epoch": 4.572192513368984, |
| "grad_norm": 0.0008511216146871448, |
| "learning_rate": 4.752614970927817e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1584896, |
| "step": 2565 |
| }, |
| { |
| "epoch": 4.581105169340463, |
| "grad_norm": 0.0062278094701468945, |
| "learning_rate": 4.750925547444699e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1588384, |
| "step": 2570 |
| }, |
| { |
| "epoch": 4.590017825311943, |
| "grad_norm": 34.354591369628906, |
| "learning_rate": 4.749230677304114e-05, |
| "loss": 0.0169, |
| "num_input_tokens_seen": 1590976, |
| "step": 2575 |
| }, |
| { |
| "epoch": 4.598930481283422, |
| "grad_norm": 0.0010839140741154552, |
| "learning_rate": 4.7475303646072054e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1594144, |
| "step": 2580 |
| }, |
| { |
| "epoch": 4.607843137254902, |
| "grad_norm": 0.0006140993209555745, |
| "learning_rate": 4.7458246134682926e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1597664, |
| "step": 2585 |
| }, |
| { |
| "epoch": 4.616755793226382, |
| "grad_norm": 0.0008959631086327136, |
| "learning_rate": 4.744113428014851e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1601216, |
| "step": 2590 |
| }, |
| { |
| "epoch": 4.625668449197861, |
| "grad_norm": 0.010111883282661438, |
| "learning_rate": 4.7423968123875076e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1604288, |
| "step": 2595 |
| }, |
| { |
| "epoch": 4.634581105169341, |
| "grad_norm": 0.006966453976929188, |
| "learning_rate": 4.740674770740027e-05, |
| "loss": 0.0013, |
| "num_input_tokens_seen": 1607840, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.64349376114082, |
| "grad_norm": 0.006527449004352093, |
| "learning_rate": 4.738947307239305e-05, |
| "loss": 0.0987, |
| "num_input_tokens_seen": 1610752, |
| "step": 2605 |
| }, |
| { |
| "epoch": 4.652406417112299, |
| "grad_norm": 0.005451792385429144, |
| "learning_rate": 4.737214426065355e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1613536, |
| "step": 2610 |
| }, |
| { |
| "epoch": 4.661319073083779, |
| "grad_norm": 0.015521319583058357, |
| "learning_rate": 4.735476131411304e-05, |
| "loss": 0.0007, |
| "num_input_tokens_seen": 1616416, |
| "step": 2615 |
| }, |
| { |
| "epoch": 4.670231729055258, |
| "grad_norm": 0.016396639868617058, |
| "learning_rate": 4.733732427483373e-05, |
| "loss": 0.0349, |
| "num_input_tokens_seen": 1619840, |
| "step": 2620 |
| }, |
| { |
| "epoch": 4.6791443850267385, |
| "grad_norm": 0.004544154740869999, |
| "learning_rate": 4.731983318500875e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1622752, |
| "step": 2625 |
| }, |
| { |
| "epoch": 4.688057040998218, |
| "grad_norm": 0.009829314425587654, |
| "learning_rate": 4.730228808696201e-05, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 1625120, |
| "step": 2630 |
| }, |
| { |
| "epoch": 4.696969696969697, |
| "grad_norm": 0.008239485323429108, |
| "learning_rate": 4.728468902314812e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1628896, |
| "step": 2635 |
| }, |
| { |
| "epoch": 4.705882352941177, |
| "grad_norm": 3.9137890338897705, |
| "learning_rate": 4.726703603615224e-05, |
| "loss": 0.0019, |
| "num_input_tokens_seen": 1632352, |
| "step": 2640 |
| }, |
| { |
| "epoch": 4.714795008912656, |
| "grad_norm": 0.002351184142753482, |
| "learning_rate": 4.724932916869005e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1635616, |
| "step": 2645 |
| }, |
| { |
| "epoch": 4.723707664884135, |
| "grad_norm": 0.02050667069852352, |
| "learning_rate": 4.7231568463607576e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1638304, |
| "step": 2650 |
| }, |
| { |
| "epoch": 4.732620320855615, |
| "grad_norm": 0.0012839463306590915, |
| "learning_rate": 4.721375396388113e-05, |
| "loss": 0.0474, |
| "num_input_tokens_seen": 1642496, |
| "step": 2655 |
| }, |
| { |
| "epoch": 4.741532976827094, |
| "grad_norm": 0.0013566635316237807, |
| "learning_rate": 4.719588571261721e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1645344, |
| "step": 2660 |
| }, |
| { |
| "epoch": 4.750445632798574, |
| "grad_norm": 0.0050344159826636314, |
| "learning_rate": 4.7177963753052345e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1647552, |
| "step": 2665 |
| }, |
| { |
| "epoch": 4.759358288770054, |
| "grad_norm": 0.0009222656372003257, |
| "learning_rate": 4.715998812855305e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1650528, |
| "step": 2670 |
| }, |
| { |
| "epoch": 4.768270944741533, |
| "grad_norm": 0.004403305239975452, |
| "learning_rate": 4.7141958882615665e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1654016, |
| "step": 2675 |
| }, |
| { |
| "epoch": 4.777183600713013, |
| "grad_norm": 0.0012677984777837992, |
| "learning_rate": 4.7123876058866315e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1656832, |
| "step": 2680 |
| }, |
| { |
| "epoch": 4.786096256684492, |
| "grad_norm": 0.00171990180388093, |
| "learning_rate": 4.710573970106076e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1659552, |
| "step": 2685 |
| }, |
| { |
| "epoch": 4.795008912655971, |
| "grad_norm": 10.418574333190918, |
| "learning_rate": 4.7087549853084286e-05, |
| "loss": 0.0027, |
| "num_input_tokens_seen": 1662944, |
| "step": 2690 |
| }, |
| { |
| "epoch": 4.803921568627451, |
| "grad_norm": 0.0004727148625534028, |
| "learning_rate": 4.706930655895163e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1665312, |
| "step": 2695 |
| }, |
| { |
| "epoch": 4.81283422459893, |
| "grad_norm": 0.000861368898767978, |
| "learning_rate": 4.7051009862806834e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1669312, |
| "step": 2700 |
| }, |
| { |
| "epoch": 4.8217468805704105, |
| "grad_norm": 0.0034621551167219877, |
| "learning_rate": 4.703265980892316e-05, |
| "loss": 0.0014, |
| "num_input_tokens_seen": 1672384, |
| "step": 2705 |
| }, |
| { |
| "epoch": 4.83065953654189, |
| "grad_norm": 0.0034683283884078264, |
| "learning_rate": 4.701425644170302e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1675552, |
| "step": 2710 |
| }, |
| { |
| "epoch": 4.839572192513369, |
| "grad_norm": 0.00047382700722664595, |
| "learning_rate": 4.699579980567776e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1678208, |
| "step": 2715 |
| }, |
| { |
| "epoch": 4.848484848484849, |
| "grad_norm": 0.007945065386593342, |
| "learning_rate": 4.697728994550771e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1681120, |
| "step": 2720 |
| }, |
| { |
| "epoch": 4.857397504456328, |
| "grad_norm": 0.0005510263727046549, |
| "learning_rate": 4.6958726905981906e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1684256, |
| "step": 2725 |
| }, |
| { |
| "epoch": 4.866310160427807, |
| "grad_norm": 0.0004645136359613389, |
| "learning_rate": 4.694011073201812e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1687328, |
| "step": 2730 |
| }, |
| { |
| "epoch": 4.875222816399287, |
| "grad_norm": 0.0005562491132877767, |
| "learning_rate": 4.6921441468662666e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1689824, |
| "step": 2735 |
| }, |
| { |
| "epoch": 4.884135472370766, |
| "grad_norm": 0.00042231017141602933, |
| "learning_rate": 4.6902719161090345e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1692896, |
| "step": 2740 |
| }, |
| { |
| "epoch": 4.893048128342246, |
| "grad_norm": 0.0009743515984155238, |
| "learning_rate": 4.688394385460428e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1695584, |
| "step": 2745 |
| }, |
| { |
| "epoch": 4.901960784313726, |
| "grad_norm": 0.00040473334956914186, |
| "learning_rate": 4.6865115594635866e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1698784, |
| "step": 2750 |
| }, |
| { |
| "epoch": 4.910873440285205, |
| "grad_norm": 0.0005721452180296183, |
| "learning_rate": 4.684623442674463e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1702144, |
| "step": 2755 |
| }, |
| { |
| "epoch": 4.919786096256685, |
| "grad_norm": 0.0032837442122399807, |
| "learning_rate": 4.682730039661809e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1704608, |
| "step": 2760 |
| }, |
| { |
| "epoch": 4.928698752228164, |
| "grad_norm": 0.0010922816582024097, |
| "learning_rate": 4.680831355007172e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1707584, |
| "step": 2765 |
| }, |
| { |
| "epoch": 4.937611408199643, |
| "grad_norm": 0.000615122087765485, |
| "learning_rate": 4.6789273933048766e-05, |
| "loss": 0.0458, |
| "num_input_tokens_seen": 1711552, |
| "step": 2770 |
| }, |
| { |
| "epoch": 4.946524064171123, |
| "grad_norm": 0.0006509011727757752, |
| "learning_rate": 4.677018159162018e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1714944, |
| "step": 2775 |
| }, |
| { |
| "epoch": 4.955436720142602, |
| "grad_norm": 0.0003759993414860219, |
| "learning_rate": 4.675103657198449e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1718336, |
| "step": 2780 |
| }, |
| { |
| "epoch": 4.9643493761140824, |
| "grad_norm": 0.09884090721607208, |
| "learning_rate": 4.6731838920467684e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1721728, |
| "step": 2785 |
| }, |
| { |
| "epoch": 4.973262032085562, |
| "grad_norm": 0.0024623344652354717, |
| "learning_rate": 4.6712588683523114e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1724352, |
| "step": 2790 |
| }, |
| { |
| "epoch": 4.982174688057041, |
| "grad_norm": 0.00023198811686597764, |
| "learning_rate": 4.669328590773139e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1727552, |
| "step": 2795 |
| }, |
| { |
| "epoch": 4.991087344028521, |
| "grad_norm": 0.000825934752356261, |
| "learning_rate": 4.66739306398002e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1730144, |
| "step": 2800 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.0022655415814369917, |
| "learning_rate": 4.665452292656431e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1732712, |
| "step": 2805 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.1608305424451828, |
| "eval_runtime": 4.5855, |
| "eval_samples_per_second": 54.302, |
| "eval_steps_per_second": 13.739, |
| "num_input_tokens_seen": 1732712, |
| "step": 2805 |
| }, |
| { |
| "epoch": 5.008912655971479, |
| "grad_norm": 7.032632129266858e-05, |
| "learning_rate": 4.6635062814985374e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1736840, |
| "step": 2810 |
| }, |
| { |
| "epoch": 5.017825311942959, |
| "grad_norm": 0.00010113899043062702, |
| "learning_rate": 4.6615550352151804e-05, |
| "loss": 0.0912, |
| "num_input_tokens_seen": 1739720, |
| "step": 2815 |
| }, |
| { |
| "epoch": 5.026737967914438, |
| "grad_norm": 0.008052799850702286, |
| "learning_rate": 4.659598558527872e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1742568, |
| "step": 2820 |
| }, |
| { |
| "epoch": 5.035650623885918, |
| "grad_norm": 0.004323708824813366, |
| "learning_rate": 4.6576368561707794e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1746088, |
| "step": 2825 |
| }, |
| { |
| "epoch": 5.044563279857398, |
| "grad_norm": 0.0015223105438053608, |
| "learning_rate": 4.6556699328907154e-05, |
| "loss": 0.0078, |
| "num_input_tokens_seen": 1749224, |
| "step": 2830 |
| }, |
| { |
| "epoch": 5.053475935828877, |
| "grad_norm": 0.020057303830981255, |
| "learning_rate": 4.653697793447125e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1752392, |
| "step": 2835 |
| }, |
| { |
| "epoch": 5.062388591800357, |
| "grad_norm": 0.0012005399912595749, |
| "learning_rate": 4.651720442612076e-05, |
| "loss": 0.0006, |
| "num_input_tokens_seen": 1755912, |
| "step": 2840 |
| }, |
| { |
| "epoch": 5.071301247771836, |
| "grad_norm": 0.005722702946513891, |
| "learning_rate": 4.6497378851702456e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1758792, |
| "step": 2845 |
| }, |
| { |
| "epoch": 5.080213903743315, |
| "grad_norm": 0.00282498961314559, |
| "learning_rate": 4.6477501259189086e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1761960, |
| "step": 2850 |
| }, |
| { |
| "epoch": 5.089126559714795, |
| "grad_norm": 0.002525976160541177, |
| "learning_rate": 4.64575716966793e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1765064, |
| "step": 2855 |
| }, |
| { |
| "epoch": 5.098039215686274, |
| "grad_norm": 0.0041107297874987125, |
| "learning_rate": 4.643759021239747e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1767720, |
| "step": 2860 |
| }, |
| { |
| "epoch": 5.106951871657754, |
| "grad_norm": 0.004421675577759743, |
| "learning_rate": 4.6417556854693636e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1770312, |
| "step": 2865 |
| }, |
| { |
| "epoch": 5.115864527629234, |
| "grad_norm": 0.001751912641339004, |
| "learning_rate": 4.639747167204332e-05, |
| "loss": 0.0267, |
| "num_input_tokens_seen": 1773352, |
| "step": 2870 |
| }, |
| { |
| "epoch": 5.124777183600713, |
| "grad_norm": 0.004455928225070238, |
| "learning_rate": 4.6377334713047473e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1775720, |
| "step": 2875 |
| }, |
| { |
| "epoch": 5.133689839572193, |
| "grad_norm": 0.0021583575289696455, |
| "learning_rate": 4.635714602643234e-05, |
| "loss": 0.0007, |
| "num_input_tokens_seen": 1779368, |
| "step": 2880 |
| }, |
| { |
| "epoch": 5.142602495543672, |
| "grad_norm": 0.0018912540981546044, |
| "learning_rate": 4.633690566104929e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1782664, |
| "step": 2885 |
| }, |
| { |
| "epoch": 5.151515151515151, |
| "grad_norm": 0.0013182173715904355, |
| "learning_rate": 4.631661366587481e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1785480, |
| "step": 2890 |
| }, |
| { |
| "epoch": 5.160427807486631, |
| "grad_norm": 0.01736585982143879, |
| "learning_rate": 4.629627009001024e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1789032, |
| "step": 2895 |
| }, |
| { |
| "epoch": 5.16934046345811, |
| "grad_norm": 0.000722629192750901, |
| "learning_rate": 4.62758749826818e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1792008, |
| "step": 2900 |
| }, |
| { |
| "epoch": 5.17825311942959, |
| "grad_norm": 0.0014722676714882255, |
| "learning_rate": 4.625542839324036e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1794984, |
| "step": 2905 |
| }, |
| { |
| "epoch": 5.18716577540107, |
| "grad_norm": 0.0006010913057252765, |
| "learning_rate": 4.623493037116137e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1798728, |
| "step": 2910 |
| }, |
| { |
| "epoch": 5.196078431372549, |
| "grad_norm": 0.0027942806482315063, |
| "learning_rate": 4.621438096604475e-05, |
| "loss": 0.0751, |
| "num_input_tokens_seen": 1801896, |
| "step": 2915 |
| }, |
| { |
| "epoch": 5.204991087344029, |
| "grad_norm": 0.014903007075190544, |
| "learning_rate": 4.6193780227614744e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1804424, |
| "step": 2920 |
| }, |
| { |
| "epoch": 5.213903743315508, |
| "grad_norm": 0.0002501923299860209, |
| "learning_rate": 4.61731282057198e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1806952, |
| "step": 2925 |
| }, |
| { |
| "epoch": 5.222816399286987, |
| "grad_norm": 0.2811971604824066, |
| "learning_rate": 4.6152424950332486e-05, |
| "loss": 0.0002, |
| "num_input_tokens_seen": 1810696, |
| "step": 2930 |
| }, |
| { |
| "epoch": 5.231729055258467, |
| "grad_norm": 0.001552988076582551, |
| "learning_rate": 4.613167051154931e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1813480, |
| "step": 2935 |
| }, |
| { |
| "epoch": 5.240641711229946, |
| "grad_norm": 0.0007487252005375922, |
| "learning_rate": 4.6110864939590644e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1816808, |
| "step": 2940 |
| }, |
| { |
| "epoch": 5.249554367201426, |
| "grad_norm": 0.0006787029560655355, |
| "learning_rate": 4.609000828480059e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1819816, |
| "step": 2945 |
| }, |
| { |
| "epoch": 5.258467023172906, |
| "grad_norm": 0.002901742234826088, |
| "learning_rate": 4.606910059764687e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1822408, |
| "step": 2950 |
| }, |
| { |
| "epoch": 5.267379679144385, |
| "grad_norm": 0.000984704471193254, |
| "learning_rate": 4.604814192872065e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1825800, |
| "step": 2955 |
| }, |
| { |
| "epoch": 5.276292335115865, |
| "grad_norm": 0.0003694720799103379, |
| "learning_rate": 4.602713232873651e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1828904, |
| "step": 2960 |
| }, |
| { |
| "epoch": 5.285204991087344, |
| "grad_norm": 0.00025981373619288206, |
| "learning_rate": 4.600607184853224e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1832168, |
| "step": 2965 |
| }, |
| { |
| "epoch": 5.294117647058823, |
| "grad_norm": 0.00047363084740936756, |
| "learning_rate": 4.5984960539068754e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1835944, |
| "step": 2970 |
| }, |
| { |
| "epoch": 5.303030303030303, |
| "grad_norm": 0.0005336463218554854, |
| "learning_rate": 4.596379845142995e-05, |
| "loss": 0.0068, |
| "num_input_tokens_seen": 1839240, |
| "step": 2975 |
| }, |
| { |
| "epoch": 5.311942959001782, |
| "grad_norm": 0.0069955759681761265, |
| "learning_rate": 4.594258563682262e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1843176, |
| "step": 2980 |
| }, |
| { |
| "epoch": 5.320855614973262, |
| "grad_norm": 0.004931231494992971, |
| "learning_rate": 4.592132214657628e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1846632, |
| "step": 2985 |
| }, |
| { |
| "epoch": 5.329768270944742, |
| "grad_norm": 0.00035663697053678334, |
| "learning_rate": 4.590000803214307e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1849928, |
| "step": 2990 |
| }, |
| { |
| "epoch": 5.338680926916221, |
| "grad_norm": 5.2898729336448014e-05, |
| "learning_rate": 4.5878643345097644e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1853000, |
| "step": 2995 |
| }, |
| { |
| "epoch": 5.347593582887701, |
| "grad_norm": 0.04160798713564873, |
| "learning_rate": 4.585722813713701e-05, |
| "loss": 0.0006, |
| "num_input_tokens_seen": 1856040, |
| "step": 3000 |
| }, |
| { |
| "epoch": 5.35650623885918, |
| "grad_norm": 0.003238047007471323, |
| "learning_rate": 4.583576246008043e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1859400, |
| "step": 3005 |
| }, |
| { |
| "epoch": 5.365418894830659, |
| "grad_norm": 0.0002586895425338298, |
| "learning_rate": 4.581424636586929e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1863048, |
| "step": 3010 |
| }, |
| { |
| "epoch": 5.374331550802139, |
| "grad_norm": 0.00018354503845330328, |
| "learning_rate": 4.579267990656697e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1866376, |
| "step": 3015 |
| }, |
| { |
| "epoch": 5.383244206773618, |
| "grad_norm": 0.00047791743418201804, |
| "learning_rate": 4.577106313435873e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1869192, |
| "step": 3020 |
| }, |
| { |
| "epoch": 5.392156862745098, |
| "grad_norm": 0.000250328826950863, |
| "learning_rate": 4.574939610155155e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1872616, |
| "step": 3025 |
| }, |
| { |
| "epoch": 5.401069518716578, |
| "grad_norm": 0.0004770901578012854, |
| "learning_rate": 4.5727678860574055e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1875496, |
| "step": 3030 |
| }, |
| { |
| "epoch": 5.409982174688057, |
| "grad_norm": 0.00023706883075647056, |
| "learning_rate": 4.570591146397635e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1878760, |
| "step": 3035 |
| }, |
| { |
| "epoch": 5.418894830659537, |
| "grad_norm": 0.0003681717498693615, |
| "learning_rate": 4.568409396442991e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1881704, |
| "step": 3040 |
| }, |
| { |
| "epoch": 5.427807486631016, |
| "grad_norm": 0.00028255791403353214, |
| "learning_rate": 4.566222641472742e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1884744, |
| "step": 3045 |
| }, |
| { |
| "epoch": 5.436720142602495, |
| "grad_norm": 9.651204163674265e-05, |
| "learning_rate": 4.564030886778271e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1887144, |
| "step": 3050 |
| }, |
| { |
| "epoch": 5.445632798573975, |
| "grad_norm": 0.0002257092419313267, |
| "learning_rate": 4.561834137663056e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1889896, |
| "step": 3055 |
| }, |
| { |
| "epoch": 5.454545454545454, |
| "grad_norm": 0.021864645183086395, |
| "learning_rate": 4.5596323994426626e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1893256, |
| "step": 3060 |
| }, |
| { |
| "epoch": 5.463458110516934, |
| "grad_norm": 0.00025736223324202, |
| "learning_rate": 4.557425677444727e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1896712, |
| "step": 3065 |
| }, |
| { |
| "epoch": 5.472370766488414, |
| "grad_norm": 0.0003680383670143783, |
| "learning_rate": 4.555213977008946e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1899240, |
| "step": 3070 |
| }, |
| { |
| "epoch": 5.481283422459893, |
| "grad_norm": 0.0001519775396445766, |
| "learning_rate": 4.5529973034870624e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1901608, |
| "step": 3075 |
| }, |
| { |
| "epoch": 5.490196078431373, |
| "grad_norm": 0.0001408984389854595, |
| "learning_rate": 4.550775662242852e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1903848, |
| "step": 3080 |
| }, |
| { |
| "epoch": 5.499108734402852, |
| "grad_norm": 0.0014439078513532877, |
| "learning_rate": 4.5485490586521116e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1907400, |
| "step": 3085 |
| }, |
| { |
| "epoch": 5.508021390374331, |
| "grad_norm": 0.00016677154053468257, |
| "learning_rate": 4.546317498102648e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1910248, |
| "step": 3090 |
| }, |
| { |
| "epoch": 5.516934046345811, |
| "grad_norm": 0.00015338376397266984, |
| "learning_rate": 4.544080985994258e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1913544, |
| "step": 3095 |
| }, |
| { |
| "epoch": 5.52584670231729, |
| "grad_norm": 0.0011399354552850127, |
| "learning_rate": 4.541839527738723e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1916104, |
| "step": 3100 |
| }, |
| { |
| "epoch": 5.53475935828877, |
| "grad_norm": 0.00014922766422387213, |
| "learning_rate": 4.539593128759792e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1919464, |
| "step": 3105 |
| }, |
| { |
| "epoch": 5.54367201426025, |
| "grad_norm": 0.0003580522316042334, |
| "learning_rate": 4.53734179449317e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1922152, |
| "step": 3110 |
| }, |
| { |
| "epoch": 5.552584670231729, |
| "grad_norm": 0.0010872179409489036, |
| "learning_rate": 4.535085530386503e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1925512, |
| "step": 3115 |
| }, |
| { |
| "epoch": 5.561497326203209, |
| "grad_norm": 0.0004132896719966084, |
| "learning_rate": 4.5328243418993665e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1928712, |
| "step": 3120 |
| }, |
| { |
| "epoch": 5.570409982174688, |
| "grad_norm": 0.0005687960074283183, |
| "learning_rate": 4.5305582345032514e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1931784, |
| "step": 3125 |
| }, |
| { |
| "epoch": 5.579322638146167, |
| "grad_norm": 0.00024347477301489562, |
| "learning_rate": 4.5282872136815516e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1934888, |
| "step": 3130 |
| }, |
| { |
| "epoch": 5.588235294117647, |
| "grad_norm": 0.06657089293003082, |
| "learning_rate": 4.526011284929549e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1937576, |
| "step": 3135 |
| }, |
| { |
| "epoch": 5.597147950089127, |
| "grad_norm": 0.00020688715449068695, |
| "learning_rate": 4.523730453754405e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1940392, |
| "step": 3140 |
| }, |
| { |
| "epoch": 5.606060606060606, |
| "grad_norm": 0.00039311559521593153, |
| "learning_rate": 4.521444725675137e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1944040, |
| "step": 3145 |
| }, |
| { |
| "epoch": 5.614973262032086, |
| "grad_norm": 0.0001793958363123238, |
| "learning_rate": 4.5191541062226186e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1946920, |
| "step": 3150 |
| }, |
| { |
| "epoch": 5.623885918003565, |
| "grad_norm": 0.0002715518930926919, |
| "learning_rate": 4.5168586009395555e-05, |
| "loss": 0.0001, |
| "num_input_tokens_seen": 1950856, |
| "step": 3155 |
| }, |
| { |
| "epoch": 5.632798573975045, |
| "grad_norm": 0.0005141710862517357, |
| "learning_rate": 4.514558215380476e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1954312, |
| "step": 3160 |
| }, |
| { |
| "epoch": 5.641711229946524, |
| "grad_norm": 0.00017174534150399268, |
| "learning_rate": 4.512252955111719e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1957480, |
| "step": 3165 |
| }, |
| { |
| "epoch": 5.650623885918003, |
| "grad_norm": 9.127831435762346e-05, |
| "learning_rate": 4.5099428257114175e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1960840, |
| "step": 3170 |
| }, |
| { |
| "epoch": 5.659536541889483, |
| "grad_norm": 0.0001852322748163715, |
| "learning_rate": 4.507627832769486e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1964328, |
| "step": 3175 |
| }, |
| { |
| "epoch": 5.668449197860962, |
| "grad_norm": 0.00013597046199720353, |
| "learning_rate": 4.50530798188761e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1966920, |
| "step": 3180 |
| }, |
| { |
| "epoch": 5.677361853832442, |
| "grad_norm": 9.64289138210006e-05, |
| "learning_rate": 4.502983278679227e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1970696, |
| "step": 3185 |
| }, |
| { |
| "epoch": 5.686274509803922, |
| "grad_norm": 0.0005941576091572642, |
| "learning_rate": 4.5006537287695186e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1973800, |
| "step": 3190 |
| }, |
| { |
| "epoch": 5.695187165775401, |
| "grad_norm": 0.0005220057209953666, |
| "learning_rate": 4.498319337795392e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1976744, |
| "step": 3195 |
| }, |
| { |
| "epoch": 5.704099821746881, |
| "grad_norm": 0.0002706492959987372, |
| "learning_rate": 4.495980111405471e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1980232, |
| "step": 3200 |
| }, |
| { |
| "epoch": 5.71301247771836, |
| "grad_norm": 0.00028276382363401353, |
| "learning_rate": 4.493636055260077e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1982984, |
| "step": 3205 |
| }, |
| { |
| "epoch": 5.721925133689839, |
| "grad_norm": 0.0006307061994448304, |
| "learning_rate": 4.491287175031218e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1986504, |
| "step": 3210 |
| }, |
| { |
| "epoch": 5.730837789661319, |
| "grad_norm": 0.00034339685225859284, |
| "learning_rate": 4.488933476402579e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1989384, |
| "step": 3215 |
| }, |
| { |
| "epoch": 5.739750445632799, |
| "grad_norm": 0.0002526050084270537, |
| "learning_rate": 4.4865749650695e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1992168, |
| "step": 3220 |
| }, |
| { |
| "epoch": 5.748663101604278, |
| "grad_norm": 0.0007074022432789207, |
| "learning_rate": 4.4842116467389696e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1996008, |
| "step": 3225 |
| }, |
| { |
| "epoch": 5.757575757575758, |
| "grad_norm": 0.0002924671280197799, |
| "learning_rate": 4.4818435271296054e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 1999144, |
| "step": 3230 |
| }, |
| { |
| "epoch": 5.766488413547237, |
| "grad_norm": 9.665504330769181e-05, |
| "learning_rate": 4.4794706119716455e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2002504, |
| "step": 3235 |
| }, |
| { |
| "epoch": 5.775401069518717, |
| "grad_norm": 0.0006428760825656354, |
| "learning_rate": 4.47709290700693e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2004520, |
| "step": 3240 |
| }, |
| { |
| "epoch": 5.784313725490196, |
| "grad_norm": 0.00017735113215167075, |
| "learning_rate": 4.474710417988889e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2007368, |
| "step": 3245 |
| }, |
| { |
| "epoch": 5.793226381461675, |
| "grad_norm": 0.0002311782300239429, |
| "learning_rate": 4.4723231506825305e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2010600, |
| "step": 3250 |
| }, |
| { |
| "epoch": 5.802139037433155, |
| "grad_norm": 0.06747859716415405, |
| "learning_rate": 4.469931110864424e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2013928, |
| "step": 3255 |
| }, |
| { |
| "epoch": 5.811051693404634, |
| "grad_norm": 0.0002444138517603278, |
| "learning_rate": 4.4675343043226856e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2016968, |
| "step": 3260 |
| }, |
| { |
| "epoch": 5.819964349376114, |
| "grad_norm": 0.00021666847169399261, |
| "learning_rate": 4.465132736856969e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2019784, |
| "step": 3265 |
| }, |
| { |
| "epoch": 5.828877005347594, |
| "grad_norm": 5.657947258441709e-05, |
| "learning_rate": 4.462726414278444e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2022888, |
| "step": 3270 |
| }, |
| { |
| "epoch": 5.837789661319073, |
| "grad_norm": 0.00012392383359838277, |
| "learning_rate": 4.460315342409791e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2025512, |
| "step": 3275 |
| }, |
| { |
| "epoch": 5.846702317290553, |
| "grad_norm": 0.00028178084176033735, |
| "learning_rate": 4.457899527085178e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2028456, |
| "step": 3280 |
| }, |
| { |
| "epoch": 5.855614973262032, |
| "grad_norm": 0.0003765980654861778, |
| "learning_rate": 4.455478974150255e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2031048, |
| "step": 3285 |
| }, |
| { |
| "epoch": 5.864527629233511, |
| "grad_norm": 0.00023475800117012113, |
| "learning_rate": 4.453053689462131e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2034824, |
| "step": 3290 |
| }, |
| { |
| "epoch": 5.873440285204991, |
| "grad_norm": 0.00026866758707910776, |
| "learning_rate": 4.4506236788893706e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2037320, |
| "step": 3295 |
| }, |
| { |
| "epoch": 5.882352941176471, |
| "grad_norm": 0.0006037325947545469, |
| "learning_rate": 4.44818894831197e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2040392, |
| "step": 3300 |
| }, |
| { |
| "epoch": 5.89126559714795, |
| "grad_norm": 0.0043501826003193855, |
| "learning_rate": 4.4457495036213456e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2042952, |
| "step": 3305 |
| }, |
| { |
| "epoch": 5.90017825311943, |
| "grad_norm": 7.099113281583413e-05, |
| "learning_rate": 4.443305350720324e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2046664, |
| "step": 3310 |
| }, |
| { |
| "epoch": 5.909090909090909, |
| "grad_norm": 0.00012569865793921053, |
| "learning_rate": 4.440856495523122e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2049256, |
| "step": 3315 |
| }, |
| { |
| "epoch": 5.918003565062389, |
| "grad_norm": 0.00024479979765601456, |
| "learning_rate": 4.438402943955336e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2052488, |
| "step": 3320 |
| }, |
| { |
| "epoch": 5.926916221033868, |
| "grad_norm": 0.000506595300976187, |
| "learning_rate": 4.4359447019539264e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2055592, |
| "step": 3325 |
| }, |
| { |
| "epoch": 5.935828877005347, |
| "grad_norm": 0.0002587987983133644, |
| "learning_rate": 4.433481775467202e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2058952, |
| "step": 3330 |
| }, |
| { |
| "epoch": 5.944741532976827, |
| "grad_norm": 0.00153339805547148, |
| "learning_rate": 4.4310141704548094e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2061768, |
| "step": 3335 |
| }, |
| { |
| "epoch": 5.953654188948306, |
| "grad_norm": 0.0005379040958359838, |
| "learning_rate": 4.428541892887712e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2064936, |
| "step": 3340 |
| }, |
| { |
| "epoch": 5.962566844919786, |
| "grad_norm": 0.0003168246184941381, |
| "learning_rate": 4.426064948748183e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2068424, |
| "step": 3345 |
| }, |
| { |
| "epoch": 5.971479500891266, |
| "grad_norm": 0.00040106987580657005, |
| "learning_rate": 4.423583344029786e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2071528, |
| "step": 3350 |
| }, |
| { |
| "epoch": 5.980392156862745, |
| "grad_norm": 0.0001550520391901955, |
| "learning_rate": 4.4210970847373636e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2074600, |
| "step": 3355 |
| }, |
| { |
| "epoch": 5.989304812834225, |
| "grad_norm": 0.00010328602365916595, |
| "learning_rate": 4.4186061768870184e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2077544, |
| "step": 3360 |
| }, |
| { |
| "epoch": 5.998217468805704, |
| "grad_norm": 4.394166899146512e-05, |
| "learning_rate": 4.416110626506105e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2080008, |
| "step": 3365 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.18822118639945984, |
| "eval_runtime": 4.5853, |
| "eval_samples_per_second": 54.303, |
| "eval_steps_per_second": 13.739, |
| "num_input_tokens_seen": 2080184, |
| "step": 3366 |
| }, |
| { |
| "epoch": 6.007130124777183, |
| "grad_norm": 8.21559369796887e-05, |
| "learning_rate": 4.4136104396332066e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2083224, |
| "step": 3370 |
| }, |
| { |
| "epoch": 6.016042780748663, |
| "grad_norm": 0.00019038471509702504, |
| "learning_rate": 4.4111056223181315e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2086008, |
| "step": 3375 |
| }, |
| { |
| "epoch": 6.024955436720143, |
| "grad_norm": 5.39618413313292e-05, |
| "learning_rate": 4.408596180621889e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2089880, |
| "step": 3380 |
| }, |
| { |
| "epoch": 6.033868092691622, |
| "grad_norm": 0.00011664695193758234, |
| "learning_rate": 4.406082120616677e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2093336, |
| "step": 3385 |
| }, |
| { |
| "epoch": 6.042780748663102, |
| "grad_norm": 0.00011814136814791709, |
| "learning_rate": 4.403563448385872e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2095800, |
| "step": 3390 |
| }, |
| { |
| "epoch": 6.051693404634581, |
| "grad_norm": 0.00034572480944916606, |
| "learning_rate": 4.401040170024009e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2099000, |
| "step": 3395 |
| }, |
| { |
| "epoch": 6.0606060606060606, |
| "grad_norm": 0.00010606726573314518, |
| "learning_rate": 4.398512291636768e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2102264, |
| "step": 3400 |
| }, |
| { |
| "epoch": 6.06951871657754, |
| "grad_norm": 0.00011406264820834622, |
| "learning_rate": 4.395979819340961e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2105048, |
| "step": 3405 |
| }, |
| { |
| "epoch": 6.078431372549019, |
| "grad_norm": 0.00042664259672164917, |
| "learning_rate": 4.3934427592645166e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2107480, |
| "step": 3410 |
| }, |
| { |
| "epoch": 6.087344028520499, |
| "grad_norm": 0.00013146565470378846, |
| "learning_rate": 4.390901117546463e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2109688, |
| "step": 3415 |
| }, |
| { |
| "epoch": 6.096256684491979, |
| "grad_norm": 0.00023358648468274623, |
| "learning_rate": 4.388354900336916e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2113464, |
| "step": 3420 |
| }, |
| { |
| "epoch": 6.105169340463458, |
| "grad_norm": 4.177187292953022e-05, |
| "learning_rate": 4.385804113797062e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2116408, |
| "step": 3425 |
| }, |
| { |
| "epoch": 6.114081996434938, |
| "grad_norm": 0.00010631237091729417, |
| "learning_rate": 4.3832487640991446e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2119768, |
| "step": 3430 |
| }, |
| { |
| "epoch": 6.122994652406417, |
| "grad_norm": 4.5598495489684865e-05, |
| "learning_rate": 4.3806888574264495e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2122680, |
| "step": 3435 |
| }, |
| { |
| "epoch": 6.1319073083778965, |
| "grad_norm": 7.063472730806097e-05, |
| "learning_rate": 4.378124399973287e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2125816, |
| "step": 3440 |
| }, |
| { |
| "epoch": 6.140819964349376, |
| "grad_norm": 0.00018453726079314947, |
| "learning_rate": 4.375555397944983e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2128472, |
| "step": 3445 |
| }, |
| { |
| "epoch": 6.149732620320855, |
| "grad_norm": 0.0016097086481750011, |
| "learning_rate": 4.372981857557856e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2130776, |
| "step": 3450 |
| }, |
| { |
| "epoch": 6.158645276292335, |
| "grad_norm": 0.00010451207344885916, |
| "learning_rate": 4.3704037850392085e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2134488, |
| "step": 3455 |
| }, |
| { |
| "epoch": 6.167557932263815, |
| "grad_norm": 0.00021727255079895258, |
| "learning_rate": 4.367821186627309e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2137528, |
| "step": 3460 |
| }, |
| { |
| "epoch": 6.176470588235294, |
| "grad_norm": 0.00022375237313099205, |
| "learning_rate": 4.365234068571377e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2141400, |
| "step": 3465 |
| }, |
| { |
| "epoch": 6.185383244206774, |
| "grad_norm": 0.00022818404249846935, |
| "learning_rate": 4.36264243713157e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2144792, |
| "step": 3470 |
| }, |
| { |
| "epoch": 6.194295900178253, |
| "grad_norm": 0.00013080937787890434, |
| "learning_rate": 4.360046298578965e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2148536, |
| "step": 3475 |
| }, |
| { |
| "epoch": 6.2032085561497325, |
| "grad_norm": 8.40335269458592e-05, |
| "learning_rate": 4.357445659195545e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2151736, |
| "step": 3480 |
| }, |
| { |
| "epoch": 6.212121212121212, |
| "grad_norm": 0.00010684873268473893, |
| "learning_rate": 4.354840525274185e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2154744, |
| "step": 3485 |
| }, |
| { |
| "epoch": 6.221033868092691, |
| "grad_norm": 0.00023128798056859523, |
| "learning_rate": 4.352230903118636e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2157784, |
| "step": 3490 |
| }, |
| { |
| "epoch": 6.229946524064171, |
| "grad_norm": 0.00011345902748871595, |
| "learning_rate": 4.3496167990435065e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2161112, |
| "step": 3495 |
| }, |
| { |
| "epoch": 6.238859180035651, |
| "grad_norm": 5.867306754225865e-05, |
| "learning_rate": 4.346998219374253e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2164056, |
| "step": 3500 |
| }, |
| { |
| "epoch": 6.24777183600713, |
| "grad_norm": 0.00016827252693474293, |
| "learning_rate": 4.344375170447162e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2167352, |
| "step": 3505 |
| }, |
| { |
| "epoch": 6.25668449197861, |
| "grad_norm": 0.0012681673979386687, |
| "learning_rate": 4.341747658609331e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2170008, |
| "step": 3510 |
| }, |
| { |
| "epoch": 6.265597147950089, |
| "grad_norm": 0.00011075458314735442, |
| "learning_rate": 4.3391156902186615e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2173912, |
| "step": 3515 |
| }, |
| { |
| "epoch": 6.2745098039215685, |
| "grad_norm": 0.0001631032646400854, |
| "learning_rate": 4.336479271643833e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2177208, |
| "step": 3520 |
| }, |
| { |
| "epoch": 6.283422459893048, |
| "grad_norm": 0.00010968335845973343, |
| "learning_rate": 4.333838409264299e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2180568, |
| "step": 3525 |
| }, |
| { |
| "epoch": 6.292335115864527, |
| "grad_norm": 0.00011602386803133413, |
| "learning_rate": 4.331193109470262e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2183640, |
| "step": 3530 |
| }, |
| { |
| "epoch": 6.301247771836007, |
| "grad_norm": 9.8639284260571e-05, |
| "learning_rate": 4.328543378662664e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2186840, |
| "step": 3535 |
| }, |
| { |
| "epoch": 6.310160427807487, |
| "grad_norm": 0.0004059053317178041, |
| "learning_rate": 4.3258892232531664e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2189400, |
| "step": 3540 |
| }, |
| { |
| "epoch": 6.319073083778966, |
| "grad_norm": 4.721015284303576e-05, |
| "learning_rate": 4.3232306496641396e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2192600, |
| "step": 3545 |
| }, |
| { |
| "epoch": 6.327985739750446, |
| "grad_norm": 0.00016030446568038315, |
| "learning_rate": 4.320567664328644e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2195480, |
| "step": 3550 |
| }, |
| { |
| "epoch": 6.336898395721925, |
| "grad_norm": 0.0020986474119126797, |
| "learning_rate": 4.317900273690415e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2198488, |
| "step": 3555 |
| }, |
| { |
| "epoch": 6.3458110516934045, |
| "grad_norm": 0.00016660125402268022, |
| "learning_rate": 4.315228484203848e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2201816, |
| "step": 3560 |
| }, |
| { |
| "epoch": 6.354723707664884, |
| "grad_norm": 6.600496999453753e-05, |
| "learning_rate": 4.312552302333982e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2205048, |
| "step": 3565 |
| }, |
| { |
| "epoch": 6.363636363636363, |
| "grad_norm": 7.55599103285931e-05, |
| "learning_rate": 4.3098717345564846e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2208408, |
| "step": 3570 |
| }, |
| { |
| "epoch": 6.372549019607844, |
| "grad_norm": 0.007104981690645218, |
| "learning_rate": 4.3071867873576364e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2211128, |
| "step": 3575 |
| }, |
| { |
| "epoch": 6.381461675579323, |
| "grad_norm": 0.00032805779483169317, |
| "learning_rate": 4.3044974672343164e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2214360, |
| "step": 3580 |
| }, |
| { |
| "epoch": 6.390374331550802, |
| "grad_norm": 8.772522414801642e-05, |
| "learning_rate": 4.301803780693982e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2217592, |
| "step": 3585 |
| }, |
| { |
| "epoch": 6.399286987522282, |
| "grad_norm": 7.108946010703221e-05, |
| "learning_rate": 4.299105734254657e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2220216, |
| "step": 3590 |
| }, |
| { |
| "epoch": 6.408199643493761, |
| "grad_norm": 0.0006188752595335245, |
| "learning_rate": 4.2964033344449174e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2223608, |
| "step": 3595 |
| }, |
| { |
| "epoch": 6.4171122994652405, |
| "grad_norm": 0.0004142081888858229, |
| "learning_rate": 4.293696587803871e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2226520, |
| "step": 3600 |
| }, |
| { |
| "epoch": 6.42602495543672, |
| "grad_norm": 6.948486407054588e-05, |
| "learning_rate": 4.290985500881143e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2229752, |
| "step": 3605 |
| }, |
| { |
| "epoch": 6.434937611408199, |
| "grad_norm": 0.00030549734947271645, |
| "learning_rate": 4.2882700802368644e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2232088, |
| "step": 3610 |
| }, |
| { |
| "epoch": 6.443850267379679, |
| "grad_norm": 8.570123463869095e-05, |
| "learning_rate": 4.285550332441651e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2235320, |
| "step": 3615 |
| }, |
| { |
| "epoch": 6.452762923351159, |
| "grad_norm": 7.770225056447089e-05, |
| "learning_rate": 4.282826264076587e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2238200, |
| "step": 3620 |
| }, |
| { |
| "epoch": 6.461675579322638, |
| "grad_norm": 0.0005393362371250987, |
| "learning_rate": 4.2800978817332136e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2241080, |
| "step": 3625 |
| }, |
| { |
| "epoch": 6.470588235294118, |
| "grad_norm": 0.0004659408878069371, |
| "learning_rate": 4.27736519201351e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2244280, |
| "step": 3630 |
| }, |
| { |
| "epoch": 6.479500891265597, |
| "grad_norm": 0.00013535030302591622, |
| "learning_rate": 4.27462820152988e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2247864, |
| "step": 3635 |
| }, |
| { |
| "epoch": 6.4884135472370765, |
| "grad_norm": 0.02784336358308792, |
| "learning_rate": 4.27188691690513e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2250520, |
| "step": 3640 |
| }, |
| { |
| "epoch": 6.497326203208556, |
| "grad_norm": 7.759433356113732e-05, |
| "learning_rate": 4.269141344772461e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2253016, |
| "step": 3645 |
| }, |
| { |
| "epoch": 6.506238859180035, |
| "grad_norm": 0.0012859473936259747, |
| "learning_rate": 4.2663914917754474e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2256536, |
| "step": 3650 |
| }, |
| { |
| "epoch": 6.515151515151516, |
| "grad_norm": 6.0505506553454325e-05, |
| "learning_rate": 4.263637364568021e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2259736, |
| "step": 3655 |
| }, |
| { |
| "epoch": 6.524064171122995, |
| "grad_norm": 0.00010412324627395719, |
| "learning_rate": 4.260878969814458e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2262776, |
| "step": 3660 |
| }, |
| { |
| "epoch": 6.532976827094474, |
| "grad_norm": 5.067836900707334e-05, |
| "learning_rate": 4.25811631418936e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2265944, |
| "step": 3665 |
| }, |
| { |
| "epoch": 6.541889483065954, |
| "grad_norm": 6.499042501673102e-05, |
| "learning_rate": 4.255349404377638e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2268824, |
| "step": 3670 |
| }, |
| { |
| "epoch": 6.550802139037433, |
| "grad_norm": 0.00021577828738372773, |
| "learning_rate": 4.252578247074499e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2271992, |
| "step": 3675 |
| }, |
| { |
| "epoch": 6.5597147950089125, |
| "grad_norm": 6.629472773056477e-05, |
| "learning_rate": 4.249802848985426e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2275320, |
| "step": 3680 |
| }, |
| { |
| "epoch": 6.568627450980392, |
| "grad_norm": 9.349354513688013e-05, |
| "learning_rate": 4.247023216826164e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2277784, |
| "step": 3685 |
| }, |
| { |
| "epoch": 6.577540106951871, |
| "grad_norm": 4.333916513132863e-05, |
| "learning_rate": 4.2442393573227046e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2280152, |
| "step": 3690 |
| }, |
| { |
| "epoch": 6.586452762923351, |
| "grad_norm": 7.514766184613109e-05, |
| "learning_rate": 4.241451277211268e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2283480, |
| "step": 3695 |
| }, |
| { |
| "epoch": 6.595365418894831, |
| "grad_norm": 0.0007041016942821443, |
| "learning_rate": 4.238658983238284e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2286328, |
| "step": 3700 |
| }, |
| { |
| "epoch": 6.60427807486631, |
| "grad_norm": 0.0005451409379020333, |
| "learning_rate": 4.2358624821603856e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2289112, |
| "step": 3705 |
| }, |
| { |
| "epoch": 6.61319073083779, |
| "grad_norm": 5.5295840866165236e-05, |
| "learning_rate": 4.2330617807443783e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2292312, |
| "step": 3710 |
| }, |
| { |
| "epoch": 6.622103386809269, |
| "grad_norm": 0.0012082583270967007, |
| "learning_rate": 4.2302568857672375e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2295576, |
| "step": 3715 |
| }, |
| { |
| "epoch": 6.6310160427807485, |
| "grad_norm": 0.0004467536637093872, |
| "learning_rate": 4.2274478040160823e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2298392, |
| "step": 3720 |
| }, |
| { |
| "epoch": 6.639928698752228, |
| "grad_norm": 3.002311132149771e-05, |
| "learning_rate": 4.224634542288163e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2301496, |
| "step": 3725 |
| }, |
| { |
| "epoch": 6.648841354723707, |
| "grad_norm": 0.00013791839592158794, |
| "learning_rate": 4.221817107390847e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2304120, |
| "step": 3730 |
| }, |
| { |
| "epoch": 6.657754010695188, |
| "grad_norm": 4.801477552973665e-05, |
| "learning_rate": 4.2189955061415965e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2307160, |
| "step": 3735 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 0.00012627999240066856, |
| "learning_rate": 4.216169745367956e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2310648, |
| "step": 3740 |
| }, |
| { |
| "epoch": 6.675579322638146, |
| "grad_norm": 4.972055830876343e-05, |
| "learning_rate": 4.2133398319075366e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2313976, |
| "step": 3745 |
| }, |
| { |
| "epoch": 6.684491978609626, |
| "grad_norm": 0.0004118916403967887, |
| "learning_rate": 4.210505772607997e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2316984, |
| "step": 3750 |
| }, |
| { |
| "epoch": 6.693404634581105, |
| "grad_norm": 3.760185791179538e-05, |
| "learning_rate": 4.207667574327027e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2319576, |
| "step": 3755 |
| }, |
| { |
| "epoch": 6.7023172905525845, |
| "grad_norm": 5.7546676544006914e-05, |
| "learning_rate": 4.204825243932331e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2322936, |
| "step": 3760 |
| }, |
| { |
| "epoch": 6.711229946524064, |
| "grad_norm": 5.261941623757593e-05, |
| "learning_rate": 4.2019787883016145e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2325784, |
| "step": 3765 |
| }, |
| { |
| "epoch": 6.720142602495543, |
| "grad_norm": 3.78849363187328e-05, |
| "learning_rate": 4.199128214322564e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2328952, |
| "step": 3770 |
| }, |
| { |
| "epoch": 6.729055258467023, |
| "grad_norm": 5.117354521644302e-05, |
| "learning_rate": 4.1962735288928305e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2332088, |
| "step": 3775 |
| }, |
| { |
| "epoch": 6.737967914438503, |
| "grad_norm": 0.00011116250971099362, |
| "learning_rate": 4.193414738920014e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2335384, |
| "step": 3780 |
| }, |
| { |
| "epoch": 6.746880570409982, |
| "grad_norm": 0.0002807832497637719, |
| "learning_rate": 4.1905518513216466e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2338168, |
| "step": 3785 |
| }, |
| { |
| "epoch": 6.755793226381462, |
| "grad_norm": 2.829926052072551e-05, |
| "learning_rate": 4.187684873025176e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2341400, |
| "step": 3790 |
| }, |
| { |
| "epoch": 6.764705882352941, |
| "grad_norm": 9.852102084551007e-05, |
| "learning_rate": 4.184813810967947e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2344280, |
| "step": 3795 |
| }, |
| { |
| "epoch": 6.7736185383244205, |
| "grad_norm": 0.00025870383251458406, |
| "learning_rate": 4.181938672097189e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2347288, |
| "step": 3800 |
| }, |
| { |
| "epoch": 6.7825311942959, |
| "grad_norm": 7.604555139550939e-05, |
| "learning_rate": 4.1790594633699917e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2350232, |
| "step": 3805 |
| }, |
| { |
| "epoch": 6.791443850267379, |
| "grad_norm": 4.824809366255067e-05, |
| "learning_rate": 4.1761761917532974e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2352856, |
| "step": 3810 |
| }, |
| { |
| "epoch": 6.80035650623886, |
| "grad_norm": 8.800329669611529e-05, |
| "learning_rate": 4.173288864223876e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2356024, |
| "step": 3815 |
| }, |
| { |
| "epoch": 6.809269162210339, |
| "grad_norm": 8.250436803791672e-05, |
| "learning_rate": 4.170397487768314e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2359224, |
| "step": 3820 |
| }, |
| { |
| "epoch": 6.818181818181818, |
| "grad_norm": 3.099319656030275e-05, |
| "learning_rate": 4.1675020693829933e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2362744, |
| "step": 3825 |
| }, |
| { |
| "epoch": 6.827094474153298, |
| "grad_norm": 8.77875936566852e-05, |
| "learning_rate": 4.164602616074079e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2366072, |
| "step": 3830 |
| }, |
| { |
| "epoch": 6.836007130124777, |
| "grad_norm": 7.556960918009281e-05, |
| "learning_rate": 4.161699134857497e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2368600, |
| "step": 3835 |
| }, |
| { |
| "epoch": 6.8449197860962565, |
| "grad_norm": 0.0002625234774313867, |
| "learning_rate": 4.1587916327589205e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2372056, |
| "step": 3840 |
| }, |
| { |
| "epoch": 6.853832442067736, |
| "grad_norm": 0.00016840100579429418, |
| "learning_rate": 4.1558801168137526e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2374360, |
| "step": 3845 |
| }, |
| { |
| "epoch": 6.862745098039216, |
| "grad_norm": 7.558833749499172e-05, |
| "learning_rate": 4.152964594067108e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2376920, |
| "step": 3850 |
| }, |
| { |
| "epoch": 6.871657754010696, |
| "grad_norm": 3.281732278992422e-05, |
| "learning_rate": 4.150045071573798e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2380120, |
| "step": 3855 |
| }, |
| { |
| "epoch": 6.880570409982175, |
| "grad_norm": 0.00011538410763023421, |
| "learning_rate": 4.147121556398312e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2383448, |
| "step": 3860 |
| }, |
| { |
| "epoch": 6.889483065953654, |
| "grad_norm": 0.00011612474190769717, |
| "learning_rate": 4.1441940556148006e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2386520, |
| "step": 3865 |
| }, |
| { |
| "epoch": 6.898395721925134, |
| "grad_norm": 2.983517879329156e-05, |
| "learning_rate": 4.141262576307058e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2389688, |
| "step": 3870 |
| }, |
| { |
| "epoch": 6.907308377896613, |
| "grad_norm": 0.0001038245391100645, |
| "learning_rate": 4.138327125568505e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2392856, |
| "step": 3875 |
| }, |
| { |
| "epoch": 6.9162210338680925, |
| "grad_norm": 0.0001221461861860007, |
| "learning_rate": 4.1353877105021726e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2396088, |
| "step": 3880 |
| }, |
| { |
| "epoch": 6.925133689839572, |
| "grad_norm": 0.00037911630352027714, |
| "learning_rate": 4.1324443382206864e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2398840, |
| "step": 3885 |
| }, |
| { |
| "epoch": 6.934046345811051, |
| "grad_norm": 5.34054015588481e-05, |
| "learning_rate": 4.129497015846245e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2401528, |
| "step": 3890 |
| }, |
| { |
| "epoch": 6.942959001782532, |
| "grad_norm": 9.827831672737375e-05, |
| "learning_rate": 4.126545750510605e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2405016, |
| "step": 3895 |
| }, |
| { |
| "epoch": 6.951871657754011, |
| "grad_norm": 0.00012127986701671034, |
| "learning_rate": 4.123590549355067e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2408440, |
| "step": 3900 |
| }, |
| { |
| "epoch": 6.96078431372549, |
| "grad_norm": 5.946194869466126e-05, |
| "learning_rate": 4.1206314195304524e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2411864, |
| "step": 3905 |
| }, |
| { |
| "epoch": 6.96969696969697, |
| "grad_norm": 5.441272151074372e-05, |
| "learning_rate": 4.117668368197089e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2414904, |
| "step": 3910 |
| }, |
| { |
| "epoch": 6.978609625668449, |
| "grad_norm": 8.909405005397275e-05, |
| "learning_rate": 4.1147014025247954e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2418616, |
| "step": 3915 |
| }, |
| { |
| "epoch": 6.9875222816399285, |
| "grad_norm": 2.683120510482695e-05, |
| "learning_rate": 4.111730529692861e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2421976, |
| "step": 3920 |
| }, |
| { |
| "epoch": 6.996434937611408, |
| "grad_norm": 5.379191861720756e-05, |
| "learning_rate": 4.108755756890028e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2424152, |
| "step": 3925 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 0.20610958337783813, |
| "eval_runtime": 4.5805, |
| "eval_samples_per_second": 54.361, |
| "eval_steps_per_second": 13.754, |
| "num_input_tokens_seen": 2425192, |
| "step": 3927 |
| }, |
| { |
| "epoch": 7.005347593582887, |
| "grad_norm": 4.8729463742347434e-05, |
| "learning_rate": 4.105777091314478e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2427720, |
| "step": 3930 |
| }, |
| { |
| "epoch": 7.0142602495543676, |
| "grad_norm": 3.457432831055485e-05, |
| "learning_rate": 4.102794540173812e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2431432, |
| "step": 3935 |
| }, |
| { |
| "epoch": 7.023172905525847, |
| "grad_norm": 0.0002528139157220721, |
| "learning_rate": 4.09980811068503e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2434088, |
| "step": 3940 |
| }, |
| { |
| "epoch": 7.032085561497326, |
| "grad_norm": 4.039124178234488e-05, |
| "learning_rate": 4.09681781007452e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2437192, |
| "step": 3945 |
| }, |
| { |
| "epoch": 7.040998217468806, |
| "grad_norm": 5.282991332933307e-05, |
| "learning_rate": 4.0938236455780364e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2439816, |
| "step": 3950 |
| }, |
| { |
| "epoch": 7.049910873440285, |
| "grad_norm": 0.00011657732102321461, |
| "learning_rate": 4.090825624440682e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2442632, |
| "step": 3955 |
| }, |
| { |
| "epoch": 7.0588235294117645, |
| "grad_norm": 0.00018239057681057602, |
| "learning_rate": 4.0878237539168915e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2445640, |
| "step": 3960 |
| }, |
| { |
| "epoch": 7.067736185383244, |
| "grad_norm": 4.836150765186176e-05, |
| "learning_rate": 4.084818041270416e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2448264, |
| "step": 3965 |
| }, |
| { |
| "epoch": 7.076648841354723, |
| "grad_norm": 8.737414464121684e-05, |
| "learning_rate": 4.081808493774302e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2450856, |
| "step": 3970 |
| }, |
| { |
| "epoch": 7.0855614973262036, |
| "grad_norm": 0.00010665191803127527, |
| "learning_rate": 4.0787951187108754e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2453640, |
| "step": 3975 |
| }, |
| { |
| "epoch": 7.094474153297683, |
| "grad_norm": 7.286696200026199e-05, |
| "learning_rate": 4.0757779233717255e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2456392, |
| "step": 3980 |
| }, |
| { |
| "epoch": 7.103386809269162, |
| "grad_norm": 3.0005081498529762e-05, |
| "learning_rate": 4.072756915057683e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2459656, |
| "step": 3985 |
| }, |
| { |
| "epoch": 7.112299465240642, |
| "grad_norm": 3.010162436112296e-05, |
| "learning_rate": 4.069732101078808e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2462632, |
| "step": 3990 |
| }, |
| { |
| "epoch": 7.121212121212121, |
| "grad_norm": 5.150728247826919e-05, |
| "learning_rate": 4.066703488754366e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2465480, |
| "step": 3995 |
| }, |
| { |
| "epoch": 7.1301247771836005, |
| "grad_norm": 4.586328941513784e-05, |
| "learning_rate": 4.063671085412817e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2468456, |
| "step": 4000 |
| }, |
| { |
| "epoch": 7.13903743315508, |
| "grad_norm": 0.0002808906720019877, |
| "learning_rate": 4.060634898391792e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2472328, |
| "step": 4005 |
| }, |
| { |
| "epoch": 7.14795008912656, |
| "grad_norm": 6.467744969995692e-05, |
| "learning_rate": 4.057594935038077e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2475944, |
| "step": 4010 |
| }, |
| { |
| "epoch": 7.1568627450980395, |
| "grad_norm": 4.673215516959317e-05, |
| "learning_rate": 4.054551202707597e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2478760, |
| "step": 4015 |
| }, |
| { |
| "epoch": 7.165775401069519, |
| "grad_norm": 3.4133565350202844e-05, |
| "learning_rate": 4.051503708765399e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2482408, |
| "step": 4020 |
| }, |
| { |
| "epoch": 7.174688057040998, |
| "grad_norm": 3.378849942237139e-05, |
| "learning_rate": 4.048452460585627e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2485416, |
| "step": 4025 |
| }, |
| { |
| "epoch": 7.183600713012478, |
| "grad_norm": 3.80839264835231e-05, |
| "learning_rate": 4.045397465551513e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2488712, |
| "step": 4030 |
| }, |
| { |
| "epoch": 7.192513368983957, |
| "grad_norm": 3.3100717701017857e-05, |
| "learning_rate": 4.042338731055356e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2491944, |
| "step": 4035 |
| }, |
| { |
| "epoch": 7.2014260249554365, |
| "grad_norm": 3.59532205038704e-05, |
| "learning_rate": 4.039276264498501e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2494568, |
| "step": 4040 |
| }, |
| { |
| "epoch": 7.210338680926916, |
| "grad_norm": 2.274327925988473e-05, |
| "learning_rate": 4.0362100732913246e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2497544, |
| "step": 4045 |
| }, |
| { |
| "epoch": 7.219251336898395, |
| "grad_norm": 2.8736509193549864e-05, |
| "learning_rate": 4.0331401648532166e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2500840, |
| "step": 4050 |
| }, |
| { |
| "epoch": 7.2281639928698755, |
| "grad_norm": 0.00025308437761850655, |
| "learning_rate": 4.030066546612562e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2504168, |
| "step": 4055 |
| }, |
| { |
| "epoch": 7.237076648841355, |
| "grad_norm": 2.420450618956238e-05, |
| "learning_rate": 4.02698922600672e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2507016, |
| "step": 4060 |
| }, |
| { |
| "epoch": 7.245989304812834, |
| "grad_norm": 4.901519059785642e-05, |
| "learning_rate": 4.0239082104820114e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2509672, |
| "step": 4065 |
| }, |
| { |
| "epoch": 7.254901960784314, |
| "grad_norm": 4.465365782380104e-05, |
| "learning_rate": 4.020823507493696e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2512584, |
| "step": 4070 |
| }, |
| { |
| "epoch": 7.263814616755793, |
| "grad_norm": 0.000388812884921208, |
| "learning_rate": 4.017735124505958e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2515592, |
| "step": 4075 |
| }, |
| { |
| "epoch": 7.2727272727272725, |
| "grad_norm": 2.8893498893012293e-05, |
| "learning_rate": 4.014643068991885e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2518664, |
| "step": 4080 |
| }, |
| { |
| "epoch": 7.281639928698752, |
| "grad_norm": 2.9145874577807263e-05, |
| "learning_rate": 4.0115473484334495e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2522184, |
| "step": 4085 |
| }, |
| { |
| "epoch": 7.290552584670232, |
| "grad_norm": 3.5548859159462154e-05, |
| "learning_rate": 4.008447970321497e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2525448, |
| "step": 4090 |
| }, |
| { |
| "epoch": 7.2994652406417115, |
| "grad_norm": 8.155805699061602e-05, |
| "learning_rate": 4.005344942155719e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2529160, |
| "step": 4095 |
| }, |
| { |
| "epoch": 7.308377896613191, |
| "grad_norm": 3.165958696627058e-05, |
| "learning_rate": 4.0022382714446415e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2532840, |
| "step": 4100 |
| }, |
| { |
| "epoch": 7.31729055258467, |
| "grad_norm": 3.817788092419505e-05, |
| "learning_rate": 3.9991279657056034e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2535400, |
| "step": 4105 |
| }, |
| { |
| "epoch": 7.32620320855615, |
| "grad_norm": 7.300837023649365e-05, |
| "learning_rate": 3.996014032464741e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2538888, |
| "step": 4110 |
| }, |
| { |
| "epoch": 7.335115864527629, |
| "grad_norm": 8.633810648461804e-05, |
| "learning_rate": 3.9928964792569655e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2542376, |
| "step": 4115 |
| }, |
| { |
| "epoch": 7.3440285204991085, |
| "grad_norm": 4.182643897365779e-05, |
| "learning_rate": 3.98977531362595e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2544712, |
| "step": 4120 |
| }, |
| { |
| "epoch": 7.352941176470588, |
| "grad_norm": 3.7395973777165636e-05, |
| "learning_rate": 3.9866505431241084e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2547784, |
| "step": 4125 |
| }, |
| { |
| "epoch": 7.361853832442068, |
| "grad_norm": 2.852731449820567e-05, |
| "learning_rate": 3.983522175312576e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2551272, |
| "step": 4130 |
| }, |
| { |
| "epoch": 7.3707664884135475, |
| "grad_norm": 0.00015264320245478302, |
| "learning_rate": 3.980390217761193e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2555176, |
| "step": 4135 |
| }, |
| { |
| "epoch": 7.379679144385027, |
| "grad_norm": 4.525822077994235e-05, |
| "learning_rate": 3.9772546780484874e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2559144, |
| "step": 4140 |
| }, |
| { |
| "epoch": 7.388591800356506, |
| "grad_norm": 2.5928084141924046e-05, |
| "learning_rate": 3.974115563761655e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2561640, |
| "step": 4145 |
| }, |
| { |
| "epoch": 7.397504456327986, |
| "grad_norm": 0.00020593231602106243, |
| "learning_rate": 3.970972882496537e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2564424, |
| "step": 4150 |
| }, |
| { |
| "epoch": 7.406417112299465, |
| "grad_norm": 4.3078893213532865e-05, |
| "learning_rate": 3.967826641857612e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2567048, |
| "step": 4155 |
| }, |
| { |
| "epoch": 7.4153297682709445, |
| "grad_norm": 0.0001659034751355648, |
| "learning_rate": 3.964676849457968e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2569736, |
| "step": 4160 |
| }, |
| { |
| "epoch": 7.424242424242424, |
| "grad_norm": 0.00016778869030531496, |
| "learning_rate": 3.961523512919286e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2573096, |
| "step": 4165 |
| }, |
| { |
| "epoch": 7.433155080213904, |
| "grad_norm": 2.2783213353250176e-05, |
| "learning_rate": 3.958366639871826e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2576232, |
| "step": 4170 |
| }, |
| { |
| "epoch": 7.4420677361853835, |
| "grad_norm": 9.495346603216603e-05, |
| "learning_rate": 3.955206237954404e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2579816, |
| "step": 4175 |
| }, |
| { |
| "epoch": 7.450980392156863, |
| "grad_norm": 0.00015493450337089598, |
| "learning_rate": 3.952042314814375e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2583240, |
| "step": 4180 |
| }, |
| { |
| "epoch": 7.459893048128342, |
| "grad_norm": 4.0488488593837246e-05, |
| "learning_rate": 3.9488748781076136e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2586536, |
| "step": 4185 |
| }, |
| { |
| "epoch": 7.468805704099822, |
| "grad_norm": 0.00010972293239319697, |
| "learning_rate": 3.9457039354984974e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2588712, |
| "step": 4190 |
| }, |
| { |
| "epoch": 7.477718360071301, |
| "grad_norm": 7.80750997364521e-05, |
| "learning_rate": 3.942529494659888e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2591368, |
| "step": 4195 |
| }, |
| { |
| "epoch": 7.4866310160427805, |
| "grad_norm": 3.180309067829512e-05, |
| "learning_rate": 3.93935156327311e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2594408, |
| "step": 4200 |
| }, |
| { |
| "epoch": 7.49554367201426, |
| "grad_norm": 2.1605219444609247e-05, |
| "learning_rate": 3.9361701490279355e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2597256, |
| "step": 4205 |
| }, |
| { |
| "epoch": 7.50445632798574, |
| "grad_norm": 4.804494892596267e-05, |
| "learning_rate": 3.9329852596225644e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2600136, |
| "step": 4210 |
| }, |
| { |
| "epoch": 7.5133689839572195, |
| "grad_norm": 3.224185638828203e-05, |
| "learning_rate": 3.929796902763604e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2603592, |
| "step": 4215 |
| }, |
| { |
| "epoch": 7.522281639928699, |
| "grad_norm": 3.549808025127277e-05, |
| "learning_rate": 3.926605086166054e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2606472, |
| "step": 4220 |
| }, |
| { |
| "epoch": 7.531194295900178, |
| "grad_norm": 2.821335510816425e-05, |
| "learning_rate": 3.923409817553284e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2609448, |
| "step": 4225 |
| }, |
| { |
| "epoch": 7.540106951871658, |
| "grad_norm": 0.00011925551370950416, |
| "learning_rate": 3.9202111046570175e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2612168, |
| "step": 4230 |
| }, |
| { |
| "epoch": 7.549019607843137, |
| "grad_norm": 4.418912067194469e-05, |
| "learning_rate": 3.917008955217314e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2615464, |
| "step": 4235 |
| }, |
| { |
| "epoch": 7.5579322638146165, |
| "grad_norm": 2.4098915673675947e-05, |
| "learning_rate": 3.9138033769825434e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2618088, |
| "step": 4240 |
| }, |
| { |
| "epoch": 7.566844919786096, |
| "grad_norm": 9.82450510491617e-05, |
| "learning_rate": 3.910594377709378e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2621064, |
| "step": 4245 |
| }, |
| { |
| "epoch": 7.575757575757576, |
| "grad_norm": 6.234718603082001e-05, |
| "learning_rate": 3.9073819651627654e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2624104, |
| "step": 4250 |
| }, |
| { |
| "epoch": 7.5846702317290555, |
| "grad_norm": 6.715167546644807e-05, |
| "learning_rate": 3.904166147115912e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2626984, |
| "step": 4255 |
| }, |
| { |
| "epoch": 7.593582887700535, |
| "grad_norm": 4.9607984692556784e-05, |
| "learning_rate": 3.9009469313502664e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2630440, |
| "step": 4260 |
| }, |
| { |
| "epoch": 7.602495543672014, |
| "grad_norm": 4.675610762205906e-05, |
| "learning_rate": 3.897724325655497e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2633192, |
| "step": 4265 |
| }, |
| { |
| "epoch": 7.611408199643494, |
| "grad_norm": 0.00048179851728491485, |
| "learning_rate": 3.8944983378294775e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2636584, |
| "step": 4270 |
| }, |
| { |
| "epoch": 7.620320855614973, |
| "grad_norm": 0.0001708936906652525, |
| "learning_rate": 3.8912689756782624e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2639560, |
| "step": 4275 |
| }, |
| { |
| "epoch": 7.6292335115864525, |
| "grad_norm": 0.0001099999135476537, |
| "learning_rate": 3.888036247016073e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2642792, |
| "step": 4280 |
| }, |
| { |
| "epoch": 7.638146167557933, |
| "grad_norm": 3.335474684718065e-05, |
| "learning_rate": 3.884800159665276e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2645064, |
| "step": 4285 |
| }, |
| { |
| "epoch": 7.647058823529412, |
| "grad_norm": 5.769542622147128e-05, |
| "learning_rate": 3.881560721456365e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2647912, |
| "step": 4290 |
| }, |
| { |
| "epoch": 7.6559714795008915, |
| "grad_norm": 2.4842016500770114e-05, |
| "learning_rate": 3.8783179402279454e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2651048, |
| "step": 4295 |
| }, |
| { |
| "epoch": 7.664884135472371, |
| "grad_norm": 8.799693750916049e-05, |
| "learning_rate": 3.8750718238267045e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2654312, |
| "step": 4300 |
| }, |
| { |
| "epoch": 7.67379679144385, |
| "grad_norm": 2.1094869225635193e-05, |
| "learning_rate": 3.871822380107407e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2658184, |
| "step": 4305 |
| }, |
| { |
| "epoch": 7.68270944741533, |
| "grad_norm": 4.154935959377326e-05, |
| "learning_rate": 3.868569616932865e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2661352, |
| "step": 4310 |
| }, |
| { |
| "epoch": 7.691622103386809, |
| "grad_norm": 0.0002805989934131503, |
| "learning_rate": 3.865313542173925e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2663944, |
| "step": 4315 |
| }, |
| { |
| "epoch": 7.7005347593582885, |
| "grad_norm": 0.00014767648826818913, |
| "learning_rate": 3.862054163709444e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2667496, |
| "step": 4320 |
| }, |
| { |
| "epoch": 7.709447415329768, |
| "grad_norm": 9.234154276782647e-05, |
| "learning_rate": 3.8587914894262754e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2670536, |
| "step": 4325 |
| }, |
| { |
| "epoch": 7.718360071301248, |
| "grad_norm": 0.0003653615422081202, |
| "learning_rate": 3.8555255272192456e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2674184, |
| "step": 4330 |
| }, |
| { |
| "epoch": 7.7272727272727275, |
| "grad_norm": 6.81550518493168e-05, |
| "learning_rate": 3.85225628499114e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2677224, |
| "step": 4335 |
| }, |
| { |
| "epoch": 7.736185383244207, |
| "grad_norm": 2.63045949395746e-05, |
| "learning_rate": 3.848983770652679e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2679880, |
| "step": 4340 |
| }, |
| { |
| "epoch": 7.745098039215686, |
| "grad_norm": 7.03669065842405e-05, |
| "learning_rate": 3.8457079921224994e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2682600, |
| "step": 4345 |
| }, |
| { |
| "epoch": 7.754010695187166, |
| "grad_norm": 3.367187309777364e-05, |
| "learning_rate": 3.842428957327138e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2685512, |
| "step": 4350 |
| }, |
| { |
| "epoch": 7.762923351158645, |
| "grad_norm": 0.00010811091487994418, |
| "learning_rate": 3.8391466742010105e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2688008, |
| "step": 4355 |
| }, |
| { |
| "epoch": 7.7718360071301245, |
| "grad_norm": 3.1085244700079784e-05, |
| "learning_rate": 3.835861150686393e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2690760, |
| "step": 4360 |
| }, |
| { |
| "epoch": 7.780748663101605, |
| "grad_norm": 6.103352643549442e-05, |
| "learning_rate": 3.8325723947334036e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2694856, |
| "step": 4365 |
| }, |
| { |
| "epoch": 7.789661319073084, |
| "grad_norm": 4.178649760433473e-05, |
| "learning_rate": 3.82928041429998e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2697928, |
| "step": 4370 |
| }, |
| { |
| "epoch": 7.7985739750445635, |
| "grad_norm": 7.200521213235334e-05, |
| "learning_rate": 3.825985217351862e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2701224, |
| "step": 4375 |
| }, |
| { |
| "epoch": 7.807486631016043, |
| "grad_norm": 0.00012286264973226935, |
| "learning_rate": 3.822686811862575e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2704104, |
| "step": 4380 |
| }, |
| { |
| "epoch": 7.816399286987522, |
| "grad_norm": 3.214297612430528e-05, |
| "learning_rate": 3.819385205813407e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2707528, |
| "step": 4385 |
| }, |
| { |
| "epoch": 7.825311942959002, |
| "grad_norm": 3.2054165785666555e-05, |
| "learning_rate": 3.81608040719339e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2710824, |
| "step": 4390 |
| }, |
| { |
| "epoch": 7.834224598930481, |
| "grad_norm": 4.8690933908801526e-05, |
| "learning_rate": 3.812772423999281e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2714280, |
| "step": 4395 |
| }, |
| { |
| "epoch": 7.8431372549019605, |
| "grad_norm": 3.2846561225596815e-05, |
| "learning_rate": 3.809461264235545e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2717416, |
| "step": 4400 |
| }, |
| { |
| "epoch": 7.85204991087344, |
| "grad_norm": 0.0007487075054086745, |
| "learning_rate": 3.806146935914331e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2720744, |
| "step": 4405 |
| }, |
| { |
| "epoch": 7.86096256684492, |
| "grad_norm": 0.00022331879881676286, |
| "learning_rate": 3.8028294470554565e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2723592, |
| "step": 4410 |
| }, |
| { |
| "epoch": 7.8698752228163995, |
| "grad_norm": 5.3162981203058735e-05, |
| "learning_rate": 3.799508805686386e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2725960, |
| "step": 4415 |
| }, |
| { |
| "epoch": 7.878787878787879, |
| "grad_norm": 1.9468281607259996e-05, |
| "learning_rate": 3.796185019842212e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2728936, |
| "step": 4420 |
| }, |
| { |
| "epoch": 7.887700534759358, |
| "grad_norm": 3.672900129458867e-05, |
| "learning_rate": 3.792858097565637e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2732264, |
| "step": 4425 |
| }, |
| { |
| "epoch": 7.896613190730838, |
| "grad_norm": 4.778657967108302e-05, |
| "learning_rate": 3.789528046906953e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2734408, |
| "step": 4430 |
| }, |
| { |
| "epoch": 7.905525846702317, |
| "grad_norm": 6.20819118921645e-05, |
| "learning_rate": 3.786194875924019e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2738184, |
| "step": 4435 |
| }, |
| { |
| "epoch": 7.9144385026737964, |
| "grad_norm": 0.00018136748985853046, |
| "learning_rate": 3.7828585926822466e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2742056, |
| "step": 4440 |
| }, |
| { |
| "epoch": 7.923351158645277, |
| "grad_norm": 2.4137643777066842e-05, |
| "learning_rate": 3.77951920525458e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2745512, |
| "step": 4445 |
| }, |
| { |
| "epoch": 7.932263814616756, |
| "grad_norm": 2.8110442144679837e-05, |
| "learning_rate": 3.776176721721472e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2749064, |
| "step": 4450 |
| }, |
| { |
| "epoch": 7.9411764705882355, |
| "grad_norm": 6.0824535466963425e-05, |
| "learning_rate": 3.772831150170868e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2751368, |
| "step": 4455 |
| }, |
| { |
| "epoch": 7.950089126559715, |
| "grad_norm": 5.143785892869346e-05, |
| "learning_rate": 3.769482498698185e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2753768, |
| "step": 4460 |
| }, |
| { |
| "epoch": 7.959001782531194, |
| "grad_norm": 5.6326894991798326e-05, |
| "learning_rate": 3.766130775406293e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2756712, |
| "step": 4465 |
| }, |
| { |
| "epoch": 7.967914438502674, |
| "grad_norm": 5.954270454822108e-05, |
| "learning_rate": 3.7627759884054955e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2759528, |
| "step": 4470 |
| }, |
| { |
| "epoch": 7.976827094474153, |
| "grad_norm": 4.2564461182337254e-05, |
| "learning_rate": 3.7594181458135105e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2763464, |
| "step": 4475 |
| }, |
| { |
| "epoch": 7.9857397504456324, |
| "grad_norm": 2.670029607543256e-05, |
| "learning_rate": 3.756057255755446e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2766696, |
| "step": 4480 |
| }, |
| { |
| "epoch": 7.994652406417112, |
| "grad_norm": 2.5107356123044156e-05, |
| "learning_rate": 3.752693326363789e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2770952, |
| "step": 4485 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.21877846121788025, |
| "eval_runtime": 4.5861, |
| "eval_samples_per_second": 54.294, |
| "eval_steps_per_second": 13.737, |
| "num_input_tokens_seen": 2772384, |
| "step": 4488 |
| }, |
| { |
| "epoch": 8.003565062388592, |
| "grad_norm": 3.1642932299291715e-05, |
| "learning_rate": 3.749326365778376e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2773728, |
| "step": 4490 |
| }, |
| { |
| "epoch": 8.01247771836007, |
| "grad_norm": 0.0001709481730358675, |
| "learning_rate": 3.7459563821463816e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2776800, |
| "step": 4495 |
| }, |
| { |
| "epoch": 8.02139037433155, |
| "grad_norm": 2.5652454496594146e-05, |
| "learning_rate": 3.7425833836222944e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2779872, |
| "step": 4500 |
| }, |
| { |
| "epoch": 8.030303030303031, |
| "grad_norm": 3.859802382066846e-05, |
| "learning_rate": 3.739207378367898e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2782752, |
| "step": 4505 |
| }, |
| { |
| "epoch": 8.03921568627451, |
| "grad_norm": 2.2162514142110012e-05, |
| "learning_rate": 3.735828374552252e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2786144, |
| "step": 4510 |
| }, |
| { |
| "epoch": 8.04812834224599, |
| "grad_norm": 1.882631295302417e-05, |
| "learning_rate": 3.73244638035167e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2789024, |
| "step": 4515 |
| }, |
| { |
| "epoch": 8.057040998217468, |
| "grad_norm": 2.0714554921141826e-05, |
| "learning_rate": 3.7290614039497055e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2792096, |
| "step": 4520 |
| }, |
| { |
| "epoch": 8.065953654188949, |
| "grad_norm": 2.3915859856060706e-05, |
| "learning_rate": 3.7256734535371225e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2795424, |
| "step": 4525 |
| }, |
| { |
| "epoch": 8.074866310160427, |
| "grad_norm": 2.8331269277259707e-05, |
| "learning_rate": 3.722282537311887e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2798432, |
| "step": 4530 |
| }, |
| { |
| "epoch": 8.083778966131907, |
| "grad_norm": 2.6493120458326302e-05, |
| "learning_rate": 3.7188886634791374e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2801664, |
| "step": 4535 |
| }, |
| { |
| "epoch": 8.092691622103386, |
| "grad_norm": 4.4756827264791355e-05, |
| "learning_rate": 3.715491840251172e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2804640, |
| "step": 4540 |
| }, |
| { |
| "epoch": 8.101604278074866, |
| "grad_norm": 4.7346729843411595e-05, |
| "learning_rate": 3.712092075847423e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2807424, |
| "step": 4545 |
| }, |
| { |
| "epoch": 8.110516934046347, |
| "grad_norm": 5.598060306510888e-05, |
| "learning_rate": 3.708689378494441e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2810496, |
| "step": 4550 |
| }, |
| { |
| "epoch": 8.119429590017825, |
| "grad_norm": 3.0805953429080546e-05, |
| "learning_rate": 3.705283756425872e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2813632, |
| "step": 4555 |
| }, |
| { |
| "epoch": 8.128342245989305, |
| "grad_norm": 0.000747096084523946, |
| "learning_rate": 3.701875217882443e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2817152, |
| "step": 4560 |
| }, |
| { |
| "epoch": 8.137254901960784, |
| "grad_norm": 3.350910992594436e-05, |
| "learning_rate": 3.698463771111933e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2820960, |
| "step": 4565 |
| }, |
| { |
| "epoch": 8.146167557932264, |
| "grad_norm": 2.3410793801303953e-05, |
| "learning_rate": 3.695049424369162e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2823616, |
| "step": 4570 |
| }, |
| { |
| "epoch": 8.155080213903743, |
| "grad_norm": 2.2134110622573644e-05, |
| "learning_rate": 3.6916321859159655e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2827424, |
| "step": 4575 |
| }, |
| { |
| "epoch": 8.163992869875223, |
| "grad_norm": 2.254007813462522e-05, |
| "learning_rate": 3.6882120640211745e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2830368, |
| "step": 4580 |
| }, |
| { |
| "epoch": 8.172905525846703, |
| "grad_norm": 1.957350832526572e-05, |
| "learning_rate": 3.684789066960602e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2833984, |
| "step": 4585 |
| }, |
| { |
| "epoch": 8.181818181818182, |
| "grad_norm": 3.5591012419899926e-05, |
| "learning_rate": 3.6813632030170145e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2837312, |
| "step": 4590 |
| }, |
| { |
| "epoch": 8.190730837789662, |
| "grad_norm": 2.466439582349267e-05, |
| "learning_rate": 3.677934480480116e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2840448, |
| "step": 4595 |
| }, |
| { |
| "epoch": 8.19964349376114, |
| "grad_norm": 0.0002501521375961602, |
| "learning_rate": 3.674502907646527e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2843520, |
| "step": 4600 |
| }, |
| { |
| "epoch": 8.20855614973262, |
| "grad_norm": 0.0016060250345617533, |
| "learning_rate": 3.6710684928197674e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2846304, |
| "step": 4605 |
| }, |
| { |
| "epoch": 8.2174688057041, |
| "grad_norm": 1.862948738562409e-05, |
| "learning_rate": 3.667631244310232e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2849344, |
| "step": 4610 |
| }, |
| { |
| "epoch": 8.22638146167558, |
| "grad_norm": 2.1913916498306207e-05, |
| "learning_rate": 3.6641911704351734e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2852704, |
| "step": 4615 |
| }, |
| { |
| "epoch": 8.235294117647058, |
| "grad_norm": 0.0001454515295336023, |
| "learning_rate": 3.66074827951868e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2856032, |
| "step": 4620 |
| }, |
| { |
| "epoch": 8.244206773618538, |
| "grad_norm": 0.00010574868792900816, |
| "learning_rate": 3.657302579891657e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2858720, |
| "step": 4625 |
| }, |
| { |
| "epoch": 8.253119429590019, |
| "grad_norm": 4.3842454033438116e-05, |
| "learning_rate": 3.653854079891805e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2861824, |
| "step": 4630 |
| }, |
| { |
| "epoch": 8.262032085561497, |
| "grad_norm": 3.260843368479982e-05, |
| "learning_rate": 3.650402787863605e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2864992, |
| "step": 4635 |
| }, |
| { |
| "epoch": 8.270944741532977, |
| "grad_norm": 3.535457290126942e-05, |
| "learning_rate": 3.646948712158287e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2868736, |
| "step": 4640 |
| }, |
| { |
| "epoch": 8.279857397504456, |
| "grad_norm": 3.386079697520472e-05, |
| "learning_rate": 3.643491861133822e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2871552, |
| "step": 4645 |
| }, |
| { |
| "epoch": 8.288770053475936, |
| "grad_norm": 2.413662150502205e-05, |
| "learning_rate": 3.640032243154896e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2874976, |
| "step": 4650 |
| }, |
| { |
| "epoch": 8.297682709447415, |
| "grad_norm": 6.0050530009903014e-05, |
| "learning_rate": 3.636569866592889e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2877504, |
| "step": 4655 |
| }, |
| { |
| "epoch": 8.306595365418895, |
| "grad_norm": 2.1612911950796843e-05, |
| "learning_rate": 3.633104739825856e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2880096, |
| "step": 4660 |
| }, |
| { |
| "epoch": 8.315508021390375, |
| "grad_norm": 9.056284034159034e-05, |
| "learning_rate": 3.629636871238508e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2883040, |
| "step": 4665 |
| }, |
| { |
| "epoch": 8.324420677361854, |
| "grad_norm": 0.001587208709679544, |
| "learning_rate": 3.626166269222189e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2885888, |
| "step": 4670 |
| }, |
| { |
| "epoch": 8.333333333333334, |
| "grad_norm": 3.2096664654091e-05, |
| "learning_rate": 3.622692942174858e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2889472, |
| "step": 4675 |
| }, |
| { |
| "epoch": 8.342245989304812, |
| "grad_norm": 2.563195812399499e-05, |
| "learning_rate": 3.6192168985010685e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2892544, |
| "step": 4680 |
| }, |
| { |
| "epoch": 8.351158645276293, |
| "grad_norm": 1.978753243747633e-05, |
| "learning_rate": 3.6157381466119475e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2896288, |
| "step": 4685 |
| }, |
| { |
| "epoch": 8.360071301247771, |
| "grad_norm": 8.230508683482185e-05, |
| "learning_rate": 3.6122566949251724e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2899296, |
| "step": 4690 |
| }, |
| { |
| "epoch": 8.368983957219251, |
| "grad_norm": 2.73281129921088e-05, |
| "learning_rate": 3.6087725518649575e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2902208, |
| "step": 4695 |
| }, |
| { |
| "epoch": 8.37789661319073, |
| "grad_norm": 2.3481750758946873e-05, |
| "learning_rate": 3.6052857258620264e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2905472, |
| "step": 4700 |
| }, |
| { |
| "epoch": 8.38680926916221, |
| "grad_norm": 2.9414770324365236e-05, |
| "learning_rate": 3.6017962253535964e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2908320, |
| "step": 4705 |
| }, |
| { |
| "epoch": 8.39572192513369, |
| "grad_norm": 3.249278597650118e-05, |
| "learning_rate": 3.598304058783357e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2911104, |
| "step": 4710 |
| }, |
| { |
| "epoch": 8.404634581105169, |
| "grad_norm": 4.262402580934577e-05, |
| "learning_rate": 3.594809234601445e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2914400, |
| "step": 4715 |
| }, |
| { |
| "epoch": 8.41354723707665, |
| "grad_norm": 1.8245505998493172e-05, |
| "learning_rate": 3.5913117612644335e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2917824, |
| "step": 4720 |
| }, |
| { |
| "epoch": 8.422459893048128, |
| "grad_norm": 9.287180000683293e-05, |
| "learning_rate": 3.587811647235302e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2920800, |
| "step": 4725 |
| }, |
| { |
| "epoch": 8.431372549019608, |
| "grad_norm": 2.9012597224209458e-05, |
| "learning_rate": 3.5843089009834214e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2923744, |
| "step": 4730 |
| }, |
| { |
| "epoch": 8.440285204991087, |
| "grad_norm": 3.052468673558906e-05, |
| "learning_rate": 3.5808035309845305e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2926496, |
| "step": 4735 |
| }, |
| { |
| "epoch": 8.449197860962567, |
| "grad_norm": 0.0005738939507864416, |
| "learning_rate": 3.5772955457207183e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2928704, |
| "step": 4740 |
| }, |
| { |
| "epoch": 8.458110516934047, |
| "grad_norm": 3.2330990507034585e-05, |
| "learning_rate": 3.5737849536804016e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2931200, |
| "step": 4745 |
| }, |
| { |
| "epoch": 8.467023172905526, |
| "grad_norm": 0.0014580815332010388, |
| "learning_rate": 3.570271763358305e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2933568, |
| "step": 4750 |
| }, |
| { |
| "epoch": 8.475935828877006, |
| "grad_norm": 2.72096131084254e-05, |
| "learning_rate": 3.56675598325544e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2936672, |
| "step": 4755 |
| }, |
| { |
| "epoch": 8.484848484848484, |
| "grad_norm": 3.214793832739815e-05, |
| "learning_rate": 3.563237621879085e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2939104, |
| "step": 4760 |
| }, |
| { |
| "epoch": 8.493761140819965, |
| "grad_norm": 4.211974373902194e-05, |
| "learning_rate": 3.559716687742763e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2941440, |
| "step": 4765 |
| }, |
| { |
| "epoch": 8.502673796791443, |
| "grad_norm": 1.873859582701698e-05, |
| "learning_rate": 3.556193189366227e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2944704, |
| "step": 4770 |
| }, |
| { |
| "epoch": 8.511586452762923, |
| "grad_norm": 0.0002100716665154323, |
| "learning_rate": 3.5526671352754285e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2947680, |
| "step": 4775 |
| }, |
| { |
| "epoch": 8.520499108734402, |
| "grad_norm": 5.308235631673597e-05, |
| "learning_rate": 3.5491385340025055e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2951232, |
| "step": 4780 |
| }, |
| { |
| "epoch": 8.529411764705882, |
| "grad_norm": 5.459520980366506e-05, |
| "learning_rate": 3.545607394085763e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2954432, |
| "step": 4785 |
| }, |
| { |
| "epoch": 8.538324420677363, |
| "grad_norm": 9.727604629006237e-05, |
| "learning_rate": 3.542073724069644e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2958208, |
| "step": 4790 |
| }, |
| { |
| "epoch": 8.547237076648841, |
| "grad_norm": 8.639811130706221e-05, |
| "learning_rate": 3.5385375325047166e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2961152, |
| "step": 4795 |
| }, |
| { |
| "epoch": 8.556149732620321, |
| "grad_norm": 0.00015109198284335434, |
| "learning_rate": 3.5349988279476494e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2964704, |
| "step": 4800 |
| }, |
| { |
| "epoch": 8.5650623885918, |
| "grad_norm": 2.8421334718586877e-05, |
| "learning_rate": 3.5314576189611906e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2968064, |
| "step": 4805 |
| }, |
| { |
| "epoch": 8.57397504456328, |
| "grad_norm": 2.659242454683408e-05, |
| "learning_rate": 3.527913914114152e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2971552, |
| "step": 4810 |
| }, |
| { |
| "epoch": 8.582887700534759, |
| "grad_norm": 2.7345558919478208e-05, |
| "learning_rate": 3.524367721981381e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2974592, |
| "step": 4815 |
| }, |
| { |
| "epoch": 8.591800356506239, |
| "grad_norm": 4.943818203173578e-05, |
| "learning_rate": 3.520819051143747e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2977376, |
| "step": 4820 |
| }, |
| { |
| "epoch": 8.60071301247772, |
| "grad_norm": 7.073458982631564e-05, |
| "learning_rate": 3.517267910188112e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2980544, |
| "step": 4825 |
| }, |
| { |
| "epoch": 8.609625668449198, |
| "grad_norm": 1.9516908650984988e-05, |
| "learning_rate": 3.513714307707321e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2984352, |
| "step": 4830 |
| }, |
| { |
| "epoch": 8.618538324420678, |
| "grad_norm": 4.920436185784638e-05, |
| "learning_rate": 3.510158252300171e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2987328, |
| "step": 4835 |
| }, |
| { |
| "epoch": 8.627450980392156, |
| "grad_norm": 2.403493272140622e-05, |
| "learning_rate": 3.506599752571398e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2989888, |
| "step": 4840 |
| }, |
| { |
| "epoch": 8.636363636363637, |
| "grad_norm": 1.945541771419812e-05, |
| "learning_rate": 3.503038817131649e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2992960, |
| "step": 4845 |
| }, |
| { |
| "epoch": 8.645276292335115, |
| "grad_norm": 1.9857079678331502e-05, |
| "learning_rate": 3.499475454597467e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2995200, |
| "step": 4850 |
| }, |
| { |
| "epoch": 8.654188948306595, |
| "grad_norm": 2.849534575943835e-05, |
| "learning_rate": 3.495909673591268e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 2997920, |
| "step": 4855 |
| }, |
| { |
| "epoch": 8.663101604278076, |
| "grad_norm": 9.624276572139934e-05, |
| "learning_rate": 3.492341482741319e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3001056, |
| "step": 4860 |
| }, |
| { |
| "epoch": 8.672014260249554, |
| "grad_norm": 4.670900307246484e-05, |
| "learning_rate": 3.488770890681718e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3004064, |
| "step": 4865 |
| }, |
| { |
| "epoch": 8.680926916221035, |
| "grad_norm": 5.9905400121351704e-05, |
| "learning_rate": 3.485197906052376e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3007456, |
| "step": 4870 |
| }, |
| { |
| "epoch": 8.689839572192513, |
| "grad_norm": 0.00032246284536086023, |
| "learning_rate": 3.4816225374989884e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3010688, |
| "step": 4875 |
| }, |
| { |
| "epoch": 8.698752228163993, |
| "grad_norm": 3.181647480232641e-05, |
| "learning_rate": 3.4780447936730245e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3014048, |
| "step": 4880 |
| }, |
| { |
| "epoch": 8.707664884135472, |
| "grad_norm": 2.5759185518836603e-05, |
| "learning_rate": 3.474464683231698e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3017056, |
| "step": 4885 |
| }, |
| { |
| "epoch": 8.716577540106952, |
| "grad_norm": 7.166628347476944e-05, |
| "learning_rate": 3.4708822148379514e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3020128, |
| "step": 4890 |
| }, |
| { |
| "epoch": 8.72549019607843, |
| "grad_norm": 1.9339231585036032e-05, |
| "learning_rate": 3.4672973971604285e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3023168, |
| "step": 4895 |
| }, |
| { |
| "epoch": 8.73440285204991, |
| "grad_norm": 1.5031446309876628e-05, |
| "learning_rate": 3.463710238873462e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3026560, |
| "step": 4900 |
| }, |
| { |
| "epoch": 8.743315508021391, |
| "grad_norm": 8.983182488009334e-05, |
| "learning_rate": 3.4601207486570476e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3030336, |
| "step": 4905 |
| }, |
| { |
| "epoch": 8.75222816399287, |
| "grad_norm": 0.00017463577387388796, |
| "learning_rate": 3.456528935196821e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3034016, |
| "step": 4910 |
| }, |
| { |
| "epoch": 8.76114081996435, |
| "grad_norm": 1.955249535967596e-05, |
| "learning_rate": 3.452934807184044e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3037024, |
| "step": 4915 |
| }, |
| { |
| "epoch": 8.770053475935828, |
| "grad_norm": 1.943771530932281e-05, |
| "learning_rate": 3.449338373315575e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3041184, |
| "step": 4920 |
| }, |
| { |
| "epoch": 8.778966131907309, |
| "grad_norm": 4.8170099034905434e-05, |
| "learning_rate": 3.4457396422938535e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3044064, |
| "step": 4925 |
| }, |
| { |
| "epoch": 8.787878787878787, |
| "grad_norm": 6.884737376822159e-05, |
| "learning_rate": 3.442138622826879e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3047712, |
| "step": 4930 |
| }, |
| { |
| "epoch": 8.796791443850267, |
| "grad_norm": 2.13040184462443e-05, |
| "learning_rate": 3.438535323628185e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3050304, |
| "step": 4935 |
| }, |
| { |
| "epoch": 8.805704099821746, |
| "grad_norm": 5.2907158533344045e-05, |
| "learning_rate": 3.434929753416824e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3052864, |
| "step": 4940 |
| }, |
| { |
| "epoch": 8.814616755793226, |
| "grad_norm": 2.187153404520359e-05, |
| "learning_rate": 3.431321920917343e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3056352, |
| "step": 4945 |
| }, |
| { |
| "epoch": 8.823529411764707, |
| "grad_norm": 1.702853296592366e-05, |
| "learning_rate": 3.427711834859764e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3059424, |
| "step": 4950 |
| }, |
| { |
| "epoch": 8.832442067736185, |
| "grad_norm": 1.7070908143068664e-05, |
| "learning_rate": 3.4240995039795606e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3062752, |
| "step": 4955 |
| }, |
| { |
| "epoch": 8.841354723707665, |
| "grad_norm": 1.8176526282331906e-05, |
| "learning_rate": 3.420484937017639e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3065120, |
| "step": 4960 |
| }, |
| { |
| "epoch": 8.850267379679144, |
| "grad_norm": 2.3698372388025746e-05, |
| "learning_rate": 3.416868142720316e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3067872, |
| "step": 4965 |
| }, |
| { |
| "epoch": 8.859180035650624, |
| "grad_norm": 3.496624412946403e-05, |
| "learning_rate": 3.413249129839298e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3071040, |
| "step": 4970 |
| }, |
| { |
| "epoch": 8.868092691622103, |
| "grad_norm": 2.3722381229163148e-05, |
| "learning_rate": 3.4096279071316606e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3074560, |
| "step": 4975 |
| }, |
| { |
| "epoch": 8.877005347593583, |
| "grad_norm": 0.00022836957941763103, |
| "learning_rate": 3.4060044833598255e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3077760, |
| "step": 4980 |
| }, |
| { |
| "epoch": 8.885918003565063, |
| "grad_norm": 0.0004945816472172737, |
| "learning_rate": 3.40237886729154e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3080512, |
| "step": 4985 |
| }, |
| { |
| "epoch": 8.894830659536542, |
| "grad_norm": 1.4382736480911262e-05, |
| "learning_rate": 3.398751067699858e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3083904, |
| "step": 4990 |
| }, |
| { |
| "epoch": 8.903743315508022, |
| "grad_norm": 1.5413037544931285e-05, |
| "learning_rate": 3.395121093363116e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3087264, |
| "step": 4995 |
| }, |
| { |
| "epoch": 8.9126559714795, |
| "grad_norm": 1.8121845641871914e-05, |
| "learning_rate": 3.3914889530649105e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3089536, |
| "step": 5000 |
| }, |
| { |
| "epoch": 8.92156862745098, |
| "grad_norm": 2.3494427296100184e-05, |
| "learning_rate": 3.387854655594085e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3092608, |
| "step": 5005 |
| }, |
| { |
| "epoch": 8.93048128342246, |
| "grad_norm": 1.5714915207354352e-05, |
| "learning_rate": 3.384218209744697e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3095968, |
| "step": 5010 |
| }, |
| { |
| "epoch": 8.93939393939394, |
| "grad_norm": 2.0781550119863823e-05, |
| "learning_rate": 3.3805796243160035e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3099584, |
| "step": 5015 |
| }, |
| { |
| "epoch": 8.94830659536542, |
| "grad_norm": 6.0617785493377596e-05, |
| "learning_rate": 3.376938908112443e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3102880, |
| "step": 5020 |
| }, |
| { |
| "epoch": 8.957219251336898, |
| "grad_norm": 1.7760969058144838e-05, |
| "learning_rate": 3.373296069943605e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3105888, |
| "step": 5025 |
| }, |
| { |
| "epoch": 8.966131907308379, |
| "grad_norm": 4.19039570260793e-05, |
| "learning_rate": 3.3696511186242144e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3108608, |
| "step": 5030 |
| }, |
| { |
| "epoch": 8.975044563279857, |
| "grad_norm": 1.3767701602773741e-05, |
| "learning_rate": 3.3660040629741114e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3111584, |
| "step": 5035 |
| }, |
| { |
| "epoch": 8.983957219251337, |
| "grad_norm": 1.66555073519703e-05, |
| "learning_rate": 3.3623549118182274e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3114976, |
| "step": 5040 |
| }, |
| { |
| "epoch": 8.992869875222816, |
| "grad_norm": 3.6471657949732617e-05, |
| "learning_rate": 3.358703673986564e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3117568, |
| "step": 5045 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 0.22779197990894318, |
| "eval_runtime": 4.5839, |
| "eval_samples_per_second": 54.32, |
| "eval_steps_per_second": 13.744, |
| "num_input_tokens_seen": 3119968, |
| "step": 5049 |
| }, |
| { |
| "epoch": 9.001782531194296, |
| "grad_norm": 2.258282074762974e-05, |
| "learning_rate": 3.355050358314172e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3120544, |
| "step": 5050 |
| }, |
| { |
| "epoch": 9.010695187165775, |
| "grad_norm": 2.1495643522939645e-05, |
| "learning_rate": 3.3513949736411297e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3123360, |
| "step": 5055 |
| }, |
| { |
| "epoch": 9.019607843137255, |
| "grad_norm": 2.0894487533951178e-05, |
| "learning_rate": 3.347737528812523e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3126720, |
| "step": 5060 |
| }, |
| { |
| "epoch": 9.028520499108735, |
| "grad_norm": 1.4006104720465373e-05, |
| "learning_rate": 3.344078032678422e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3129760, |
| "step": 5065 |
| }, |
| { |
| "epoch": 9.037433155080214, |
| "grad_norm": 2.4866374587872997e-05, |
| "learning_rate": 3.340416494093861e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3132928, |
| "step": 5070 |
| }, |
| { |
| "epoch": 9.046345811051694, |
| "grad_norm": 2.3607737603015266e-05, |
| "learning_rate": 3.336752921918814e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3136224, |
| "step": 5075 |
| }, |
| { |
| "epoch": 9.055258467023172, |
| "grad_norm": 2.0439423678908497e-05, |
| "learning_rate": 3.33308732501818e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3139296, |
| "step": 5080 |
| }, |
| { |
| "epoch": 9.064171122994653, |
| "grad_norm": 2.25005169340875e-05, |
| "learning_rate": 3.329419712261754e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3142272, |
| "step": 5085 |
| }, |
| { |
| "epoch": 9.073083778966131, |
| "grad_norm": 0.00012200982018839568, |
| "learning_rate": 3.3257500925242106e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3145440, |
| "step": 5090 |
| }, |
| { |
| "epoch": 9.081996434937611, |
| "grad_norm": 0.00030202369089238346, |
| "learning_rate": 3.322078474685081e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3149280, |
| "step": 5095 |
| }, |
| { |
| "epoch": 9.090909090909092, |
| "grad_norm": 1.6156487617990933e-05, |
| "learning_rate": 3.3184048676287284e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3152480, |
| "step": 5100 |
| }, |
| { |
| "epoch": 9.09982174688057, |
| "grad_norm": 0.0004181478579994291, |
| "learning_rate": 3.314729280244332e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3156032, |
| "step": 5105 |
| }, |
| { |
| "epoch": 9.10873440285205, |
| "grad_norm": 1.4357733562064823e-05, |
| "learning_rate": 3.311051721425864e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3159488, |
| "step": 5110 |
| }, |
| { |
| "epoch": 9.117647058823529, |
| "grad_norm": 8.172341040335596e-05, |
| "learning_rate": 3.3073722000720644e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3162272, |
| "step": 5115 |
| }, |
| { |
| "epoch": 9.12655971479501, |
| "grad_norm": 2.1385545551311225e-05, |
| "learning_rate": 3.303690725086421e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3165184, |
| "step": 5120 |
| }, |
| { |
| "epoch": 9.135472370766488, |
| "grad_norm": 2.0441351807676256e-05, |
| "learning_rate": 3.300007305377153e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3167744, |
| "step": 5125 |
| }, |
| { |
| "epoch": 9.144385026737968, |
| "grad_norm": 1.8742101019597612e-05, |
| "learning_rate": 3.296321949857183e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3170464, |
| "step": 5130 |
| }, |
| { |
| "epoch": 9.153297682709447, |
| "grad_norm": 1.82445965037914e-05, |
| "learning_rate": 3.292634667444117e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3173952, |
| "step": 5135 |
| }, |
| { |
| "epoch": 9.162210338680927, |
| "grad_norm": 1.8075352272717282e-05, |
| "learning_rate": 3.288945467060226e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3177792, |
| "step": 5140 |
| }, |
| { |
| "epoch": 9.171122994652407, |
| "grad_norm": 5.484546272782609e-05, |
| "learning_rate": 3.285254357632418e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3180544, |
| "step": 5145 |
| }, |
| { |
| "epoch": 9.180035650623886, |
| "grad_norm": 2.4764098270679824e-05, |
| "learning_rate": 3.281561348092225e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3183808, |
| "step": 5150 |
| }, |
| { |
| "epoch": 9.188948306595366, |
| "grad_norm": 0.0002967322070617229, |
| "learning_rate": 3.277866447375774e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3186688, |
| "step": 5155 |
| }, |
| { |
| "epoch": 9.197860962566844, |
| "grad_norm": 1.93959513126174e-05, |
| "learning_rate": 3.274169664423768e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3190592, |
| "step": 5160 |
| }, |
| { |
| "epoch": 9.206773618538325, |
| "grad_norm": 1.803379927878268e-05, |
| "learning_rate": 3.270471008181466e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3193824, |
| "step": 5165 |
| }, |
| { |
| "epoch": 9.215686274509803, |
| "grad_norm": 0.0001877088361652568, |
| "learning_rate": 3.26677048759866e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3196416, |
| "step": 5170 |
| }, |
| { |
| "epoch": 9.224598930481283, |
| "grad_norm": 2.6759424144984223e-05, |
| "learning_rate": 3.26306811162965e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3199488, |
| "step": 5175 |
| }, |
| { |
| "epoch": 9.233511586452764, |
| "grad_norm": 2.7462174330139533e-05, |
| "learning_rate": 3.259363889233231e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3202080, |
| "step": 5180 |
| }, |
| { |
| "epoch": 9.242424242424242, |
| "grad_norm": 1.7499840396340005e-05, |
| "learning_rate": 3.255657829372662e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3205216, |
| "step": 5185 |
| }, |
| { |
| "epoch": 9.251336898395722, |
| "grad_norm": 3.1354134989669546e-05, |
| "learning_rate": 3.251949941015646e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3208096, |
| "step": 5190 |
| }, |
| { |
| "epoch": 9.260249554367201, |
| "grad_norm": 1.662471731833648e-05, |
| "learning_rate": 3.248240233134317e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3211136, |
| "step": 5195 |
| }, |
| { |
| "epoch": 9.269162210338681, |
| "grad_norm": 2.0622837837436236e-05, |
| "learning_rate": 3.2445287147052086e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3213952, |
| "step": 5200 |
| }, |
| { |
| "epoch": 9.27807486631016, |
| "grad_norm": 3.4175893233623356e-05, |
| "learning_rate": 3.240815394709234e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3217888, |
| "step": 5205 |
| }, |
| { |
| "epoch": 9.28698752228164, |
| "grad_norm": 5.586072074947879e-05, |
| "learning_rate": 3.237100282131665e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3221120, |
| "step": 5210 |
| }, |
| { |
| "epoch": 9.29590017825312, |
| "grad_norm": 2.8636835850193165e-05, |
| "learning_rate": 3.2333833859621153e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3224032, |
| "step": 5215 |
| }, |
| { |
| "epoch": 9.304812834224599, |
| "grad_norm": 9.697127825347707e-05, |
| "learning_rate": 3.2296647151945114e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3227072, |
| "step": 5220 |
| }, |
| { |
| "epoch": 9.313725490196079, |
| "grad_norm": 2.4525064873159863e-05, |
| "learning_rate": 3.225944278827074e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3230208, |
| "step": 5225 |
| }, |
| { |
| "epoch": 9.322638146167558, |
| "grad_norm": 1.6048215911723673e-05, |
| "learning_rate": 3.222222085862297e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3232736, |
| "step": 5230 |
| }, |
| { |
| "epoch": 9.331550802139038, |
| "grad_norm": 1.599715687916614e-05, |
| "learning_rate": 3.218498145306925e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3235808, |
| "step": 5235 |
| }, |
| { |
| "epoch": 9.340463458110516, |
| "grad_norm": 2.0266739738872275e-05, |
| "learning_rate": 3.21477246617193e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3239648, |
| "step": 5240 |
| }, |
| { |
| "epoch": 9.349376114081997, |
| "grad_norm": 1.4434924196393695e-05, |
| "learning_rate": 3.211045057472491e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3242848, |
| "step": 5245 |
| }, |
| { |
| "epoch": 9.358288770053475, |
| "grad_norm": 3.0308527129818685e-05, |
| "learning_rate": 3.207315928227974e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3245600, |
| "step": 5250 |
| }, |
| { |
| "epoch": 9.367201426024955, |
| "grad_norm": 1.3195046449254733e-05, |
| "learning_rate": 3.2035850874619055e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3249088, |
| "step": 5255 |
| }, |
| { |
| "epoch": 9.376114081996436, |
| "grad_norm": 3.300649404991418e-05, |
| "learning_rate": 3.199852544201955e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3252800, |
| "step": 5260 |
| }, |
| { |
| "epoch": 9.385026737967914, |
| "grad_norm": 2.0082863557036035e-05, |
| "learning_rate": 3.1961183074799143e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3255968, |
| "step": 5265 |
| }, |
| { |
| "epoch": 9.393939393939394, |
| "grad_norm": 2.281305569340475e-05, |
| "learning_rate": 3.192382386331667e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3258336, |
| "step": 5270 |
| }, |
| { |
| "epoch": 9.402852049910873, |
| "grad_norm": 4.2561547161312774e-05, |
| "learning_rate": 3.188644789797177e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3261440, |
| "step": 5275 |
| }, |
| { |
| "epoch": 9.411764705882353, |
| "grad_norm": 1.503498151578242e-05, |
| "learning_rate": 3.1849055269204604e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3264192, |
| "step": 5280 |
| }, |
| { |
| "epoch": 9.420677361853832, |
| "grad_norm": 4.1438135667704046e-05, |
| "learning_rate": 3.181164606749566e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3267328, |
| "step": 5285 |
| }, |
| { |
| "epoch": 9.429590017825312, |
| "grad_norm": 5.512424104381353e-05, |
| "learning_rate": 3.177422038336554e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3269984, |
| "step": 5290 |
| }, |
| { |
| "epoch": 9.43850267379679, |
| "grad_norm": 6.817230314482003e-05, |
| "learning_rate": 3.17367783073747e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3272960, |
| "step": 5295 |
| }, |
| { |
| "epoch": 9.44741532976827, |
| "grad_norm": 1.5790932593517937e-05, |
| "learning_rate": 3.169931993012328e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3275744, |
| "step": 5300 |
| }, |
| { |
| "epoch": 9.456327985739751, |
| "grad_norm": 1.866510501713492e-05, |
| "learning_rate": 3.166184534225087e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3278656, |
| "step": 5305 |
| }, |
| { |
| "epoch": 9.46524064171123, |
| "grad_norm": 0.00014416482008527964, |
| "learning_rate": 3.162435463443628e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3282272, |
| "step": 5310 |
| }, |
| { |
| "epoch": 9.47415329768271, |
| "grad_norm": 5.762660293839872e-05, |
| "learning_rate": 3.158684789739731e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3284992, |
| "step": 5315 |
| }, |
| { |
| "epoch": 9.483065953654188, |
| "grad_norm": 2.13386447285302e-05, |
| "learning_rate": 3.1549325221890575e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3288256, |
| "step": 5320 |
| }, |
| { |
| "epoch": 9.491978609625669, |
| "grad_norm": 2.8539412596728653e-05, |
| "learning_rate": 3.1511786698711224e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3291104, |
| "step": 5325 |
| }, |
| { |
| "epoch": 9.500891265597147, |
| "grad_norm": 2.0268642401788384e-05, |
| "learning_rate": 3.147423241869278e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3294336, |
| "step": 5330 |
| }, |
| { |
| "epoch": 9.509803921568627, |
| "grad_norm": 1.5605648513883352e-05, |
| "learning_rate": 3.1436662472706895e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3297568, |
| "step": 5335 |
| }, |
| { |
| "epoch": 9.518716577540108, |
| "grad_norm": 4.1615989175625145e-05, |
| "learning_rate": 3.139907695166311e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3300256, |
| "step": 5340 |
| }, |
| { |
| "epoch": 9.527629233511586, |
| "grad_norm": 4.602934859576635e-05, |
| "learning_rate": 3.1361475946508645e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3303648, |
| "step": 5345 |
| }, |
| { |
| "epoch": 9.536541889483066, |
| "grad_norm": 3.193959128111601e-05, |
| "learning_rate": 3.132385954822823e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3307264, |
| "step": 5350 |
| }, |
| { |
| "epoch": 9.545454545454545, |
| "grad_norm": 1.5343108316301368e-05, |
| "learning_rate": 3.128622784784381e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3310624, |
| "step": 5355 |
| }, |
| { |
| "epoch": 9.554367201426025, |
| "grad_norm": 2.4617331291665323e-05, |
| "learning_rate": 3.1248580936414354e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3313376, |
| "step": 5360 |
| }, |
| { |
| "epoch": 9.563279857397504, |
| "grad_norm": 6.980136095080525e-05, |
| "learning_rate": 3.1210918905035655e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3316576, |
| "step": 5365 |
| }, |
| { |
| "epoch": 9.572192513368984, |
| "grad_norm": 2.2588457795791328e-05, |
| "learning_rate": 3.117324184484008e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3319264, |
| "step": 5370 |
| }, |
| { |
| "epoch": 9.581105169340464, |
| "grad_norm": 1.3750898688158486e-05, |
| "learning_rate": 3.1135549846996384e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3321920, |
| "step": 5375 |
| }, |
| { |
| "epoch": 9.590017825311943, |
| "grad_norm": 2.8685486540780403e-05, |
| "learning_rate": 3.109784300270943e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3324832, |
| "step": 5380 |
| }, |
| { |
| "epoch": 9.598930481283423, |
| "grad_norm": 6.582101195817813e-05, |
| "learning_rate": 3.106012140322004e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3327616, |
| "step": 5385 |
| }, |
| { |
| "epoch": 9.607843137254902, |
| "grad_norm": 1.9889133909600787e-05, |
| "learning_rate": 3.102238513980471e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3329984, |
| "step": 5390 |
| }, |
| { |
| "epoch": 9.616755793226382, |
| "grad_norm": 3.023408498847857e-05, |
| "learning_rate": 3.098463430377544e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3333312, |
| "step": 5395 |
| }, |
| { |
| "epoch": 9.62566844919786, |
| "grad_norm": 1.3414460227068048e-05, |
| "learning_rate": 3.09468689864795e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3336384, |
| "step": 5400 |
| }, |
| { |
| "epoch": 9.63458110516934, |
| "grad_norm": 1.646774762775749e-05, |
| "learning_rate": 3.090908927929917e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3339520, |
| "step": 5405 |
| }, |
| { |
| "epoch": 9.643493761140821, |
| "grad_norm": 8.691311813890934e-05, |
| "learning_rate": 3.087129527365158e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3342528, |
| "step": 5410 |
| }, |
| { |
| "epoch": 9.6524064171123, |
| "grad_norm": 3.321999975014478e-05, |
| "learning_rate": 3.083348706098844e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3345536, |
| "step": 5415 |
| }, |
| { |
| "epoch": 9.66131907308378, |
| "grad_norm": 2.6839175916393287e-05, |
| "learning_rate": 3.0795664732795825e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3349184, |
| "step": 5420 |
| }, |
| { |
| "epoch": 9.670231729055258, |
| "grad_norm": 2.5091510906349868e-05, |
| "learning_rate": 3.075782838059402e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3351904, |
| "step": 5425 |
| }, |
| { |
| "epoch": 9.679144385026738, |
| "grad_norm": 0.000148052436998114, |
| "learning_rate": 3.071997809593719e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3355424, |
| "step": 5430 |
| }, |
| { |
| "epoch": 9.688057040998217, |
| "grad_norm": 3.8398997276090086e-05, |
| "learning_rate": 3.068211397041322e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3358208, |
| "step": 5435 |
| }, |
| { |
| "epoch": 9.696969696969697, |
| "grad_norm": 1.5549350791843608e-05, |
| "learning_rate": 3.064423609564352e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3361472, |
| "step": 5440 |
| }, |
| { |
| "epoch": 9.705882352941176, |
| "grad_norm": 5.20404391863849e-05, |
| "learning_rate": 3.060634456328273e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3364640, |
| "step": 5445 |
| }, |
| { |
| "epoch": 9.714795008912656, |
| "grad_norm": 0.00016949191922321916, |
| "learning_rate": 3.056843946501856e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3367520, |
| "step": 5450 |
| }, |
| { |
| "epoch": 9.723707664884136, |
| "grad_norm": 1.3725932149100117e-05, |
| "learning_rate": 3.053052089257154e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3370656, |
| "step": 5455 |
| }, |
| { |
| "epoch": 9.732620320855615, |
| "grad_norm": 1.540976700198371e-05, |
| "learning_rate": 3.0492588937694814e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3373984, |
| "step": 5460 |
| }, |
| { |
| "epoch": 9.741532976827095, |
| "grad_norm": 1.4343509064929094e-05, |
| "learning_rate": 3.0454643692173883e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3377312, |
| "step": 5465 |
| }, |
| { |
| "epoch": 9.750445632798574, |
| "grad_norm": 2.721649252634961e-05, |
| "learning_rate": 3.0416685247826443e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3380224, |
| "step": 5470 |
| }, |
| { |
| "epoch": 9.759358288770054, |
| "grad_norm": 2.0400497305672616e-05, |
| "learning_rate": 3.0378713696502097e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3383424, |
| "step": 5475 |
| }, |
| { |
| "epoch": 9.768270944741532, |
| "grad_norm": 2.7101803425466642e-05, |
| "learning_rate": 3.0340729130082175e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3387104, |
| "step": 5480 |
| }, |
| { |
| "epoch": 9.777183600713013, |
| "grad_norm": 1.8909197024186142e-05, |
| "learning_rate": 3.03027316404795e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3389792, |
| "step": 5485 |
| }, |
| { |
| "epoch": 9.786096256684491, |
| "grad_norm": 1.3240232874522917e-05, |
| "learning_rate": 3.026472131963817e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3392704, |
| "step": 5490 |
| }, |
| { |
| "epoch": 9.795008912655971, |
| "grad_norm": 4.4982502004131675e-05, |
| "learning_rate": 3.0226698259533332e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3396640, |
| "step": 5495 |
| }, |
| { |
| "epoch": 9.803921568627452, |
| "grad_norm": 2.582524757599458e-05, |
| "learning_rate": 3.0188662552170943e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3400256, |
| "step": 5500 |
| }, |
| { |
| "epoch": 9.81283422459893, |
| "grad_norm": 2.0919447706546634e-05, |
| "learning_rate": 3.0150614289587585e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3403456, |
| "step": 5505 |
| }, |
| { |
| "epoch": 9.82174688057041, |
| "grad_norm": 1.901478935906198e-05, |
| "learning_rate": 3.0112553563850197e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3406528, |
| "step": 5510 |
| }, |
| { |
| "epoch": 9.830659536541889, |
| "grad_norm": 3.364668009453453e-05, |
| "learning_rate": 3.0074480467055905e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3409280, |
| "step": 5515 |
| }, |
| { |
| "epoch": 9.83957219251337, |
| "grad_norm": 1.191617593576666e-05, |
| "learning_rate": 3.0036395091331743e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3412384, |
| "step": 5520 |
| }, |
| { |
| "epoch": 9.848484848484848, |
| "grad_norm": 0.00011046986764995381, |
| "learning_rate": 2.999829752883446e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3416384, |
| "step": 5525 |
| }, |
| { |
| "epoch": 9.857397504456328, |
| "grad_norm": 1.4666607057733927e-05, |
| "learning_rate": 2.996018787175031e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3418720, |
| "step": 5530 |
| }, |
| { |
| "epoch": 9.866310160427808, |
| "grad_norm": 4.578886000672355e-05, |
| "learning_rate": 2.9922066212294808e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3422240, |
| "step": 5535 |
| }, |
| { |
| "epoch": 9.875222816399287, |
| "grad_norm": 6.549031240865588e-05, |
| "learning_rate": 2.988393264271249e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3425408, |
| "step": 5540 |
| }, |
| { |
| "epoch": 9.884135472370767, |
| "grad_norm": 3.163235305692069e-05, |
| "learning_rate": 2.9845787255276753e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3427936, |
| "step": 5545 |
| }, |
| { |
| "epoch": 9.893048128342246, |
| "grad_norm": 7.271839422173798e-05, |
| "learning_rate": 2.980763014228955e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3431392, |
| "step": 5550 |
| }, |
| { |
| "epoch": 9.901960784313726, |
| "grad_norm": 1.4547945283993613e-05, |
| "learning_rate": 2.9769461396081216e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3434624, |
| "step": 5555 |
| }, |
| { |
| "epoch": 9.910873440285204, |
| "grad_norm": 2.0756122466991656e-05, |
| "learning_rate": 2.9731281109010256e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3437760, |
| "step": 5560 |
| }, |
| { |
| "epoch": 9.919786096256685, |
| "grad_norm": 3.5389031836530194e-05, |
| "learning_rate": 2.9693089373463083e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3440992, |
| "step": 5565 |
| }, |
| { |
| "epoch": 9.928698752228165, |
| "grad_norm": 1.5621943020960316e-05, |
| "learning_rate": 2.965488628185381e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3443936, |
| "step": 5570 |
| }, |
| { |
| "epoch": 9.937611408199643, |
| "grad_norm": 1.6809039152576588e-05, |
| "learning_rate": 2.9616671926624047e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3446208, |
| "step": 5575 |
| }, |
| { |
| "epoch": 9.946524064171124, |
| "grad_norm": 1.767824505805038e-05, |
| "learning_rate": 2.957844640024263e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3449152, |
| "step": 5580 |
| }, |
| { |
| "epoch": 9.955436720142602, |
| "grad_norm": 4.033447476103902e-05, |
| "learning_rate": 2.9540209795205458e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3452000, |
| "step": 5585 |
| }, |
| { |
| "epoch": 9.964349376114082, |
| "grad_norm": 5.009349843021482e-05, |
| "learning_rate": 2.9501962204035217e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3455712, |
| "step": 5590 |
| }, |
| { |
| "epoch": 9.973262032085561, |
| "grad_norm": 1.348033401882276e-05, |
| "learning_rate": 2.9463703719281187e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3458880, |
| "step": 5595 |
| }, |
| { |
| "epoch": 9.982174688057041, |
| "grad_norm": 2.4049759304034524e-05, |
| "learning_rate": 2.9425434433518985e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3461632, |
| "step": 5600 |
| }, |
| { |
| "epoch": 9.99108734402852, |
| "grad_norm": 1.4484941857517697e-05, |
| "learning_rate": 2.9387154439350406e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3464064, |
| "step": 5605 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 1.7108382962760516e-05, |
| "learning_rate": 2.9348863829403117e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3466384, |
| "step": 5610 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.23532113432884216, |
| "eval_runtime": 4.5836, |
| "eval_samples_per_second": 54.324, |
| "eval_steps_per_second": 13.745, |
| "num_input_tokens_seen": 3466384, |
| "step": 5610 |
| }, |
| { |
| "epoch": 10.00891265597148, |
| "grad_norm": 1.203343526867684e-05, |
| "learning_rate": 2.931056269633049e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3468944, |
| "step": 5615 |
| }, |
| { |
| "epoch": 10.017825311942959, |
| "grad_norm": 1.2630572200578172e-05, |
| "learning_rate": 2.9272251132811368e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3472144, |
| "step": 5620 |
| }, |
| { |
| "epoch": 10.026737967914439, |
| "grad_norm": 1.4490614375972655e-05, |
| "learning_rate": 2.9233929231549806e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3474608, |
| "step": 5625 |
| }, |
| { |
| "epoch": 10.035650623885918, |
| "grad_norm": 8.628850628156215e-05, |
| "learning_rate": 2.9195597085274893e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3477552, |
| "step": 5630 |
| }, |
| { |
| "epoch": 10.044563279857398, |
| "grad_norm": 3.0749939469387755e-05, |
| "learning_rate": 2.915725478674053e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3480592, |
| "step": 5635 |
| }, |
| { |
| "epoch": 10.053475935828876, |
| "grad_norm": 1.7167909390991554e-05, |
| "learning_rate": 2.9118902428725132e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3484656, |
| "step": 5640 |
| }, |
| { |
| "epoch": 10.062388591800357, |
| "grad_norm": 9.126300574280322e-05, |
| "learning_rate": 2.9080540104031485e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3487440, |
| "step": 5645 |
| }, |
| { |
| "epoch": 10.071301247771837, |
| "grad_norm": 2.4468277842970565e-05, |
| "learning_rate": 2.9042167905486506e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3490832, |
| "step": 5650 |
| }, |
| { |
| "epoch": 10.080213903743315, |
| "grad_norm": 1.621020601305645e-05, |
| "learning_rate": 2.9003785925940975e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3493424, |
| "step": 5655 |
| }, |
| { |
| "epoch": 10.089126559714796, |
| "grad_norm": 0.00030905677704140544, |
| "learning_rate": 2.896539425826935e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3497008, |
| "step": 5660 |
| }, |
| { |
| "epoch": 10.098039215686274, |
| "grad_norm": 1.6341586160706356e-05, |
| "learning_rate": 2.8926992995369556e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3500720, |
| "step": 5665 |
| }, |
| { |
| "epoch": 10.106951871657754, |
| "grad_norm": 1.4716282748850062e-05, |
| "learning_rate": 2.8888582230162688e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3503696, |
| "step": 5670 |
| }, |
| { |
| "epoch": 10.115864527629233, |
| "grad_norm": 1.3503812624549028e-05, |
| "learning_rate": 2.8850162055592866e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3506704, |
| "step": 5675 |
| }, |
| { |
| "epoch": 10.124777183600713, |
| "grad_norm": 0.000634489580988884, |
| "learning_rate": 2.8811732564626987e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3510352, |
| "step": 5680 |
| }, |
| { |
| "epoch": 10.133689839572192, |
| "grad_norm": 1.2275093467906117e-05, |
| "learning_rate": 2.8773293850254463e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3513360, |
| "step": 5685 |
| }, |
| { |
| "epoch": 10.142602495543672, |
| "grad_norm": 1.3238013707450591e-05, |
| "learning_rate": 2.8734846005487036e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3516528, |
| "step": 5690 |
| }, |
| { |
| "epoch": 10.151515151515152, |
| "grad_norm": 1.2889286153949797e-05, |
| "learning_rate": 2.8696389123358553e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3519664, |
| "step": 5695 |
| }, |
| { |
| "epoch": 10.16042780748663, |
| "grad_norm": 3.7722624256275594e-05, |
| "learning_rate": 2.865792329692472e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3522608, |
| "step": 5700 |
| }, |
| { |
| "epoch": 10.169340463458111, |
| "grad_norm": 1.4927626580174547e-05, |
| "learning_rate": 2.8619448619262874e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3525360, |
| "step": 5705 |
| }, |
| { |
| "epoch": 10.17825311942959, |
| "grad_norm": 1.6124704416142777e-05, |
| "learning_rate": 2.8580965183471792e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3528752, |
| "step": 5710 |
| }, |
| { |
| "epoch": 10.18716577540107, |
| "grad_norm": 1.633623833185993e-05, |
| "learning_rate": 2.854247308267142e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3532560, |
| "step": 5715 |
| }, |
| { |
| "epoch": 10.196078431372548, |
| "grad_norm": 0.0005267616361379623, |
| "learning_rate": 2.8503972410002693e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3535984, |
| "step": 5720 |
| }, |
| { |
| "epoch": 10.204991087344029, |
| "grad_norm": 0.00010131792805623263, |
| "learning_rate": 2.8465463258627283e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3538864, |
| "step": 5725 |
| }, |
| { |
| "epoch": 10.213903743315509, |
| "grad_norm": 2.3571072233607993e-05, |
| "learning_rate": 2.8426945721727366e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3541680, |
| "step": 5730 |
| }, |
| { |
| "epoch": 10.222816399286987, |
| "grad_norm": 1.532697024231311e-05, |
| "learning_rate": 2.838841989250541e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3544592, |
| "step": 5735 |
| }, |
| { |
| "epoch": 10.231729055258468, |
| "grad_norm": 1.6518961274414323e-05, |
| "learning_rate": 2.8349885864183955e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3548016, |
| "step": 5740 |
| }, |
| { |
| "epoch": 10.240641711229946, |
| "grad_norm": 2.0978848624508828e-05, |
| "learning_rate": 2.8311343730005397e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3550800, |
| "step": 5745 |
| }, |
| { |
| "epoch": 10.249554367201426, |
| "grad_norm": 1.3249901712697465e-05, |
| "learning_rate": 2.827279358323171e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3553840, |
| "step": 5750 |
| }, |
| { |
| "epoch": 10.258467023172905, |
| "grad_norm": 2.9433726012939587e-05, |
| "learning_rate": 2.823423551714429e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3557008, |
| "step": 5755 |
| }, |
| { |
| "epoch": 10.267379679144385, |
| "grad_norm": 6.580901390407234e-05, |
| "learning_rate": 2.819566962504367e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3560432, |
| "step": 5760 |
| }, |
| { |
| "epoch": 10.276292335115864, |
| "grad_norm": 1.4316668966785073e-05, |
| "learning_rate": 2.8157096000249334e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3563472, |
| "step": 5765 |
| }, |
| { |
| "epoch": 10.285204991087344, |
| "grad_norm": 2.383297214691993e-05, |
| "learning_rate": 2.8118514736099482e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3567088, |
| "step": 5770 |
| }, |
| { |
| "epoch": 10.294117647058824, |
| "grad_norm": 4.281475776224397e-05, |
| "learning_rate": 2.8079925925950784e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3569584, |
| "step": 5775 |
| }, |
| { |
| "epoch": 10.303030303030303, |
| "grad_norm": 2.8406180717865936e-05, |
| "learning_rate": 2.8041329663178173e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3572464, |
| "step": 5780 |
| }, |
| { |
| "epoch": 10.311942959001783, |
| "grad_norm": 1.3338190910872072e-05, |
| "learning_rate": 2.800272604117463e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3575120, |
| "step": 5785 |
| }, |
| { |
| "epoch": 10.320855614973262, |
| "grad_norm": 1.4634756553277839e-05, |
| "learning_rate": 2.7964115153350927e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3578128, |
| "step": 5790 |
| }, |
| { |
| "epoch": 10.329768270944742, |
| "grad_norm": 1.9146786144119687e-05, |
| "learning_rate": 2.7925497093135424e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3581648, |
| "step": 5795 |
| }, |
| { |
| "epoch": 10.33868092691622, |
| "grad_norm": 2.9808454200974666e-05, |
| "learning_rate": 2.7886871953973838e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3585264, |
| "step": 5800 |
| }, |
| { |
| "epoch": 10.3475935828877, |
| "grad_norm": 1.4228238796931691e-05, |
| "learning_rate": 2.7848239829329002e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3588048, |
| "step": 5805 |
| }, |
| { |
| "epoch": 10.35650623885918, |
| "grad_norm": 1.5599915059283376e-05, |
| "learning_rate": 2.7809600812680674e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3591216, |
| "step": 5810 |
| }, |
| { |
| "epoch": 10.36541889483066, |
| "grad_norm": 0.0007058135233819485, |
| "learning_rate": 2.7770954997525277e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3594800, |
| "step": 5815 |
| }, |
| { |
| "epoch": 10.37433155080214, |
| "grad_norm": 1.2978567610844038e-05, |
| "learning_rate": 2.7732302477375688e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3597264, |
| "step": 5820 |
| }, |
| { |
| "epoch": 10.383244206773618, |
| "grad_norm": 1.385369341733167e-05, |
| "learning_rate": 2.769364334576099e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3601136, |
| "step": 5825 |
| }, |
| { |
| "epoch": 10.392156862745098, |
| "grad_norm": 1.804764542612247e-05, |
| "learning_rate": 2.7654977696226292e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3604432, |
| "step": 5830 |
| }, |
| { |
| "epoch": 10.401069518716577, |
| "grad_norm": 1.2603826689883135e-05, |
| "learning_rate": 2.7616305622332466e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3607024, |
| "step": 5835 |
| }, |
| { |
| "epoch": 10.409982174688057, |
| "grad_norm": 1.2305051313887816e-05, |
| "learning_rate": 2.7577627217655916e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3610640, |
| "step": 5840 |
| }, |
| { |
| "epoch": 10.418894830659536, |
| "grad_norm": 1.807206353987567e-05, |
| "learning_rate": 2.7538942575788386e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3613424, |
| "step": 5845 |
| }, |
| { |
| "epoch": 10.427807486631016, |
| "grad_norm": 1.0147291504836176e-05, |
| "learning_rate": 2.7500251790336683e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3617008, |
| "step": 5850 |
| }, |
| { |
| "epoch": 10.436720142602496, |
| "grad_norm": 0.00023956519726198167, |
| "learning_rate": 2.7461554954922514e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3620048, |
| "step": 5855 |
| }, |
| { |
| "epoch": 10.445632798573975, |
| "grad_norm": 4.6874189138179645e-05, |
| "learning_rate": 2.7422852163182205e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3624080, |
| "step": 5860 |
| }, |
| { |
| "epoch": 10.454545454545455, |
| "grad_norm": 1.3826291251461953e-05, |
| "learning_rate": 2.7384143508766496e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3627664, |
| "step": 5865 |
| }, |
| { |
| "epoch": 10.463458110516934, |
| "grad_norm": 2.025993489951361e-05, |
| "learning_rate": 2.7345429085340314e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3630384, |
| "step": 5870 |
| }, |
| { |
| "epoch": 10.472370766488414, |
| "grad_norm": 1.1854433068947401e-05, |
| "learning_rate": 2.7306708986582553e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3633936, |
| "step": 5875 |
| }, |
| { |
| "epoch": 10.481283422459892, |
| "grad_norm": 1.50766263686819e-05, |
| "learning_rate": 2.7267983306185836e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3637552, |
| "step": 5880 |
| }, |
| { |
| "epoch": 10.490196078431373, |
| "grad_norm": 1.3261160347610712e-05, |
| "learning_rate": 2.722925213785628e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3640560, |
| "step": 5885 |
| }, |
| { |
| "epoch": 10.499108734402853, |
| "grad_norm": 4.532691673375666e-05, |
| "learning_rate": 2.7190515575313307e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3643920, |
| "step": 5890 |
| }, |
| { |
| "epoch": 10.508021390374331, |
| "grad_norm": 1.3621403013530653e-05, |
| "learning_rate": 2.7151773712289358e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3647088, |
| "step": 5895 |
| }, |
| { |
| "epoch": 10.516934046345812, |
| "grad_norm": 1.2079704902134836e-05, |
| "learning_rate": 2.711302664252973e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3650192, |
| "step": 5900 |
| }, |
| { |
| "epoch": 10.52584670231729, |
| "grad_norm": 1.122187950386433e-05, |
| "learning_rate": 2.707427445979232e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3653552, |
| "step": 5905 |
| }, |
| { |
| "epoch": 10.53475935828877, |
| "grad_norm": 2.3511658582719974e-05, |
| "learning_rate": 2.7035517257847358e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3655760, |
| "step": 5910 |
| }, |
| { |
| "epoch": 10.543672014260249, |
| "grad_norm": 1.2023041563224979e-05, |
| "learning_rate": 2.699675513047726e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3658640, |
| "step": 5915 |
| }, |
| { |
| "epoch": 10.55258467023173, |
| "grad_norm": 1.289930150960572e-05, |
| "learning_rate": 2.6957988171476344e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3661392, |
| "step": 5920 |
| }, |
| { |
| "epoch": 10.56149732620321, |
| "grad_norm": 3.980232577305287e-05, |
| "learning_rate": 2.691921647465062e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3665040, |
| "step": 5925 |
| }, |
| { |
| "epoch": 10.570409982174688, |
| "grad_norm": 7.775369158480316e-05, |
| "learning_rate": 2.6880440133817562e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3667824, |
| "step": 5930 |
| }, |
| { |
| "epoch": 10.579322638146168, |
| "grad_norm": 1.4728250789630692e-05, |
| "learning_rate": 2.684165924280589e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3671152, |
| "step": 5935 |
| }, |
| { |
| "epoch": 10.588235294117647, |
| "grad_norm": 1.8075328625855036e-05, |
| "learning_rate": 2.6802873895455317e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3674576, |
| "step": 5940 |
| }, |
| { |
| "epoch": 10.597147950089127, |
| "grad_norm": 4.87805918965023e-05, |
| "learning_rate": 2.676408418561635e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3677808, |
| "step": 5945 |
| }, |
| { |
| "epoch": 10.606060606060606, |
| "grad_norm": 6.62174352328293e-05, |
| "learning_rate": 2.672529020715006e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3681648, |
| "step": 5950 |
| }, |
| { |
| "epoch": 10.614973262032086, |
| "grad_norm": 2.1246925825835206e-05, |
| "learning_rate": 2.6686492053927837e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3684592, |
| "step": 5955 |
| }, |
| { |
| "epoch": 10.623885918003564, |
| "grad_norm": 2.8126887627877295e-05, |
| "learning_rate": 2.664768981983116e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3688144, |
| "step": 5960 |
| }, |
| { |
| "epoch": 10.632798573975045, |
| "grad_norm": 0.00012585851072799414, |
| "learning_rate": 2.660888359875141e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3690864, |
| "step": 5965 |
| }, |
| { |
| "epoch": 10.641711229946525, |
| "grad_norm": 1.5733108739368618e-05, |
| "learning_rate": 2.6570073484589607e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3693968, |
| "step": 5970 |
| }, |
| { |
| "epoch": 10.650623885918003, |
| "grad_norm": 1.848669307946693e-05, |
| "learning_rate": 2.6531259571256166e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3697520, |
| "step": 5975 |
| }, |
| { |
| "epoch": 10.659536541889484, |
| "grad_norm": 1.7645805201027542e-05, |
| "learning_rate": 2.649244195267074e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3700848, |
| "step": 5980 |
| }, |
| { |
| "epoch": 10.668449197860962, |
| "grad_norm": 1.3177233086025808e-05, |
| "learning_rate": 2.6453620722761896e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3703376, |
| "step": 5985 |
| }, |
| { |
| "epoch": 10.677361853832442, |
| "grad_norm": 1.3371476597967558e-05, |
| "learning_rate": 2.6414795975466987e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3705776, |
| "step": 5990 |
| }, |
| { |
| "epoch": 10.686274509803921, |
| "grad_norm": 1.0603682312648743e-05, |
| "learning_rate": 2.637596780473186e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3708592, |
| "step": 5995 |
| }, |
| { |
| "epoch": 10.695187165775401, |
| "grad_norm": 1.2599515684996732e-05, |
| "learning_rate": 2.633713630451063e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3712080, |
| "step": 6000 |
| }, |
| { |
| "epoch": 10.70409982174688, |
| "grad_norm": 9.517248145129997e-06, |
| "learning_rate": 2.6298301568765478e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3715120, |
| "step": 6005 |
| }, |
| { |
| "epoch": 10.71301247771836, |
| "grad_norm": 2.19264293264132e-05, |
| "learning_rate": 2.6259463691466423e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3717936, |
| "step": 6010 |
| }, |
| { |
| "epoch": 10.72192513368984, |
| "grad_norm": 3.806079621426761e-05, |
| "learning_rate": 2.622062276659109e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3720688, |
| "step": 6015 |
| }, |
| { |
| "epoch": 10.730837789661319, |
| "grad_norm": 1.4707470654684585e-05, |
| "learning_rate": 2.6181778888124454e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3723920, |
| "step": 6020 |
| }, |
| { |
| "epoch": 10.739750445632799, |
| "grad_norm": 1.9322525986353867e-05, |
| "learning_rate": 2.6142932150058657e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3727440, |
| "step": 6025 |
| }, |
| { |
| "epoch": 10.748663101604278, |
| "grad_norm": 2.9112976335454732e-05, |
| "learning_rate": 2.6104082646392754e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3730480, |
| "step": 6030 |
| }, |
| { |
| "epoch": 10.757575757575758, |
| "grad_norm": 1.0991312592523172e-05, |
| "learning_rate": 2.606523047113249e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3733392, |
| "step": 6035 |
| }, |
| { |
| "epoch": 10.766488413547236, |
| "grad_norm": 1.333354703092482e-05, |
| "learning_rate": 2.6026375718290086e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3736112, |
| "step": 6040 |
| }, |
| { |
| "epoch": 10.775401069518717, |
| "grad_norm": 2.259712891827803e-05, |
| "learning_rate": 2.5987518481883987e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3739696, |
| "step": 6045 |
| }, |
| { |
| "epoch": 10.784313725490197, |
| "grad_norm": 1.3553658391174395e-05, |
| "learning_rate": 2.5948658855938644e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3742576, |
| "step": 6050 |
| }, |
| { |
| "epoch": 10.793226381461675, |
| "grad_norm": 1.4138406186248176e-05, |
| "learning_rate": 2.5909796934484308e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3745360, |
| "step": 6055 |
| }, |
| { |
| "epoch": 10.802139037433156, |
| "grad_norm": 0.0002746598329395056, |
| "learning_rate": 2.587093281155677e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3748464, |
| "step": 6060 |
| }, |
| { |
| "epoch": 10.811051693404634, |
| "grad_norm": 1.3880183360015508e-05, |
| "learning_rate": 2.5832066581197162e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3751920, |
| "step": 6065 |
| }, |
| { |
| "epoch": 10.819964349376114, |
| "grad_norm": 1.6997690181597136e-05, |
| "learning_rate": 2.5793198337451696e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3755088, |
| "step": 6070 |
| }, |
| { |
| "epoch": 10.828877005347593, |
| "grad_norm": 1.9864462956320494e-05, |
| "learning_rate": 2.575432817437146e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3757648, |
| "step": 6075 |
| }, |
| { |
| "epoch": 10.837789661319073, |
| "grad_norm": 1.3433314961730503e-05, |
| "learning_rate": 2.571545618601221e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3760912, |
| "step": 6080 |
| }, |
| { |
| "epoch": 10.846702317290553, |
| "grad_norm": 0.00028330503846518695, |
| "learning_rate": 2.567658246643409e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3764144, |
| "step": 6085 |
| }, |
| { |
| "epoch": 10.855614973262032, |
| "grad_norm": 1.4432083844440058e-05, |
| "learning_rate": 2.5637707109701442e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3767088, |
| "step": 6090 |
| }, |
| { |
| "epoch": 10.864527629233512, |
| "grad_norm": 9.103316551772878e-05, |
| "learning_rate": 2.559883020988258e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3770992, |
| "step": 6095 |
| }, |
| { |
| "epoch": 10.87344028520499, |
| "grad_norm": 1.3692199900106061e-05, |
| "learning_rate": 2.5559951861049532e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3773744, |
| "step": 6100 |
| }, |
| { |
| "epoch": 10.882352941176471, |
| "grad_norm": 2.3006872652331367e-05, |
| "learning_rate": 2.552107215727785e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3777904, |
| "step": 6105 |
| }, |
| { |
| "epoch": 10.89126559714795, |
| "grad_norm": 0.0002752006403170526, |
| "learning_rate": 2.5482191192646365e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3780912, |
| "step": 6110 |
| }, |
| { |
| "epoch": 10.90017825311943, |
| "grad_norm": 9.974956810765434e-06, |
| "learning_rate": 2.544330906123694e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3784016, |
| "step": 6115 |
| }, |
| { |
| "epoch": 10.909090909090908, |
| "grad_norm": 2.1135621864232235e-05, |
| "learning_rate": 2.5404425857134285e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3787024, |
| "step": 6120 |
| }, |
| { |
| "epoch": 10.918003565062389, |
| "grad_norm": 1.515540589025477e-05, |
| "learning_rate": 2.536554167442568e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3789584, |
| "step": 6125 |
| }, |
| { |
| "epoch": 10.926916221033869, |
| "grad_norm": 1.2581827832036652e-05, |
| "learning_rate": 2.53266566072008e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3792720, |
| "step": 6130 |
| }, |
| { |
| "epoch": 10.935828877005347, |
| "grad_norm": 0.0006793736247345805, |
| "learning_rate": 2.5287770749551442e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3795376, |
| "step": 6135 |
| }, |
| { |
| "epoch": 10.944741532976828, |
| "grad_norm": 1.5780657122377306e-05, |
| "learning_rate": 2.5248884195571326e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3798480, |
| "step": 6140 |
| }, |
| { |
| "epoch": 10.953654188948306, |
| "grad_norm": 1.0621036381053273e-05, |
| "learning_rate": 2.5209997039355837e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3801808, |
| "step": 6145 |
| }, |
| { |
| "epoch": 10.962566844919786, |
| "grad_norm": 9.538345693727024e-06, |
| "learning_rate": 2.517110937500185e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3804848, |
| "step": 6150 |
| }, |
| { |
| "epoch": 10.971479500891265, |
| "grad_norm": 0.00019309086201246828, |
| "learning_rate": 2.5132221296607445e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3808208, |
| "step": 6155 |
| }, |
| { |
| "epoch": 10.980392156862745, |
| "grad_norm": 0.0001642105489736423, |
| "learning_rate": 2.509333289827171e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3810800, |
| "step": 6160 |
| }, |
| { |
| "epoch": 10.989304812834224, |
| "grad_norm": 1.139036157837836e-05, |
| "learning_rate": 2.5054444274094507e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3813584, |
| "step": 6165 |
| }, |
| { |
| "epoch": 10.998217468805704, |
| "grad_norm": 1.1301727681711782e-05, |
| "learning_rate": 2.5015555518176243e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3817040, |
| "step": 6170 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 0.24064487218856812, |
| "eval_runtime": 4.585, |
| "eval_samples_per_second": 54.308, |
| "eval_steps_per_second": 13.741, |
| "num_input_tokens_seen": 3817120, |
| "step": 6171 |
| }, |
| { |
| "epoch": 11.007130124777184, |
| "grad_norm": 1.7638394638197497e-05, |
| "learning_rate": 2.4976666724617657e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3819872, |
| "step": 6175 |
| }, |
| { |
| "epoch": 11.016042780748663, |
| "grad_norm": 1.0595828825898934e-05, |
| "learning_rate": 2.493777798751956e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3823296, |
| "step": 6180 |
| }, |
| { |
| "epoch": 11.024955436720143, |
| "grad_norm": 3.515037678880617e-05, |
| "learning_rate": 2.489888940098263e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3826208, |
| "step": 6185 |
| }, |
| { |
| "epoch": 11.033868092691621, |
| "grad_norm": 1.735261503199581e-05, |
| "learning_rate": 2.4860001059107187e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3829760, |
| "step": 6190 |
| }, |
| { |
| "epoch": 11.042780748663102, |
| "grad_norm": 1.1009148693119641e-05, |
| "learning_rate": 2.4821113055992965e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3833216, |
| "step": 6195 |
| }, |
| { |
| "epoch": 11.05169340463458, |
| "grad_norm": 1.4147810361464508e-05, |
| "learning_rate": 2.478222548573887e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3835840, |
| "step": 6200 |
| }, |
| { |
| "epoch": 11.06060606060606, |
| "grad_norm": 1.4920704415999353e-05, |
| "learning_rate": 2.4743338442442755e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3838560, |
| "step": 6205 |
| }, |
| { |
| "epoch": 11.06951871657754, |
| "grad_norm": 1.137426897912519e-05, |
| "learning_rate": 2.4704452020201206e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3841792, |
| "step": 6210 |
| }, |
| { |
| "epoch": 11.07843137254902, |
| "grad_norm": 1.3182347174733877e-05, |
| "learning_rate": 2.4665566313109307e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3844352, |
| "step": 6215 |
| }, |
| { |
| "epoch": 11.0873440285205, |
| "grad_norm": 1.325432094745338e-05, |
| "learning_rate": 2.4626681415260393e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3847552, |
| "step": 6220 |
| }, |
| { |
| "epoch": 11.096256684491978, |
| "grad_norm": 1.2028423952870071e-05, |
| "learning_rate": 2.4587797420745883e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3850016, |
| "step": 6225 |
| }, |
| { |
| "epoch": 11.105169340463458, |
| "grad_norm": 1.213027280755341e-05, |
| "learning_rate": 2.4548914423654973e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3852512, |
| "step": 6230 |
| }, |
| { |
| "epoch": 11.114081996434937, |
| "grad_norm": 1.2425961358530913e-05, |
| "learning_rate": 2.4510032518074443e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3855872, |
| "step": 6235 |
| }, |
| { |
| "epoch": 11.122994652406417, |
| "grad_norm": 1.2018854249618016e-05, |
| "learning_rate": 2.4471151798088466e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3859648, |
| "step": 6240 |
| }, |
| { |
| "epoch": 11.131907308377897, |
| "grad_norm": 7.846080552553758e-05, |
| "learning_rate": 2.4432272357778314e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3862208, |
| "step": 6245 |
| }, |
| { |
| "epoch": 11.140819964349376, |
| "grad_norm": 1.3241695342003368e-05, |
| "learning_rate": 2.439339429122216e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3864672, |
| "step": 6250 |
| }, |
| { |
| "epoch": 11.149732620320856, |
| "grad_norm": 6.398496770998463e-05, |
| "learning_rate": 2.4354517692494895e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3867872, |
| "step": 6255 |
| }, |
| { |
| "epoch": 11.158645276292335, |
| "grad_norm": 1.46424317790661e-05, |
| "learning_rate": 2.431564265566781e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3871264, |
| "step": 6260 |
| }, |
| { |
| "epoch": 11.167557932263815, |
| "grad_norm": 1.3262514585221652e-05, |
| "learning_rate": 2.427676927480845e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3874464, |
| "step": 6265 |
| }, |
| { |
| "epoch": 11.176470588235293, |
| "grad_norm": 1.7728940292727202e-05, |
| "learning_rate": 2.4237897643980328e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3878016, |
| "step": 6270 |
| }, |
| { |
| "epoch": 11.185383244206774, |
| "grad_norm": 1.4908625416865107e-05, |
| "learning_rate": 2.4199027857242734e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3880832, |
| "step": 6275 |
| }, |
| { |
| "epoch": 11.194295900178252, |
| "grad_norm": 1.1908256055903621e-05, |
| "learning_rate": 2.41601600086505e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3884800, |
| "step": 6280 |
| }, |
| { |
| "epoch": 11.203208556149733, |
| "grad_norm": 1.2285045158932917e-05, |
| "learning_rate": 2.4121294192253764e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3887264, |
| "step": 6285 |
| }, |
| { |
| "epoch": 11.212121212121213, |
| "grad_norm": 1.382422215101542e-05, |
| "learning_rate": 2.4082430502097747e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3890304, |
| "step": 6290 |
| }, |
| { |
| "epoch": 11.221033868092691, |
| "grad_norm": 1.405794591846643e-05, |
| "learning_rate": 2.4043569032222526e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3893248, |
| "step": 6295 |
| }, |
| { |
| "epoch": 11.229946524064172, |
| "grad_norm": 1.7820608263718896e-05, |
| "learning_rate": 2.4004709876662795e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3896544, |
| "step": 6300 |
| }, |
| { |
| "epoch": 11.23885918003565, |
| "grad_norm": 1.839667856984306e-05, |
| "learning_rate": 2.396585312944767e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3899968, |
| "step": 6305 |
| }, |
| { |
| "epoch": 11.24777183600713, |
| "grad_norm": 3.20358740282245e-05, |
| "learning_rate": 2.3926998884600404e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3902912, |
| "step": 6310 |
| }, |
| { |
| "epoch": 11.256684491978609, |
| "grad_norm": 8.05861345725134e-05, |
| "learning_rate": 2.3888147236138245e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3905664, |
| "step": 6315 |
| }, |
| { |
| "epoch": 11.26559714795009, |
| "grad_norm": 1.0621994988468941e-05, |
| "learning_rate": 2.3849298278072118e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3909248, |
| "step": 6320 |
| }, |
| { |
| "epoch": 11.27450980392157, |
| "grad_norm": 1.65787641890347e-05, |
| "learning_rate": 2.3810452104406444e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3912128, |
| "step": 6325 |
| }, |
| { |
| "epoch": 11.283422459893048, |
| "grad_norm": 2.989449058077298e-05, |
| "learning_rate": 2.3771608809138926e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3914848, |
| "step": 6330 |
| }, |
| { |
| "epoch": 11.292335115864528, |
| "grad_norm": 7.710716454312205e-05, |
| "learning_rate": 2.3732768486260283e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3918528, |
| "step": 6335 |
| }, |
| { |
| "epoch": 11.301247771836007, |
| "grad_norm": 3.157812534482218e-05, |
| "learning_rate": 2.3693931229754036e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3921536, |
| "step": 6340 |
| }, |
| { |
| "epoch": 11.310160427807487, |
| "grad_norm": 3.4598073398228735e-05, |
| "learning_rate": 2.365509713359632e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3924352, |
| "step": 6345 |
| }, |
| { |
| "epoch": 11.319073083778965, |
| "grad_norm": 1.363915089314105e-05, |
| "learning_rate": 2.3616266291755582e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3926944, |
| "step": 6350 |
| }, |
| { |
| "epoch": 11.327985739750446, |
| "grad_norm": 9.564583706378471e-06, |
| "learning_rate": 2.3577438798192427e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3930528, |
| "step": 6355 |
| }, |
| { |
| "epoch": 11.336898395721924, |
| "grad_norm": 2.2686510419589467e-05, |
| "learning_rate": 2.3538614746859338e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3934144, |
| "step": 6360 |
| }, |
| { |
| "epoch": 11.345811051693405, |
| "grad_norm": 1.1965239536948502e-05, |
| "learning_rate": 2.349979423170047e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3937856, |
| "step": 6365 |
| }, |
| { |
| "epoch": 11.354723707664885, |
| "grad_norm": 1.0834000022441614e-05, |
| "learning_rate": 2.346097734665143e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3940704, |
| "step": 6370 |
| }, |
| { |
| "epoch": 11.363636363636363, |
| "grad_norm": 1.23635754789575e-05, |
| "learning_rate": 2.342216418563904e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3943008, |
| "step": 6375 |
| }, |
| { |
| "epoch": 11.372549019607844, |
| "grad_norm": 1.816690382838715e-05, |
| "learning_rate": 2.3383354842581106e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3945664, |
| "step": 6380 |
| }, |
| { |
| "epoch": 11.381461675579322, |
| "grad_norm": 1.2368334864731878e-05, |
| "learning_rate": 2.3344549411386203e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3948256, |
| "step": 6385 |
| }, |
| { |
| "epoch": 11.390374331550802, |
| "grad_norm": 2.607596616144292e-05, |
| "learning_rate": 2.330574798595342e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3951264, |
| "step": 6390 |
| }, |
| { |
| "epoch": 11.39928698752228, |
| "grad_norm": 1.1210274351469707e-05, |
| "learning_rate": 2.3266950660172183e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3954592, |
| "step": 6395 |
| }, |
| { |
| "epoch": 11.408199643493761, |
| "grad_norm": 1.4848555110802408e-05, |
| "learning_rate": 2.3228157527921966e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3958112, |
| "step": 6400 |
| }, |
| { |
| "epoch": 11.417112299465241, |
| "grad_norm": 1.3798643522022758e-05, |
| "learning_rate": 2.3189368683072134e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3960896, |
| "step": 6405 |
| }, |
| { |
| "epoch": 11.42602495543672, |
| "grad_norm": 8.388559763261583e-06, |
| "learning_rate": 2.3150584219481644e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3963232, |
| "step": 6410 |
| }, |
| { |
| "epoch": 11.4349376114082, |
| "grad_norm": 1.9333376258146018e-05, |
| "learning_rate": 2.3111804230998863e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3966752, |
| "step": 6415 |
| }, |
| { |
| "epoch": 11.443850267379679, |
| "grad_norm": 9.065933409146965e-05, |
| "learning_rate": 2.3073028811461335e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3970080, |
| "step": 6420 |
| }, |
| { |
| "epoch": 11.452762923351159, |
| "grad_norm": 1.0522752745600883e-05, |
| "learning_rate": 2.303425805469554e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3972928, |
| "step": 6425 |
| }, |
| { |
| "epoch": 11.461675579322637, |
| "grad_norm": 1.237884225702146e-05, |
| "learning_rate": 2.2995492054516672e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3976288, |
| "step": 6430 |
| }, |
| { |
| "epoch": 11.470588235294118, |
| "grad_norm": 1.0875227417272981e-05, |
| "learning_rate": 2.2956730904728436e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3979424, |
| "step": 6435 |
| }, |
| { |
| "epoch": 11.479500891265598, |
| "grad_norm": 0.00011285066284472123, |
| "learning_rate": 2.2917974699122775e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3982592, |
| "step": 6440 |
| }, |
| { |
| "epoch": 11.488413547237077, |
| "grad_norm": 3.4363692975603044e-05, |
| "learning_rate": 2.287922353147969e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3985504, |
| "step": 6445 |
| }, |
| { |
| "epoch": 11.497326203208557, |
| "grad_norm": 1.3565711924456991e-05, |
| "learning_rate": 2.2840477495566976e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3988992, |
| "step": 6450 |
| }, |
| { |
| "epoch": 11.506238859180035, |
| "grad_norm": 9.274257536162622e-06, |
| "learning_rate": 2.2801736685140012e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3992416, |
| "step": 6455 |
| }, |
| { |
| "epoch": 11.515151515151516, |
| "grad_norm": 1.225191317644203e-05, |
| "learning_rate": 2.276300119394153e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3994688, |
| "step": 6460 |
| }, |
| { |
| "epoch": 11.524064171122994, |
| "grad_norm": 1.2006966244371142e-05, |
| "learning_rate": 2.272427111570141e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 3997632, |
| "step": 6465 |
| }, |
| { |
| "epoch": 11.532976827094474, |
| "grad_norm": 1.2051388694089837e-05, |
| "learning_rate": 2.2685546544136422e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4000512, |
| "step": 6470 |
| }, |
| { |
| "epoch": 11.541889483065953, |
| "grad_norm": 0.00017697972361929715, |
| "learning_rate": 2.2646827572950008e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4003296, |
| "step": 6475 |
| }, |
| { |
| "epoch": 11.550802139037433, |
| "grad_norm": 9.811637937673368e-06, |
| "learning_rate": 2.2608114295832053e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4006752, |
| "step": 6480 |
| }, |
| { |
| "epoch": 11.559714795008913, |
| "grad_norm": 9.908816537063103e-06, |
| "learning_rate": 2.256940680645868e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4009984, |
| "step": 6485 |
| }, |
| { |
| "epoch": 11.568627450980392, |
| "grad_norm": 1.0891917554545216e-05, |
| "learning_rate": 2.253070519849199e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4012960, |
| "step": 6490 |
| }, |
| { |
| "epoch": 11.577540106951872, |
| "grad_norm": 1.98553352674935e-05, |
| "learning_rate": 2.2492009565579876e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4016640, |
| "step": 6495 |
| }, |
| { |
| "epoch": 11.58645276292335, |
| "grad_norm": 8.628303476143628e-06, |
| "learning_rate": 2.2453320001355753e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4019648, |
| "step": 6500 |
| }, |
| { |
| "epoch": 11.595365418894831, |
| "grad_norm": 9.807788956095465e-06, |
| "learning_rate": 2.2414636599438345e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4021888, |
| "step": 6505 |
| }, |
| { |
| "epoch": 11.60427807486631, |
| "grad_norm": 1.3811519238515757e-05, |
| "learning_rate": 2.237595945343149e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4024480, |
| "step": 6510 |
| }, |
| { |
| "epoch": 11.61319073083779, |
| "grad_norm": 9.580502592143603e-06, |
| "learning_rate": 2.2337288656923874e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4027552, |
| "step": 6515 |
| }, |
| { |
| "epoch": 11.622103386809268, |
| "grad_norm": 1.2243661331012845e-05, |
| "learning_rate": 2.22986243034888e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4030720, |
| "step": 6520 |
| }, |
| { |
| "epoch": 11.631016042780749, |
| "grad_norm": 1.1290548172837589e-05, |
| "learning_rate": 2.2259966486684034e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4033248, |
| "step": 6525 |
| }, |
| { |
| "epoch": 11.639928698752229, |
| "grad_norm": 1.4833147361059673e-05, |
| "learning_rate": 2.222131530005146e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4036704, |
| "step": 6530 |
| }, |
| { |
| "epoch": 11.648841354723707, |
| "grad_norm": 2.9466349587892182e-05, |
| "learning_rate": 2.2182670837116975e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4040288, |
| "step": 6535 |
| }, |
| { |
| "epoch": 11.657754010695188, |
| "grad_norm": 1.0144281077373307e-05, |
| "learning_rate": 2.2144033191390168e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4043392, |
| "step": 6540 |
| }, |
| { |
| "epoch": 11.666666666666666, |
| "grad_norm": 8.521773452230264e-06, |
| "learning_rate": 2.2105402456364146e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4046752, |
| "step": 6545 |
| }, |
| { |
| "epoch": 11.675579322638146, |
| "grad_norm": 1.819172211980913e-05, |
| "learning_rate": 2.2066778725515283e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4049600, |
| "step": 6550 |
| }, |
| { |
| "epoch": 11.684491978609625, |
| "grad_norm": 1.8302695025340654e-05, |
| "learning_rate": 2.202816209230303e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4052416, |
| "step": 6555 |
| }, |
| { |
| "epoch": 11.693404634581105, |
| "grad_norm": 1.0395393474027514e-05, |
| "learning_rate": 2.1989552650169655e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4055616, |
| "step": 6560 |
| }, |
| { |
| "epoch": 11.702317290552585, |
| "grad_norm": 1.530360896140337e-05, |
| "learning_rate": 2.1950950492540003e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4059072, |
| "step": 6565 |
| }, |
| { |
| "epoch": 11.711229946524064, |
| "grad_norm": 1.1224491572647821e-05, |
| "learning_rate": 2.1912355712821316e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4061728, |
| "step": 6570 |
| }, |
| { |
| "epoch": 11.720142602495544, |
| "grad_norm": 1.2782220437657088e-05, |
| "learning_rate": 2.187376840440297e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4064832, |
| "step": 6575 |
| }, |
| { |
| "epoch": 11.729055258467023, |
| "grad_norm": 9.216395483235829e-06, |
| "learning_rate": 2.1835188660656267e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4068160, |
| "step": 6580 |
| }, |
| { |
| "epoch": 11.737967914438503, |
| "grad_norm": 1.4968473806220572e-05, |
| "learning_rate": 2.179661657493422e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4071392, |
| "step": 6585 |
| }, |
| { |
| "epoch": 11.746880570409981, |
| "grad_norm": 1.3059238881396595e-05, |
| "learning_rate": 2.1758052240571285e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4075456, |
| "step": 6590 |
| }, |
| { |
| "epoch": 11.755793226381462, |
| "grad_norm": 2.845875314960722e-05, |
| "learning_rate": 2.1719495750883172e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4078976, |
| "step": 6595 |
| }, |
| { |
| "epoch": 11.764705882352942, |
| "grad_norm": 1.1186496521986555e-05, |
| "learning_rate": 2.1680947199166624e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4082400, |
| "step": 6600 |
| }, |
| { |
| "epoch": 11.77361853832442, |
| "grad_norm": 2.289231815666426e-05, |
| "learning_rate": 2.1642406678699153e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4085536, |
| "step": 6605 |
| }, |
| { |
| "epoch": 11.7825311942959, |
| "grad_norm": 1.3052140275249258e-05, |
| "learning_rate": 2.1603874282738836e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4088192, |
| "step": 6610 |
| }, |
| { |
| "epoch": 11.79144385026738, |
| "grad_norm": 1.2246360711287707e-05, |
| "learning_rate": 2.156535010452413e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4091104, |
| "step": 6615 |
| }, |
| { |
| "epoch": 11.80035650623886, |
| "grad_norm": 1.011964377539698e-05, |
| "learning_rate": 2.152683423727355e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4093728, |
| "step": 6620 |
| }, |
| { |
| "epoch": 11.809269162210338, |
| "grad_norm": 4.648279718821868e-05, |
| "learning_rate": 2.148832677418556e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4096640, |
| "step": 6625 |
| }, |
| { |
| "epoch": 11.818181818181818, |
| "grad_norm": 8.568744306103326e-06, |
| "learning_rate": 2.1449827808438233e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4098944, |
| "step": 6630 |
| }, |
| { |
| "epoch": 11.827094474153299, |
| "grad_norm": 1.164458353741793e-05, |
| "learning_rate": 2.1411337433189123e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4101728, |
| "step": 6635 |
| }, |
| { |
| "epoch": 11.836007130124777, |
| "grad_norm": 0.0002633916446939111, |
| "learning_rate": 2.1372855741574954e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4104864, |
| "step": 6640 |
| }, |
| { |
| "epoch": 11.844919786096257, |
| "grad_norm": 1.1167186130478512e-05, |
| "learning_rate": 2.133438282671149e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4108416, |
| "step": 6645 |
| }, |
| { |
| "epoch": 11.853832442067736, |
| "grad_norm": 8.543814146833029e-06, |
| "learning_rate": 2.1295918781693232e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4111712, |
| "step": 6650 |
| }, |
| { |
| "epoch": 11.862745098039216, |
| "grad_norm": 1.109227287088288e-05, |
| "learning_rate": 2.12574636995932e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4115040, |
| "step": 6655 |
| }, |
| { |
| "epoch": 11.871657754010695, |
| "grad_norm": 9.523738299321849e-06, |
| "learning_rate": 2.121901767346276e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4118880, |
| "step": 6660 |
| }, |
| { |
| "epoch": 11.880570409982175, |
| "grad_norm": 1.0091476724483073e-05, |
| "learning_rate": 2.1180580796331324e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4121696, |
| "step": 6665 |
| }, |
| { |
| "epoch": 11.889483065953653, |
| "grad_norm": 9.299220255343243e-05, |
| "learning_rate": 2.114215316120622e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4125152, |
| "step": 6670 |
| }, |
| { |
| "epoch": 11.898395721925134, |
| "grad_norm": 8.668677764944732e-06, |
| "learning_rate": 2.1103734861072368e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4127680, |
| "step": 6675 |
| }, |
| { |
| "epoch": 11.907308377896614, |
| "grad_norm": 1.0266234312439337e-05, |
| "learning_rate": 2.106532598889212e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4131104, |
| "step": 6680 |
| }, |
| { |
| "epoch": 11.916221033868093, |
| "grad_norm": 1.0019150977313984e-05, |
| "learning_rate": 2.1026926637605008e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4133792, |
| "step": 6685 |
| }, |
| { |
| "epoch": 11.925133689839573, |
| "grad_norm": 9.016303920361679e-06, |
| "learning_rate": 2.098853690012752e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4137472, |
| "step": 6690 |
| }, |
| { |
| "epoch": 11.934046345811051, |
| "grad_norm": 1.0192444278800394e-05, |
| "learning_rate": 2.095015686935289e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4140288, |
| "step": 6695 |
| }, |
| { |
| "epoch": 11.942959001782532, |
| "grad_norm": 1.0827254300238565e-05, |
| "learning_rate": 2.0911786638150872e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4142848, |
| "step": 6700 |
| }, |
| { |
| "epoch": 11.95187165775401, |
| "grad_norm": 8.758339390624315e-06, |
| "learning_rate": 2.0873426299367502e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4145664, |
| "step": 6705 |
| }, |
| { |
| "epoch": 11.96078431372549, |
| "grad_norm": 1.169169172499096e-05, |
| "learning_rate": 2.0835075945824858e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4148352, |
| "step": 6710 |
| }, |
| { |
| "epoch": 11.969696969696969, |
| "grad_norm": 9.24233336263569e-06, |
| "learning_rate": 2.0796735670320888e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4151584, |
| "step": 6715 |
| }, |
| { |
| "epoch": 11.97860962566845, |
| "grad_norm": 1.0173745067731943e-05, |
| "learning_rate": 2.0758405565629135e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4154592, |
| "step": 6720 |
| }, |
| { |
| "epoch": 11.98752228163993, |
| "grad_norm": 1.4830648069619201e-05, |
| "learning_rate": 2.0720085724498526e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4158752, |
| "step": 6725 |
| }, |
| { |
| "epoch": 11.996434937611408, |
| "grad_norm": 2.1176798327360302e-05, |
| "learning_rate": 2.0681776239653177e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4162048, |
| "step": 6730 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 0.25058555603027344, |
| "eval_runtime": 4.5876, |
| "eval_samples_per_second": 54.277, |
| "eval_steps_per_second": 13.733, |
| "num_input_tokens_seen": 4163160, |
| "step": 6732 |
| }, |
| { |
| "epoch": 12.005347593582888, |
| "grad_norm": 5.25148534507025e-05, |
| "learning_rate": 2.0643477203792126e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4165048, |
| "step": 6735 |
| }, |
| { |
| "epoch": 12.014260249554367, |
| "grad_norm": 1.4447410649154335e-05, |
| "learning_rate": 2.060518870958913e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4168184, |
| "step": 6740 |
| }, |
| { |
| "epoch": 12.023172905525847, |
| "grad_norm": 8.24018661660375e-06, |
| "learning_rate": 2.056691084969244e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4171864, |
| "step": 6745 |
| }, |
| { |
| "epoch": 12.032085561497325, |
| "grad_norm": 0.00023547218006569892, |
| "learning_rate": 2.052864371672457e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4174392, |
| "step": 6750 |
| }, |
| { |
| "epoch": 12.040998217468806, |
| "grad_norm": 1.2732047252939083e-05, |
| "learning_rate": 2.0490387403282077e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4177816, |
| "step": 6755 |
| }, |
| { |
| "epoch": 12.049910873440286, |
| "grad_norm": 9.282196515414398e-06, |
| "learning_rate": 2.045214200193535e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4180536, |
| "step": 6760 |
| }, |
| { |
| "epoch": 12.058823529411764, |
| "grad_norm": 1.1917635674763005e-05, |
| "learning_rate": 2.0413907605228372e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4183960, |
| "step": 6765 |
| }, |
| { |
| "epoch": 12.067736185383245, |
| "grad_norm": 1.1140574315504637e-05, |
| "learning_rate": 2.037568430567848e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4186936, |
| "step": 6770 |
| }, |
| { |
| "epoch": 12.076648841354723, |
| "grad_norm": 8.070183866948355e-06, |
| "learning_rate": 2.033747219577618e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4190360, |
| "step": 6775 |
| }, |
| { |
| "epoch": 12.085561497326204, |
| "grad_norm": 9.62817375693703e-06, |
| "learning_rate": 2.0299271367984873e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4193336, |
| "step": 6780 |
| }, |
| { |
| "epoch": 12.094474153297682, |
| "grad_norm": 5.264949140837416e-05, |
| "learning_rate": 2.0261081914740688e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4196312, |
| "step": 6785 |
| }, |
| { |
| "epoch": 12.103386809269162, |
| "grad_norm": 8.345319656655192e-06, |
| "learning_rate": 2.022290392845223e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4198808, |
| "step": 6790 |
| }, |
| { |
| "epoch": 12.112299465240643, |
| "grad_norm": 1.0408018169982824e-05, |
| "learning_rate": 2.018473750150035e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4201880, |
| "step": 6795 |
| }, |
| { |
| "epoch": 12.121212121212121, |
| "grad_norm": 8.212978173105512e-06, |
| "learning_rate": 2.0146582726237916e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4204376, |
| "step": 6800 |
| }, |
| { |
| "epoch": 12.130124777183601, |
| "grad_norm": 0.00011563662701519206, |
| "learning_rate": 2.010843969498961e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4207800, |
| "step": 6805 |
| }, |
| { |
| "epoch": 12.13903743315508, |
| "grad_norm": 8.69808627612656e-06, |
| "learning_rate": 2.0070308500051716e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4210520, |
| "step": 6810 |
| }, |
| { |
| "epoch": 12.14795008912656, |
| "grad_norm": 7.624462341482285e-06, |
| "learning_rate": 2.0032189233691834e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4213944, |
| "step": 6815 |
| }, |
| { |
| "epoch": 12.156862745098039, |
| "grad_norm": 3.42975981766358e-05, |
| "learning_rate": 1.999408198814876e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4216792, |
| "step": 6820 |
| }, |
| { |
| "epoch": 12.165775401069519, |
| "grad_norm": 2.3967681045178324e-05, |
| "learning_rate": 1.995598685563214e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4219896, |
| "step": 6825 |
| }, |
| { |
| "epoch": 12.174688057040997, |
| "grad_norm": 0.00024852409842424095, |
| "learning_rate": 1.9917903928322356e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4222264, |
| "step": 6830 |
| }, |
| { |
| "epoch": 12.183600713012478, |
| "grad_norm": 1.4906848264217842e-05, |
| "learning_rate": 1.9879833298370238e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4225656, |
| "step": 6835 |
| }, |
| { |
| "epoch": 12.192513368983958, |
| "grad_norm": 9.082517863134854e-06, |
| "learning_rate": 1.9841775057896855e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4229624, |
| "step": 6840 |
| }, |
| { |
| "epoch": 12.201426024955436, |
| "grad_norm": 8.315850209328346e-06, |
| "learning_rate": 1.9803729298993297e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4231992, |
| "step": 6845 |
| }, |
| { |
| "epoch": 12.210338680926917, |
| "grad_norm": 1.1928617823286913e-05, |
| "learning_rate": 1.9765696113720463e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4235416, |
| "step": 6850 |
| }, |
| { |
| "epoch": 12.219251336898395, |
| "grad_norm": 7.387813639070373e-06, |
| "learning_rate": 1.9727675594108834e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4238968, |
| "step": 6855 |
| }, |
| { |
| "epoch": 12.228163992869876, |
| "grad_norm": 1.2016014807159081e-05, |
| "learning_rate": 1.968966783215822e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4242200, |
| "step": 6860 |
| }, |
| { |
| "epoch": 12.237076648841354, |
| "grad_norm": 0.00013246844173409045, |
| "learning_rate": 1.965167291983757e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4244888, |
| "step": 6865 |
| }, |
| { |
| "epoch": 12.245989304812834, |
| "grad_norm": 6.707129796268418e-05, |
| "learning_rate": 1.961369094908474e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4247640, |
| "step": 6870 |
| }, |
| { |
| "epoch": 12.254901960784313, |
| "grad_norm": 1.8930944861494936e-05, |
| "learning_rate": 1.957572201180627e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4250680, |
| "step": 6875 |
| }, |
| { |
| "epoch": 12.263814616755793, |
| "grad_norm": 8.173775313480292e-06, |
| "learning_rate": 1.953776619987718e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4253976, |
| "step": 6880 |
| }, |
| { |
| "epoch": 12.272727272727273, |
| "grad_norm": 1.1115719644294586e-05, |
| "learning_rate": 1.949982360514071e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4257080, |
| "step": 6885 |
| }, |
| { |
| "epoch": 12.281639928698752, |
| "grad_norm": 2.634565498738084e-05, |
| "learning_rate": 1.946189431940812e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4260920, |
| "step": 6890 |
| }, |
| { |
| "epoch": 12.290552584670232, |
| "grad_norm": 1.156184862338705e-05, |
| "learning_rate": 1.9423978434458458e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4263800, |
| "step": 6895 |
| }, |
| { |
| "epoch": 12.29946524064171, |
| "grad_norm": 7.503647339035524e-06, |
| "learning_rate": 1.9386076042038372e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4267160, |
| "step": 6900 |
| }, |
| { |
| "epoch": 12.308377896613191, |
| "grad_norm": 1.15874208859168e-05, |
| "learning_rate": 1.934818723386183e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4270552, |
| "step": 6905 |
| }, |
| { |
| "epoch": 12.31729055258467, |
| "grad_norm": 8.49560365168145e-06, |
| "learning_rate": 1.9310312101609964e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4273112, |
| "step": 6910 |
| }, |
| { |
| "epoch": 12.32620320855615, |
| "grad_norm": 1.1358582014509011e-05, |
| "learning_rate": 1.927245073693078e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4276408, |
| "step": 6915 |
| }, |
| { |
| "epoch": 12.33511586452763, |
| "grad_norm": 5.10916106577497e-05, |
| "learning_rate": 1.9234603231438995e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4279192, |
| "step": 6920 |
| }, |
| { |
| "epoch": 12.344028520499108, |
| "grad_norm": 3.123953865724616e-05, |
| "learning_rate": 1.919676967671578e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4283096, |
| "step": 6925 |
| }, |
| { |
| "epoch": 12.352941176470589, |
| "grad_norm": 1.0834906788659282e-05, |
| "learning_rate": 1.9158950164308543e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4286008, |
| "step": 6930 |
| }, |
| { |
| "epoch": 12.361853832442067, |
| "grad_norm": 1.2189595508971252e-05, |
| "learning_rate": 1.912114478573071e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4289144, |
| "step": 6935 |
| }, |
| { |
| "epoch": 12.370766488413548, |
| "grad_norm": 7.232151983771473e-05, |
| "learning_rate": 1.9083353632461533e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4292120, |
| "step": 6940 |
| }, |
| { |
| "epoch": 12.379679144385026, |
| "grad_norm": 0.0005177415441721678, |
| "learning_rate": 1.9045576795945826e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4295192, |
| "step": 6945 |
| }, |
| { |
| "epoch": 12.388591800356506, |
| "grad_norm": 9.710914309835061e-05, |
| "learning_rate": 1.9007814367593755e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4298808, |
| "step": 6950 |
| }, |
| { |
| "epoch": 12.397504456327987, |
| "grad_norm": 1.163170236395672e-05, |
| "learning_rate": 1.8970066438780628e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4302136, |
| "step": 6955 |
| }, |
| { |
| "epoch": 12.406417112299465, |
| "grad_norm": 1.068214260158129e-05, |
| "learning_rate": 1.8932333100846654e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4305144, |
| "step": 6960 |
| }, |
| { |
| "epoch": 12.415329768270945, |
| "grad_norm": 1.1760239431168884e-05, |
| "learning_rate": 1.8894614445096758e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4308824, |
| "step": 6965 |
| }, |
| { |
| "epoch": 12.424242424242424, |
| "grad_norm": 8.910944416129496e-06, |
| "learning_rate": 1.8856910562800342e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4311960, |
| "step": 6970 |
| }, |
| { |
| "epoch": 12.433155080213904, |
| "grad_norm": 9.133290404861327e-06, |
| "learning_rate": 1.881922154519103e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4314488, |
| "step": 6975 |
| }, |
| { |
| "epoch": 12.442067736185383, |
| "grad_norm": 0.00017555024533066899, |
| "learning_rate": 1.8781547483466503e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4317336, |
| "step": 6980 |
| }, |
| { |
| "epoch": 12.450980392156863, |
| "grad_norm": 2.1916104742558673e-05, |
| "learning_rate": 1.874388846878823e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4320792, |
| "step": 6985 |
| }, |
| { |
| "epoch": 12.459893048128341, |
| "grad_norm": 8.021089342946652e-06, |
| "learning_rate": 1.8706244592281298e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4323544, |
| "step": 6990 |
| }, |
| { |
| "epoch": 12.468805704099822, |
| "grad_norm": 1.0469730113982223e-05, |
| "learning_rate": 1.8668615945034128e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4326264, |
| "step": 6995 |
| }, |
| { |
| "epoch": 12.477718360071302, |
| "grad_norm": 1.1207467650820035e-05, |
| "learning_rate": 1.863100261809834e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4329656, |
| "step": 7000 |
| }, |
| { |
| "epoch": 12.48663101604278, |
| "grad_norm": 1.2151757800893392e-05, |
| "learning_rate": 1.8593404702488437e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4332760, |
| "step": 7005 |
| }, |
| { |
| "epoch": 12.49554367201426, |
| "grad_norm": 9.998749192163814e-06, |
| "learning_rate": 1.855582228918165e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4335992, |
| "step": 7010 |
| }, |
| { |
| "epoch": 12.50445632798574, |
| "grad_norm": 7.701738468313124e-06, |
| "learning_rate": 1.8518255469117697e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4339160, |
| "step": 7015 |
| }, |
| { |
| "epoch": 12.51336898395722, |
| "grad_norm": 1.6190710084629245e-05, |
| "learning_rate": 1.8480704333198565e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4342136, |
| "step": 7020 |
| }, |
| { |
| "epoch": 12.522281639928698, |
| "grad_norm": 7.62514173402451e-05, |
| "learning_rate": 1.8443168972288272e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4345368, |
| "step": 7025 |
| }, |
| { |
| "epoch": 12.531194295900178, |
| "grad_norm": 8.554538908356335e-06, |
| "learning_rate": 1.84056494772127e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4348440, |
| "step": 7030 |
| }, |
| { |
| "epoch": 12.540106951871659, |
| "grad_norm": 8.511431951774284e-05, |
| "learning_rate": 1.8368145938759322e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4351064, |
| "step": 7035 |
| }, |
| { |
| "epoch": 12.549019607843137, |
| "grad_norm": 1.336482455371879e-05, |
| "learning_rate": 1.8330658447676986e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4354392, |
| "step": 7040 |
| }, |
| { |
| "epoch": 12.557932263814617, |
| "grad_norm": 8.30404314910993e-06, |
| "learning_rate": 1.829318709467573e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4357592, |
| "step": 7045 |
| }, |
| { |
| "epoch": 12.566844919786096, |
| "grad_norm": 8.932283890317194e-06, |
| "learning_rate": 1.8255731970426522e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4361464, |
| "step": 7050 |
| }, |
| { |
| "epoch": 12.575757575757576, |
| "grad_norm": 1.1647942301351577e-05, |
| "learning_rate": 1.8218293165561072e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4365144, |
| "step": 7055 |
| }, |
| { |
| "epoch": 12.584670231729055, |
| "grad_norm": 9.729025805427227e-06, |
| "learning_rate": 1.818087077067162e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4368504, |
| "step": 7060 |
| }, |
| { |
| "epoch": 12.593582887700535, |
| "grad_norm": 9.038580174092203e-05, |
| "learning_rate": 1.8143464876310673e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4371832, |
| "step": 7065 |
| }, |
| { |
| "epoch": 12.602495543672013, |
| "grad_norm": 1.3633638445753604e-05, |
| "learning_rate": 1.810607557299081e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4375576, |
| "step": 7070 |
| }, |
| { |
| "epoch": 12.611408199643494, |
| "grad_norm": 2.659759411471896e-05, |
| "learning_rate": 1.8068702951184475e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4378552, |
| "step": 7075 |
| }, |
| { |
| "epoch": 12.620320855614974, |
| "grad_norm": 8.272366540040821e-05, |
| "learning_rate": 1.8031347101323748e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4382424, |
| "step": 7080 |
| }, |
| { |
| "epoch": 12.629233511586452, |
| "grad_norm": 6.152382411528379e-05, |
| "learning_rate": 1.7994008113800102e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4386456, |
| "step": 7085 |
| }, |
| { |
| "epoch": 12.638146167557933, |
| "grad_norm": 1.3592002687801141e-05, |
| "learning_rate": 1.795668607896426e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4388824, |
| "step": 7090 |
| }, |
| { |
| "epoch": 12.647058823529411, |
| "grad_norm": 9.91694560070755e-06, |
| "learning_rate": 1.7919381087125868e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4392312, |
| "step": 7095 |
| }, |
| { |
| "epoch": 12.655971479500892, |
| "grad_norm": 8.959758815763053e-06, |
| "learning_rate": 1.7882093228553355e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4395928, |
| "step": 7100 |
| }, |
| { |
| "epoch": 12.66488413547237, |
| "grad_norm": 1.1833170901809353e-05, |
| "learning_rate": 1.78448225934737e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4398840, |
| "step": 7105 |
| }, |
| { |
| "epoch": 12.67379679144385, |
| "grad_norm": 7.207169801404234e-06, |
| "learning_rate": 1.7807569272072194e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4402360, |
| "step": 7110 |
| }, |
| { |
| "epoch": 12.68270944741533, |
| "grad_norm": 9.115967259276658e-06, |
| "learning_rate": 1.7770333354492225e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4405240, |
| "step": 7115 |
| }, |
| { |
| "epoch": 12.691622103386809, |
| "grad_norm": 8.961714229371864e-06, |
| "learning_rate": 1.7733114930835104e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4408696, |
| "step": 7120 |
| }, |
| { |
| "epoch": 12.70053475935829, |
| "grad_norm": 9.721734386403114e-06, |
| "learning_rate": 1.7695914091159765e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4411288, |
| "step": 7125 |
| }, |
| { |
| "epoch": 12.709447415329768, |
| "grad_norm": 2.042876985797193e-05, |
| "learning_rate": 1.765873092548263e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4413784, |
| "step": 7130 |
| }, |
| { |
| "epoch": 12.718360071301248, |
| "grad_norm": 9.558229066897184e-06, |
| "learning_rate": 1.762156552377734e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4416856, |
| "step": 7135 |
| }, |
| { |
| "epoch": 12.727272727272727, |
| "grad_norm": 0.00018919534340966493, |
| "learning_rate": 1.7584417975974534e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4420152, |
| "step": 7140 |
| }, |
| { |
| "epoch": 12.736185383244207, |
| "grad_norm": 7.95549203758128e-06, |
| "learning_rate": 1.7547288371961675e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4423064, |
| "step": 7145 |
| }, |
| { |
| "epoch": 12.745098039215687, |
| "grad_norm": 9.42739916354185e-06, |
| "learning_rate": 1.7510176801582818e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4426104, |
| "step": 7150 |
| }, |
| { |
| "epoch": 12.754010695187166, |
| "grad_norm": 3.628047488746233e-05, |
| "learning_rate": 1.7473083354638344e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4429912, |
| "step": 7155 |
| }, |
| { |
| "epoch": 12.762923351158646, |
| "grad_norm": 2.2449634343502112e-05, |
| "learning_rate": 1.7436008120884794e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4433016, |
| "step": 7160 |
| }, |
| { |
| "epoch": 12.771836007130124, |
| "grad_norm": 8.338784937222954e-06, |
| "learning_rate": 1.739895119003465e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4435768, |
| "step": 7165 |
| }, |
| { |
| "epoch": 12.780748663101605, |
| "grad_norm": 7.26561802366632e-06, |
| "learning_rate": 1.7361912651756098e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4438872, |
| "step": 7170 |
| }, |
| { |
| "epoch": 12.789661319073083, |
| "grad_norm": 7.64839023759123e-06, |
| "learning_rate": 1.7324892595672805e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4442104, |
| "step": 7175 |
| }, |
| { |
| "epoch": 12.798573975044564, |
| "grad_norm": 1.171323219750775e-05, |
| "learning_rate": 1.728789111136375e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4445208, |
| "step": 7180 |
| }, |
| { |
| "epoch": 12.807486631016042, |
| "grad_norm": 1.1664715202641673e-05, |
| "learning_rate": 1.7250908288362944e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4448184, |
| "step": 7185 |
| }, |
| { |
| "epoch": 12.816399286987522, |
| "grad_norm": 1.2254623470653314e-05, |
| "learning_rate": 1.7213944216159242e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4450808, |
| "step": 7190 |
| }, |
| { |
| "epoch": 12.825311942959003, |
| "grad_norm": 1.8988508600159548e-05, |
| "learning_rate": 1.7176998984196146e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4453432, |
| "step": 7195 |
| }, |
| { |
| "epoch": 12.834224598930481, |
| "grad_norm": 2.6997422537533566e-05, |
| "learning_rate": 1.7140072681871554e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4456216, |
| "step": 7200 |
| }, |
| { |
| "epoch": 12.843137254901961, |
| "grad_norm": 2.3871938537922688e-05, |
| "learning_rate": 1.7103165398537553e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4458808, |
| "step": 7205 |
| }, |
| { |
| "epoch": 12.85204991087344, |
| "grad_norm": 1.0611884135869332e-05, |
| "learning_rate": 1.7066277223500245e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4461848, |
| "step": 7210 |
| }, |
| { |
| "epoch": 12.86096256684492, |
| "grad_norm": 5.608840729109943e-05, |
| "learning_rate": 1.7029408246019447e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4464824, |
| "step": 7215 |
| }, |
| { |
| "epoch": 12.869875222816399, |
| "grad_norm": 3.4594344469951466e-05, |
| "learning_rate": 1.699255855530856e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4468248, |
| "step": 7220 |
| }, |
| { |
| "epoch": 12.878787878787879, |
| "grad_norm": 7.11159236743697e-06, |
| "learning_rate": 1.6955728240534305e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4470744, |
| "step": 7225 |
| }, |
| { |
| "epoch": 12.887700534759357, |
| "grad_norm": 9.122089068114292e-06, |
| "learning_rate": 1.6918917390816497e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4474264, |
| "step": 7230 |
| }, |
| { |
| "epoch": 12.896613190730838, |
| "grad_norm": 1.050134414981585e-05, |
| "learning_rate": 1.688212609522788e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4477272, |
| "step": 7235 |
| }, |
| { |
| "epoch": 12.905525846702318, |
| "grad_norm": 9.323571248387452e-06, |
| "learning_rate": 1.684535444279387e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4479832, |
| "step": 7240 |
| }, |
| { |
| "epoch": 12.914438502673796, |
| "grad_norm": 1.227039047080325e-05, |
| "learning_rate": 1.6808602522492357e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4482296, |
| "step": 7245 |
| }, |
| { |
| "epoch": 12.923351158645277, |
| "grad_norm": 1.0895531886490062e-05, |
| "learning_rate": 1.6771870423253472e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4484856, |
| "step": 7250 |
| }, |
| { |
| "epoch": 12.932263814616755, |
| "grad_norm": 7.429500783473486e-06, |
| "learning_rate": 1.673515823395939e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4487896, |
| "step": 7255 |
| }, |
| { |
| "epoch": 12.941176470588236, |
| "grad_norm": 8.657786565891001e-06, |
| "learning_rate": 1.6698466043444123e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4491576, |
| "step": 7260 |
| }, |
| { |
| "epoch": 12.950089126559714, |
| "grad_norm": 1.9946444808738306e-05, |
| "learning_rate": 1.6661793940493263e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4494904, |
| "step": 7265 |
| }, |
| { |
| "epoch": 12.959001782531194, |
| "grad_norm": 7.4231547841918655e-06, |
| "learning_rate": 1.6625142013843825e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4498456, |
| "step": 7270 |
| }, |
| { |
| "epoch": 12.967914438502675, |
| "grad_norm": 6.513627340609673e-06, |
| "learning_rate": 1.658851035218399e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4501720, |
| "step": 7275 |
| }, |
| { |
| "epoch": 12.976827094474153, |
| "grad_norm": 1.3617377589980606e-05, |
| "learning_rate": 1.6551899044152887e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4504600, |
| "step": 7280 |
| }, |
| { |
| "epoch": 12.985739750445633, |
| "grad_norm": 9.484362635703292e-06, |
| "learning_rate": 1.651530817834043e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4507704, |
| "step": 7285 |
| }, |
| { |
| "epoch": 12.994652406417112, |
| "grad_norm": 7.737330633972306e-06, |
| "learning_rate": 1.647873784328703e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4510360, |
| "step": 7290 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 0.25558963418006897, |
| "eval_runtime": 4.5868, |
| "eval_samples_per_second": 54.286, |
| "eval_steps_per_second": 13.735, |
| "num_input_tokens_seen": 4511312, |
| "step": 7293 |
| }, |
| { |
| "epoch": 13.003565062388592, |
| "grad_norm": 7.779332008794881e-06, |
| "learning_rate": 1.644218812748343e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4512624, |
| "step": 7295 |
| }, |
| { |
| "epoch": 13.01247771836007, |
| "grad_norm": 9.883010534394998e-06, |
| "learning_rate": 1.6405659119370512e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4515984, |
| "step": 7300 |
| }, |
| { |
| "epoch": 13.02139037433155, |
| "grad_norm": 7.949201972223818e-06, |
| "learning_rate": 1.6369150907339005e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4519216, |
| "step": 7305 |
| }, |
| { |
| "epoch": 13.030303030303031, |
| "grad_norm": 1.067373978003161e-05, |
| "learning_rate": 1.6332663579729352e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4522000, |
| "step": 7310 |
| }, |
| { |
| "epoch": 13.03921568627451, |
| "grad_norm": 7.869979526731186e-06, |
| "learning_rate": 1.6296197224831435e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4524912, |
| "step": 7315 |
| }, |
| { |
| "epoch": 13.04812834224599, |
| "grad_norm": 7.198002549557714e-06, |
| "learning_rate": 1.6259751930884397e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4528560, |
| "step": 7320 |
| }, |
| { |
| "epoch": 13.057040998217468, |
| "grad_norm": 1.3213011698098853e-05, |
| "learning_rate": 1.622332778607642e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4530928, |
| "step": 7325 |
| }, |
| { |
| "epoch": 13.065953654188949, |
| "grad_norm": 6.953484444238711e-06, |
| "learning_rate": 1.618692487854452e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4534448, |
| "step": 7330 |
| }, |
| { |
| "epoch": 13.074866310160427, |
| "grad_norm": 9.440144822292496e-06, |
| "learning_rate": 1.615054329637431e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4537392, |
| "step": 7335 |
| }, |
| { |
| "epoch": 13.083778966131907, |
| "grad_norm": 7.443520644301316e-06, |
| "learning_rate": 1.6114183127599807e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4540048, |
| "step": 7340 |
| }, |
| { |
| "epoch": 13.092691622103386, |
| "grad_norm": 1.597183472767938e-05, |
| "learning_rate": 1.6077844460203206e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4543472, |
| "step": 7345 |
| }, |
| { |
| "epoch": 13.101604278074866, |
| "grad_norm": 9.358037459605839e-06, |
| "learning_rate": 1.6041527382114692e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4546544, |
| "step": 7350 |
| }, |
| { |
| "epoch": 13.110516934046347, |
| "grad_norm": 9.500805390416645e-06, |
| "learning_rate": 1.600523198121218e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4549744, |
| "step": 7355 |
| }, |
| { |
| "epoch": 13.119429590017825, |
| "grad_norm": 2.4797285732347518e-05, |
| "learning_rate": 1.5968958345321178e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4552880, |
| "step": 7360 |
| }, |
| { |
| "epoch": 13.128342245989305, |
| "grad_norm": 9.394189874001313e-06, |
| "learning_rate": 1.593270656221448e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4556112, |
| "step": 7365 |
| }, |
| { |
| "epoch": 13.137254901960784, |
| "grad_norm": 8.06232765171444e-06, |
| "learning_rate": 1.5896476719612023e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4559792, |
| "step": 7370 |
| }, |
| { |
| "epoch": 13.146167557932264, |
| "grad_norm": 7.0690566644771025e-06, |
| "learning_rate": 1.586026890518066e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4563376, |
| "step": 7375 |
| }, |
| { |
| "epoch": 13.155080213903743, |
| "grad_norm": 8.308110409416258e-06, |
| "learning_rate": 1.582408320653393e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4566704, |
| "step": 7380 |
| }, |
| { |
| "epoch": 13.163992869875223, |
| "grad_norm": 3.808155088336207e-05, |
| "learning_rate": 1.578791971123185e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4569712, |
| "step": 7385 |
| }, |
| { |
| "epoch": 13.172905525846703, |
| "grad_norm": 9.400876479048748e-06, |
| "learning_rate": 1.5751778506780748e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4573808, |
| "step": 7390 |
| }, |
| { |
| "epoch": 13.181818181818182, |
| "grad_norm": 7.752068086119834e-06, |
| "learning_rate": 1.5715659680632973e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4577680, |
| "step": 7395 |
| }, |
| { |
| "epoch": 13.190730837789662, |
| "grad_norm": 7.223582997539779e-06, |
| "learning_rate": 1.567956332018674e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4581488, |
| "step": 7400 |
| }, |
| { |
| "epoch": 13.19964349376114, |
| "grad_norm": 2.2543108570971526e-05, |
| "learning_rate": 1.564348951278591e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4584720, |
| "step": 7405 |
| }, |
| { |
| "epoch": 13.20855614973262, |
| "grad_norm": 8.04398314357968e-06, |
| "learning_rate": 1.560743834571975e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4587792, |
| "step": 7410 |
| }, |
| { |
| "epoch": 13.2174688057041, |
| "grad_norm": 1.0653552635631058e-05, |
| "learning_rate": 1.5571409906222765e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4590672, |
| "step": 7415 |
| }, |
| { |
| "epoch": 13.22638146167558, |
| "grad_norm": 2.178288923460059e-05, |
| "learning_rate": 1.5535404281474457e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4594064, |
| "step": 7420 |
| }, |
| { |
| "epoch": 13.235294117647058, |
| "grad_norm": 6.663993644906441e-06, |
| "learning_rate": 1.549942155859913e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4598352, |
| "step": 7425 |
| }, |
| { |
| "epoch": 13.244206773618538, |
| "grad_norm": 1.463762964704074e-05, |
| "learning_rate": 1.546346182466566e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4601968, |
| "step": 7430 |
| }, |
| { |
| "epoch": 13.253119429590019, |
| "grad_norm": 1.085971416614484e-05, |
| "learning_rate": 1.5427525166687288e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4605264, |
| "step": 7435 |
| }, |
| { |
| "epoch": 13.262032085561497, |
| "grad_norm": 7.035836915747495e-06, |
| "learning_rate": 1.5391611671621443e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4608272, |
| "step": 7440 |
| }, |
| { |
| "epoch": 13.270944741532977, |
| "grad_norm": 8.502263881382532e-06, |
| "learning_rate": 1.535572142636948e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4611664, |
| "step": 7445 |
| }, |
| { |
| "epoch": 13.279857397504456, |
| "grad_norm": 0.0001319638395216316, |
| "learning_rate": 1.531985451777652e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4614224, |
| "step": 7450 |
| }, |
| { |
| "epoch": 13.288770053475936, |
| "grad_norm": 1.2678749953920487e-05, |
| "learning_rate": 1.5284011032631197e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4617424, |
| "step": 7455 |
| }, |
| { |
| "epoch": 13.297682709447415, |
| "grad_norm": 8.696750228409655e-06, |
| "learning_rate": 1.5248191057665462e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4619920, |
| "step": 7460 |
| }, |
| { |
| "epoch": 13.306595365418895, |
| "grad_norm": 1.0347956958867144e-05, |
| "learning_rate": 1.5212394679554403e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4623184, |
| "step": 7465 |
| }, |
| { |
| "epoch": 13.315508021390375, |
| "grad_norm": 6.585772098333109e-06, |
| "learning_rate": 1.517662198491599e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4625552, |
| "step": 7470 |
| }, |
| { |
| "epoch": 13.324420677361854, |
| "grad_norm": 3.188840128132142e-05, |
| "learning_rate": 1.5140873060310872e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4629200, |
| "step": 7475 |
| }, |
| { |
| "epoch": 13.333333333333334, |
| "grad_norm": 1.206223987537669e-05, |
| "learning_rate": 1.5105147992242222e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4632592, |
| "step": 7480 |
| }, |
| { |
| "epoch": 13.342245989304812, |
| "grad_norm": 6.815862889197888e-06, |
| "learning_rate": 1.5069446867155446e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4636432, |
| "step": 7485 |
| }, |
| { |
| "epoch": 13.351158645276293, |
| "grad_norm": 8.916998922359198e-06, |
| "learning_rate": 1.5033769771438039e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4639664, |
| "step": 7490 |
| }, |
| { |
| "epoch": 13.360071301247771, |
| "grad_norm": 7.717449989286251e-06, |
| "learning_rate": 1.4998116791419342e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4642832, |
| "step": 7495 |
| }, |
| { |
| "epoch": 13.368983957219251, |
| "grad_norm": 7.5525867941905744e-06, |
| "learning_rate": 1.4962488013370329e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4646096, |
| "step": 7500 |
| }, |
| { |
| "epoch": 13.37789661319073, |
| "grad_norm": 7.96997392171761e-06, |
| "learning_rate": 1.492688352350344e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4648560, |
| "step": 7505 |
| }, |
| { |
| "epoch": 13.38680926916221, |
| "grad_norm": 7.938811904750764e-06, |
| "learning_rate": 1.4891303407972324e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4651920, |
| "step": 7510 |
| }, |
| { |
| "epoch": 13.39572192513369, |
| "grad_norm": 1.78286645677872e-05, |
| "learning_rate": 1.4855747752871657e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4654896, |
| "step": 7515 |
| }, |
| { |
| "epoch": 13.404634581105169, |
| "grad_norm": 6.955673597985879e-06, |
| "learning_rate": 1.4820216644236925e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4657872, |
| "step": 7520 |
| }, |
| { |
| "epoch": 13.41354723707665, |
| "grad_norm": 9.080586096388288e-06, |
| "learning_rate": 1.4784710168044213e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4660784, |
| "step": 7525 |
| }, |
| { |
| "epoch": 13.422459893048128, |
| "grad_norm": 2.6790774427354336e-05, |
| "learning_rate": 1.4749228410210017e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4664592, |
| "step": 7530 |
| }, |
| { |
| "epoch": 13.431372549019608, |
| "grad_norm": 6.748609393980587e-06, |
| "learning_rate": 1.4713771456590996e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4667664, |
| "step": 7535 |
| }, |
| { |
| "epoch": 13.440285204991087, |
| "grad_norm": 0.00010700252460082993, |
| "learning_rate": 1.4678339392983822e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4670032, |
| "step": 7540 |
| }, |
| { |
| "epoch": 13.449197860962567, |
| "grad_norm": 1.4548013496096246e-05, |
| "learning_rate": 1.4642932305124918e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4673648, |
| "step": 7545 |
| }, |
| { |
| "epoch": 13.458110516934047, |
| "grad_norm": 8.775423339102417e-06, |
| "learning_rate": 1.4607550278690262e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4676272, |
| "step": 7550 |
| }, |
| { |
| "epoch": 13.467023172905526, |
| "grad_norm": 9.47409171203617e-06, |
| "learning_rate": 1.4572193399295228e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4679664, |
| "step": 7555 |
| }, |
| { |
| "epoch": 13.475935828877006, |
| "grad_norm": 5.426706593425479e-06, |
| "learning_rate": 1.453686175249429e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4683376, |
| "step": 7560 |
| }, |
| { |
| "epoch": 13.484848484848484, |
| "grad_norm": 7.583116530440748e-05, |
| "learning_rate": 1.4501555423780899e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4686512, |
| "step": 7565 |
| }, |
| { |
| "epoch": 13.493761140819965, |
| "grad_norm": 3.9882790588308126e-05, |
| "learning_rate": 1.446627449858726e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4689680, |
| "step": 7570 |
| }, |
| { |
| "epoch": 13.502673796791443, |
| "grad_norm": 8.056603837758303e-05, |
| "learning_rate": 1.4431019062284057e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4693136, |
| "step": 7575 |
| }, |
| { |
| "epoch": 13.511586452762923, |
| "grad_norm": 1.796651486074552e-05, |
| "learning_rate": 1.4395789200180344e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4696752, |
| "step": 7580 |
| }, |
| { |
| "epoch": 13.520499108734402, |
| "grad_norm": 6.788226073695114e-06, |
| "learning_rate": 1.4360584997523252e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4699632, |
| "step": 7585 |
| }, |
| { |
| "epoch": 13.529411764705882, |
| "grad_norm": 8.783808880252764e-06, |
| "learning_rate": 1.4325406539497854e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4703152, |
| "step": 7590 |
| }, |
| { |
| "epoch": 13.538324420677363, |
| "grad_norm": 7.359179107879754e-06, |
| "learning_rate": 1.4290253911226919e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4706288, |
| "step": 7595 |
| }, |
| { |
| "epoch": 13.547237076648841, |
| "grad_norm": 5.950364084128523e-06, |
| "learning_rate": 1.425512719777071e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4709296, |
| "step": 7600 |
| }, |
| { |
| "epoch": 13.556149732620321, |
| "grad_norm": 8.201064702006988e-06, |
| "learning_rate": 1.4220026484126798e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4712272, |
| "step": 7605 |
| }, |
| { |
| "epoch": 13.5650623885918, |
| "grad_norm": 5.314827649272047e-05, |
| "learning_rate": 1.4184951855229805e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4715248, |
| "step": 7610 |
| }, |
| { |
| "epoch": 13.57397504456328, |
| "grad_norm": 3.716797073138878e-05, |
| "learning_rate": 1.414990339595127e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4717552, |
| "step": 7615 |
| }, |
| { |
| "epoch": 13.582887700534759, |
| "grad_norm": 7.371178980974946e-06, |
| "learning_rate": 1.411488119109941e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4721488, |
| "step": 7620 |
| }, |
| { |
| "epoch": 13.591800356506239, |
| "grad_norm": 8.358735613001045e-06, |
| "learning_rate": 1.4079885325418868e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4724560, |
| "step": 7625 |
| }, |
| { |
| "epoch": 13.60071301247772, |
| "grad_norm": 1.5506338968407363e-05, |
| "learning_rate": 1.4044915883590626e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4727408, |
| "step": 7630 |
| }, |
| { |
| "epoch": 13.609625668449198, |
| "grad_norm": 2.4985572963487357e-05, |
| "learning_rate": 1.4009972950231653e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4730672, |
| "step": 7635 |
| }, |
| { |
| "epoch": 13.618538324420678, |
| "grad_norm": 7.919730705907568e-06, |
| "learning_rate": 1.3975056609894819e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4734000, |
| "step": 7640 |
| }, |
| { |
| "epoch": 13.627450980392156, |
| "grad_norm": 7.953521162562538e-06, |
| "learning_rate": 1.3940166947068644e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4737136, |
| "step": 7645 |
| }, |
| { |
| "epoch": 13.636363636363637, |
| "grad_norm": 1.991149656532798e-05, |
| "learning_rate": 1.3905304046177065e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4739888, |
| "step": 7650 |
| }, |
| { |
| "epoch": 13.645276292335115, |
| "grad_norm": 1.0619149179547094e-05, |
| "learning_rate": 1.3870467991579284e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4742864, |
| "step": 7655 |
| }, |
| { |
| "epoch": 13.654188948306595, |
| "grad_norm": 6.993436727498192e-06, |
| "learning_rate": 1.3835658867569568e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4745808, |
| "step": 7660 |
| }, |
| { |
| "epoch": 13.663101604278076, |
| "grad_norm": 0.0001521021913504228, |
| "learning_rate": 1.3800876758376963e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4748592, |
| "step": 7665 |
| }, |
| { |
| "epoch": 13.672014260249554, |
| "grad_norm": 0.00011914921196876094, |
| "learning_rate": 1.3766121748165194e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4751184, |
| "step": 7670 |
| }, |
| { |
| "epoch": 13.680926916221035, |
| "grad_norm": 1.2205597158754244e-05, |
| "learning_rate": 1.3731393921032376e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4754288, |
| "step": 7675 |
| }, |
| { |
| "epoch": 13.689839572192513, |
| "grad_norm": 5.635377419821452e-06, |
| "learning_rate": 1.3696693361010871e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4757424, |
| "step": 7680 |
| }, |
| { |
| "epoch": 13.698752228163993, |
| "grad_norm": 1.6696209058864042e-05, |
| "learning_rate": 1.3662020152067061e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4761232, |
| "step": 7685 |
| }, |
| { |
| "epoch": 13.707664884135472, |
| "grad_norm": 8.318459549627732e-06, |
| "learning_rate": 1.362737437810114e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4764368, |
| "step": 7690 |
| }, |
| { |
| "epoch": 13.716577540106952, |
| "grad_norm": 4.31857151852455e-05, |
| "learning_rate": 1.3592756122946926e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4767472, |
| "step": 7695 |
| }, |
| { |
| "epoch": 13.72549019607843, |
| "grad_norm": 5.811690243717749e-06, |
| "learning_rate": 1.3558165470371623e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4770256, |
| "step": 7700 |
| }, |
| { |
| "epoch": 13.73440285204991, |
| "grad_norm": 1.465106015530182e-05, |
| "learning_rate": 1.3523602504075666e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4773296, |
| "step": 7705 |
| }, |
| { |
| "epoch": 13.743315508021391, |
| "grad_norm": 7.546026154159335e-06, |
| "learning_rate": 1.348906730769251e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4776400, |
| "step": 7710 |
| }, |
| { |
| "epoch": 13.75222816399287, |
| "grad_norm": 6.846257292636437e-06, |
| "learning_rate": 1.3454559964788355e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4780176, |
| "step": 7715 |
| }, |
| { |
| "epoch": 13.76114081996435, |
| "grad_norm": 6.956892320886254e-06, |
| "learning_rate": 1.3420080558862092e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4783536, |
| "step": 7720 |
| }, |
| { |
| "epoch": 13.770053475935828, |
| "grad_norm": 6.646995643677656e-06, |
| "learning_rate": 1.3385629173344927e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4786832, |
| "step": 7725 |
| }, |
| { |
| "epoch": 13.778966131907309, |
| "grad_norm": 8.099759725155309e-06, |
| "learning_rate": 1.335120589160031e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4789904, |
| "step": 7730 |
| }, |
| { |
| "epoch": 13.787878787878787, |
| "grad_norm": 8.979537597042508e-06, |
| "learning_rate": 1.3316810796923693e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4792848, |
| "step": 7735 |
| }, |
| { |
| "epoch": 13.796791443850267, |
| "grad_norm": 1.531767520646099e-05, |
| "learning_rate": 1.328244397254228e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4795888, |
| "step": 7740 |
| }, |
| { |
| "epoch": 13.805704099821746, |
| "grad_norm": 6.935872079338878e-06, |
| "learning_rate": 1.3248105501614897e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4798704, |
| "step": 7745 |
| }, |
| { |
| "epoch": 13.814616755793226, |
| "grad_norm": 7.608053238072898e-06, |
| "learning_rate": 1.3213795467231788e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4801744, |
| "step": 7750 |
| }, |
| { |
| "epoch": 13.823529411764707, |
| "grad_norm": 6.829433004895691e-06, |
| "learning_rate": 1.3179513952414332e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4804592, |
| "step": 7755 |
| }, |
| { |
| "epoch": 13.832442067736185, |
| "grad_norm": 1.5230633835017215e-05, |
| "learning_rate": 1.3145261040114944e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4807248, |
| "step": 7760 |
| }, |
| { |
| "epoch": 13.841354723707665, |
| "grad_norm": 6.265021966100903e-06, |
| "learning_rate": 1.3111036813216792e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4809776, |
| "step": 7765 |
| }, |
| { |
| "epoch": 13.850267379679144, |
| "grad_norm": 6.747121460648486e-06, |
| "learning_rate": 1.3076841354533658e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4813104, |
| "step": 7770 |
| }, |
| { |
| "epoch": 13.859180035650624, |
| "grad_norm": 7.226227353385184e-06, |
| "learning_rate": 1.3042674746809707e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4815984, |
| "step": 7775 |
| }, |
| { |
| "epoch": 13.868092691622103, |
| "grad_norm": 7.566853128082585e-06, |
| "learning_rate": 1.300853707271929e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4819312, |
| "step": 7780 |
| }, |
| { |
| "epoch": 13.877005347593583, |
| "grad_norm": 9.527244401397184e-06, |
| "learning_rate": 1.2974428414866752e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4822000, |
| "step": 7785 |
| }, |
| { |
| "epoch": 13.885918003565063, |
| "grad_norm": 7.2643560997676104e-06, |
| "learning_rate": 1.2940348855786208e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4825296, |
| "step": 7790 |
| }, |
| { |
| "epoch": 13.894830659536542, |
| "grad_norm": 6.629866220464464e-06, |
| "learning_rate": 1.2906298477941378e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4828848, |
| "step": 7795 |
| }, |
| { |
| "epoch": 13.903743315508022, |
| "grad_norm": 3.560426921467297e-05, |
| "learning_rate": 1.287227736372538e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4831696, |
| "step": 7800 |
| }, |
| { |
| "epoch": 13.9126559714795, |
| "grad_norm": 1.4213070244295523e-05, |
| "learning_rate": 1.2838285595460478e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4835152, |
| "step": 7805 |
| }, |
| { |
| "epoch": 13.92156862745098, |
| "grad_norm": 7.702679795329459e-06, |
| "learning_rate": 1.2804323255397996e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4837712, |
| "step": 7810 |
| }, |
| { |
| "epoch": 13.93048128342246, |
| "grad_norm": 7.759183972666506e-06, |
| "learning_rate": 1.2770390425717982e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4841264, |
| "step": 7815 |
| }, |
| { |
| "epoch": 13.93939393939394, |
| "grad_norm": 8.99410770216491e-06, |
| "learning_rate": 1.2736487188529112e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4844272, |
| "step": 7820 |
| }, |
| { |
| "epoch": 13.94830659536542, |
| "grad_norm": 1.2729369700537063e-05, |
| "learning_rate": 1.2702613625868459e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4847056, |
| "step": 7825 |
| }, |
| { |
| "epoch": 13.957219251336898, |
| "grad_norm": 7.109924354153918e-06, |
| "learning_rate": 1.2668769819701259e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4849232, |
| "step": 7830 |
| }, |
| { |
| "epoch": 13.966131907308379, |
| "grad_norm": 6.8033637035114225e-06, |
| "learning_rate": 1.2634955851920789e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4852272, |
| "step": 7835 |
| }, |
| { |
| "epoch": 13.975044563279857, |
| "grad_norm": 6.467252205766272e-06, |
| "learning_rate": 1.2601171804348085e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4855472, |
| "step": 7840 |
| }, |
| { |
| "epoch": 13.983957219251337, |
| "grad_norm": 6.207174010341987e-06, |
| "learning_rate": 1.2567417758731815e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4858064, |
| "step": 7845 |
| }, |
| { |
| "epoch": 13.992869875222816, |
| "grad_norm": 1.287949999095872e-05, |
| "learning_rate": 1.2533693796748041e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4860080, |
| "step": 7850 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 0.2638137936592102, |
| "eval_runtime": 4.5897, |
| "eval_samples_per_second": 54.252, |
| "eval_steps_per_second": 13.726, |
| "num_input_tokens_seen": 4861864, |
| "step": 7854 |
| }, |
| { |
| "epoch": 14.001782531194296, |
| "grad_norm": 7.107454621291254e-06, |
| "learning_rate": 1.2500000000000006e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4862344, |
| "step": 7855 |
| }, |
| { |
| "epoch": 14.010695187165775, |
| "grad_norm": 0.00010611310426611453, |
| "learning_rate": 1.2466336450017981e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4865608, |
| "step": 7860 |
| }, |
| { |
| "epoch": 14.019607843137255, |
| "grad_norm": 1.0504892998142168e-05, |
| "learning_rate": 1.243270322825908e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4868296, |
| "step": 7865 |
| }, |
| { |
| "epoch": 14.028520499108735, |
| "grad_norm": 1.3590751223091502e-05, |
| "learning_rate": 1.2399100416106964e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4871432, |
| "step": 7870 |
| }, |
| { |
| "epoch": 14.037433155080214, |
| "grad_norm": 7.295515842997702e-06, |
| "learning_rate": 1.236552809487177e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4874472, |
| "step": 7875 |
| }, |
| { |
| "epoch": 14.046345811051694, |
| "grad_norm": 9.071957720152568e-06, |
| "learning_rate": 1.2331986345789806e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4877736, |
| "step": 7880 |
| }, |
| { |
| "epoch": 14.055258467023172, |
| "grad_norm": 1.2070072443748359e-05, |
| "learning_rate": 1.2298475250023439e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4881064, |
| "step": 7885 |
| }, |
| { |
| "epoch": 14.064171122994653, |
| "grad_norm": 5.530808721232461e-06, |
| "learning_rate": 1.2264994888660846e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4883816, |
| "step": 7890 |
| }, |
| { |
| "epoch": 14.073083778966131, |
| "grad_norm": 6.749080512236105e-06, |
| "learning_rate": 1.2231545342715847e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4886920, |
| "step": 7895 |
| }, |
| { |
| "epoch": 14.081996434937611, |
| "grad_norm": 6.1865684983786196e-06, |
| "learning_rate": 1.2198126693127693e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4890120, |
| "step": 7900 |
| }, |
| { |
| "epoch": 14.090909090909092, |
| "grad_norm": 7.120606824173592e-06, |
| "learning_rate": 1.2164739020760854e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4893000, |
| "step": 7905 |
| }, |
| { |
| "epoch": 14.09982174688057, |
| "grad_norm": 6.07286438025767e-06, |
| "learning_rate": 1.2131382406404864e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4896904, |
| "step": 7910 |
| }, |
| { |
| "epoch": 14.10873440285205, |
| "grad_norm": 6.876835414004745e-06, |
| "learning_rate": 1.2098056930774116e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4900328, |
| "step": 7915 |
| }, |
| { |
| "epoch": 14.117647058823529, |
| "grad_norm": 0.00021053240925539285, |
| "learning_rate": 1.2064762674507607e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4903592, |
| "step": 7920 |
| }, |
| { |
| "epoch": 14.12655971479501, |
| "grad_norm": 6.274164206843125e-06, |
| "learning_rate": 1.2031499718168859e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4906312, |
| "step": 7925 |
| }, |
| { |
| "epoch": 14.135472370766488, |
| "grad_norm": 8.332051038451027e-06, |
| "learning_rate": 1.1998268142245598e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4909352, |
| "step": 7930 |
| }, |
| { |
| "epoch": 14.144385026737968, |
| "grad_norm": 1.3166509233997203e-05, |
| "learning_rate": 1.1965068027149643e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4911688, |
| "step": 7935 |
| }, |
| { |
| "epoch": 14.153297682709447, |
| "grad_norm": 6.029657015460543e-05, |
| "learning_rate": 1.1931899453216697e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4914376, |
| "step": 7940 |
| }, |
| { |
| "epoch": 14.162210338680927, |
| "grad_norm": 9.119195965467952e-06, |
| "learning_rate": 1.189876250070611e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4917352, |
| "step": 7945 |
| }, |
| { |
| "epoch": 14.171122994652407, |
| "grad_norm": 2.1003546862630174e-05, |
| "learning_rate": 1.1865657249800738e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4921096, |
| "step": 7950 |
| }, |
| { |
| "epoch": 14.180035650623886, |
| "grad_norm": 6.048298928362783e-06, |
| "learning_rate": 1.1832583780606726e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4924040, |
| "step": 7955 |
| }, |
| { |
| "epoch": 14.188948306595366, |
| "grad_norm": 6.6788943513529375e-06, |
| "learning_rate": 1.1799542173153314e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4927208, |
| "step": 7960 |
| }, |
| { |
| "epoch": 14.197860962566844, |
| "grad_norm": 6.807384579587961e-06, |
| "learning_rate": 1.176653250739265e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4929928, |
| "step": 7965 |
| }, |
| { |
| "epoch": 14.206773618538325, |
| "grad_norm": 6.744728580088122e-06, |
| "learning_rate": 1.173355486319957e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4932328, |
| "step": 7970 |
| }, |
| { |
| "epoch": 14.215686274509803, |
| "grad_norm": 9.249639333575033e-06, |
| "learning_rate": 1.1700609320371448e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4935272, |
| "step": 7975 |
| }, |
| { |
| "epoch": 14.224598930481283, |
| "grad_norm": 7.17133025318617e-06, |
| "learning_rate": 1.1667695958627974e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4938504, |
| "step": 7980 |
| }, |
| { |
| "epoch": 14.233511586452764, |
| "grad_norm": 6.4170390032813884e-06, |
| "learning_rate": 1.1634814857610968e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4942472, |
| "step": 7985 |
| }, |
| { |
| "epoch": 14.242424242424242, |
| "grad_norm": 8.718321623746306e-06, |
| "learning_rate": 1.1601966096884198e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4945800, |
| "step": 7990 |
| }, |
| { |
| "epoch": 14.251336898395722, |
| "grad_norm": 6.421875241358066e-06, |
| "learning_rate": 1.1569149755933147e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4948840, |
| "step": 7995 |
| }, |
| { |
| "epoch": 14.260249554367201, |
| "grad_norm": 1.2632759535335936e-05, |
| "learning_rate": 1.1536365914164882e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4952520, |
| "step": 8000 |
| }, |
| { |
| "epoch": 14.269162210338681, |
| "grad_norm": 7.619096777489176e-06, |
| "learning_rate": 1.1503614650907821e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4955880, |
| "step": 8005 |
| }, |
| { |
| "epoch": 14.27807486631016, |
| "grad_norm": 1.3213113561505452e-05, |
| "learning_rate": 1.1470896045411525e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4958760, |
| "step": 8010 |
| }, |
| { |
| "epoch": 14.28698752228164, |
| "grad_norm": 7.3548239925003145e-06, |
| "learning_rate": 1.1438210176846592e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4962088, |
| "step": 8015 |
| }, |
| { |
| "epoch": 14.29590017825312, |
| "grad_norm": 7.781805834383704e-06, |
| "learning_rate": 1.1405557124304337e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4965416, |
| "step": 8020 |
| }, |
| { |
| "epoch": 14.304812834224599, |
| "grad_norm": 5.628372491628397e-06, |
| "learning_rate": 1.137293696679671e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4968168, |
| "step": 8025 |
| }, |
| { |
| "epoch": 14.313725490196079, |
| "grad_norm": 4.754801011586096e-06, |
| "learning_rate": 1.134034978325606e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4972200, |
| "step": 8030 |
| }, |
| { |
| "epoch": 14.322638146167558, |
| "grad_norm": 2.6338497264077887e-05, |
| "learning_rate": 1.1307795652534923e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4974696, |
| "step": 8035 |
| }, |
| { |
| "epoch": 14.331550802139038, |
| "grad_norm": 6.248131830943748e-05, |
| "learning_rate": 1.1275274653405885e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4977576, |
| "step": 8040 |
| }, |
| { |
| "epoch": 14.340463458110516, |
| "grad_norm": 6.767934792151209e-06, |
| "learning_rate": 1.1242786864561344e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4980712, |
| "step": 8045 |
| }, |
| { |
| "epoch": 14.349376114081997, |
| "grad_norm": 2.475587280059699e-05, |
| "learning_rate": 1.121033236461335e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4983976, |
| "step": 8050 |
| }, |
| { |
| "epoch": 14.358288770053475, |
| "grad_norm": 7.764555448375177e-06, |
| "learning_rate": 1.1177911232093403e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4987304, |
| "step": 8055 |
| }, |
| { |
| "epoch": 14.367201426024955, |
| "grad_norm": 1.8427512259222567e-05, |
| "learning_rate": 1.1145523545452235e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4990504, |
| "step": 8060 |
| }, |
| { |
| "epoch": 14.376114081996436, |
| "grad_norm": 6.077263151382795e-06, |
| "learning_rate": 1.1113169383059682e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4993032, |
| "step": 8065 |
| }, |
| { |
| "epoch": 14.385026737967914, |
| "grad_norm": 7.569628451165045e-06, |
| "learning_rate": 1.1080848823204445e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4995976, |
| "step": 8070 |
| }, |
| { |
| "epoch": 14.393939393939394, |
| "grad_norm": 8.515498848282732e-06, |
| "learning_rate": 1.1048561944093914e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 4999624, |
| "step": 8075 |
| }, |
| { |
| "epoch": 14.402852049910873, |
| "grad_norm": 7.329627806029748e-06, |
| "learning_rate": 1.1016308823853996e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5001928, |
| "step": 8080 |
| }, |
| { |
| "epoch": 14.411764705882353, |
| "grad_norm": 0.00010971013398375362, |
| "learning_rate": 1.0984089540528878e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5005832, |
| "step": 8085 |
| }, |
| { |
| "epoch": 14.420677361853832, |
| "grad_norm": 5.8523328334558755e-06, |
| "learning_rate": 1.0951904172080896e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5008712, |
| "step": 8090 |
| }, |
| { |
| "epoch": 14.429590017825312, |
| "grad_norm": 5.586658062384231e-06, |
| "learning_rate": 1.0919752796390328e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5011720, |
| "step": 8095 |
| }, |
| { |
| "epoch": 14.43850267379679, |
| "grad_norm": 6.4028436099761166e-06, |
| "learning_rate": 1.0887635491255158e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5015240, |
| "step": 8100 |
| }, |
| { |
| "epoch": 14.44741532976827, |
| "grad_norm": 7.400541562674334e-06, |
| "learning_rate": 1.085555233439099e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5019080, |
| "step": 8105 |
| }, |
| { |
| "epoch": 14.456327985739751, |
| "grad_norm": 7.24855044609285e-06, |
| "learning_rate": 1.0823503403430734e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5023304, |
| "step": 8110 |
| }, |
| { |
| "epoch": 14.46524064171123, |
| "grad_norm": 5.581604000326479e-06, |
| "learning_rate": 1.0791488775924522e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5026728, |
| "step": 8115 |
| }, |
| { |
| "epoch": 14.47415329768271, |
| "grad_norm": 2.331694668100681e-05, |
| "learning_rate": 1.0759508529339479e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5029800, |
| "step": 8120 |
| }, |
| { |
| "epoch": 14.483065953654188, |
| "grad_norm": 1.1446799362602178e-05, |
| "learning_rate": 1.072756274105951e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5032776, |
| "step": 8125 |
| }, |
| { |
| "epoch": 14.491978609625669, |
| "grad_norm": 6.0211009440536145e-06, |
| "learning_rate": 1.0695651488385166e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5036264, |
| "step": 8130 |
| }, |
| { |
| "epoch": 14.500891265597147, |
| "grad_norm": 5.3520207075052895e-06, |
| "learning_rate": 1.0663774848533425e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5038824, |
| "step": 8135 |
| }, |
| { |
| "epoch": 14.509803921568627, |
| "grad_norm": 1.092964976123767e-05, |
| "learning_rate": 1.0631932898637503e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5041928, |
| "step": 8140 |
| }, |
| { |
| "epoch": 14.518716577540108, |
| "grad_norm": 0.00015407356841024011, |
| "learning_rate": 1.0600125715746695e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5045000, |
| "step": 8145 |
| }, |
| { |
| "epoch": 14.527629233511586, |
| "grad_norm": 5.3865824156673625e-05, |
| "learning_rate": 1.0568353376826134e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5047848, |
| "step": 8150 |
| }, |
| { |
| "epoch": 14.536541889483066, |
| "grad_norm": 5.537413926504087e-06, |
| "learning_rate": 1.0536615958756669e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5051400, |
| "step": 8155 |
| }, |
| { |
| "epoch": 14.545454545454545, |
| "grad_norm": 6.4448718148923945e-06, |
| "learning_rate": 1.050491353833464e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5054184, |
| "step": 8160 |
| }, |
| { |
| "epoch": 14.554367201426025, |
| "grad_norm": 5.720929948438425e-06, |
| "learning_rate": 1.0473246192271704e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5057064, |
| "step": 8165 |
| }, |
| { |
| "epoch": 14.563279857397504, |
| "grad_norm": 5.878225692868e-06, |
| "learning_rate": 1.0441613997194654e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5060232, |
| "step": 8170 |
| }, |
| { |
| "epoch": 14.572192513368984, |
| "grad_norm": 7.7972963481443e-06, |
| "learning_rate": 1.0410017029645203e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5062856, |
| "step": 8175 |
| }, |
| { |
| "epoch": 14.581105169340464, |
| "grad_norm": 0.0001732358941808343, |
| "learning_rate": 1.0378455366079843e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5065960, |
| "step": 8180 |
| }, |
| { |
| "epoch": 14.590017825311943, |
| "grad_norm": 1.0606602700136136e-05, |
| "learning_rate": 1.0346929082869641e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5068808, |
| "step": 8185 |
| }, |
| { |
| "epoch": 14.598930481283423, |
| "grad_norm": 9.733703336678445e-06, |
| "learning_rate": 1.0315438256300025e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5071848, |
| "step": 8190 |
| }, |
| { |
| "epoch": 14.607843137254902, |
| "grad_norm": 4.52782041975297e-05, |
| "learning_rate": 1.0283982962570682e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5075464, |
| "step": 8195 |
| }, |
| { |
| "epoch": 14.616755793226382, |
| "grad_norm": 6.3549205151502974e-06, |
| "learning_rate": 1.0252563277795254e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5077864, |
| "step": 8200 |
| }, |
| { |
| "epoch": 14.62566844919786, |
| "grad_norm": 5.957157554803416e-06, |
| "learning_rate": 1.0221179278001264e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5080264, |
| "step": 8205 |
| }, |
| { |
| "epoch": 14.63458110516934, |
| "grad_norm": 8.766522114456166e-06, |
| "learning_rate": 1.0189831039129876e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5083816, |
| "step": 8210 |
| }, |
| { |
| "epoch": 14.643493761140821, |
| "grad_norm": 7.778281542414334e-06, |
| "learning_rate": 1.0158518637035704e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5086984, |
| "step": 8215 |
| }, |
| { |
| "epoch": 14.6524064171123, |
| "grad_norm": 2.461150143062696e-05, |
| "learning_rate": 1.0127242147486668e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5089672, |
| "step": 8220 |
| }, |
| { |
| "epoch": 14.66131907308378, |
| "grad_norm": 1.6552379747736268e-05, |
| "learning_rate": 1.0096001646163777e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5092872, |
| "step": 8225 |
| }, |
| { |
| "epoch": 14.670231729055258, |
| "grad_norm": 4.991354580852203e-05, |
| "learning_rate": 1.0064797208660967e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5095752, |
| "step": 8230 |
| }, |
| { |
| "epoch": 14.679144385026738, |
| "grad_norm": 7.476746759493835e-06, |
| "learning_rate": 1.003362891048491e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5098728, |
| "step": 8235 |
| }, |
| { |
| "epoch": 14.688057040998217, |
| "grad_norm": 5.878478077647742e-06, |
| "learning_rate": 1.0002496827054805e-05, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5101992, |
| "step": 8240 |
| }, |
| { |
| "epoch": 14.696969696969697, |
| "grad_norm": 3.9587441278854385e-05, |
| "learning_rate": 9.971401033702249e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5105288, |
| "step": 8245 |
| }, |
| { |
| "epoch": 14.705882352941176, |
| "grad_norm": 7.687859579164069e-06, |
| "learning_rate": 9.94034160567102e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5108776, |
| "step": 8250 |
| }, |
| { |
| "epoch": 14.714795008912656, |
| "grad_norm": 5.535346190299606e-06, |
| "learning_rate": 9.909318618116892e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5112168, |
| "step": 8255 |
| }, |
| { |
| "epoch": 14.723707664884136, |
| "grad_norm": 1.7241862224182114e-05, |
| "learning_rate": 9.87833214610748e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5115464, |
| "step": 8260 |
| }, |
| { |
| "epoch": 14.732620320855615, |
| "grad_norm": 6.819671853008913e-06, |
| "learning_rate": 9.847382264622016e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5118216, |
| "step": 8265 |
| }, |
| { |
| "epoch": 14.741532976827095, |
| "grad_norm": 5.388214503909694e-06, |
| "learning_rate": 9.81646904855121e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5120552, |
| "step": 8270 |
| }, |
| { |
| "epoch": 14.750445632798574, |
| "grad_norm": 1.2260394214536063e-05, |
| "learning_rate": 9.785592572697058e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5124008, |
| "step": 8275 |
| }, |
| { |
| "epoch": 14.759358288770054, |
| "grad_norm": 5.739399512094678e-06, |
| "learning_rate": 9.754752911772616e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5127624, |
| "step": 8280 |
| }, |
| { |
| "epoch": 14.768270944741532, |
| "grad_norm": 6.3006323216541205e-06, |
| "learning_rate": 9.723950140401922e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5130504, |
| "step": 8285 |
| }, |
| { |
| "epoch": 14.777183600713013, |
| "grad_norm": 5.688974397344282e-06, |
| "learning_rate": 9.693184333119681e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5133288, |
| "step": 8290 |
| }, |
| { |
| "epoch": 14.786096256684491, |
| "grad_norm": 5.98324504608172e-06, |
| "learning_rate": 9.662455564371203e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5136648, |
| "step": 8295 |
| }, |
| { |
| "epoch": 14.795008912655971, |
| "grad_norm": 5.788949692941969e-06, |
| "learning_rate": 9.631763908512164e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5139336, |
| "step": 8300 |
| }, |
| { |
| "epoch": 14.803921568627452, |
| "grad_norm": 5.5504601732536685e-06, |
| "learning_rate": 9.601109439808412e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5141832, |
| "step": 8305 |
| }, |
| { |
| "epoch": 14.81283422459893, |
| "grad_norm": 5.692533250112319e-06, |
| "learning_rate": 9.57049223243584e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5144712, |
| "step": 8310 |
| }, |
| { |
| "epoch": 14.82174688057041, |
| "grad_norm": 6.114484222052852e-06, |
| "learning_rate": 9.53991236048017e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5148232, |
| "step": 8315 |
| }, |
| { |
| "epoch": 14.830659536541889, |
| "grad_norm": 7.69770394981606e-06, |
| "learning_rate": 9.509369897936779e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5151080, |
| "step": 8320 |
| }, |
| { |
| "epoch": 14.83957219251337, |
| "grad_norm": 1.0251422281726263e-05, |
| "learning_rate": 9.478864918710534e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5153864, |
| "step": 8325 |
| }, |
| { |
| "epoch": 14.848484848484848, |
| "grad_norm": 2.408368163742125e-05, |
| "learning_rate": 9.448397496615574e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5157736, |
| "step": 8330 |
| }, |
| { |
| "epoch": 14.857397504456328, |
| "grad_norm": 6.29090072834515e-06, |
| "learning_rate": 9.417967705375186e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5160360, |
| "step": 8335 |
| }, |
| { |
| "epoch": 14.866310160427808, |
| "grad_norm": 8.154268471116666e-06, |
| "learning_rate": 9.387575618621597e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5163656, |
| "step": 8340 |
| }, |
| { |
| "epoch": 14.875222816399287, |
| "grad_norm": 8.839137080940418e-06, |
| "learning_rate": 9.357221309895786e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5167336, |
| "step": 8345 |
| }, |
| { |
| "epoch": 14.884135472370767, |
| "grad_norm": 7.0688442974642385e-06, |
| "learning_rate": 9.326904852647344e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5170408, |
| "step": 8350 |
| }, |
| { |
| "epoch": 14.893048128342246, |
| "grad_norm": 9.05954402696807e-06, |
| "learning_rate": 9.29662632023423e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5173192, |
| "step": 8355 |
| }, |
| { |
| "epoch": 14.901960784313726, |
| "grad_norm": 5.333501576387789e-06, |
| "learning_rate": 9.266385785922672e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5177160, |
| "step": 8360 |
| }, |
| { |
| "epoch": 14.910873440285204, |
| "grad_norm": 6.870981906104134e-06, |
| "learning_rate": 9.236183322886945e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5181064, |
| "step": 8365 |
| }, |
| { |
| "epoch": 14.919786096256685, |
| "grad_norm": 5.239366601017537e-06, |
| "learning_rate": 9.206019004209171e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5184264, |
| "step": 8370 |
| }, |
| { |
| "epoch": 14.928698752228165, |
| "grad_norm": 1.6661275367368944e-05, |
| "learning_rate": 9.175892902879232e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5187048, |
| "step": 8375 |
| }, |
| { |
| "epoch": 14.937611408199643, |
| "grad_norm": 8.331326171173714e-06, |
| "learning_rate": 9.145805091794473e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5189928, |
| "step": 8380 |
| }, |
| { |
| "epoch": 14.946524064171124, |
| "grad_norm": 8.945467925514095e-06, |
| "learning_rate": 9.115755643759621e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5192392, |
| "step": 8385 |
| }, |
| { |
| "epoch": 14.955436720142602, |
| "grad_norm": 1.2952526049048174e-05, |
| "learning_rate": 9.085744631486573e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5195720, |
| "step": 8390 |
| }, |
| { |
| "epoch": 14.964349376114082, |
| "grad_norm": 5.643107670039171e-06, |
| "learning_rate": 9.0557721275942e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5198536, |
| "step": 8395 |
| }, |
| { |
| "epoch": 14.973262032085561, |
| "grad_norm": 6.707574357278645e-06, |
| "learning_rate": 9.025838204608215e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5201288, |
| "step": 8400 |
| }, |
| { |
| "epoch": 14.982174688057041, |
| "grad_norm": 1.741484084050171e-05, |
| "learning_rate": 8.995942934960964e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5204520, |
| "step": 8405 |
| }, |
| { |
| "epoch": 14.99108734402852, |
| "grad_norm": 6.384299922501668e-06, |
| "learning_rate": 8.966086390991266e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5207688, |
| "step": 8410 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 6.7942432906420436e-06, |
| "learning_rate": 8.936268644944246e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5210208, |
| "step": 8415 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 0.26385584473609924, |
| "eval_runtime": 4.5846, |
| "eval_samples_per_second": 54.313, |
| "eval_steps_per_second": 13.742, |
| "num_input_tokens_seen": 5210208, |
| "step": 8415 |
| }, |
| { |
| "epoch": 15.00891265597148, |
| "grad_norm": 7.896741408330854e-06, |
| "learning_rate": 8.906489768971113e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5213600, |
| "step": 8420 |
| }, |
| { |
| "epoch": 15.017825311942959, |
| "grad_norm": 4.688414264819585e-05, |
| "learning_rate": 8.876749835129053e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5216704, |
| "step": 8425 |
| }, |
| { |
| "epoch": 15.026737967914439, |
| "grad_norm": 5.828882876812713e-06, |
| "learning_rate": 8.847048915381011e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5219456, |
| "step": 8430 |
| }, |
| { |
| "epoch": 15.035650623885918, |
| "grad_norm": 5.714399321732344e-06, |
| "learning_rate": 8.817387081595532e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5223008, |
| "step": 8435 |
| }, |
| { |
| "epoch": 15.044563279857398, |
| "grad_norm": 0.00012872931256424636, |
| "learning_rate": 8.787764405546584e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5226496, |
| "step": 8440 |
| }, |
| { |
| "epoch": 15.053475935828876, |
| "grad_norm": 8.361628715647385e-06, |
| "learning_rate": 8.758180958913362e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5229440, |
| "step": 8445 |
| }, |
| { |
| "epoch": 15.062388591800357, |
| "grad_norm": 5.478733783093048e-06, |
| "learning_rate": 8.728636813280163e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5232704, |
| "step": 8450 |
| }, |
| { |
| "epoch": 15.071301247771837, |
| "grad_norm": 1.3515757927962113e-05, |
| "learning_rate": 8.699132040136186e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5235168, |
| "step": 8455 |
| }, |
| { |
| "epoch": 15.080213903743315, |
| "grad_norm": 4.47141701442888e-06, |
| "learning_rate": 8.669666710875318e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5238656, |
| "step": 8460 |
| }, |
| { |
| "epoch": 15.089126559714796, |
| "grad_norm": 1.1040413482987788e-05, |
| "learning_rate": 8.640240896796074e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5242240, |
| "step": 8465 |
| }, |
| { |
| "epoch": 15.098039215686274, |
| "grad_norm": 8.942615750129335e-06, |
| "learning_rate": 8.61085466910128e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5245568, |
| "step": 8470 |
| }, |
| { |
| "epoch": 15.106951871657754, |
| "grad_norm": 1.3127410966262687e-05, |
| "learning_rate": 8.581508098898011e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5248384, |
| "step": 8475 |
| }, |
| { |
| "epoch": 15.115864527629233, |
| "grad_norm": 5.364198386814678e-06, |
| "learning_rate": 8.552201257197389e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5251744, |
| "step": 8480 |
| }, |
| { |
| "epoch": 15.124777183600713, |
| "grad_norm": 4.796311259269714e-06, |
| "learning_rate": 8.522934214914372e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5254592, |
| "step": 8485 |
| }, |
| { |
| "epoch": 15.133689839572192, |
| "grad_norm": 7.995293344720267e-06, |
| "learning_rate": 8.493707042867633e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5257312, |
| "step": 8490 |
| }, |
| { |
| "epoch": 15.142602495543672, |
| "grad_norm": 1.153885659732623e-05, |
| "learning_rate": 8.464519811779367e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5260320, |
| "step": 8495 |
| }, |
| { |
| "epoch": 15.151515151515152, |
| "grad_norm": 5.2959899221605156e-06, |
| "learning_rate": 8.43537259227513e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5263744, |
| "step": 8500 |
| }, |
| { |
| "epoch": 15.16042780748663, |
| "grad_norm": 5.650913590216078e-05, |
| "learning_rate": 8.406265454883649e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5267328, |
| "step": 8505 |
| }, |
| { |
| "epoch": 15.169340463458111, |
| "grad_norm": 8.47045703267213e-06, |
| "learning_rate": 8.37719847003666e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5270688, |
| "step": 8510 |
| }, |
| { |
| "epoch": 15.17825311942959, |
| "grad_norm": 6.704518909828039e-06, |
| "learning_rate": 8.348171708068747e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5273728, |
| "step": 8515 |
| }, |
| { |
| "epoch": 15.18716577540107, |
| "grad_norm": 5.129308192408644e-06, |
| "learning_rate": 8.31918523921717e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5276288, |
| "step": 8520 |
| }, |
| { |
| "epoch": 15.196078431372548, |
| "grad_norm": 4.8411061470687855e-06, |
| "learning_rate": 8.29023913362168e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5279136, |
| "step": 8525 |
| }, |
| { |
| "epoch": 15.204991087344029, |
| "grad_norm": 8.145854735630564e-06, |
| "learning_rate": 8.261333461324372e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5281376, |
| "step": 8530 |
| }, |
| { |
| "epoch": 15.213903743315509, |
| "grad_norm": 9.624214726500213e-06, |
| "learning_rate": 8.23246829226948e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5283584, |
| "step": 8535 |
| }, |
| { |
| "epoch": 15.222816399286987, |
| "grad_norm": 0.00012442510342225432, |
| "learning_rate": 8.203643696303255e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5286976, |
| "step": 8540 |
| }, |
| { |
| "epoch": 15.231729055258468, |
| "grad_norm": 4.386363798403181e-06, |
| "learning_rate": 8.174859743173765e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5290208, |
| "step": 8545 |
| }, |
| { |
| "epoch": 15.240641711229946, |
| "grad_norm": 9.525587302050553e-06, |
| "learning_rate": 8.146116502530709e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5293280, |
| "step": 8550 |
| }, |
| { |
| "epoch": 15.249554367201426, |
| "grad_norm": 7.593696409458062e-06, |
| "learning_rate": 8.117414043925322e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5296288, |
| "step": 8555 |
| }, |
| { |
| "epoch": 15.258467023172905, |
| "grad_norm": 1.2609904842975084e-05, |
| "learning_rate": 8.08875243681011e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5299488, |
| "step": 8560 |
| }, |
| { |
| "epoch": 15.267379679144385, |
| "grad_norm": 1.8711343727773055e-05, |
| "learning_rate": 8.06013175053875e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5302880, |
| "step": 8565 |
| }, |
| { |
| "epoch": 15.276292335115864, |
| "grad_norm": 5.2512627917167265e-06, |
| "learning_rate": 8.031552054365903e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5305792, |
| "step": 8570 |
| }, |
| { |
| "epoch": 15.285204991087344, |
| "grad_norm": 5.599639735009987e-06, |
| "learning_rate": 8.003013417447034e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5308384, |
| "step": 8575 |
| }, |
| { |
| "epoch": 15.294117647058824, |
| "grad_norm": 2.4499842766090296e-05, |
| "learning_rate": 7.974515908838259e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5310816, |
| "step": 8580 |
| }, |
| { |
| "epoch": 15.303030303030303, |
| "grad_norm": 5.107368451717775e-06, |
| "learning_rate": 7.94605959749618e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5313440, |
| "step": 8585 |
| }, |
| { |
| "epoch": 15.311942959001783, |
| "grad_norm": 4.877272203884786e-06, |
| "learning_rate": 7.917644552277708e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5316416, |
| "step": 8590 |
| }, |
| { |
| "epoch": 15.320855614973262, |
| "grad_norm": 6.525916433020029e-06, |
| "learning_rate": 7.889270841939908e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5318752, |
| "step": 8595 |
| }, |
| { |
| "epoch": 15.329768270944742, |
| "grad_norm": 6.321250566543313e-06, |
| "learning_rate": 7.860938535139805e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5322112, |
| "step": 8600 |
| }, |
| { |
| "epoch": 15.33868092691622, |
| "grad_norm": 5.054580469732173e-06, |
| "learning_rate": 7.832647700434257e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5325120, |
| "step": 8605 |
| }, |
| { |
| "epoch": 15.3475935828877, |
| "grad_norm": 5.5520063142466825e-06, |
| "learning_rate": 7.804398406279764e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5328960, |
| "step": 8610 |
| }, |
| { |
| "epoch": 15.35650623885918, |
| "grad_norm": 5.534681804419961e-06, |
| "learning_rate": 7.776190721032312e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5332064, |
| "step": 8615 |
| }, |
| { |
| "epoch": 15.36541889483066, |
| "grad_norm": 6.235063210624503e-06, |
| "learning_rate": 7.748024712947205e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5336000, |
| "step": 8620 |
| }, |
| { |
| "epoch": 15.37433155080214, |
| "grad_norm": 6.73488329994143e-06, |
| "learning_rate": 7.719900450178882e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5338944, |
| "step": 8625 |
| }, |
| { |
| "epoch": 15.383244206773618, |
| "grad_norm": 7.579846715088934e-05, |
| "learning_rate": 7.691818000780796e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5342176, |
| "step": 8630 |
| }, |
| { |
| "epoch": 15.392156862745098, |
| "grad_norm": 9.930758096743375e-06, |
| "learning_rate": 7.663777432705207e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5345376, |
| "step": 8635 |
| }, |
| { |
| "epoch": 15.401069518716577, |
| "grad_norm": 5.044625140726566e-06, |
| "learning_rate": 7.635778813803018e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5348544, |
| "step": 8640 |
| }, |
| { |
| "epoch": 15.409982174688057, |
| "grad_norm": 5.212654741626466e-06, |
| "learning_rate": 7.607822211823673e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5351904, |
| "step": 8645 |
| }, |
| { |
| "epoch": 15.418894830659536, |
| "grad_norm": 5.073397915111855e-06, |
| "learning_rate": 7.579907694414892e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5354560, |
| "step": 8650 |
| }, |
| { |
| "epoch": 15.427807486631016, |
| "grad_norm": 5.894227797398344e-06, |
| "learning_rate": 7.552035329122592e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5358368, |
| "step": 8655 |
| }, |
| { |
| "epoch": 15.436720142602496, |
| "grad_norm": 7.546342658315552e-06, |
| "learning_rate": 7.524205183390698e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5361856, |
| "step": 8660 |
| }, |
| { |
| "epoch": 15.445632798573975, |
| "grad_norm": 5.361746843846049e-06, |
| "learning_rate": 7.49641732456094e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5364704, |
| "step": 8665 |
| }, |
| { |
| "epoch": 15.454545454545455, |
| "grad_norm": 4.600000920618186e-06, |
| "learning_rate": 7.46867181987276e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5367200, |
| "step": 8670 |
| }, |
| { |
| "epoch": 15.463458110516934, |
| "grad_norm": 5.97817688685609e-06, |
| "learning_rate": 7.4409687364631e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5369920, |
| "step": 8675 |
| }, |
| { |
| "epoch": 15.472370766488414, |
| "grad_norm": 5.486062946147285e-06, |
| "learning_rate": 7.413308141366254e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5373728, |
| "step": 8680 |
| }, |
| { |
| "epoch": 15.481283422459892, |
| "grad_norm": 6.220386694621993e-06, |
| "learning_rate": 7.385690101513715e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5376736, |
| "step": 8685 |
| }, |
| { |
| "epoch": 15.490196078431373, |
| "grad_norm": 1.1094513865828048e-05, |
| "learning_rate": 7.358114683733977e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5379616, |
| "step": 8690 |
| }, |
| { |
| "epoch": 15.499108734402853, |
| "grad_norm": 6.627035872952547e-06, |
| "learning_rate": 7.330581954752427e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5383104, |
| "step": 8695 |
| }, |
| { |
| "epoch": 15.508021390374331, |
| "grad_norm": 3.4417731512803584e-05, |
| "learning_rate": 7.303091981191141e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5386688, |
| "step": 8700 |
| }, |
| { |
| "epoch": 15.516934046345812, |
| "grad_norm": 4.375489425001433e-06, |
| "learning_rate": 7.275644829568748e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5389760, |
| "step": 8705 |
| }, |
| { |
| "epoch": 15.52584670231729, |
| "grad_norm": 5.563930699281627e-06, |
| "learning_rate": 7.248240566300257e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5392928, |
| "step": 8710 |
| }, |
| { |
| "epoch": 15.53475935828877, |
| "grad_norm": 9.789599062060006e-06, |
| "learning_rate": 7.220879257696883e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5395104, |
| "step": 8715 |
| }, |
| { |
| "epoch": 15.543672014260249, |
| "grad_norm": 5.930739462201018e-06, |
| "learning_rate": 7.1935609699659236e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5398336, |
| "step": 8720 |
| }, |
| { |
| "epoch": 15.55258467023173, |
| "grad_norm": 8.74727174959844e-06, |
| "learning_rate": 7.166285769210568e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5401696, |
| "step": 8725 |
| }, |
| { |
| "epoch": 15.56149732620321, |
| "grad_norm": 4.707947937276913e-06, |
| "learning_rate": 7.139053721429728e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5404736, |
| "step": 8730 |
| }, |
| { |
| "epoch": 15.570409982174688, |
| "grad_norm": 7.5294760790711734e-06, |
| "learning_rate": 7.111864892517944e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5407904, |
| "step": 8735 |
| }, |
| { |
| "epoch": 15.579322638146168, |
| "grad_norm": 1.3512942132365424e-05, |
| "learning_rate": 7.0847193482651234e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5411296, |
| "step": 8740 |
| }, |
| { |
| "epoch": 15.588235294117647, |
| "grad_norm": 4.419291144586168e-06, |
| "learning_rate": 7.057617154356469e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5414592, |
| "step": 8745 |
| }, |
| { |
| "epoch": 15.597147950089127, |
| "grad_norm": 5.332386990630766e-06, |
| "learning_rate": 7.030558376372284e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5417504, |
| "step": 8750 |
| }, |
| { |
| "epoch": 15.606060606060606, |
| "grad_norm": 1.6384390619350597e-05, |
| "learning_rate": 7.0035430797877974e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5420256, |
| "step": 8755 |
| }, |
| { |
| "epoch": 15.614973262032086, |
| "grad_norm": 7.927361366455443e-06, |
| "learning_rate": 6.976571329973044e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5423264, |
| "step": 8760 |
| }, |
| { |
| "epoch": 15.623885918003564, |
| "grad_norm": 4.236151653458364e-05, |
| "learning_rate": 6.949643192192678e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5426528, |
| "step": 8765 |
| }, |
| { |
| "epoch": 15.632798573975045, |
| "grad_norm": 5.545013664232101e-06, |
| "learning_rate": 6.922758731605833e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5429600, |
| "step": 8770 |
| }, |
| { |
| "epoch": 15.641711229946525, |
| "grad_norm": 7.829821697669104e-05, |
| "learning_rate": 6.8959180132659475e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5432704, |
| "step": 8775 |
| }, |
| { |
| "epoch": 15.650623885918003, |
| "grad_norm": 2.0672809114330448e-05, |
| "learning_rate": 6.869121102120607e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5435968, |
| "step": 8780 |
| }, |
| { |
| "epoch": 15.659536541889484, |
| "grad_norm": 6.136841875559185e-06, |
| "learning_rate": 6.842368063011406e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5438496, |
| "step": 8785 |
| }, |
| { |
| "epoch": 15.668449197860962, |
| "grad_norm": 6.172531357151456e-06, |
| "learning_rate": 6.815658960673782e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5441536, |
| "step": 8790 |
| }, |
| { |
| "epoch": 15.677361853832442, |
| "grad_norm": 8.231077117670793e-06, |
| "learning_rate": 6.7889938597368505e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5444672, |
| "step": 8795 |
| }, |
| { |
| "epoch": 15.686274509803921, |
| "grad_norm": 7.102705694705946e-06, |
| "learning_rate": 6.762372824723265e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5447712, |
| "step": 8800 |
| }, |
| { |
| "epoch": 15.695187165775401, |
| "grad_norm": 5.1239135245850775e-06, |
| "learning_rate": 6.735795920049026e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5450720, |
| "step": 8805 |
| }, |
| { |
| "epoch": 15.70409982174688, |
| "grad_norm": 6.167297215142753e-06, |
| "learning_rate": 6.709263210023375e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5453760, |
| "step": 8810 |
| }, |
| { |
| "epoch": 15.71301247771836, |
| "grad_norm": 6.101850885897875e-06, |
| "learning_rate": 6.682774758848618e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5456256, |
| "step": 8815 |
| }, |
| { |
| "epoch": 15.72192513368984, |
| "grad_norm": 0.00011210032243980095, |
| "learning_rate": 6.656330630619925e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5459488, |
| "step": 8820 |
| }, |
| { |
| "epoch": 15.730837789661319, |
| "grad_norm": 5.509222319233231e-06, |
| "learning_rate": 6.629930889325278e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5462848, |
| "step": 8825 |
| }, |
| { |
| "epoch": 15.739750445632799, |
| "grad_norm": 5.822642378916498e-06, |
| "learning_rate": 6.603575598845196e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5465760, |
| "step": 8830 |
| }, |
| { |
| "epoch": 15.748663101604278, |
| "grad_norm": 5.75665217183996e-06, |
| "learning_rate": 6.577264822952675e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5469440, |
| "step": 8835 |
| }, |
| { |
| "epoch": 15.757575757575758, |
| "grad_norm": 3.2189564080908895e-05, |
| "learning_rate": 6.550998625312987e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5472064, |
| "step": 8840 |
| }, |
| { |
| "epoch": 15.766488413547236, |
| "grad_norm": 4.856089435634203e-05, |
| "learning_rate": 6.524777069483526e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5474784, |
| "step": 8845 |
| }, |
| { |
| "epoch": 15.775401069518717, |
| "grad_norm": 4.835144864046015e-06, |
| "learning_rate": 6.498600218913678e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5478048, |
| "step": 8850 |
| }, |
| { |
| "epoch": 15.784313725490197, |
| "grad_norm": 1.2231737855472602e-05, |
| "learning_rate": 6.472468136944648e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5481056, |
| "step": 8855 |
| }, |
| { |
| "epoch": 15.793226381461675, |
| "grad_norm": 5.054258053860394e-06, |
| "learning_rate": 6.446380886809314e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5484320, |
| "step": 8860 |
| }, |
| { |
| "epoch": 15.802139037433156, |
| "grad_norm": 5.58068131795153e-06, |
| "learning_rate": 6.420338531632078e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5487424, |
| "step": 8865 |
| }, |
| { |
| "epoch": 15.811051693404634, |
| "grad_norm": 6.2250314840639476e-06, |
| "learning_rate": 6.394341134428691e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5491456, |
| "step": 8870 |
| }, |
| { |
| "epoch": 15.819964349376114, |
| "grad_norm": 0.00017160887364298105, |
| "learning_rate": 6.368388758106133e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5493920, |
| "step": 8875 |
| }, |
| { |
| "epoch": 15.828877005347593, |
| "grad_norm": 4.926492692902684e-06, |
| "learning_rate": 6.342481465462441e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5496992, |
| "step": 8880 |
| }, |
| { |
| "epoch": 15.837789661319073, |
| "grad_norm": 6.23631831331295e-06, |
| "learning_rate": 6.316619319186562e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5500800, |
| "step": 8885 |
| }, |
| { |
| "epoch": 15.846702317290553, |
| "grad_norm": 7.705433745286427e-06, |
| "learning_rate": 6.290802381858202e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5504064, |
| "step": 8890 |
| }, |
| { |
| "epoch": 15.855614973262032, |
| "grad_norm": 2.909653449023608e-05, |
| "learning_rate": 6.265030715947659e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5507264, |
| "step": 8895 |
| }, |
| { |
| "epoch": 15.864527629233512, |
| "grad_norm": 5.82176107855048e-06, |
| "learning_rate": 6.2393043838157055e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5510176, |
| "step": 8900 |
| }, |
| { |
| "epoch": 15.87344028520499, |
| "grad_norm": 9.333507478004321e-06, |
| "learning_rate": 6.213623447713413e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5512672, |
| "step": 8905 |
| }, |
| { |
| "epoch": 15.882352941176471, |
| "grad_norm": 6.4683308664825745e-06, |
| "learning_rate": 6.1879879697819806e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5515936, |
| "step": 8910 |
| }, |
| { |
| "epoch": 15.89126559714795, |
| "grad_norm": 1.6260813936241902e-05, |
| "learning_rate": 6.162398012052664e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5518784, |
| "step": 8915 |
| }, |
| { |
| "epoch": 15.90017825311943, |
| "grad_norm": 2.4525292246835306e-05, |
| "learning_rate": 6.136853636446518e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5521888, |
| "step": 8920 |
| }, |
| { |
| "epoch": 15.909090909090908, |
| "grad_norm": 0.0001769509253790602, |
| "learning_rate": 6.11135490477433e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5524960, |
| "step": 8925 |
| }, |
| { |
| "epoch": 15.918003565062389, |
| "grad_norm": 5.651103492709808e-06, |
| "learning_rate": 6.085901878736442e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5528128, |
| "step": 8930 |
| }, |
| { |
| "epoch": 15.926916221033869, |
| "grad_norm": 5.721294201066485e-06, |
| "learning_rate": 6.060494619922575e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5530720, |
| "step": 8935 |
| }, |
| { |
| "epoch": 15.935828877005347, |
| "grad_norm": 4.7713056119391695e-06, |
| "learning_rate": 6.035133189811729e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5533888, |
| "step": 8940 |
| }, |
| { |
| "epoch": 15.944741532976828, |
| "grad_norm": 4.651044037018437e-06, |
| "learning_rate": 6.009817649772007e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5536768, |
| "step": 8945 |
| }, |
| { |
| "epoch": 15.953654188948306, |
| "grad_norm": 1.05441022242303e-05, |
| "learning_rate": 5.9845480610604635e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5539968, |
| "step": 8950 |
| }, |
| { |
| "epoch": 15.962566844919786, |
| "grad_norm": 1.0455855772306677e-05, |
| "learning_rate": 5.959324484822973e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5542752, |
| "step": 8955 |
| }, |
| { |
| "epoch": 15.971479500891265, |
| "grad_norm": 3.935343102057232e-06, |
| "learning_rate": 5.9341469820940495e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5545600, |
| "step": 8960 |
| }, |
| { |
| "epoch": 15.980392156862745, |
| "grad_norm": 7.889810149208643e-06, |
| "learning_rate": 5.909015613796745e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5549280, |
| "step": 8965 |
| }, |
| { |
| "epoch": 15.989304812834224, |
| "grad_norm": 3.854881015286082e-06, |
| "learning_rate": 5.883930440742466e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5553088, |
| "step": 8970 |
| }, |
| { |
| "epoch": 15.998217468805704, |
| "grad_norm": 9.84231064649066e-06, |
| "learning_rate": 5.858891523630844e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5555680, |
| "step": 8975 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 0.2678108513355255, |
| "eval_runtime": 4.5876, |
| "eval_samples_per_second": 54.277, |
| "eval_steps_per_second": 13.733, |
| "num_input_tokens_seen": 5555776, |
| "step": 8976 |
| }, |
| { |
| "epoch": 16.007130124777184, |
| "grad_norm": 9.22131039260421e-06, |
| "learning_rate": 5.833898923049586e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5558240, |
| "step": 8980 |
| }, |
| { |
| "epoch": 16.016042780748663, |
| "grad_norm": 5.392693765315926e-06, |
| "learning_rate": 5.8089526994743014e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5560992, |
| "step": 8985 |
| }, |
| { |
| "epoch": 16.02495543672014, |
| "grad_norm": 1.1318848919472657e-05, |
| "learning_rate": 5.784052913268412e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5563872, |
| "step": 8990 |
| }, |
| { |
| "epoch": 16.033868092691623, |
| "grad_norm": 9.131179467658512e-06, |
| "learning_rate": 5.759199624682962e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5566400, |
| "step": 8995 |
| }, |
| { |
| "epoch": 16.0427807486631, |
| "grad_norm": 4.842033831664594e-06, |
| "learning_rate": 5.734392893856458e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5569024, |
| "step": 9000 |
| }, |
| { |
| "epoch": 16.05169340463458, |
| "grad_norm": 6.703641702188179e-06, |
| "learning_rate": 5.709632780814797e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5571648, |
| "step": 9005 |
| }, |
| { |
| "epoch": 16.060606060606062, |
| "grad_norm": 3.561503763194196e-05, |
| "learning_rate": 5.684919345471029e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5574656, |
| "step": 9010 |
| }, |
| { |
| "epoch": 16.06951871657754, |
| "grad_norm": 4.695339157478884e-06, |
| "learning_rate": 5.660252647625278e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5577536, |
| "step": 9015 |
| }, |
| { |
| "epoch": 16.07843137254902, |
| "grad_norm": 1.17061272248975e-05, |
| "learning_rate": 5.635632746964581e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5579968, |
| "step": 9020 |
| }, |
| { |
| "epoch": 16.087344028520498, |
| "grad_norm": 7.923229532025289e-06, |
| "learning_rate": 5.611059703062713e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5583008, |
| "step": 9025 |
| }, |
| { |
| "epoch": 16.09625668449198, |
| "grad_norm": 5.213600161368959e-06, |
| "learning_rate": 5.5865335753800875e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5586080, |
| "step": 9030 |
| }, |
| { |
| "epoch": 16.10516934046346, |
| "grad_norm": 8.186175364244264e-06, |
| "learning_rate": 5.562054423263591e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5589248, |
| "step": 9035 |
| }, |
| { |
| "epoch": 16.114081996434937, |
| "grad_norm": 5.150972810952226e-06, |
| "learning_rate": 5.537622305946436e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5591936, |
| "step": 9040 |
| }, |
| { |
| "epoch": 16.122994652406415, |
| "grad_norm": 1.3816433238389436e-05, |
| "learning_rate": 5.513237282548034e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5594304, |
| "step": 9045 |
| }, |
| { |
| "epoch": 16.131907308377897, |
| "grad_norm": 1.9653147319331765e-05, |
| "learning_rate": 5.4888994120738164e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5597376, |
| "step": 9050 |
| }, |
| { |
| "epoch": 16.140819964349376, |
| "grad_norm": 6.136602678452618e-06, |
| "learning_rate": 5.464608753415146e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5600896, |
| "step": 9055 |
| }, |
| { |
| "epoch": 16.149732620320854, |
| "grad_norm": 5.412967766460497e-06, |
| "learning_rate": 5.440365365349126e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5603552, |
| "step": 9060 |
| }, |
| { |
| "epoch": 16.158645276292336, |
| "grad_norm": 5.781025720352773e-06, |
| "learning_rate": 5.416169306538485e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5607264, |
| "step": 9065 |
| }, |
| { |
| "epoch": 16.167557932263815, |
| "grad_norm": 9.013300768856425e-06, |
| "learning_rate": 5.392020635531433e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5609984, |
| "step": 9070 |
| }, |
| { |
| "epoch": 16.176470588235293, |
| "grad_norm": 4.711997462436557e-06, |
| "learning_rate": 5.367919410761493e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5613472, |
| "step": 9075 |
| }, |
| { |
| "epoch": 16.185383244206772, |
| "grad_norm": 4.303492823964916e-05, |
| "learning_rate": 5.343865690547401e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5616608, |
| "step": 9080 |
| }, |
| { |
| "epoch": 16.194295900178254, |
| "grad_norm": 5.112724466016516e-05, |
| "learning_rate": 5.319859533092933e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5619104, |
| "step": 9085 |
| }, |
| { |
| "epoch": 16.203208556149733, |
| "grad_norm": 4.8787801461003255e-06, |
| "learning_rate": 5.295900996486782e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5622208, |
| "step": 9090 |
| }, |
| { |
| "epoch": 16.21212121212121, |
| "grad_norm": 1.4346080206451006e-05, |
| "learning_rate": 5.271990138702418e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5625056, |
| "step": 9095 |
| }, |
| { |
| "epoch": 16.221033868092693, |
| "grad_norm": 3.947042569052428e-05, |
| "learning_rate": 5.248127017597909e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5628608, |
| "step": 9100 |
| }, |
| { |
| "epoch": 16.22994652406417, |
| "grad_norm": 6.575951829290716e-06, |
| "learning_rate": 5.2243116909158475e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5631776, |
| "step": 9105 |
| }, |
| { |
| "epoch": 16.23885918003565, |
| "grad_norm": 5.081686140329111e-06, |
| "learning_rate": 5.200544216283168e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5634720, |
| "step": 9110 |
| }, |
| { |
| "epoch": 16.24777183600713, |
| "grad_norm": 7.938112503325101e-06, |
| "learning_rate": 5.17682465121099e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5638048, |
| "step": 9115 |
| }, |
| { |
| "epoch": 16.25668449197861, |
| "grad_norm": 6.122967079136288e-06, |
| "learning_rate": 5.153153053094551e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5641600, |
| "step": 9120 |
| }, |
| { |
| "epoch": 16.26559714795009, |
| "grad_norm": 1.0540902621869463e-05, |
| "learning_rate": 5.129529479212969e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5644832, |
| "step": 9125 |
| }, |
| { |
| "epoch": 16.274509803921568, |
| "grad_norm": 4.833938874071464e-06, |
| "learning_rate": 5.105953986729195e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5648192, |
| "step": 9130 |
| }, |
| { |
| "epoch": 16.28342245989305, |
| "grad_norm": 4.714757778856438e-06, |
| "learning_rate": 5.082426632689827e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5650912, |
| "step": 9135 |
| }, |
| { |
| "epoch": 16.292335115864528, |
| "grad_norm": 6.666777608188568e-06, |
| "learning_rate": 5.058947474024958e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5654016, |
| "step": 9140 |
| }, |
| { |
| "epoch": 16.301247771836007, |
| "grad_norm": 4.947042270941893e-06, |
| "learning_rate": 5.0355165675480845e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5657376, |
| "step": 9145 |
| }, |
| { |
| "epoch": 16.310160427807485, |
| "grad_norm": 3.160488631692715e-05, |
| "learning_rate": 5.01213396995594e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5660608, |
| "step": 9150 |
| }, |
| { |
| "epoch": 16.319073083778967, |
| "grad_norm": 4.728987278213026e-06, |
| "learning_rate": 4.988799737828362e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5663072, |
| "step": 9155 |
| }, |
| { |
| "epoch": 16.327985739750446, |
| "grad_norm": 4.394975348986918e-06, |
| "learning_rate": 4.96551392762816e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5666304, |
| "step": 9160 |
| }, |
| { |
| "epoch": 16.336898395721924, |
| "grad_norm": 5.376489752961788e-06, |
| "learning_rate": 4.94227659570096e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5668960, |
| "step": 9165 |
| }, |
| { |
| "epoch": 16.345811051693406, |
| "grad_norm": 5.486904228746425e-06, |
| "learning_rate": 4.9190877982750935e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5672032, |
| "step": 9170 |
| }, |
| { |
| "epoch": 16.354723707664885, |
| "grad_norm": 2.8458198357839137e-05, |
| "learning_rate": 4.8959475914614554e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5674912, |
| "step": 9175 |
| }, |
| { |
| "epoch": 16.363636363636363, |
| "grad_norm": 5.422547928901622e-06, |
| "learning_rate": 4.872856031253362e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5677984, |
| "step": 9180 |
| }, |
| { |
| "epoch": 16.372549019607842, |
| "grad_norm": 8.959475962910801e-06, |
| "learning_rate": 4.849813173526416e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5680320, |
| "step": 9185 |
| }, |
| { |
| "epoch": 16.381461675579324, |
| "grad_norm": 6.08643813393428e-06, |
| "learning_rate": 4.826819074038361e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5682688, |
| "step": 9190 |
| }, |
| { |
| "epoch": 16.390374331550802, |
| "grad_norm": 8.104920198093168e-06, |
| "learning_rate": 4.803873788428972e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5685664, |
| "step": 9195 |
| }, |
| { |
| "epoch": 16.39928698752228, |
| "grad_norm": 2.9293587431311607e-05, |
| "learning_rate": 4.780977372219916e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5688768, |
| "step": 9200 |
| }, |
| { |
| "epoch": 16.40819964349376, |
| "grad_norm": 7.23411494618631e-06, |
| "learning_rate": 4.758129880814574e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5692256, |
| "step": 9205 |
| }, |
| { |
| "epoch": 16.41711229946524, |
| "grad_norm": 4.782838459505001e-06, |
| "learning_rate": 4.735331369497992e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5695904, |
| "step": 9210 |
| }, |
| { |
| "epoch": 16.42602495543672, |
| "grad_norm": 5.022124696552055e-06, |
| "learning_rate": 4.712581893436646e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5698720, |
| "step": 9215 |
| }, |
| { |
| "epoch": 16.4349376114082, |
| "grad_norm": 4.0708955566515215e-06, |
| "learning_rate": 4.689881507678393e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5702208, |
| "step": 9220 |
| }, |
| { |
| "epoch": 16.44385026737968, |
| "grad_norm": 7.042349807306891e-06, |
| "learning_rate": 4.667230267152295e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5705344, |
| "step": 9225 |
| }, |
| { |
| "epoch": 16.45276292335116, |
| "grad_norm": 6.274646239035064e-06, |
| "learning_rate": 4.644628226668485e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5708960, |
| "step": 9230 |
| }, |
| { |
| "epoch": 16.461675579322637, |
| "grad_norm": 7.114686741260812e-05, |
| "learning_rate": 4.622075440918058e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5712128, |
| "step": 9235 |
| }, |
| { |
| "epoch": 16.470588235294116, |
| "grad_norm": 4.347176400187891e-06, |
| "learning_rate": 4.599571964472921e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5715552, |
| "step": 9240 |
| }, |
| { |
| "epoch": 16.479500891265598, |
| "grad_norm": 8.624338079243898e-06, |
| "learning_rate": 4.577117851785665e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5719488, |
| "step": 9245 |
| }, |
| { |
| "epoch": 16.488413547237077, |
| "grad_norm": 3.135059159831144e-05, |
| "learning_rate": 4.554713157189439e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5722560, |
| "step": 9250 |
| }, |
| { |
| "epoch": 16.497326203208555, |
| "grad_norm": 5.457145107357064e-06, |
| "learning_rate": 4.5323579348977966e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5726336, |
| "step": 9255 |
| }, |
| { |
| "epoch": 16.506238859180037, |
| "grad_norm": 5.291251000016928e-06, |
| "learning_rate": 4.510052239004597e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5729632, |
| "step": 9260 |
| }, |
| { |
| "epoch": 16.515151515151516, |
| "grad_norm": 6.33938861938077e-06, |
| "learning_rate": 4.487796123483856e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5732384, |
| "step": 9265 |
| }, |
| { |
| "epoch": 16.524064171122994, |
| "grad_norm": 1.3511777979147155e-05, |
| "learning_rate": 4.46558964218961e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5735328, |
| "step": 9270 |
| }, |
| { |
| "epoch": 16.532976827094473, |
| "grad_norm": 8.118163350445684e-06, |
| "learning_rate": 4.443432848855811e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5738016, |
| "step": 9275 |
| }, |
| { |
| "epoch": 16.541889483065955, |
| "grad_norm": 6.011026471242076e-06, |
| "learning_rate": 4.421325797096146e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5740896, |
| "step": 9280 |
| }, |
| { |
| "epoch": 16.550802139037433, |
| "grad_norm": 4.317985258239787e-06, |
| "learning_rate": 4.399268540403975e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5743936, |
| "step": 9285 |
| }, |
| { |
| "epoch": 16.55971479500891, |
| "grad_norm": 4.823016752197873e-06, |
| "learning_rate": 4.377261132152155e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5746464, |
| "step": 9290 |
| }, |
| { |
| "epoch": 16.568627450980394, |
| "grad_norm": 4.637930487660924e-06, |
| "learning_rate": 4.355303625592899e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5749376, |
| "step": 9295 |
| }, |
| { |
| "epoch": 16.577540106951872, |
| "grad_norm": 5.199408860789845e-06, |
| "learning_rate": 4.333396073857724e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5752480, |
| "step": 9300 |
| }, |
| { |
| "epoch": 16.58645276292335, |
| "grad_norm": 1.6364750990760513e-05, |
| "learning_rate": 4.311538529957213e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5755360, |
| "step": 9305 |
| }, |
| { |
| "epoch": 16.59536541889483, |
| "grad_norm": 5.023352514399448e-06, |
| "learning_rate": 4.289731046780973e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5758752, |
| "step": 9310 |
| }, |
| { |
| "epoch": 16.60427807486631, |
| "grad_norm": 5.1138937124051154e-05, |
| "learning_rate": 4.267973677097481e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5762144, |
| "step": 9315 |
| }, |
| { |
| "epoch": 16.61319073083779, |
| "grad_norm": 6.966163255128777e-06, |
| "learning_rate": 4.246266473553931e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5765760, |
| "step": 9320 |
| }, |
| { |
| "epoch": 16.62210338680927, |
| "grad_norm": 8.861073001753539e-05, |
| "learning_rate": 4.22460948867614e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5768704, |
| "step": 9325 |
| }, |
| { |
| "epoch": 16.63101604278075, |
| "grad_norm": 1.665574382059276e-05, |
| "learning_rate": 4.203002774868414e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5771808, |
| "step": 9330 |
| }, |
| { |
| "epoch": 16.63992869875223, |
| "grad_norm": 5.504481123352889e-06, |
| "learning_rate": 4.18144638441341e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5774912, |
| "step": 9335 |
| }, |
| { |
| "epoch": 16.648841354723707, |
| "grad_norm": 5.119895831739996e-06, |
| "learning_rate": 4.159940369472015e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5777792, |
| "step": 9340 |
| }, |
| { |
| "epoch": 16.657754010695186, |
| "grad_norm": 5.188813702261541e-06, |
| "learning_rate": 4.138484782083219e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5780256, |
| "step": 9345 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "grad_norm": 5.0006860874418635e-06, |
| "learning_rate": 4.11707967416399e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5784032, |
| "step": 9350 |
| }, |
| { |
| "epoch": 16.675579322638146, |
| "grad_norm": 1.523861828900408e-05, |
| "learning_rate": 4.095725097509157e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5788064, |
| "step": 9355 |
| }, |
| { |
| "epoch": 16.684491978609625, |
| "grad_norm": 4.191660536889685e-06, |
| "learning_rate": 4.0744211037912706e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5791584, |
| "step": 9360 |
| }, |
| { |
| "epoch": 16.693404634581107, |
| "grad_norm": 6.965089141885983e-06, |
| "learning_rate": 4.0531677445604846e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5795008, |
| "step": 9365 |
| }, |
| { |
| "epoch": 16.702317290552585, |
| "grad_norm": 4.43164481112035e-06, |
| "learning_rate": 4.031965071244423e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5798400, |
| "step": 9370 |
| }, |
| { |
| "epoch": 16.711229946524064, |
| "grad_norm": 4.396553777041845e-06, |
| "learning_rate": 4.010813135148073e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5801312, |
| "step": 9375 |
| }, |
| { |
| "epoch": 16.720142602495542, |
| "grad_norm": 9.282194696424995e-06, |
| "learning_rate": 3.9897119874536536e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5803936, |
| "step": 9380 |
| }, |
| { |
| "epoch": 16.729055258467024, |
| "grad_norm": 5.5876425903989e-06, |
| "learning_rate": 3.968661679220468e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5807232, |
| "step": 9385 |
| }, |
| { |
| "epoch": 16.737967914438503, |
| "grad_norm": 1.001564487523865e-05, |
| "learning_rate": 3.9476622613848356e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5810624, |
| "step": 9390 |
| }, |
| { |
| "epoch": 16.74688057040998, |
| "grad_norm": 1.018101738736732e-05, |
| "learning_rate": 3.9267137847599e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5813184, |
| "step": 9395 |
| }, |
| { |
| "epoch": 16.75579322638146, |
| "grad_norm": 4.13356701756129e-06, |
| "learning_rate": 3.905816300035559e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5815904, |
| "step": 9400 |
| }, |
| { |
| "epoch": 16.764705882352942, |
| "grad_norm": 5.294611128192628e-06, |
| "learning_rate": 3.884969857778325e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5819744, |
| "step": 9405 |
| }, |
| { |
| "epoch": 16.77361853832442, |
| "grad_norm": 6.165806553326547e-06, |
| "learning_rate": 3.864174508431187e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5822688, |
| "step": 9410 |
| }, |
| { |
| "epoch": 16.7825311942959, |
| "grad_norm": 1.4151906725601293e-05, |
| "learning_rate": 3.843430302313511e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5826144, |
| "step": 9415 |
| }, |
| { |
| "epoch": 16.79144385026738, |
| "grad_norm": 6.0542051869560964e-06, |
| "learning_rate": 3.822737289620909e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5830048, |
| "step": 9420 |
| }, |
| { |
| "epoch": 16.80035650623886, |
| "grad_norm": 9.024288374348544e-06, |
| "learning_rate": 3.8020955204251223e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5833760, |
| "step": 9425 |
| }, |
| { |
| "epoch": 16.809269162210338, |
| "grad_norm": 3.84986788048991e-06, |
| "learning_rate": 3.781505044673894e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5837440, |
| "step": 9430 |
| }, |
| { |
| "epoch": 16.818181818181817, |
| "grad_norm": 5.007711934013059e-06, |
| "learning_rate": 3.760965912190839e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5840480, |
| "step": 9435 |
| }, |
| { |
| "epoch": 16.8270944741533, |
| "grad_norm": 5.706263891624985e-06, |
| "learning_rate": 3.740478172675346e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5843520, |
| "step": 9440 |
| }, |
| { |
| "epoch": 16.836007130124777, |
| "grad_norm": 8.089661605481524e-06, |
| "learning_rate": 3.720041875702451e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5846464, |
| "step": 9445 |
| }, |
| { |
| "epoch": 16.844919786096256, |
| "grad_norm": 8.192110726668034e-06, |
| "learning_rate": 3.699657070722698e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5849088, |
| "step": 9450 |
| }, |
| { |
| "epoch": 16.853832442067738, |
| "grad_norm": 4.462923243409023e-06, |
| "learning_rate": 3.6793238070620517e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5852128, |
| "step": 9455 |
| }, |
| { |
| "epoch": 16.862745098039216, |
| "grad_norm": 1.4291997104010079e-05, |
| "learning_rate": 3.659042133921736e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5855488, |
| "step": 9460 |
| }, |
| { |
| "epoch": 16.871657754010695, |
| "grad_norm": 5.222723757469794e-06, |
| "learning_rate": 3.6388121003781613e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5858816, |
| "step": 9465 |
| }, |
| { |
| "epoch": 16.880570409982173, |
| "grad_norm": 5.399089786806144e-06, |
| "learning_rate": 3.6186337553827747e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5861856, |
| "step": 9470 |
| }, |
| { |
| "epoch": 16.889483065953655, |
| "grad_norm": 4.9572722673474345e-06, |
| "learning_rate": 3.5985071477619397e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5864928, |
| "step": 9475 |
| }, |
| { |
| "epoch": 16.898395721925134, |
| "grad_norm": 4.7666785576439e-06, |
| "learning_rate": 3.57843232621686e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5868128, |
| "step": 9480 |
| }, |
| { |
| "epoch": 16.907308377896612, |
| "grad_norm": 5.753831374022411e-06, |
| "learning_rate": 3.55840933932339e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5870624, |
| "step": 9485 |
| }, |
| { |
| "epoch": 16.916221033868094, |
| "grad_norm": 7.89822661317885e-06, |
| "learning_rate": 3.5384382355319877e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5873504, |
| "step": 9490 |
| }, |
| { |
| "epoch": 16.925133689839573, |
| "grad_norm": 1.078835157386493e-05, |
| "learning_rate": 3.5185190631675635e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5876448, |
| "step": 9495 |
| }, |
| { |
| "epoch": 16.93404634581105, |
| "grad_norm": 1.1036178875656333e-05, |
| "learning_rate": 3.498651870429345e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5879392, |
| "step": 9500 |
| }, |
| { |
| "epoch": 16.94295900178253, |
| "grad_norm": 4.848511252930621e-06, |
| "learning_rate": 3.478836705390809e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5882944, |
| "step": 9505 |
| }, |
| { |
| "epoch": 16.951871657754012, |
| "grad_norm": 4.952781637257431e-06, |
| "learning_rate": 3.4590736159995253e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5886592, |
| "step": 9510 |
| }, |
| { |
| "epoch": 16.96078431372549, |
| "grad_norm": 6.254635991354007e-06, |
| "learning_rate": 3.4393626500770574e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5890144, |
| "step": 9515 |
| }, |
| { |
| "epoch": 16.96969696969697, |
| "grad_norm": 4.650720711651957e-06, |
| "learning_rate": 3.4197038553188484e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5892928, |
| "step": 9520 |
| }, |
| { |
| "epoch": 16.97860962566845, |
| "grad_norm": 5.847327429364668e-06, |
| "learning_rate": 3.400097279294087e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5895104, |
| "step": 9525 |
| }, |
| { |
| "epoch": 16.98752228163993, |
| "grad_norm": 5.981655249343021e-06, |
| "learning_rate": 3.3805429694456185e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5898560, |
| "step": 9530 |
| }, |
| { |
| "epoch": 16.996434937611408, |
| "grad_norm": 5.227420388109749e-06, |
| "learning_rate": 3.3610409730898155e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5901152, |
| "step": 9535 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_loss": 0.2681449055671692, |
| "eval_runtime": 4.5873, |
| "eval_samples_per_second": 54.28, |
| "eval_steps_per_second": 13.734, |
| "num_input_tokens_seen": 5902048, |
| "step": 9537 |
| }, |
| { |
| "epoch": 17.005347593582886, |
| "grad_norm": 4.638813152268995e-06, |
| "learning_rate": 3.341591337416461e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5904032, |
| "step": 9540 |
| }, |
| { |
| "epoch": 17.01426024955437, |
| "grad_norm": 2.263891656184569e-05, |
| "learning_rate": 3.3221941094886493e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5906880, |
| "step": 9545 |
| }, |
| { |
| "epoch": 17.023172905525847, |
| "grad_norm": 1.2489145774452481e-05, |
| "learning_rate": 3.3028493362426387e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5909344, |
| "step": 9550 |
| }, |
| { |
| "epoch": 17.032085561497325, |
| "grad_norm": 4.581550456350669e-06, |
| "learning_rate": 3.2835570644877854e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5912064, |
| "step": 9555 |
| }, |
| { |
| "epoch": 17.040998217468804, |
| "grad_norm": 5.454068741528317e-06, |
| "learning_rate": 3.2643173409063977e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5915808, |
| "step": 9560 |
| }, |
| { |
| "epoch": 17.049910873440286, |
| "grad_norm": 2.5720153644215316e-05, |
| "learning_rate": 3.2451302120536155e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5918656, |
| "step": 9565 |
| }, |
| { |
| "epoch": 17.058823529411764, |
| "grad_norm": 7.229024049593136e-06, |
| "learning_rate": 3.2259957243573474e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5921344, |
| "step": 9570 |
| }, |
| { |
| "epoch": 17.067736185383243, |
| "grad_norm": 4.824169081985019e-06, |
| "learning_rate": 3.206913924118085e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5924544, |
| "step": 9575 |
| }, |
| { |
| "epoch": 17.076648841354725, |
| "grad_norm": 4.6479017328238115e-06, |
| "learning_rate": 3.1878848575088576e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5927200, |
| "step": 9580 |
| }, |
| { |
| "epoch": 17.085561497326204, |
| "grad_norm": 4.2676165321609005e-06, |
| "learning_rate": 3.168908570575085e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5930848, |
| "step": 9585 |
| }, |
| { |
| "epoch": 17.094474153297682, |
| "grad_norm": 1.122180765378289e-05, |
| "learning_rate": 3.149985109234463e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5934048, |
| "step": 9590 |
| }, |
| { |
| "epoch": 17.10338680926916, |
| "grad_norm": 6.161967576190364e-06, |
| "learning_rate": 3.131114519276876e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5936864, |
| "step": 9595 |
| }, |
| { |
| "epoch": 17.112299465240643, |
| "grad_norm": 2.578197563707363e-05, |
| "learning_rate": 3.112296846364271e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5939936, |
| "step": 9600 |
| }, |
| { |
| "epoch": 17.12121212121212, |
| "grad_norm": 5.605054411716992e-06, |
| "learning_rate": 3.0935321360305468e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5943968, |
| "step": 9605 |
| }, |
| { |
| "epoch": 17.1301247771836, |
| "grad_norm": 5.6164799389080144e-06, |
| "learning_rate": 3.074820433681455e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5947104, |
| "step": 9610 |
| }, |
| { |
| "epoch": 17.13903743315508, |
| "grad_norm": 6.087178917368874e-06, |
| "learning_rate": 3.0561617845944633e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5950656, |
| "step": 9615 |
| }, |
| { |
| "epoch": 17.14795008912656, |
| "grad_norm": 8.51512868393911e-06, |
| "learning_rate": 3.037556233918684e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5954144, |
| "step": 9620 |
| }, |
| { |
| "epoch": 17.15686274509804, |
| "grad_norm": 6.7033947743766475e-06, |
| "learning_rate": 3.0190038266747184e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5957856, |
| "step": 9625 |
| }, |
| { |
| "epoch": 17.165775401069517, |
| "grad_norm": 6.372693405864993e-06, |
| "learning_rate": 3.0005046077546147e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5960384, |
| "step": 9630 |
| }, |
| { |
| "epoch": 17.174688057041, |
| "grad_norm": 8.521442396158818e-06, |
| "learning_rate": 2.9820586219216908e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5962976, |
| "step": 9635 |
| }, |
| { |
| "epoch": 17.183600713012478, |
| "grad_norm": 5.297032657836098e-06, |
| "learning_rate": 2.9636659138104513e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5965856, |
| "step": 9640 |
| }, |
| { |
| "epoch": 17.192513368983956, |
| "grad_norm": 0.00017949608445633203, |
| "learning_rate": 2.9453265279264954e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5968832, |
| "step": 9645 |
| }, |
| { |
| "epoch": 17.20142602495544, |
| "grad_norm": 6.363478860293981e-06, |
| "learning_rate": 2.9270405086464e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5971616, |
| "step": 9650 |
| }, |
| { |
| "epoch": 17.210338680926917, |
| "grad_norm": 0.0001252765068784356, |
| "learning_rate": 2.908807900217583e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5975296, |
| "step": 9655 |
| }, |
| { |
| "epoch": 17.219251336898395, |
| "grad_norm": 1.5092450666998047e-05, |
| "learning_rate": 2.8906287467582616e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5978560, |
| "step": 9660 |
| }, |
| { |
| "epoch": 17.228163992869874, |
| "grad_norm": 5.846444764756598e-06, |
| "learning_rate": 2.87250309225727e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5981792, |
| "step": 9665 |
| }, |
| { |
| "epoch": 17.237076648841356, |
| "grad_norm": 3.902230218955083e-06, |
| "learning_rate": 2.8544309805740023e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5984352, |
| "step": 9670 |
| }, |
| { |
| "epoch": 17.245989304812834, |
| "grad_norm": 3.8089124245743733e-06, |
| "learning_rate": 2.8364124554383057e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5987424, |
| "step": 9675 |
| }, |
| { |
| "epoch": 17.254901960784313, |
| "grad_norm": 4.704589628090616e-06, |
| "learning_rate": 2.8184475604503324e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5990720, |
| "step": 9680 |
| }, |
| { |
| "epoch": 17.263814616755795, |
| "grad_norm": 1.3694937479158398e-05, |
| "learning_rate": 2.8005363390804896e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5993792, |
| "step": 9685 |
| }, |
| { |
| "epoch": 17.272727272727273, |
| "grad_norm": 3.3423213608330116e-05, |
| "learning_rate": 2.782678834669297e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 5997344, |
| "step": 9690 |
| }, |
| { |
| "epoch": 17.281639928698752, |
| "grad_norm": 4.3190329961362295e-06, |
| "learning_rate": 2.7648750904272964e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6000096, |
| "step": 9695 |
| }, |
| { |
| "epoch": 17.29055258467023, |
| "grad_norm": 3.917830326827243e-06, |
| "learning_rate": 2.747125149434948e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6003168, |
| "step": 9700 |
| }, |
| { |
| "epoch": 17.299465240641712, |
| "grad_norm": 2.409498847555369e-05, |
| "learning_rate": 2.7294290546425044e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6006944, |
| "step": 9705 |
| }, |
| { |
| "epoch": 17.30837789661319, |
| "grad_norm": 4.727301984530641e-06, |
| "learning_rate": 2.7117868488699517e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6009984, |
| "step": 9710 |
| }, |
| { |
| "epoch": 17.31729055258467, |
| "grad_norm": 4.537017048278358e-06, |
| "learning_rate": 2.6941985748068418e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6013344, |
| "step": 9715 |
| }, |
| { |
| "epoch": 17.32620320855615, |
| "grad_norm": 5.146333023731131e-06, |
| "learning_rate": 2.6766642750122666e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6016992, |
| "step": 9720 |
| }, |
| { |
| "epoch": 17.33511586452763, |
| "grad_norm": 2.907273665186949e-05, |
| "learning_rate": 2.659183991914696e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6020416, |
| "step": 9725 |
| }, |
| { |
| "epoch": 17.34402852049911, |
| "grad_norm": 3.90044351661345e-06, |
| "learning_rate": 2.641757767811881e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6023712, |
| "step": 9730 |
| }, |
| { |
| "epoch": 17.352941176470587, |
| "grad_norm": 7.279854798980523e-06, |
| "learning_rate": 2.624385644870783e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6026656, |
| "step": 9735 |
| }, |
| { |
| "epoch": 17.36185383244207, |
| "grad_norm": 1.2712700481642969e-05, |
| "learning_rate": 2.607067665127441e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6029568, |
| "step": 9740 |
| }, |
| { |
| "epoch": 17.370766488413548, |
| "grad_norm": 5.397015684138751e-06, |
| "learning_rate": 2.5898038704868818e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6033312, |
| "step": 9745 |
| }, |
| { |
| "epoch": 17.379679144385026, |
| "grad_norm": 5.237438472249778e-06, |
| "learning_rate": 2.5725943027230333e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6036128, |
| "step": 9750 |
| }, |
| { |
| "epoch": 17.388591800356505, |
| "grad_norm": 9.407116885995492e-05, |
| "learning_rate": 2.555439003478591e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6039040, |
| "step": 9755 |
| }, |
| { |
| "epoch": 17.397504456327987, |
| "grad_norm": 6.8480708250717726e-06, |
| "learning_rate": 2.538338014264938e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6042432, |
| "step": 9760 |
| }, |
| { |
| "epoch": 17.406417112299465, |
| "grad_norm": 4.484073087951401e-06, |
| "learning_rate": 2.521291376462051e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6046080, |
| "step": 9765 |
| }, |
| { |
| "epoch": 17.415329768270944, |
| "grad_norm": 4.551226084004156e-06, |
| "learning_rate": 2.5042991313183745e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6048352, |
| "step": 9770 |
| }, |
| { |
| "epoch": 17.424242424242426, |
| "grad_norm": 8.205198355426546e-06, |
| "learning_rate": 2.4873613199507514e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6050976, |
| "step": 9775 |
| }, |
| { |
| "epoch": 17.433155080213904, |
| "grad_norm": 5.023833182349335e-06, |
| "learning_rate": 2.470477983344299e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6054784, |
| "step": 9780 |
| }, |
| { |
| "epoch": 17.442067736185383, |
| "grad_norm": 4.796798293682514e-06, |
| "learning_rate": 2.4536491623523284e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6057440, |
| "step": 9785 |
| }, |
| { |
| "epoch": 17.45098039215686, |
| "grad_norm": 4.666772383643547e-06, |
| "learning_rate": 2.436874897696234e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6060768, |
| "step": 9790 |
| }, |
| { |
| "epoch": 17.459893048128343, |
| "grad_norm": 4.6128952817525715e-06, |
| "learning_rate": 2.42015522996539e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6064096, |
| "step": 9795 |
| }, |
| { |
| "epoch": 17.46880570409982, |
| "grad_norm": 4.935674041917082e-06, |
| "learning_rate": 2.403490199617073e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6067072, |
| "step": 9800 |
| }, |
| { |
| "epoch": 17.4777183600713, |
| "grad_norm": 6.165453669382259e-05, |
| "learning_rate": 2.3868798469763307e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6070016, |
| "step": 9805 |
| }, |
| { |
| "epoch": 17.486631016042782, |
| "grad_norm": 6.289596512942808e-06, |
| "learning_rate": 2.370324212235936e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6073664, |
| "step": 9810 |
| }, |
| { |
| "epoch": 17.49554367201426, |
| "grad_norm": 4.492193511396181e-06, |
| "learning_rate": 2.35382333545624e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6076896, |
| "step": 9815 |
| }, |
| { |
| "epoch": 17.50445632798574, |
| "grad_norm": 1.5470133803319186e-05, |
| "learning_rate": 2.3373772565650874e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6080000, |
| "step": 9820 |
| }, |
| { |
| "epoch": 17.513368983957218, |
| "grad_norm": 1.5124073797778692e-05, |
| "learning_rate": 2.3209860153577402e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6082784, |
| "step": 9825 |
| }, |
| { |
| "epoch": 17.5222816399287, |
| "grad_norm": 3.944990112358937e-06, |
| "learning_rate": 2.304649651496754e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6085440, |
| "step": 9830 |
| }, |
| { |
| "epoch": 17.53119429590018, |
| "grad_norm": 5.5363611863867845e-06, |
| "learning_rate": 2.2883682045119063e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6088672, |
| "step": 9835 |
| }, |
| { |
| "epoch": 17.540106951871657, |
| "grad_norm": 5.04318541061366e-06, |
| "learning_rate": 2.272141713800094e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6091680, |
| "step": 9840 |
| }, |
| { |
| "epoch": 17.54901960784314, |
| "grad_norm": 4.3036125134676695e-06, |
| "learning_rate": 2.255970218625217e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6094336, |
| "step": 9845 |
| }, |
| { |
| "epoch": 17.557932263814617, |
| "grad_norm": 1.092425827664556e-05, |
| "learning_rate": 2.2398537581181155e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6096704, |
| "step": 9850 |
| }, |
| { |
| "epoch": 17.566844919786096, |
| "grad_norm": 4.398175860842457e-06, |
| "learning_rate": 2.2237923712764535e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6099136, |
| "step": 9855 |
| }, |
| { |
| "epoch": 17.575757575757574, |
| "grad_norm": 0.00013597174256574363, |
| "learning_rate": 2.2077860969646285e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6102016, |
| "step": 9860 |
| }, |
| { |
| "epoch": 17.584670231729056, |
| "grad_norm": 9.326985491497908e-06, |
| "learning_rate": 2.191834973913684e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6105440, |
| "step": 9865 |
| }, |
| { |
| "epoch": 17.593582887700535, |
| "grad_norm": 3.933530479116598e-06, |
| "learning_rate": 2.1759390407212117e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6108768, |
| "step": 9870 |
| }, |
| { |
| "epoch": 17.602495543672013, |
| "grad_norm": 7.7144486567704e-06, |
| "learning_rate": 2.1600983358512574e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6111840, |
| "step": 9875 |
| }, |
| { |
| "epoch": 17.611408199643495, |
| "grad_norm": 4.5524284359999e-06, |
| "learning_rate": 2.144312897634232e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6114048, |
| "step": 9880 |
| }, |
| { |
| "epoch": 17.620320855614974, |
| "grad_norm": 7.663302312721498e-06, |
| "learning_rate": 2.1285827642668065e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6117440, |
| "step": 9885 |
| }, |
| { |
| "epoch": 17.629233511586452, |
| "grad_norm": 4.616276783053763e-06, |
| "learning_rate": 2.1129079738118423e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6120736, |
| "step": 9890 |
| }, |
| { |
| "epoch": 17.63814616755793, |
| "grad_norm": 8.89071452547796e-06, |
| "learning_rate": 2.09728856419826e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6123392, |
| "step": 9895 |
| }, |
| { |
| "epoch": 17.647058823529413, |
| "grad_norm": 4.064894710609224e-06, |
| "learning_rate": 2.0817245732210057e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6126848, |
| "step": 9900 |
| }, |
| { |
| "epoch": 17.65597147950089, |
| "grad_norm": 5.309177595336223e-06, |
| "learning_rate": 2.0662160385409108e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6130048, |
| "step": 9905 |
| }, |
| { |
| "epoch": 17.66488413547237, |
| "grad_norm": 5.220045750320423e-06, |
| "learning_rate": 2.050762997684605e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6133216, |
| "step": 9910 |
| }, |
| { |
| "epoch": 17.67379679144385, |
| "grad_norm": 4.825315954803955e-06, |
| "learning_rate": 2.0353654880444635e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6136480, |
| "step": 9915 |
| }, |
| { |
| "epoch": 17.68270944741533, |
| "grad_norm": 8.878021617420018e-06, |
| "learning_rate": 2.0200235468784636e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6139136, |
| "step": 9920 |
| }, |
| { |
| "epoch": 17.69162210338681, |
| "grad_norm": 1.6448260794277303e-05, |
| "learning_rate": 2.0047372113101344e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6142112, |
| "step": 9925 |
| }, |
| { |
| "epoch": 17.700534759358288, |
| "grad_norm": 3.952519364247564e-06, |
| "learning_rate": 1.9895065183284683e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6145632, |
| "step": 9930 |
| }, |
| { |
| "epoch": 17.70944741532977, |
| "grad_norm": 5.336608865036396e-06, |
| "learning_rate": 1.9743315047877853e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6149152, |
| "step": 9935 |
| }, |
| { |
| "epoch": 17.718360071301248, |
| "grad_norm": 8.08969070931198e-06, |
| "learning_rate": 1.9592122074077012e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6152512, |
| "step": 9940 |
| }, |
| { |
| "epoch": 17.727272727272727, |
| "grad_norm": 1.4897877917974256e-05, |
| "learning_rate": 1.9441486627729987e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6155232, |
| "step": 9945 |
| }, |
| { |
| "epoch": 17.736185383244205, |
| "grad_norm": 5.695238087355392e-06, |
| "learning_rate": 1.929140907333557e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6158752, |
| "step": 9950 |
| }, |
| { |
| "epoch": 17.745098039215687, |
| "grad_norm": 5.3530861805484165e-06, |
| "learning_rate": 1.914188977404269e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6161696, |
| "step": 9955 |
| }, |
| { |
| "epoch": 17.754010695187166, |
| "grad_norm": 3.5964576454716735e-06, |
| "learning_rate": 1.899292909164932e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6165440, |
| "step": 9960 |
| }, |
| { |
| "epoch": 17.762923351158644, |
| "grad_norm": 2.9325330615392886e-05, |
| "learning_rate": 1.884452738660178e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6168128, |
| "step": 9965 |
| }, |
| { |
| "epoch": 17.771836007130126, |
| "grad_norm": 4.651951712730806e-06, |
| "learning_rate": 1.8696685017993849e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6171456, |
| "step": 9970 |
| }, |
| { |
| "epoch": 17.780748663101605, |
| "grad_norm": 9.019388016895391e-06, |
| "learning_rate": 1.8549402343565698e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6174880, |
| "step": 9975 |
| }, |
| { |
| "epoch": 17.789661319073083, |
| "grad_norm": 1.0646539521985687e-05, |
| "learning_rate": 1.8402679719703442e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6177984, |
| "step": 9980 |
| }, |
| { |
| "epoch": 17.79857397504456, |
| "grad_norm": 3.674474555737106e-06, |
| "learning_rate": 1.825651750143767e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6180928, |
| "step": 9985 |
| }, |
| { |
| "epoch": 17.807486631016044, |
| "grad_norm": 2.7525751647772267e-05, |
| "learning_rate": 1.8110916042443332e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6183712, |
| "step": 9990 |
| }, |
| { |
| "epoch": 17.816399286987522, |
| "grad_norm": 5.357886038837023e-06, |
| "learning_rate": 1.7965875695038215e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6186976, |
| "step": 9995 |
| }, |
| { |
| "epoch": 17.825311942959, |
| "grad_norm": 2.7742196834878996e-05, |
| "learning_rate": 1.782139681018244e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6190880, |
| "step": 10000 |
| }, |
| { |
| "epoch": 17.834224598930483, |
| "grad_norm": 5.267481810733443e-06, |
| "learning_rate": 1.767747973747752e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6193824, |
| "step": 10005 |
| }, |
| { |
| "epoch": 17.84313725490196, |
| "grad_norm": 1.7488835510448553e-05, |
| "learning_rate": 1.7534124825165505e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6197056, |
| "step": 10010 |
| }, |
| { |
| "epoch": 17.85204991087344, |
| "grad_norm": 4.473508852242958e-06, |
| "learning_rate": 1.7391332420128193e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6199776, |
| "step": 10015 |
| }, |
| { |
| "epoch": 17.86096256684492, |
| "grad_norm": 3.7598254039039603e-06, |
| "learning_rate": 1.7249102867886392e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6203392, |
| "step": 10020 |
| }, |
| { |
| "epoch": 17.8698752228164, |
| "grad_norm": 9.932436114468146e-06, |
| "learning_rate": 1.7107436512598661e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6206656, |
| "step": 10025 |
| }, |
| { |
| "epoch": 17.87878787878788, |
| "grad_norm": 5.878880983800627e-06, |
| "learning_rate": 1.6966333697061049e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6209664, |
| "step": 10030 |
| }, |
| { |
| "epoch": 17.887700534759357, |
| "grad_norm": 4.809881829714868e-06, |
| "learning_rate": 1.6825794762705765e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6212320, |
| "step": 10035 |
| }, |
| { |
| "epoch": 17.89661319073084, |
| "grad_norm": 4.702781552623492e-06, |
| "learning_rate": 1.6685820049600703e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6215136, |
| "step": 10040 |
| }, |
| { |
| "epoch": 17.905525846702318, |
| "grad_norm": 4.891365733783459e-06, |
| "learning_rate": 1.6546409896448457e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6218816, |
| "step": 10045 |
| }, |
| { |
| "epoch": 17.914438502673796, |
| "grad_norm": 5.0554826884763315e-06, |
| "learning_rate": 1.6407564640585572e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6221440, |
| "step": 10050 |
| }, |
| { |
| "epoch": 17.923351158645275, |
| "grad_norm": 4.003755293524591e-06, |
| "learning_rate": 1.6269284617981607e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6224576, |
| "step": 10055 |
| }, |
| { |
| "epoch": 17.932263814616757, |
| "grad_norm": 5.1156189329049084e-06, |
| "learning_rate": 1.6131570163238436e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6227744, |
| "step": 10060 |
| }, |
| { |
| "epoch": 17.941176470588236, |
| "grad_norm": 4.8709184738982e-06, |
| "learning_rate": 1.5994421609589388e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6231136, |
| "step": 10065 |
| }, |
| { |
| "epoch": 17.950089126559714, |
| "grad_norm": 5.6951562328322325e-06, |
| "learning_rate": 1.5857839288898558e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6234048, |
| "step": 10070 |
| }, |
| { |
| "epoch": 17.959001782531196, |
| "grad_norm": 5.639751179842278e-06, |
| "learning_rate": 1.5721823531659712e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6237120, |
| "step": 10075 |
| }, |
| { |
| "epoch": 17.967914438502675, |
| "grad_norm": 8.619826257927343e-05, |
| "learning_rate": 1.558637466699589e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6240256, |
| "step": 10080 |
| }, |
| { |
| "epoch": 17.976827094474153, |
| "grad_norm": 5.539429821510566e-06, |
| "learning_rate": 1.5451493022658332e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6243488, |
| "step": 10085 |
| }, |
| { |
| "epoch": 17.98573975044563, |
| "grad_norm": 5.7802731134870555e-06, |
| "learning_rate": 1.5317178925025571e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6247040, |
| "step": 10090 |
| }, |
| { |
| "epoch": 17.994652406417114, |
| "grad_norm": 4.963373612554278e-06, |
| "learning_rate": 1.5183432699103134e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6250880, |
| "step": 10095 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 0.2708573043346405, |
| "eval_runtime": 4.5899, |
| "eval_samples_per_second": 54.249, |
| "eval_steps_per_second": 13.726, |
| "num_input_tokens_seen": 6252128, |
| "step": 10098 |
| }, |
| { |
| "epoch": 18.003565062388592, |
| "grad_norm": 5.739175321650691e-06, |
| "learning_rate": 1.5050254668522168e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6253536, |
| "step": 10100 |
| }, |
| { |
| "epoch": 18.01247771836007, |
| "grad_norm": 4.788083060702775e-06, |
| "learning_rate": 1.4917645155539062e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6256864, |
| "step": 10105 |
| }, |
| { |
| "epoch": 18.02139037433155, |
| "grad_norm": 3.880075837514596e-06, |
| "learning_rate": 1.4785604481034638e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6259360, |
| "step": 10110 |
| }, |
| { |
| "epoch": 18.03030303030303, |
| "grad_norm": 4.699511464423267e-06, |
| "learning_rate": 1.465413296451304e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6261920, |
| "step": 10115 |
| }, |
| { |
| "epoch": 18.03921568627451, |
| "grad_norm": 4.53999336968991e-06, |
| "learning_rate": 1.4523230924101433e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6265280, |
| "step": 10120 |
| }, |
| { |
| "epoch": 18.048128342245988, |
| "grad_norm": 4.677222477766918e-06, |
| "learning_rate": 1.4392898676548777e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6267936, |
| "step": 10125 |
| }, |
| { |
| "epoch": 18.05704099821747, |
| "grad_norm": 5.001421413908247e-06, |
| "learning_rate": 1.4263136537225442e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6271552, |
| "step": 10130 |
| }, |
| { |
| "epoch": 18.06595365418895, |
| "grad_norm": 4.5485739974537864e-06, |
| "learning_rate": 1.4133944820122258e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6274272, |
| "step": 10135 |
| }, |
| { |
| "epoch": 18.074866310160427, |
| "grad_norm": 8.964995686255861e-06, |
| "learning_rate": 1.4005323837849721e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6277056, |
| "step": 10140 |
| }, |
| { |
| "epoch": 18.083778966131906, |
| "grad_norm": 5.683996278094128e-06, |
| "learning_rate": 1.38772739016374e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6279488, |
| "step": 10145 |
| }, |
| { |
| "epoch": 18.092691622103388, |
| "grad_norm": 3.7518773297051666e-06, |
| "learning_rate": 1.3749795321332887e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6282944, |
| "step": 10150 |
| }, |
| { |
| "epoch": 18.101604278074866, |
| "grad_norm": 4.472956788958982e-06, |
| "learning_rate": 1.3622888405401462e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6286464, |
| "step": 10155 |
| }, |
| { |
| "epoch": 18.110516934046345, |
| "grad_norm": 6.11408313488937e-06, |
| "learning_rate": 1.3496553460925042e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6289344, |
| "step": 10160 |
| }, |
| { |
| "epoch": 18.119429590017827, |
| "grad_norm": 7.724917850282509e-06, |
| "learning_rate": 1.3370790793601373e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6292160, |
| "step": 10165 |
| }, |
| { |
| "epoch": 18.128342245989305, |
| "grad_norm": 4.958530553267337e-06, |
| "learning_rate": 1.3245600707743749e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6294976, |
| "step": 10170 |
| }, |
| { |
| "epoch": 18.137254901960784, |
| "grad_norm": 4.44446959591005e-06, |
| "learning_rate": 1.3120983506279689e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6298848, |
| "step": 10175 |
| }, |
| { |
| "epoch": 18.146167557932262, |
| "grad_norm": 4.498652288020821e-06, |
| "learning_rate": 1.2996939490750564e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6301568, |
| "step": 10180 |
| }, |
| { |
| "epoch": 18.155080213903744, |
| "grad_norm": 1.0262559044349473e-05, |
| "learning_rate": 1.2873468961310892e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6303936, |
| "step": 10185 |
| }, |
| { |
| "epoch": 18.163992869875223, |
| "grad_norm": 4.685638941737125e-06, |
| "learning_rate": 1.2750572216727318e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6307040, |
| "step": 10190 |
| }, |
| { |
| "epoch": 18.1729055258467, |
| "grad_norm": 5.877223884453997e-06, |
| "learning_rate": 1.2628249554378135e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6310240, |
| "step": 10195 |
| }, |
| { |
| "epoch": 18.181818181818183, |
| "grad_norm": 4.965536390955094e-06, |
| "learning_rate": 1.2506501270252712e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6313664, |
| "step": 10200 |
| }, |
| { |
| "epoch": 18.190730837789662, |
| "grad_norm": 7.754924808978103e-06, |
| "learning_rate": 1.238532765895023e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6316768, |
| "step": 10205 |
| }, |
| { |
| "epoch": 18.19964349376114, |
| "grad_norm": 4.4843045543530025e-06, |
| "learning_rate": 1.2264729013679588e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6319264, |
| "step": 10210 |
| }, |
| { |
| "epoch": 18.20855614973262, |
| "grad_norm": 5.228135705692694e-06, |
| "learning_rate": 1.2144705626258217e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6321984, |
| "step": 10215 |
| }, |
| { |
| "epoch": 18.2174688057041, |
| "grad_norm": 7.139366971387062e-06, |
| "learning_rate": 1.202525778711172e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6325952, |
| "step": 10220 |
| }, |
| { |
| "epoch": 18.22638146167558, |
| "grad_norm": 4.869689291808754e-06, |
| "learning_rate": 1.1906385785272923e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6328896, |
| "step": 10225 |
| }, |
| { |
| "epoch": 18.235294117647058, |
| "grad_norm": 4.0212094063463155e-06, |
| "learning_rate": 1.1788089908381372e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6332256, |
| "step": 10230 |
| }, |
| { |
| "epoch": 18.24420677361854, |
| "grad_norm": 7.100913990143454e-06, |
| "learning_rate": 1.167037044268246e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6335104, |
| "step": 10235 |
| }, |
| { |
| "epoch": 18.25311942959002, |
| "grad_norm": 6.65205589029938e-05, |
| "learning_rate": 1.1553227673026801e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6338592, |
| "step": 10240 |
| }, |
| { |
| "epoch": 18.262032085561497, |
| "grad_norm": 2.6237210477120243e-05, |
| "learning_rate": 1.1436661882869626e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6341856, |
| "step": 10245 |
| }, |
| { |
| "epoch": 18.270944741532976, |
| "grad_norm": 7.856396223360207e-06, |
| "learning_rate": 1.1320673354270034e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6345280, |
| "step": 10250 |
| }, |
| { |
| "epoch": 18.279857397504458, |
| "grad_norm": 3.6295623431215063e-06, |
| "learning_rate": 1.1205262367890101e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6349344, |
| "step": 10255 |
| }, |
| { |
| "epoch": 18.288770053475936, |
| "grad_norm": 4.837954747927142e-06, |
| "learning_rate": 1.1090429202994746e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6353152, |
| "step": 10260 |
| }, |
| { |
| "epoch": 18.297682709447415, |
| "grad_norm": 8.854965926730074e-06, |
| "learning_rate": 1.097617413745039e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6355840, |
| "step": 10265 |
| }, |
| { |
| "epoch": 18.306595365418893, |
| "grad_norm": 5.455266000353731e-05, |
| "learning_rate": 1.0862497447724802e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6359008, |
| "step": 10270 |
| }, |
| { |
| "epoch": 18.315508021390375, |
| "grad_norm": 1.102537407859927e-05, |
| "learning_rate": 1.0749399408886141e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6361760, |
| "step": 10275 |
| }, |
| { |
| "epoch": 18.324420677361854, |
| "grad_norm": 4.134295977564761e-06, |
| "learning_rate": 1.063688029460233e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6364512, |
| "step": 10280 |
| }, |
| { |
| "epoch": 18.333333333333332, |
| "grad_norm": 8.992203220259398e-06, |
| "learning_rate": 1.0524940377140635e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6367104, |
| "step": 10285 |
| }, |
| { |
| "epoch": 18.342245989304814, |
| "grad_norm": 7.417367669404484e-06, |
| "learning_rate": 1.0413579927366635e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6370400, |
| "step": 10290 |
| }, |
| { |
| "epoch": 18.351158645276293, |
| "grad_norm": 2.561887595220469e-05, |
| "learning_rate": 1.030279921474378e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6373536, |
| "step": 10295 |
| }, |
| { |
| "epoch": 18.36007130124777, |
| "grad_norm": 2.4329236111952923e-05, |
| "learning_rate": 1.0192598507332785e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6377280, |
| "step": 10300 |
| }, |
| { |
| "epoch": 18.36898395721925, |
| "grad_norm": 6.050835509086028e-06, |
| "learning_rate": 1.0082978071790815e-06, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6380640, |
| "step": 10305 |
| }, |
| { |
| "epoch": 18.37789661319073, |
| "grad_norm": 1.3629637578560505e-05, |
| "learning_rate": 9.973938173370972e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6383616, |
| "step": 10310 |
| }, |
| { |
| "epoch": 18.38680926916221, |
| "grad_norm": 8.69268988026306e-05, |
| "learning_rate": 9.865479075921642e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6386432, |
| "step": 10315 |
| }, |
| { |
| "epoch": 18.39572192513369, |
| "grad_norm": 4.355636974651134e-06, |
| "learning_rate": 9.757601041885694e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6389824, |
| "step": 10320 |
| }, |
| { |
| "epoch": 18.40463458110517, |
| "grad_norm": 9.561532351654023e-06, |
| "learning_rate": 9.650304332300159e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6392352, |
| "step": 10325 |
| }, |
| { |
| "epoch": 18.41354723707665, |
| "grad_norm": 3.7180716390139423e-06, |
| "learning_rate": 9.54358920679524e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6395936, |
| "step": 10330 |
| }, |
| { |
| "epoch": 18.422459893048128, |
| "grad_norm": 4.346656169218477e-06, |
| "learning_rate": 9.437455923593963e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6399136, |
| "step": 10335 |
| }, |
| { |
| "epoch": 18.431372549019606, |
| "grad_norm": 0.0001780799648258835, |
| "learning_rate": 9.331904739511399e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6402592, |
| "step": 10340 |
| }, |
| { |
| "epoch": 18.44028520499109, |
| "grad_norm": 6.550861144205555e-06, |
| "learning_rate": 9.226935909954104e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6405728, |
| "step": 10345 |
| }, |
| { |
| "epoch": 18.449197860962567, |
| "grad_norm": 4.070516297360882e-06, |
| "learning_rate": 9.12254968891954e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6409664, |
| "step": 10350 |
| }, |
| { |
| "epoch": 18.458110516934045, |
| "grad_norm": 5.320921445672866e-06, |
| "learning_rate": 9.018746328995298e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6412160, |
| "step": 10355 |
| }, |
| { |
| "epoch": 18.467023172905527, |
| "grad_norm": 3.3889532460307237e-06, |
| "learning_rate": 8.915526081358649e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6415072, |
| "step": 10360 |
| }, |
| { |
| "epoch": 18.475935828877006, |
| "grad_norm": 4.983634426025674e-06, |
| "learning_rate": 8.812889195775942e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6417952, |
| "step": 10365 |
| }, |
| { |
| "epoch": 18.484848484848484, |
| "grad_norm": 7.026436833257321e-06, |
| "learning_rate": 8.710835920601818e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6420960, |
| "step": 10370 |
| }, |
| { |
| "epoch": 18.493761140819963, |
| "grad_norm": 5.714975486625917e-06, |
| "learning_rate": 8.609366502778854e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6424352, |
| "step": 10375 |
| }, |
| { |
| "epoch": 18.502673796791445, |
| "grad_norm": 2.397276330157183e-05, |
| "learning_rate": 8.508481187836759e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6427776, |
| "step": 10380 |
| }, |
| { |
| "epoch": 18.511586452762923, |
| "grad_norm": 5.075999524706276e-06, |
| "learning_rate": 8.408180219891897e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6431296, |
| "step": 10385 |
| }, |
| { |
| "epoch": 18.520499108734402, |
| "grad_norm": 5.217380930844229e-06, |
| "learning_rate": 8.308463841646713e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6434464, |
| "step": 10390 |
| }, |
| { |
| "epoch": 18.529411764705884, |
| "grad_norm": 4.922795142192626e-06, |
| "learning_rate": 8.209332294388972e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6438016, |
| "step": 10395 |
| }, |
| { |
| "epoch": 18.538324420677363, |
| "grad_norm": 4.330814590502996e-06, |
| "learning_rate": 8.110785817991379e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6440608, |
| "step": 10400 |
| }, |
| { |
| "epoch": 18.54723707664884, |
| "grad_norm": 7.737668056506664e-06, |
| "learning_rate": 8.012824650910938e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6443552, |
| "step": 10405 |
| }, |
| { |
| "epoch": 18.55614973262032, |
| "grad_norm": 5.734651949751424e-06, |
| "learning_rate": 7.915449030188316e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6445952, |
| "step": 10410 |
| }, |
| { |
| "epoch": 18.5650623885918, |
| "grad_norm": 6.203586963238195e-06, |
| "learning_rate": 7.818659191447363e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6449536, |
| "step": 10415 |
| }, |
| { |
| "epoch": 18.57397504456328, |
| "grad_norm": 3.782967723964248e-06, |
| "learning_rate": 7.722455368894376e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6452960, |
| "step": 10420 |
| }, |
| { |
| "epoch": 18.58288770053476, |
| "grad_norm": 3.587515038816491e-06, |
| "learning_rate": 7.626837795317781e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6456384, |
| "step": 10425 |
| }, |
| { |
| "epoch": 18.59180035650624, |
| "grad_norm": 4.201141564408317e-06, |
| "learning_rate": 7.531806702087307e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6458944, |
| "step": 10430 |
| }, |
| { |
| "epoch": 18.60071301247772, |
| "grad_norm": 4.2174779082415625e-06, |
| "learning_rate": 7.437362319153651e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6461920, |
| "step": 10435 |
| }, |
| { |
| "epoch": 18.609625668449198, |
| "grad_norm": 4.0159065974876285e-06, |
| "learning_rate": 7.343504875047813e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6465536, |
| "step": 10440 |
| }, |
| { |
| "epoch": 18.618538324420676, |
| "grad_norm": 4.9373779802408535e-06, |
| "learning_rate": 7.250234596880456e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6469152, |
| "step": 10445 |
| }, |
| { |
| "epoch": 18.627450980392158, |
| "grad_norm": 4.6428826863120776e-06, |
| "learning_rate": 7.157551710341576e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6472896, |
| "step": 10450 |
| }, |
| { |
| "epoch": 18.636363636363637, |
| "grad_norm": 5.256363238004269e-06, |
| "learning_rate": 7.065456439699775e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6475136, |
| "step": 10455 |
| }, |
| { |
| "epoch": 18.645276292335115, |
| "grad_norm": 4.301908120396547e-06, |
| "learning_rate": 6.973949007801711e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6478368, |
| "step": 10460 |
| }, |
| { |
| "epoch": 18.654188948306594, |
| "grad_norm": 4.6390055103984196e-06, |
| "learning_rate": 6.883029636071819e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6481216, |
| "step": 10465 |
| }, |
| { |
| "epoch": 18.663101604278076, |
| "grad_norm": 1.0161958925891668e-05, |
| "learning_rate": 6.792698544511366e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6484416, |
| "step": 10470 |
| }, |
| { |
| "epoch": 18.672014260249554, |
| "grad_norm": 4.047020865982631e-06, |
| "learning_rate": 6.702955951698231e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6487360, |
| "step": 10475 |
| }, |
| { |
| "epoch": 18.680926916221033, |
| "grad_norm": 3.812608383668703e-06, |
| "learning_rate": 6.613802074786319e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6490208, |
| "step": 10480 |
| }, |
| { |
| "epoch": 18.689839572192515, |
| "grad_norm": 3.885810201609274e-06, |
| "learning_rate": 6.525237129504868e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6493376, |
| "step": 10485 |
| }, |
| { |
| "epoch": 18.698752228163993, |
| "grad_norm": 7.146949883463094e-06, |
| "learning_rate": 6.437261330158207e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6496064, |
| "step": 10490 |
| }, |
| { |
| "epoch": 18.707664884135472, |
| "grad_norm": 1.5320661987061612e-05, |
| "learning_rate": 6.349874889624962e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6499616, |
| "step": 10495 |
| }, |
| { |
| "epoch": 18.71657754010695, |
| "grad_norm": 4.966541837347904e-06, |
| "learning_rate": 6.263078019357716e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6502656, |
| "step": 10500 |
| }, |
| { |
| "epoch": 18.725490196078432, |
| "grad_norm": 8.445246749033686e-06, |
| "learning_rate": 6.176870929382489e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6505344, |
| "step": 10505 |
| }, |
| { |
| "epoch": 18.73440285204991, |
| "grad_norm": 3.86704778065905e-05, |
| "learning_rate": 6.091253828298088e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6508160, |
| "step": 10510 |
| }, |
| { |
| "epoch": 18.74331550802139, |
| "grad_norm": 3.769595195990405e-06, |
| "learning_rate": 6.006226923275738e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6511616, |
| "step": 10515 |
| }, |
| { |
| "epoch": 18.75222816399287, |
| "grad_norm": 8.503994467901066e-05, |
| "learning_rate": 5.921790420058582e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6514240, |
| "step": 10520 |
| }, |
| { |
| "epoch": 18.76114081996435, |
| "grad_norm": 4.197690032015089e-06, |
| "learning_rate": 5.837944522961075e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6517056, |
| "step": 10525 |
| }, |
| { |
| "epoch": 18.77005347593583, |
| "grad_norm": 3.576196377252927e-06, |
| "learning_rate": 5.754689434868677e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6520032, |
| "step": 10530 |
| }, |
| { |
| "epoch": 18.778966131907307, |
| "grad_norm": 7.368656497419579e-06, |
| "learning_rate": 5.672025357237071e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6522784, |
| "step": 10535 |
| }, |
| { |
| "epoch": 18.78787878787879, |
| "grad_norm": 3.6832218484050827e-06, |
| "learning_rate": 5.589952490091948e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6525632, |
| "step": 10540 |
| }, |
| { |
| "epoch": 18.796791443850267, |
| "grad_norm": 4.035174697492039e-06, |
| "learning_rate": 5.508471032028478e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6528832, |
| "step": 10545 |
| }, |
| { |
| "epoch": 18.805704099821746, |
| "grad_norm": 5.856224561284762e-06, |
| "learning_rate": 5.427581180210639e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6532416, |
| "step": 10550 |
| }, |
| { |
| "epoch": 18.814616755793228, |
| "grad_norm": 6.858808774268255e-05, |
| "learning_rate": 5.347283130371e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6535136, |
| "step": 10555 |
| }, |
| { |
| "epoch": 18.823529411764707, |
| "grad_norm": 4.181023086857749e-06, |
| "learning_rate": 5.267577076810026e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6538528, |
| "step": 10560 |
| }, |
| { |
| "epoch": 18.832442067736185, |
| "grad_norm": 4.6028135329834186e-06, |
| "learning_rate": 5.188463212395744e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6540992, |
| "step": 10565 |
| }, |
| { |
| "epoch": 18.841354723707664, |
| "grad_norm": 6.4640212258382235e-06, |
| "learning_rate": 5.1099417285633e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6543744, |
| "step": 10570 |
| }, |
| { |
| "epoch": 18.850267379679146, |
| "grad_norm": 4.091177288501058e-06, |
| "learning_rate": 5.032012815314291e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6546336, |
| "step": 10575 |
| }, |
| { |
| "epoch": 18.859180035650624, |
| "grad_norm": 2.7967815185547806e-05, |
| "learning_rate": 4.954676661216546e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6548864, |
| "step": 10580 |
| }, |
| { |
| "epoch": 18.868092691622103, |
| "grad_norm": 9.529613453196362e-05, |
| "learning_rate": 4.877933453403593e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6551808, |
| "step": 10585 |
| }, |
| { |
| "epoch": 18.87700534759358, |
| "grad_norm": 3.412982914596796e-05, |
| "learning_rate": 4.801783377574088e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6554496, |
| "step": 10590 |
| }, |
| { |
| "epoch": 18.885918003565063, |
| "grad_norm": 4.093836196261691e-06, |
| "learning_rate": 4.726226617991547e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6557312, |
| "step": 10595 |
| }, |
| { |
| "epoch": 18.89483065953654, |
| "grad_norm": 3.870322416332783e-06, |
| "learning_rate": 4.651263357483754e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6560768, |
| "step": 10600 |
| }, |
| { |
| "epoch": 18.90374331550802, |
| "grad_norm": 4.343833097664174e-06, |
| "learning_rate": 4.5768937774424146e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6563712, |
| "step": 10605 |
| }, |
| { |
| "epoch": 18.912655971479502, |
| "grad_norm": 4.011643795820419e-06, |
| "learning_rate": 4.5031180578226637e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6567328, |
| "step": 10610 |
| }, |
| { |
| "epoch": 18.92156862745098, |
| "grad_norm": 4.949677077092929e-06, |
| "learning_rate": 4.4299363771427015e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6570976, |
| "step": 10615 |
| }, |
| { |
| "epoch": 18.93048128342246, |
| "grad_norm": 7.535657459811773e-06, |
| "learning_rate": 4.357348912483211e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6574656, |
| "step": 10620 |
| }, |
| { |
| "epoch": 18.939393939393938, |
| "grad_norm": 6.099482106947107e-06, |
| "learning_rate": 4.2853558394871096e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6578048, |
| "step": 10625 |
| }, |
| { |
| "epoch": 18.94830659536542, |
| "grad_norm": 8.545129276171792e-06, |
| "learning_rate": 4.2139573323589643e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6580576, |
| "step": 10630 |
| }, |
| { |
| "epoch": 18.9572192513369, |
| "grad_norm": 3.974701485276455e-06, |
| "learning_rate": 4.1431535638647436e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6584416, |
| "step": 10635 |
| }, |
| { |
| "epoch": 18.966131907308377, |
| "grad_norm": 5.085218617750797e-06, |
| "learning_rate": 4.072944705331178e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6587488, |
| "step": 10640 |
| }, |
| { |
| "epoch": 18.97504456327986, |
| "grad_norm": 8.860148227540776e-06, |
| "learning_rate": 4.003330926645649e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6590208, |
| "step": 10645 |
| }, |
| { |
| "epoch": 18.983957219251337, |
| "grad_norm": 4.768772669194732e-06, |
| "learning_rate": 3.9343123962553853e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6593568, |
| "step": 10650 |
| }, |
| { |
| "epoch": 18.992869875222816, |
| "grad_norm": 4.617687864083564e-06, |
| "learning_rate": 3.865889281167406e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6596896, |
| "step": 10655 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_loss": 0.27009719610214233, |
| "eval_runtime": 4.5878, |
| "eval_samples_per_second": 54.274, |
| "eval_steps_per_second": 13.732, |
| "num_input_tokens_seen": 6598768, |
| "step": 10659 |
| }, |
| { |
| "epoch": 19.001782531194294, |
| "grad_norm": 3.65996152140724e-06, |
| "learning_rate": 3.7980617469479953e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6599440, |
| "step": 10660 |
| }, |
| { |
| "epoch": 19.010695187165776, |
| "grad_norm": 3.970195393776521e-06, |
| "learning_rate": 3.730829957722171e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6602032, |
| "step": 10665 |
| }, |
| { |
| "epoch": 19.019607843137255, |
| "grad_norm": 4.120309768040897e-06, |
| "learning_rate": 3.6641940761735217e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6605616, |
| "step": 10670 |
| }, |
| { |
| "epoch": 19.028520499108733, |
| "grad_norm": 6.569275683432352e-06, |
| "learning_rate": 3.598154263543596e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6608720, |
| "step": 10675 |
| }, |
| { |
| "epoch": 19.037433155080215, |
| "grad_norm": 5.234503532847157e-06, |
| "learning_rate": 3.532710679631679e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6611664, |
| "step": 10680 |
| }, |
| { |
| "epoch": 19.046345811051694, |
| "grad_norm": 4.1348962440679315e-06, |
| "learning_rate": 3.467863482794348e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6614928, |
| "step": 10685 |
| }, |
| { |
| "epoch": 19.055258467023172, |
| "grad_norm": 4.554530278255697e-06, |
| "learning_rate": 3.4036128299449466e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6617328, |
| "step": 10690 |
| }, |
| { |
| "epoch": 19.06417112299465, |
| "grad_norm": 3.7013876408309443e-06, |
| "learning_rate": 3.3399588765535284e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6619984, |
| "step": 10695 |
| }, |
| { |
| "epoch": 19.073083778966133, |
| "grad_norm": 3.5750003917200956e-06, |
| "learning_rate": 3.276901776646135e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6623408, |
| "step": 10700 |
| }, |
| { |
| "epoch": 19.08199643493761, |
| "grad_norm": 3.6158824059384642e-06, |
| "learning_rate": 3.2144416828046307e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6626192, |
| "step": 10705 |
| }, |
| { |
| "epoch": 19.09090909090909, |
| "grad_norm": 5.441886969492771e-05, |
| "learning_rate": 3.1525787461663405e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6630096, |
| "step": 10710 |
| }, |
| { |
| "epoch": 19.099821746880572, |
| "grad_norm": 3.259347522543976e-06, |
| "learning_rate": 3.091313116423522e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6633360, |
| "step": 10715 |
| }, |
| { |
| "epoch": 19.10873440285205, |
| "grad_norm": 6.232783562154509e-06, |
| "learning_rate": 3.0306449418231464e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6636656, |
| "step": 10720 |
| }, |
| { |
| "epoch": 19.11764705882353, |
| "grad_norm": 4.463233381102327e-06, |
| "learning_rate": 2.9705743691665345e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6639184, |
| "step": 10725 |
| }, |
| { |
| "epoch": 19.126559714795007, |
| "grad_norm": 4.211536725051701e-05, |
| "learning_rate": 2.9111015438088583e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6642256, |
| "step": 10730 |
| }, |
| { |
| "epoch": 19.13547237076649, |
| "grad_norm": 4.517605702858418e-06, |
| "learning_rate": 2.852226609659059e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6645680, |
| "step": 10735 |
| }, |
| { |
| "epoch": 19.144385026737968, |
| "grad_norm": 3.99427972297417e-06, |
| "learning_rate": 2.793949709179178e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6648784, |
| "step": 10740 |
| }, |
| { |
| "epoch": 19.153297682709447, |
| "grad_norm": 4.534751042228891e-06, |
| "learning_rate": 2.7362709833842757e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6651728, |
| "step": 10745 |
| }, |
| { |
| "epoch": 19.16221033868093, |
| "grad_norm": 4.9152927203977015e-06, |
| "learning_rate": 2.679190571841933e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6654864, |
| "step": 10750 |
| }, |
| { |
| "epoch": 19.171122994652407, |
| "grad_norm": 4.616060323314741e-06, |
| "learning_rate": 2.62270861267197e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6657776, |
| "step": 10755 |
| }, |
| { |
| "epoch": 19.180035650623886, |
| "grad_norm": 8.252305269706994e-05, |
| "learning_rate": 2.566825242546117e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6661264, |
| "step": 10760 |
| }, |
| { |
| "epoch": 19.188948306595364, |
| "grad_norm": 4.335048288339749e-06, |
| "learning_rate": 2.511540596687678e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6665584, |
| "step": 10765 |
| }, |
| { |
| "epoch": 19.197860962566846, |
| "grad_norm": 7.008193279034458e-06, |
| "learning_rate": 2.456854808871201e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6669200, |
| "step": 10770 |
| }, |
| { |
| "epoch": 19.206773618538325, |
| "grad_norm": 4.312935288908193e-06, |
| "learning_rate": 2.4027680114221405e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6672624, |
| "step": 10775 |
| }, |
| { |
| "epoch": 19.215686274509803, |
| "grad_norm": 4.447610626812093e-05, |
| "learning_rate": 2.3492803352165303e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6675632, |
| "step": 10780 |
| }, |
| { |
| "epoch": 19.224598930481285, |
| "grad_norm": 3.66940571439045e-06, |
| "learning_rate": 2.2963919096807285e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6678352, |
| "step": 10785 |
| }, |
| { |
| "epoch": 19.233511586452764, |
| "grad_norm": 1.1107338650617748e-05, |
| "learning_rate": 2.244102862791031e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6681296, |
| "step": 10790 |
| }, |
| { |
| "epoch": 19.242424242424242, |
| "grad_norm": 4.122299287701026e-06, |
| "learning_rate": 2.1924133210734222e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6685200, |
| "step": 10795 |
| }, |
| { |
| "epoch": 19.25133689839572, |
| "grad_norm": 4.148144398641307e-06, |
| "learning_rate": 2.141323409603241e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6688304, |
| "step": 10800 |
| }, |
| { |
| "epoch": 19.260249554367203, |
| "grad_norm": 5.2304230848676525e-06, |
| "learning_rate": 2.0908332520047645e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6691024, |
| "step": 10805 |
| }, |
| { |
| "epoch": 19.26916221033868, |
| "grad_norm": 4.854452981817303e-06, |
| "learning_rate": 2.0409429704512096e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6693936, |
| "step": 10810 |
| }, |
| { |
| "epoch": 19.27807486631016, |
| "grad_norm": 1.8323980839340948e-05, |
| "learning_rate": 1.9916526856641193e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6697520, |
| "step": 10815 |
| }, |
| { |
| "epoch": 19.28698752228164, |
| "grad_norm": 5.771956693934044e-06, |
| "learning_rate": 1.9429625169131716e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6700880, |
| "step": 10820 |
| }, |
| { |
| "epoch": 19.29590017825312, |
| "grad_norm": 1.2314047125983052e-05, |
| "learning_rate": 1.8948725820160662e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6704432, |
| "step": 10825 |
| }, |
| { |
| "epoch": 19.3048128342246, |
| "grad_norm": 4.320734660723247e-06, |
| "learning_rate": 1.847382997337943e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6707568, |
| "step": 10830 |
| }, |
| { |
| "epoch": 19.313725490196077, |
| "grad_norm": 4.215276476315921e-06, |
| "learning_rate": 1.8004938777913537e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6710736, |
| "step": 10835 |
| }, |
| { |
| "epoch": 19.32263814616756, |
| "grad_norm": 6.504408247565152e-06, |
| "learning_rate": 1.754205336835818e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6714224, |
| "step": 10840 |
| }, |
| { |
| "epoch": 19.331550802139038, |
| "grad_norm": 1.6524629245395772e-05, |
| "learning_rate": 1.7085174864776287e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6717680, |
| "step": 10845 |
| }, |
| { |
| "epoch": 19.340463458110516, |
| "grad_norm": 3.6317417198006297e-06, |
| "learning_rate": 1.6634304372695474e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6720304, |
| "step": 10850 |
| }, |
| { |
| "epoch": 19.349376114081995, |
| "grad_norm": 1.5814599464647472e-05, |
| "learning_rate": 1.6189442983105817e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6723728, |
| "step": 10855 |
| }, |
| { |
| "epoch": 19.358288770053477, |
| "grad_norm": 5.51480798094417e-06, |
| "learning_rate": 1.5750591772456802e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6726320, |
| "step": 10860 |
| }, |
| { |
| "epoch": 19.367201426024955, |
| "grad_norm": 9.519346349406987e-06, |
| "learning_rate": 1.5317751802654823e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6729680, |
| "step": 10865 |
| }, |
| { |
| "epoch": 19.376114081996434, |
| "grad_norm": 5.400910595199093e-06, |
| "learning_rate": 1.489092412106069e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6732080, |
| "step": 10870 |
| }, |
| { |
| "epoch": 19.385026737967916, |
| "grad_norm": 8.067914677667432e-06, |
| "learning_rate": 1.447010976048685e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6734768, |
| "step": 10875 |
| }, |
| { |
| "epoch": 19.393939393939394, |
| "grad_norm": 6.035778824298177e-06, |
| "learning_rate": 1.4055309739195167e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6737168, |
| "step": 10880 |
| }, |
| { |
| "epoch": 19.402852049910873, |
| "grad_norm": 4.06550316256471e-06, |
| "learning_rate": 1.3646525060894422e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6739824, |
| "step": 10885 |
| }, |
| { |
| "epoch": 19.41176470588235, |
| "grad_norm": 4.2528931771812495e-06, |
| "learning_rate": 1.324375671473782e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6742832, |
| "step": 10890 |
| }, |
| { |
| "epoch": 19.420677361853834, |
| "grad_norm": 3.7699369386245962e-06, |
| "learning_rate": 1.2847005675320767e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6746192, |
| "step": 10895 |
| }, |
| { |
| "epoch": 19.429590017825312, |
| "grad_norm": 1.7903794287121855e-05, |
| "learning_rate": 1.2456272902677534e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6750064, |
| "step": 10900 |
| }, |
| { |
| "epoch": 19.43850267379679, |
| "grad_norm": 3.696606427183724e-06, |
| "learning_rate": 1.207155934228099e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6753968, |
| "step": 10905 |
| }, |
| { |
| "epoch": 19.447415329768273, |
| "grad_norm": 4.29191641160287e-06, |
| "learning_rate": 1.16928659250376e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6757200, |
| "step": 10910 |
| }, |
| { |
| "epoch": 19.45632798573975, |
| "grad_norm": 9.008747292682528e-05, |
| "learning_rate": 1.1320193567288529e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6760784, |
| "step": 10915 |
| }, |
| { |
| "epoch": 19.46524064171123, |
| "grad_norm": 7.780551095493138e-05, |
| "learning_rate": 1.0953543170803826e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6763760, |
| "step": 10920 |
| }, |
| { |
| "epoch": 19.474153297682708, |
| "grad_norm": 6.355798177537508e-06, |
| "learning_rate": 1.0592915622782418e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6766768, |
| "step": 10925 |
| }, |
| { |
| "epoch": 19.48306595365419, |
| "grad_norm": 8.294658073282335e-06, |
| "learning_rate": 1.0238311795850163e-07, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6769616, |
| "step": 10930 |
| }, |
| { |
| "epoch": 19.49197860962567, |
| "grad_norm": 4.181761596555589e-06, |
| "learning_rate": 9.889732548056252e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6773968, |
| "step": 10935 |
| }, |
| { |
| "epoch": 19.500891265597147, |
| "grad_norm": 4.156318937020842e-06, |
| "learning_rate": 9.547178722872364e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6776752, |
| "step": 10940 |
| }, |
| { |
| "epoch": 19.509803921568626, |
| "grad_norm": 4.000894023192814e-06, |
| "learning_rate": 9.210651149190175e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6779632, |
| "step": 10945 |
| }, |
| { |
| "epoch": 19.518716577540108, |
| "grad_norm": 4.35921583630261e-06, |
| "learning_rate": 8.880150641319418e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6782800, |
| "step": 10950 |
| }, |
| { |
| "epoch": 19.527629233511586, |
| "grad_norm": 4.094060841453029e-06, |
| "learning_rate": 8.555677998985657e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6785648, |
| "step": 10955 |
| }, |
| { |
| "epoch": 19.536541889483065, |
| "grad_norm": 1.1936950613744557e-05, |
| "learning_rate": 8.23723400732862e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6788944, |
| "step": 10960 |
| }, |
| { |
| "epoch": 19.545454545454547, |
| "grad_norm": 5.246789442026056e-06, |
| "learning_rate": 7.924819436900821e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6792048, |
| "step": 10965 |
| }, |
| { |
| "epoch": 19.554367201426025, |
| "grad_norm": 3.7531422094616573e-06, |
| "learning_rate": 7.618435043664218e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6794896, |
| "step": 10970 |
| }, |
| { |
| "epoch": 19.563279857397504, |
| "grad_norm": 7.082822321535787e-06, |
| "learning_rate": 7.318081568990221e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6797936, |
| "step": 10975 |
| }, |
| { |
| "epoch": 19.572192513368982, |
| "grad_norm": 7.775455742375925e-06, |
| "learning_rate": 7.023759739656078e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6801328, |
| "step": 10980 |
| }, |
| { |
| "epoch": 19.581105169340464, |
| "grad_norm": 1.4494855349767022e-05, |
| "learning_rate": 6.735470267844879e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6805328, |
| "step": 10985 |
| }, |
| { |
| "epoch": 19.590017825311943, |
| "grad_norm": 0.0002640245365910232, |
| "learning_rate": 6.453213851142226e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6808176, |
| "step": 10990 |
| }, |
| { |
| "epoch": 19.59893048128342, |
| "grad_norm": 4.6502209443133324e-06, |
| "learning_rate": 6.176991172535673e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6811632, |
| "step": 10995 |
| }, |
| { |
| "epoch": 19.607843137254903, |
| "grad_norm": 2.5176357667078264e-05, |
| "learning_rate": 5.906802900412789e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6814864, |
| "step": 11000 |
| }, |
| { |
| "epoch": 19.616755793226382, |
| "grad_norm": 5.3292624215828255e-06, |
| "learning_rate": 5.642649688559487e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6817936, |
| "step": 11005 |
| }, |
| { |
| "epoch": 19.62566844919786, |
| "grad_norm": 6.279942317632958e-05, |
| "learning_rate": 5.384532176157808e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6820976, |
| "step": 11010 |
| }, |
| { |
| "epoch": 19.63458110516934, |
| "grad_norm": 3.641805233201012e-05, |
| "learning_rate": 5.132450987785364e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6824368, |
| "step": 11015 |
| }, |
| { |
| "epoch": 19.64349376114082, |
| "grad_norm": 4.100953447050415e-06, |
| "learning_rate": 4.8864067334136735e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6828240, |
| "step": 11020 |
| }, |
| { |
| "epoch": 19.6524064171123, |
| "grad_norm": 4.846000138059026e-06, |
| "learning_rate": 4.6464000084059376e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6831376, |
| "step": 11025 |
| }, |
| { |
| "epoch": 19.661319073083778, |
| "grad_norm": 3.7461518331838306e-06, |
| "learning_rate": 4.412431393516492e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6834064, |
| "step": 11030 |
| }, |
| { |
| "epoch": 19.67023172905526, |
| "grad_norm": 6.311793185886927e-06, |
| "learning_rate": 4.184501454888856e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6837136, |
| "step": 11035 |
| }, |
| { |
| "epoch": 19.67914438502674, |
| "grad_norm": 0.00011546228779479861, |
| "learning_rate": 3.9626107440543515e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6840048, |
| "step": 11040 |
| }, |
| { |
| "epoch": 19.688057040998217, |
| "grad_norm": 1.7715008652885444e-05, |
| "learning_rate": 3.746759797931265e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6843568, |
| "step": 11045 |
| }, |
| { |
| "epoch": 19.696969696969695, |
| "grad_norm": 4.317462298786268e-06, |
| "learning_rate": 3.536949138822909e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6847312, |
| "step": 11050 |
| }, |
| { |
| "epoch": 19.705882352941178, |
| "grad_norm": 4.139226803090423e-06, |
| "learning_rate": 3.333179274417064e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6849904, |
| "step": 11055 |
| }, |
| { |
| "epoch": 19.714795008912656, |
| "grad_norm": 8.816688932711259e-06, |
| "learning_rate": 3.135450697783482e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6853104, |
| "step": 11060 |
| }, |
| { |
| "epoch": 19.723707664884135, |
| "grad_norm": 6.18420881437487e-06, |
| "learning_rate": 2.943763887374995e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6856208, |
| "step": 11065 |
| }, |
| { |
| "epoch": 19.732620320855617, |
| "grad_norm": 4.002175501227612e-06, |
| "learning_rate": 2.7581193070233546e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6859440, |
| "step": 11070 |
| }, |
| { |
| "epoch": 19.741532976827095, |
| "grad_norm": 6.689347355859354e-05, |
| "learning_rate": 2.5785174059408947e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6862096, |
| "step": 11075 |
| }, |
| { |
| "epoch": 19.750445632798574, |
| "grad_norm": 7.351686235779198e-06, |
| "learning_rate": 2.4049586187174787e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6864848, |
| "step": 11080 |
| }, |
| { |
| "epoch": 19.759358288770052, |
| "grad_norm": 8.2574997577467e-06, |
| "learning_rate": 2.237443365320502e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6868336, |
| "step": 11085 |
| }, |
| { |
| "epoch": 19.768270944741534, |
| "grad_norm": 5.573724592977669e-06, |
| "learning_rate": 2.0759720510937773e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6870896, |
| "step": 11090 |
| }, |
| { |
| "epoch": 19.777183600713013, |
| "grad_norm": 3.6006058508064598e-06, |
| "learning_rate": 1.9205450667558743e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6873936, |
| "step": 11095 |
| }, |
| { |
| "epoch": 19.78609625668449, |
| "grad_norm": 1.860854354163166e-05, |
| "learning_rate": 1.7711627883998382e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6876784, |
| "step": 11100 |
| }, |
| { |
| "epoch": 19.795008912655973, |
| "grad_norm": 8.149945642799139e-06, |
| "learning_rate": 1.627825577492359e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6879568, |
| "step": 11105 |
| }, |
| { |
| "epoch": 19.80392156862745, |
| "grad_norm": 4.631604952010093e-06, |
| "learning_rate": 1.4905337808721053e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6882256, |
| "step": 11110 |
| }, |
| { |
| "epoch": 19.81283422459893, |
| "grad_norm": 7.1401186687580775e-06, |
| "learning_rate": 1.3592877307500029e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6885168, |
| "step": 11115 |
| }, |
| { |
| "epoch": 19.82174688057041, |
| "grad_norm": 4.165116024523741e-06, |
| "learning_rate": 1.2340877447072907e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6887856, |
| "step": 11120 |
| }, |
| { |
| "epoch": 19.83065953654189, |
| "grad_norm": 1.1358249139448162e-05, |
| "learning_rate": 1.114934125695799e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6891024, |
| "step": 11125 |
| }, |
| { |
| "epoch": 19.83957219251337, |
| "grad_norm": 5.638150923914509e-06, |
| "learning_rate": 1.001827162036284e-08, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6893904, |
| "step": 11130 |
| }, |
| { |
| "epoch": 19.848484848484848, |
| "grad_norm": 4.670748694479698e-06, |
| "learning_rate": 8.947671274184277e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6896336, |
| "step": 11135 |
| }, |
| { |
| "epoch": 19.85739750445633, |
| "grad_norm": 3.5210650821682066e-05, |
| "learning_rate": 7.937542808997278e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6899120, |
| "step": 11140 |
| }, |
| { |
| "epoch": 19.86631016042781, |
| "grad_norm": 7.780026862747036e-06, |
| "learning_rate": 6.987888669052201e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6901808, |
| "step": 11145 |
| }, |
| { |
| "epoch": 19.875222816399287, |
| "grad_norm": 4.677411652664887e-06, |
| "learning_rate": 6.098711152266456e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6905200, |
| "step": 11150 |
| }, |
| { |
| "epoch": 19.884135472370765, |
| "grad_norm": 7.62782474339474e-06, |
| "learning_rate": 5.270012410216185e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6908272, |
| "step": 11155 |
| }, |
| { |
| "epoch": 19.893048128342247, |
| "grad_norm": 4.890515356237302e-06, |
| "learning_rate": 4.50179444814458e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6911760, |
| "step": 11160 |
| }, |
| { |
| "epoch": 19.901960784313726, |
| "grad_norm": 2.7669217161019333e-05, |
| "learning_rate": 3.794059124934135e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6915568, |
| "step": 11165 |
| }, |
| { |
| "epoch": 19.910873440285204, |
| "grad_norm": 1.4286442819866352e-05, |
| "learning_rate": 3.146808153123293e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6918832, |
| "step": 11170 |
| }, |
| { |
| "epoch": 19.919786096256683, |
| "grad_norm": 6.247079727472737e-05, |
| "learning_rate": 2.560043098895348e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6922288, |
| "step": 11175 |
| }, |
| { |
| "epoch": 19.928698752228165, |
| "grad_norm": 4.766968231706414e-06, |
| "learning_rate": 2.0337653820645673e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6925232, |
| "step": 11180 |
| }, |
| { |
| "epoch": 19.937611408199643, |
| "grad_norm": 4.658260877477005e-06, |
| "learning_rate": 1.5679762760900663e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6927792, |
| "step": 11185 |
| }, |
| { |
| "epoch": 19.946524064171122, |
| "grad_norm": 3.767223006434506e-06, |
| "learning_rate": 1.162676908059157e-09, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6931024, |
| "step": 11190 |
| }, |
| { |
| "epoch": 19.955436720142604, |
| "grad_norm": 6.662194209638983e-06, |
| "learning_rate": 8.178682586928998e-10, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6933616, |
| "step": 11195 |
| }, |
| { |
| "epoch": 19.964349376114082, |
| "grad_norm": 6.126639163994696e-06, |
| "learning_rate": 5.335511623377753e-10, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6936592, |
| "step": 11200 |
| }, |
| { |
| "epoch": 19.97326203208556, |
| "grad_norm": 4.0635059121996164e-06, |
| "learning_rate": 3.0972630696846084e-10, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6939024, |
| "step": 11205 |
| }, |
| { |
| "epoch": 19.98217468805704, |
| "grad_norm": 5.989404144202126e-06, |
| "learning_rate": 1.463942341850544e-10, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6942032, |
| "step": 11210 |
| }, |
| { |
| "epoch": 19.99108734402852, |
| "grad_norm": 3.8322787077049725e-06, |
| "learning_rate": 4.35553392047483e-11, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6944464, |
| "step": 11215 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 3.420008397370111e-06, |
| "learning_rate": 1.2098708757068978e-12, |
| "loss": 0.0, |
| "num_input_tokens_seen": 6947288, |
| "step": 11220 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 0.2706851363182068, |
| "eval_runtime": 4.5892, |
| "eval_samples_per_second": 54.258, |
| "eval_steps_per_second": 13.728, |
| "num_input_tokens_seen": 6947288, |
| "step": 11220 |
| }, |
| { |
| "epoch": 20.0, |
| "num_input_tokens_seen": 6947288, |
| "step": 11220, |
| "total_flos": 3.134451584829358e+17, |
| "train_loss": 0.01365312689491912, |
| "train_runtime": 2907.3969, |
| "train_samples_per_second": 15.416, |
| "train_steps_per_second": 3.859 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 11220, |
| "num_input_tokens_seen": 6947288, |
| "num_train_epochs": 20, |
| "save_steps": 561, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.134451584829358e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|