{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 32115, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00015569048731122528, "grad_norm": 3.283604621887207, "learning_rate": 1.2453300124533003e-08, "loss": 1.2194, "step": 5 }, { "epoch": 0.00031138097462245055, "grad_norm": 3.4059951305389404, "learning_rate": 2.8019925280199255e-08, "loss": 1.0645, "step": 10 }, { "epoch": 0.00046707146193367583, "grad_norm": 2.7085843086242676, "learning_rate": 4.3586550435865507e-08, "loss": 1.0979, "step": 15 }, { "epoch": 0.0006227619492449011, "grad_norm": 4.10581111907959, "learning_rate": 5.9153175591531765e-08, "loss": 1.1532, "step": 20 }, { "epoch": 0.0007784524365561264, "grad_norm": 2.6791064739227295, "learning_rate": 7.471980074719802e-08, "loss": 1.0446, "step": 25 }, { "epoch": 0.0009341429238673517, "grad_norm": 4.303100109100342, "learning_rate": 9.028642590286426e-08, "loss": 1.0621, "step": 30 }, { "epoch": 0.001089833411178577, "grad_norm": 2.8380274772644043, "learning_rate": 1.0585305105853052e-07, "loss": 1.1005, "step": 35 }, { "epoch": 0.0012455238984898022, "grad_norm": 3.83164119720459, "learning_rate": 1.2141967621419677e-07, "loss": 1.0321, "step": 40 }, { "epoch": 0.0014012143858010276, "grad_norm": 4.503605842590332, "learning_rate": 1.36986301369863e-07, "loss": 1.1556, "step": 45 }, { "epoch": 0.0015569048731122529, "grad_norm": 4.462259769439697, "learning_rate": 1.5255292652552928e-07, "loss": 1.1444, "step": 50 }, { "epoch": 0.001712595360423478, "grad_norm": 3.9841365814208984, "learning_rate": 1.6811955168119555e-07, "loss": 1.1535, "step": 55 }, { "epoch": 0.0018682858477347033, "grad_norm": 4.321094512939453, "learning_rate": 1.836861768368618e-07, "loss": 1.1788, "step": 60 }, { "epoch": 0.0020239763350459287, "grad_norm": 3.9590845108032227, "learning_rate": 1.9925280199252805e-07, "loss": 1.0349, "step": 65 }, { "epoch": 0.002179666822357154, "grad_norm": 3.7782015800476074, "learning_rate": 2.148194271481943e-07, "loss": 1.0884, "step": 70 }, { "epoch": 0.002335357309668379, "grad_norm": 3.2927517890930176, "learning_rate": 2.3038605230386054e-07, "loss": 1.1591, "step": 75 }, { "epoch": 0.0024910477969796044, "grad_norm": 3.2807750701904297, "learning_rate": 2.459526774595268e-07, "loss": 1.2204, "step": 80 }, { "epoch": 0.0026467382842908296, "grad_norm": 3.374964952468872, "learning_rate": 2.61519302615193e-07, "loss": 1.1733, "step": 85 }, { "epoch": 0.0028024287716020553, "grad_norm": 3.245229482650757, "learning_rate": 2.7708592777085927e-07, "loss": 1.122, "step": 90 }, { "epoch": 0.0029581192589132805, "grad_norm": 4.808199405670166, "learning_rate": 2.9265255292652557e-07, "loss": 1.1701, "step": 95 }, { "epoch": 0.0031138097462245057, "grad_norm": 3.0665745735168457, "learning_rate": 3.082191780821918e-07, "loss": 1.175, "step": 100 }, { "epoch": 0.003269500233535731, "grad_norm": 3.5556416511535645, "learning_rate": 3.2378580323785806e-07, "loss": 1.1011, "step": 105 }, { "epoch": 0.003425190720846956, "grad_norm": 2.885706663131714, "learning_rate": 3.393524283935243e-07, "loss": 1.0874, "step": 110 }, { "epoch": 0.0035808812081581814, "grad_norm": 3.3416695594787598, "learning_rate": 3.549190535491906e-07, "loss": 1.1232, "step": 115 }, { "epoch": 0.0037365716954694066, "grad_norm": 2.832444906234741, "learning_rate": 3.7048567870485685e-07, "loss": 0.9916, "step": 120 }, { "epoch": 0.0038922621827806323, "grad_norm": 3.001929759979248, "learning_rate": 3.860523038605231e-07, "loss": 1.0261, "step": 125 }, { "epoch": 0.0040479526700918575, "grad_norm": 2.647461175918579, "learning_rate": 4.0161892901618934e-07, "loss": 1.0472, "step": 130 }, { "epoch": 0.004203643157403083, "grad_norm": 2.6099634170532227, "learning_rate": 4.171855541718556e-07, "loss": 1.1173, "step": 135 }, { "epoch": 0.004359333644714308, "grad_norm": 2.7095370292663574, "learning_rate": 4.3275217932752183e-07, "loss": 1.1043, "step": 140 }, { "epoch": 0.004515024132025533, "grad_norm": 2.474191665649414, "learning_rate": 4.483188044831881e-07, "loss": 1.0957, "step": 145 }, { "epoch": 0.004670714619336758, "grad_norm": 2.6040124893188477, "learning_rate": 4.638854296388543e-07, "loss": 1.0737, "step": 150 }, { "epoch": 0.004826405106647984, "grad_norm": 2.5907247066497803, "learning_rate": 4.794520547945206e-07, "loss": 1.0252, "step": 155 }, { "epoch": 0.004982095593959209, "grad_norm": 2.646566152572632, "learning_rate": 4.950186799501868e-07, "loss": 1.0734, "step": 160 }, { "epoch": 0.005137786081270434, "grad_norm": 2.7865023612976074, "learning_rate": 5.105853051058532e-07, "loss": 1.0045, "step": 165 }, { "epoch": 0.005293476568581659, "grad_norm": 2.5880703926086426, "learning_rate": 5.261519302615194e-07, "loss": 1.0471, "step": 170 }, { "epoch": 0.005449167055892885, "grad_norm": 2.801464319229126, "learning_rate": 5.417185554171857e-07, "loss": 1.1276, "step": 175 }, { "epoch": 0.005604857543204111, "grad_norm": 2.687166452407837, "learning_rate": 5.572851805728518e-07, "loss": 1.0475, "step": 180 }, { "epoch": 0.005760548030515336, "grad_norm": 2.3103690147399902, "learning_rate": 5.728518057285181e-07, "loss": 1.0397, "step": 185 }, { "epoch": 0.005916238517826561, "grad_norm": 2.5599775314331055, "learning_rate": 5.884184308841843e-07, "loss": 1.0388, "step": 190 }, { "epoch": 0.006071929005137786, "grad_norm": 2.857927083969116, "learning_rate": 6.039850560398506e-07, "loss": 1.0452, "step": 195 }, { "epoch": 0.0062276194924490115, "grad_norm": 3.0142204761505127, "learning_rate": 6.195516811955168e-07, "loss": 1.0695, "step": 200 }, { "epoch": 0.006383309979760237, "grad_norm": 2.9811131954193115, "learning_rate": 6.351183063511831e-07, "loss": 1.0797, "step": 205 }, { "epoch": 0.006539000467071462, "grad_norm": 2.2012641429901123, "learning_rate": 6.506849315068493e-07, "loss": 0.9687, "step": 210 }, { "epoch": 0.006694690954382687, "grad_norm": 2.357551097869873, "learning_rate": 6.662515566625156e-07, "loss": 1.0747, "step": 215 }, { "epoch": 0.006850381441693912, "grad_norm": 2.6318929195404053, "learning_rate": 6.818181818181818e-07, "loss": 1.1324, "step": 220 }, { "epoch": 0.007006071929005138, "grad_norm": 2.229192018508911, "learning_rate": 6.973848069738481e-07, "loss": 1.0048, "step": 225 }, { "epoch": 0.007161762416316363, "grad_norm": 1.7933167219161987, "learning_rate": 7.129514321295143e-07, "loss": 1.0294, "step": 230 }, { "epoch": 0.007317452903627588, "grad_norm": 2.7218077182769775, "learning_rate": 7.285180572851806e-07, "loss": 1.0391, "step": 235 }, { "epoch": 0.007473143390938813, "grad_norm": 2.2177767753601074, "learning_rate": 7.440846824408469e-07, "loss": 1.0734, "step": 240 }, { "epoch": 0.007628833878250039, "grad_norm": 2.5591771602630615, "learning_rate": 7.596513075965131e-07, "loss": 0.9977, "step": 245 }, { "epoch": 0.0077845243655612646, "grad_norm": 2.2198832035064697, "learning_rate": 7.752179327521794e-07, "loss": 1.0364, "step": 250 }, { "epoch": 0.007940214852872489, "grad_norm": 2.004800319671631, "learning_rate": 7.907845579078456e-07, "loss": 1.0527, "step": 255 }, { "epoch": 0.008095905340183715, "grad_norm": 2.560798406600952, "learning_rate": 8.06351183063512e-07, "loss": 1.073, "step": 260 }, { "epoch": 0.00825159582749494, "grad_norm": 2.4387149810791016, "learning_rate": 8.219178082191781e-07, "loss": 1.0534, "step": 265 }, { "epoch": 0.008407286314806165, "grad_norm": 1.9729505777359009, "learning_rate": 8.374844333748445e-07, "loss": 1.0777, "step": 270 }, { "epoch": 0.00856297680211739, "grad_norm": 2.7391395568847656, "learning_rate": 8.530510585305106e-07, "loss": 0.9502, "step": 275 }, { "epoch": 0.008718667289428616, "grad_norm": 2.2698216438293457, "learning_rate": 8.68617683686177e-07, "loss": 0.9947, "step": 280 }, { "epoch": 0.008874357776739842, "grad_norm": 3.628788948059082, "learning_rate": 8.841843088418433e-07, "loss": 0.9264, "step": 285 }, { "epoch": 0.009030048264051066, "grad_norm": 2.3954381942749023, "learning_rate": 8.997509339975095e-07, "loss": 1.106, "step": 290 }, { "epoch": 0.009185738751362292, "grad_norm": 2.3566417694091797, "learning_rate": 9.153175591531758e-07, "loss": 0.9435, "step": 295 }, { "epoch": 0.009341429238673517, "grad_norm": 2.469677686691284, "learning_rate": 9.308841843088419e-07, "loss": 0.9869, "step": 300 }, { "epoch": 0.009497119725984743, "grad_norm": 2.0759243965148926, "learning_rate": 9.464508094645082e-07, "loss": 0.9858, "step": 305 }, { "epoch": 0.009652810213295967, "grad_norm": 2.897146701812744, "learning_rate": 9.620174346201744e-07, "loss": 1.013, "step": 310 }, { "epoch": 0.009808500700607193, "grad_norm": 2.730409622192383, "learning_rate": 9.775840597758407e-07, "loss": 1.1182, "step": 315 }, { "epoch": 0.009964191187918418, "grad_norm": 3.0635416507720947, "learning_rate": 9.931506849315068e-07, "loss": 0.9582, "step": 320 }, { "epoch": 0.010119881675229644, "grad_norm": 2.616448163986206, "learning_rate": 1.0087173100871731e-06, "loss": 1.0114, "step": 325 }, { "epoch": 0.010275572162540868, "grad_norm": 4.41140079498291, "learning_rate": 1.0242839352428394e-06, "loss": 0.9678, "step": 330 }, { "epoch": 0.010431262649852094, "grad_norm": 2.7094991207122803, "learning_rate": 1.0398505603985057e-06, "loss": 1.0222, "step": 335 }, { "epoch": 0.010586953137163319, "grad_norm": 2.8222920894622803, "learning_rate": 1.055417185554172e-06, "loss": 0.9798, "step": 340 }, { "epoch": 0.010742643624474545, "grad_norm": 2.3013155460357666, "learning_rate": 1.070983810709838e-06, "loss": 1.0699, "step": 345 }, { "epoch": 0.01089833411178577, "grad_norm": 2.3985326290130615, "learning_rate": 1.0865504358655044e-06, "loss": 1.0278, "step": 350 }, { "epoch": 0.011054024599096995, "grad_norm": 2.4402284622192383, "learning_rate": 1.1021170610211707e-06, "loss": 1.0531, "step": 355 }, { "epoch": 0.011209715086408221, "grad_norm": 2.181032180786133, "learning_rate": 1.117683686176837e-06, "loss": 1.0181, "step": 360 }, { "epoch": 0.011365405573719446, "grad_norm": 2.075164318084717, "learning_rate": 1.133250311332503e-06, "loss": 0.9799, "step": 365 }, { "epoch": 0.011521096061030672, "grad_norm": 2.1921708583831787, "learning_rate": 1.1488169364881694e-06, "loss": 0.9795, "step": 370 }, { "epoch": 0.011676786548341896, "grad_norm": 2.3534677028656006, "learning_rate": 1.1643835616438357e-06, "loss": 1.0182, "step": 375 }, { "epoch": 0.011832477035653122, "grad_norm": 2.5446934700012207, "learning_rate": 1.179950186799502e-06, "loss": 1.0024, "step": 380 }, { "epoch": 0.011988167522964346, "grad_norm": 2.701848268508911, "learning_rate": 1.1955168119551683e-06, "loss": 0.9637, "step": 385 }, { "epoch": 0.012143858010275572, "grad_norm": 2.1075360774993896, "learning_rate": 1.2110834371108344e-06, "loss": 0.9434, "step": 390 }, { "epoch": 0.012299548497586797, "grad_norm": 2.04843807220459, "learning_rate": 1.2266500622665009e-06, "loss": 1.0201, "step": 395 }, { "epoch": 0.012455238984898023, "grad_norm": 2.8257617950439453, "learning_rate": 1.242216687422167e-06, "loss": 0.9418, "step": 400 }, { "epoch": 0.012610929472209247, "grad_norm": 3.158083438873291, "learning_rate": 1.2577833125778333e-06, "loss": 1.1533, "step": 405 }, { "epoch": 0.012766619959520473, "grad_norm": 2.3794965744018555, "learning_rate": 1.2733499377334995e-06, "loss": 0.8762, "step": 410 }, { "epoch": 0.012922310446831698, "grad_norm": 2.077437162399292, "learning_rate": 1.2889165628891656e-06, "loss": 0.9685, "step": 415 }, { "epoch": 0.013078000934142924, "grad_norm": 2.3617327213287354, "learning_rate": 1.304483188044832e-06, "loss": 0.9773, "step": 420 }, { "epoch": 0.01323369142145415, "grad_norm": 2.621269941329956, "learning_rate": 1.3200498132004982e-06, "loss": 0.9972, "step": 425 }, { "epoch": 0.013389381908765374, "grad_norm": 2.595856189727783, "learning_rate": 1.3356164383561645e-06, "loss": 0.9563, "step": 430 }, { "epoch": 0.0135450723960766, "grad_norm": 2.9191460609436035, "learning_rate": 1.3511830635118308e-06, "loss": 0.9584, "step": 435 }, { "epoch": 0.013700762883387825, "grad_norm": 3.1975576877593994, "learning_rate": 1.366749688667497e-06, "loss": 0.9581, "step": 440 }, { "epoch": 0.01385645337069905, "grad_norm": 1.9367632865905762, "learning_rate": 1.3823163138231632e-06, "loss": 0.9046, "step": 445 }, { "epoch": 0.014012143858010275, "grad_norm": 2.0651473999023438, "learning_rate": 1.3978829389788295e-06, "loss": 1.0074, "step": 450 }, { "epoch": 0.014167834345321501, "grad_norm": 2.1583235263824463, "learning_rate": 1.4134495641344958e-06, "loss": 0.9797, "step": 455 }, { "epoch": 0.014323524832632726, "grad_norm": 2.4611079692840576, "learning_rate": 1.429016189290162e-06, "loss": 1.0284, "step": 460 }, { "epoch": 0.014479215319943952, "grad_norm": 2.190833568572998, "learning_rate": 1.4445828144458282e-06, "loss": 0.9479, "step": 465 }, { "epoch": 0.014634905807255176, "grad_norm": 2.708170175552368, "learning_rate": 1.4601494396014945e-06, "loss": 1.022, "step": 470 }, { "epoch": 0.014790596294566402, "grad_norm": 2.3228135108947754, "learning_rate": 1.4757160647571608e-06, "loss": 1.0291, "step": 475 }, { "epoch": 0.014946286781877626, "grad_norm": 2.344114065170288, "learning_rate": 1.491282689912827e-06, "loss": 0.9374, "step": 480 }, { "epoch": 0.015101977269188853, "grad_norm": 3.221118211746216, "learning_rate": 1.5068493150684932e-06, "loss": 0.9851, "step": 485 }, { "epoch": 0.015257667756500079, "grad_norm": 2.579148292541504, "learning_rate": 1.5224159402241595e-06, "loss": 1.0808, "step": 490 }, { "epoch": 0.015413358243811303, "grad_norm": 2.2942442893981934, "learning_rate": 1.5379825653798258e-06, "loss": 0.9948, "step": 495 }, { "epoch": 0.015569048731122529, "grad_norm": 1.8858500719070435, "learning_rate": 1.553549190535492e-06, "loss": 0.9601, "step": 500 }, { "epoch": 0.015724739218433755, "grad_norm": 2.68804669380188, "learning_rate": 1.5691158156911582e-06, "loss": 0.9604, "step": 505 }, { "epoch": 0.015880429705744978, "grad_norm": 2.1350057125091553, "learning_rate": 1.5846824408468245e-06, "loss": 0.9676, "step": 510 }, { "epoch": 0.016036120193056204, "grad_norm": 2.2392661571502686, "learning_rate": 1.6002490660024908e-06, "loss": 1.0107, "step": 515 }, { "epoch": 0.01619181068036743, "grad_norm": 2.6814496517181396, "learning_rate": 1.615815691158157e-06, "loss": 1.0588, "step": 520 }, { "epoch": 0.016347501167678656, "grad_norm": 2.2715704441070557, "learning_rate": 1.6313823163138233e-06, "loss": 0.9343, "step": 525 }, { "epoch": 0.01650319165498988, "grad_norm": 2.3646368980407715, "learning_rate": 1.6469489414694894e-06, "loss": 0.9355, "step": 530 }, { "epoch": 0.016658882142301105, "grad_norm": 2.642336845397949, "learning_rate": 1.6625155666251557e-06, "loss": 0.9297, "step": 535 }, { "epoch": 0.01681457262961233, "grad_norm": 2.205068588256836, "learning_rate": 1.678082191780822e-06, "loss": 0.9239, "step": 540 }, { "epoch": 0.016970263116923557, "grad_norm": 2.6118686199188232, "learning_rate": 1.6936488169364883e-06, "loss": 0.9825, "step": 545 }, { "epoch": 0.01712595360423478, "grad_norm": 2.2325520515441895, "learning_rate": 1.7092154420921544e-06, "loss": 0.8468, "step": 550 }, { "epoch": 0.017281644091546006, "grad_norm": 1.9894431829452515, "learning_rate": 1.7247820672478207e-06, "loss": 0.9439, "step": 555 }, { "epoch": 0.017437334578857232, "grad_norm": 2.178140640258789, "learning_rate": 1.740348692403487e-06, "loss": 1.0311, "step": 560 }, { "epoch": 0.017593025066168458, "grad_norm": 3.766580104827881, "learning_rate": 1.7559153175591533e-06, "loss": 0.9089, "step": 565 }, { "epoch": 0.017748715553479684, "grad_norm": 2.5182740688323975, "learning_rate": 1.7714819427148194e-06, "loss": 0.9702, "step": 570 }, { "epoch": 0.017904406040790907, "grad_norm": 2.2478623390197754, "learning_rate": 1.7870485678704857e-06, "loss": 0.9808, "step": 575 }, { "epoch": 0.018060096528102133, "grad_norm": 2.048468828201294, "learning_rate": 1.802615193026152e-06, "loss": 0.9977, "step": 580 }, { "epoch": 0.01821578701541336, "grad_norm": 2.1747870445251465, "learning_rate": 1.8181818181818183e-06, "loss": 0.8788, "step": 585 }, { "epoch": 0.018371477502724585, "grad_norm": 2.0923125743865967, "learning_rate": 1.8337484433374846e-06, "loss": 0.8647, "step": 590 }, { "epoch": 0.018527167990035807, "grad_norm": 2.090158700942993, "learning_rate": 1.8493150684931507e-06, "loss": 0.9238, "step": 595 }, { "epoch": 0.018682858477347034, "grad_norm": 3.557389974594116, "learning_rate": 1.864881693648817e-06, "loss": 0.9723, "step": 600 }, { "epoch": 0.01883854896465826, "grad_norm": 2.2674388885498047, "learning_rate": 1.8804483188044833e-06, "loss": 0.9901, "step": 605 }, { "epoch": 0.018994239451969486, "grad_norm": 2.463621139526367, "learning_rate": 1.8960149439601498e-06, "loss": 0.9704, "step": 610 }, { "epoch": 0.01914992993928071, "grad_norm": 3.833758592605591, "learning_rate": 1.9115815691158157e-06, "loss": 0.8934, "step": 615 }, { "epoch": 0.019305620426591934, "grad_norm": 2.201092481613159, "learning_rate": 1.927148194271482e-06, "loss": 0.9479, "step": 620 }, { "epoch": 0.01946131091390316, "grad_norm": 3.019127368927002, "learning_rate": 1.9427148194271483e-06, "loss": 1.0137, "step": 625 }, { "epoch": 0.019617001401214387, "grad_norm": 2.5428009033203125, "learning_rate": 1.9582814445828146e-06, "loss": 0.8899, "step": 630 }, { "epoch": 0.019772691888525613, "grad_norm": 2.4588804244995117, "learning_rate": 1.973848069738481e-06, "loss": 0.9258, "step": 635 }, { "epoch": 0.019928382375836835, "grad_norm": 2.1884708404541016, "learning_rate": 1.989414694894147e-06, "loss": 0.9843, "step": 640 }, { "epoch": 0.02008407286314806, "grad_norm": 3.0021109580993652, "learning_rate": 2.0049813200498134e-06, "loss": 0.9438, "step": 645 }, { "epoch": 0.020239763350459287, "grad_norm": 2.393141746520996, "learning_rate": 2.0205479452054797e-06, "loss": 0.978, "step": 650 }, { "epoch": 0.020395453837770514, "grad_norm": 2.831987142562866, "learning_rate": 2.036114570361146e-06, "loss": 1.0542, "step": 655 }, { "epoch": 0.020551144325081736, "grad_norm": 2.3142971992492676, "learning_rate": 2.051681195516812e-06, "loss": 0.9912, "step": 660 }, { "epoch": 0.020706834812392962, "grad_norm": 2.6849210262298584, "learning_rate": 2.0672478206724782e-06, "loss": 0.9174, "step": 665 }, { "epoch": 0.02086252529970419, "grad_norm": 2.0941145420074463, "learning_rate": 2.0828144458281445e-06, "loss": 0.9372, "step": 670 }, { "epoch": 0.021018215787015414, "grad_norm": 2.576441526412964, "learning_rate": 2.098381070983811e-06, "loss": 0.9235, "step": 675 }, { "epoch": 0.021173906274326637, "grad_norm": 2.085787296295166, "learning_rate": 2.113947696139477e-06, "loss": 0.9641, "step": 680 }, { "epoch": 0.021329596761637863, "grad_norm": 2.472637176513672, "learning_rate": 2.1295143212951434e-06, "loss": 0.9987, "step": 685 }, { "epoch": 0.02148528724894909, "grad_norm": 2.2167108058929443, "learning_rate": 2.1450809464508097e-06, "loss": 0.8898, "step": 690 }, { "epoch": 0.021640977736260315, "grad_norm": 2.15908145904541, "learning_rate": 2.160647571606476e-06, "loss": 0.8408, "step": 695 }, { "epoch": 0.02179666822357154, "grad_norm": 2.4335649013519287, "learning_rate": 2.1762141967621423e-06, "loss": 0.8898, "step": 700 }, { "epoch": 0.021952358710882764, "grad_norm": 2.3152613639831543, "learning_rate": 2.191780821917808e-06, "loss": 0.9728, "step": 705 }, { "epoch": 0.02210804919819399, "grad_norm": 2.475432872772217, "learning_rate": 2.2073474470734745e-06, "loss": 0.9664, "step": 710 }, { "epoch": 0.022263739685505216, "grad_norm": 2.1535749435424805, "learning_rate": 2.2229140722291408e-06, "loss": 0.971, "step": 715 }, { "epoch": 0.022419430172816442, "grad_norm": 2.631324052810669, "learning_rate": 2.238480697384807e-06, "loss": 1.0325, "step": 720 }, { "epoch": 0.022575120660127665, "grad_norm": 2.421797752380371, "learning_rate": 2.2540473225404734e-06, "loss": 0.901, "step": 725 }, { "epoch": 0.02273081114743889, "grad_norm": 2.4073374271392822, "learning_rate": 2.2696139476961397e-06, "loss": 0.8805, "step": 730 }, { "epoch": 0.022886501634750117, "grad_norm": 2.450468063354492, "learning_rate": 2.285180572851806e-06, "loss": 0.9267, "step": 735 }, { "epoch": 0.023042192122061343, "grad_norm": 2.5459694862365723, "learning_rate": 2.3007471980074723e-06, "loss": 0.876, "step": 740 }, { "epoch": 0.023197882609372566, "grad_norm": 2.592050552368164, "learning_rate": 2.3163138231631386e-06, "loss": 0.8809, "step": 745 }, { "epoch": 0.023353573096683792, "grad_norm": 2.1723744869232178, "learning_rate": 2.3318804483188044e-06, "loss": 0.8983, "step": 750 }, { "epoch": 0.023509263583995018, "grad_norm": 2.71817946434021, "learning_rate": 2.3474470734744707e-06, "loss": 0.9643, "step": 755 }, { "epoch": 0.023664954071306244, "grad_norm": 1.8171418905258179, "learning_rate": 2.363013698630137e-06, "loss": 1.0699, "step": 760 }, { "epoch": 0.02382064455861747, "grad_norm": 2.27527117729187, "learning_rate": 2.3785803237858033e-06, "loss": 0.8568, "step": 765 }, { "epoch": 0.023976335045928693, "grad_norm": 2.8911936283111572, "learning_rate": 2.3941469489414696e-06, "loss": 0.9286, "step": 770 }, { "epoch": 0.02413202553323992, "grad_norm": 2.5493175983428955, "learning_rate": 2.409713574097136e-06, "loss": 0.8346, "step": 775 }, { "epoch": 0.024287716020551145, "grad_norm": 2.1085166931152344, "learning_rate": 2.4252801992528022e-06, "loss": 1.0091, "step": 780 }, { "epoch": 0.02444340650786237, "grad_norm": 2.2246227264404297, "learning_rate": 2.4408468244084685e-06, "loss": 0.9265, "step": 785 }, { "epoch": 0.024599096995173594, "grad_norm": 2.2094104290008545, "learning_rate": 2.456413449564135e-06, "loss": 0.9806, "step": 790 }, { "epoch": 0.02475478748248482, "grad_norm": 2.1035478115081787, "learning_rate": 2.4719800747198007e-06, "loss": 0.9858, "step": 795 }, { "epoch": 0.024910477969796046, "grad_norm": 2.4528605937957764, "learning_rate": 2.487546699875467e-06, "loss": 0.9128, "step": 800 }, { "epoch": 0.025066168457107272, "grad_norm": 2.341224431991577, "learning_rate": 2.5031133250311333e-06, "loss": 0.9251, "step": 805 }, { "epoch": 0.025221858944418495, "grad_norm": 2.1983251571655273, "learning_rate": 2.5186799501867996e-06, "loss": 0.9646, "step": 810 }, { "epoch": 0.02537754943172972, "grad_norm": 2.393798351287842, "learning_rate": 2.534246575342466e-06, "loss": 0.9636, "step": 815 }, { "epoch": 0.025533239919040947, "grad_norm": 2.1212761402130127, "learning_rate": 2.5498132004981326e-06, "loss": 0.8768, "step": 820 }, { "epoch": 0.025688930406352173, "grad_norm": 2.279715061187744, "learning_rate": 2.565379825653799e-06, "loss": 0.9005, "step": 825 }, { "epoch": 0.025844620893663395, "grad_norm": 2.695458173751831, "learning_rate": 2.5809464508094644e-06, "loss": 0.9681, "step": 830 }, { "epoch": 0.02600031138097462, "grad_norm": 2.512413501739502, "learning_rate": 2.5965130759651307e-06, "loss": 0.8809, "step": 835 }, { "epoch": 0.026156001868285848, "grad_norm": 3.185964584350586, "learning_rate": 2.612079701120797e-06, "loss": 0.9679, "step": 840 }, { "epoch": 0.026311692355597074, "grad_norm": 2.3978590965270996, "learning_rate": 2.6276463262764633e-06, "loss": 0.9263, "step": 845 }, { "epoch": 0.0264673828429083, "grad_norm": 2.466953754425049, "learning_rate": 2.6432129514321296e-06, "loss": 0.9501, "step": 850 }, { "epoch": 0.026623073330219522, "grad_norm": 2.5943169593811035, "learning_rate": 2.658779576587796e-06, "loss": 1.025, "step": 855 }, { "epoch": 0.02677876381753075, "grad_norm": 2.332108736038208, "learning_rate": 2.6743462017434626e-06, "loss": 1.038, "step": 860 }, { "epoch": 0.026934454304841975, "grad_norm": 2.3616631031036377, "learning_rate": 2.689912826899129e-06, "loss": 0.9382, "step": 865 }, { "epoch": 0.0270901447921532, "grad_norm": 2.736388921737671, "learning_rate": 2.705479452054795e-06, "loss": 0.9207, "step": 870 }, { "epoch": 0.027245835279464423, "grad_norm": 2.2055838108062744, "learning_rate": 2.7210460772104606e-06, "loss": 0.9668, "step": 875 }, { "epoch": 0.02740152576677565, "grad_norm": 2.994877815246582, "learning_rate": 2.736612702366127e-06, "loss": 0.98, "step": 880 }, { "epoch": 0.027557216254086876, "grad_norm": 2.135636806488037, "learning_rate": 2.7521793275217932e-06, "loss": 0.8978, "step": 885 }, { "epoch": 0.0277129067413981, "grad_norm": 2.199552297592163, "learning_rate": 2.7677459526774595e-06, "loss": 0.8786, "step": 890 }, { "epoch": 0.027868597228709324, "grad_norm": 2.3891475200653076, "learning_rate": 2.7833125778331262e-06, "loss": 0.9443, "step": 895 }, { "epoch": 0.02802428771602055, "grad_norm": 2.646768569946289, "learning_rate": 2.7988792029887925e-06, "loss": 0.9028, "step": 900 }, { "epoch": 0.028179978203331776, "grad_norm": 2.4538040161132812, "learning_rate": 2.814445828144459e-06, "loss": 0.983, "step": 905 }, { "epoch": 0.028335668690643002, "grad_norm": 2.589569330215454, "learning_rate": 2.830012453300125e-06, "loss": 0.8873, "step": 910 }, { "epoch": 0.02849135917795423, "grad_norm": 2.8329381942749023, "learning_rate": 2.8455790784557914e-06, "loss": 0.9979, "step": 915 }, { "epoch": 0.02864704966526545, "grad_norm": 2.4603025913238525, "learning_rate": 2.861145703611457e-06, "loss": 0.9702, "step": 920 }, { "epoch": 0.028802740152576677, "grad_norm": 2.0460257530212402, "learning_rate": 2.876712328767123e-06, "loss": 0.8911, "step": 925 }, { "epoch": 0.028958430639887903, "grad_norm": 2.9115917682647705, "learning_rate": 2.8922789539227895e-06, "loss": 0.9975, "step": 930 }, { "epoch": 0.02911412112719913, "grad_norm": 2.5836968421936035, "learning_rate": 2.907845579078456e-06, "loss": 0.9188, "step": 935 }, { "epoch": 0.029269811614510352, "grad_norm": 2.514178514480591, "learning_rate": 2.9234122042341225e-06, "loss": 0.9905, "step": 940 }, { "epoch": 0.029425502101821578, "grad_norm": 2.091116428375244, "learning_rate": 2.938978829389789e-06, "loss": 0.9271, "step": 945 }, { "epoch": 0.029581192589132804, "grad_norm": 1.8732845783233643, "learning_rate": 2.954545454545455e-06, "loss": 0.984, "step": 950 }, { "epoch": 0.02973688307644403, "grad_norm": 2.411909818649292, "learning_rate": 2.9701120797011214e-06, "loss": 0.9352, "step": 955 }, { "epoch": 0.029892573563755253, "grad_norm": 2.297410249710083, "learning_rate": 2.9856787048567877e-06, "loss": 0.9735, "step": 960 }, { "epoch": 0.03004826405106648, "grad_norm": 2.1982038021087646, "learning_rate": 3.001245330012453e-06, "loss": 0.9121, "step": 965 }, { "epoch": 0.030203954538377705, "grad_norm": 2.0340113639831543, "learning_rate": 3.0168119551681194e-06, "loss": 0.8618, "step": 970 }, { "epoch": 0.03035964502568893, "grad_norm": 2.3781206607818604, "learning_rate": 3.032378580323786e-06, "loss": 0.9568, "step": 975 }, { "epoch": 0.030515335513000157, "grad_norm": 2.550605297088623, "learning_rate": 3.0479452054794525e-06, "loss": 0.9022, "step": 980 }, { "epoch": 0.03067102600031138, "grad_norm": 2.1174941062927246, "learning_rate": 3.0635118306351188e-06, "loss": 0.952, "step": 985 }, { "epoch": 0.030826716487622606, "grad_norm": 2.7364578247070312, "learning_rate": 3.079078455790785e-06, "loss": 0.8949, "step": 990 }, { "epoch": 0.030982406974933832, "grad_norm": 2.3315956592559814, "learning_rate": 3.0946450809464514e-06, "loss": 0.9519, "step": 995 }, { "epoch": 0.031138097462245058, "grad_norm": 2.2858662605285645, "learning_rate": 3.1102117061021177e-06, "loss": 0.916, "step": 1000 }, { "epoch": 0.031293787949556284, "grad_norm": 2.2380332946777344, "learning_rate": 3.125778331257783e-06, "loss": 0.9088, "step": 1005 }, { "epoch": 0.03144947843686751, "grad_norm": 2.7578177452087402, "learning_rate": 3.1413449564134494e-06, "loss": 0.9603, "step": 1010 }, { "epoch": 0.03160516892417873, "grad_norm": 2.598353624343872, "learning_rate": 3.156911581569116e-06, "loss": 0.8806, "step": 1015 }, { "epoch": 0.031760859411489956, "grad_norm": 2.029369592666626, "learning_rate": 3.1724782067247824e-06, "loss": 0.9001, "step": 1020 }, { "epoch": 0.03191654989880118, "grad_norm": 2.175008773803711, "learning_rate": 3.1880448318804487e-06, "loss": 0.8806, "step": 1025 }, { "epoch": 0.03207224038611241, "grad_norm": 2.2649056911468506, "learning_rate": 3.203611457036115e-06, "loss": 0.892, "step": 1030 }, { "epoch": 0.032227930873423634, "grad_norm": 2.4121391773223877, "learning_rate": 3.2191780821917813e-06, "loss": 0.9662, "step": 1035 }, { "epoch": 0.03238362136073486, "grad_norm": 2.6578874588012695, "learning_rate": 3.2347447073474476e-06, "loss": 0.9616, "step": 1040 }, { "epoch": 0.032539311848046086, "grad_norm": 2.4220187664031982, "learning_rate": 3.250311332503114e-06, "loss": 0.8747, "step": 1045 }, { "epoch": 0.03269500233535731, "grad_norm": 2.857973575592041, "learning_rate": 3.2658779576587794e-06, "loss": 0.926, "step": 1050 }, { "epoch": 0.03285069282266854, "grad_norm": 2.326024055480957, "learning_rate": 3.281444582814446e-06, "loss": 0.9422, "step": 1055 }, { "epoch": 0.03300638330997976, "grad_norm": 1.9475518465042114, "learning_rate": 3.2970112079701124e-06, "loss": 0.9379, "step": 1060 }, { "epoch": 0.033162073797290983, "grad_norm": 1.969853162765503, "learning_rate": 3.3125778331257787e-06, "loss": 0.8755, "step": 1065 }, { "epoch": 0.03331776428460221, "grad_norm": 2.2439334392547607, "learning_rate": 3.328144458281445e-06, "loss": 0.8369, "step": 1070 }, { "epoch": 0.033473454771913436, "grad_norm": 2.1126739978790283, "learning_rate": 3.3437110834371113e-06, "loss": 0.9281, "step": 1075 }, { "epoch": 0.03362914525922466, "grad_norm": 2.3052613735198975, "learning_rate": 3.3592777085927776e-06, "loss": 0.9731, "step": 1080 }, { "epoch": 0.03378483574653589, "grad_norm": 3.2367234230041504, "learning_rate": 3.374844333748444e-06, "loss": 0.8871, "step": 1085 }, { "epoch": 0.033940526233847114, "grad_norm": 3.706087350845337, "learning_rate": 3.39041095890411e-06, "loss": 0.9873, "step": 1090 }, { "epoch": 0.03409621672115834, "grad_norm": 2.1765525341033936, "learning_rate": 3.405977584059776e-06, "loss": 0.8869, "step": 1095 }, { "epoch": 0.03425190720846956, "grad_norm": 2.5183911323547363, "learning_rate": 3.4215442092154423e-06, "loss": 1.042, "step": 1100 }, { "epoch": 0.034407597695780785, "grad_norm": 2.79896879196167, "learning_rate": 3.4371108343711086e-06, "loss": 0.8948, "step": 1105 }, { "epoch": 0.03456328818309201, "grad_norm": 2.085315704345703, "learning_rate": 3.452677459526775e-06, "loss": 0.9627, "step": 1110 }, { "epoch": 0.03471897867040324, "grad_norm": 2.258185863494873, "learning_rate": 3.4682440846824412e-06, "loss": 0.8531, "step": 1115 }, { "epoch": 0.034874669157714464, "grad_norm": 2.4445066452026367, "learning_rate": 3.4838107098381075e-06, "loss": 0.9013, "step": 1120 }, { "epoch": 0.03503035964502569, "grad_norm": 2.5348050594329834, "learning_rate": 3.499377334993774e-06, "loss": 0.9146, "step": 1125 }, { "epoch": 0.035186050132336916, "grad_norm": 2.207733392715454, "learning_rate": 3.51494396014944e-06, "loss": 0.9106, "step": 1130 }, { "epoch": 0.03534174061964814, "grad_norm": 2.6120240688323975, "learning_rate": 3.5305105853051064e-06, "loss": 1.028, "step": 1135 }, { "epoch": 0.03549743110695937, "grad_norm": 2.1063060760498047, "learning_rate": 3.5460772104607723e-06, "loss": 0.8961, "step": 1140 }, { "epoch": 0.03565312159427059, "grad_norm": 1.9105236530303955, "learning_rate": 3.5616438356164386e-06, "loss": 0.9341, "step": 1145 }, { "epoch": 0.03580881208158181, "grad_norm": 2.303560733795166, "learning_rate": 3.577210460772105e-06, "loss": 0.9178, "step": 1150 }, { "epoch": 0.03596450256889304, "grad_norm": 2.218660831451416, "learning_rate": 3.592777085927771e-06, "loss": 0.9421, "step": 1155 }, { "epoch": 0.036120193056204265, "grad_norm": 3.021721363067627, "learning_rate": 3.6083437110834375e-06, "loss": 0.9098, "step": 1160 }, { "epoch": 0.03627588354351549, "grad_norm": 2.5375850200653076, "learning_rate": 3.623910336239104e-06, "loss": 0.9111, "step": 1165 }, { "epoch": 0.03643157403082672, "grad_norm": 2.1779801845550537, "learning_rate": 3.63947696139477e-06, "loss": 0.9966, "step": 1170 }, { "epoch": 0.036587264518137944, "grad_norm": 1.9874790906906128, "learning_rate": 3.6550435865504364e-06, "loss": 0.9422, "step": 1175 }, { "epoch": 0.03674295500544917, "grad_norm": 2.5515897274017334, "learning_rate": 3.6706102117061027e-06, "loss": 0.9237, "step": 1180 }, { "epoch": 0.036898645492760396, "grad_norm": 2.384657144546509, "learning_rate": 3.6861768368617686e-06, "loss": 0.9328, "step": 1185 }, { "epoch": 0.037054335980071615, "grad_norm": 2.004432439804077, "learning_rate": 3.701743462017435e-06, "loss": 0.8816, "step": 1190 }, { "epoch": 0.03721002646738284, "grad_norm": 2.1727659702301025, "learning_rate": 3.717310087173101e-06, "loss": 0.9124, "step": 1195 }, { "epoch": 0.03736571695469407, "grad_norm": 2.312882423400879, "learning_rate": 3.7328767123287675e-06, "loss": 0.8765, "step": 1200 }, { "epoch": 0.03752140744200529, "grad_norm": 2.3897042274475098, "learning_rate": 3.7484433374844338e-06, "loss": 0.8761, "step": 1205 }, { "epoch": 0.03767709792931652, "grad_norm": 2.1145455837249756, "learning_rate": 3.7640099626401e-06, "loss": 0.9391, "step": 1210 }, { "epoch": 0.037832788416627745, "grad_norm": 3.5479962825775146, "learning_rate": 3.7795765877957664e-06, "loss": 0.9714, "step": 1215 }, { "epoch": 0.03798847890393897, "grad_norm": 2.596888780593872, "learning_rate": 3.7951432129514327e-06, "loss": 0.9276, "step": 1220 }, { "epoch": 0.0381441693912502, "grad_norm": 2.725003242492676, "learning_rate": 3.810709838107099e-06, "loss": 0.8886, "step": 1225 }, { "epoch": 0.03829985987856142, "grad_norm": 2.5117547512054443, "learning_rate": 3.826276463262765e-06, "loss": 0.9435, "step": 1230 }, { "epoch": 0.03845555036587264, "grad_norm": 2.043947696685791, "learning_rate": 3.841843088418431e-06, "loss": 0.893, "step": 1235 }, { "epoch": 0.03861124085318387, "grad_norm": 2.2579524517059326, "learning_rate": 3.8574097135740974e-06, "loss": 0.805, "step": 1240 }, { "epoch": 0.038766931340495095, "grad_norm": 2.3138530254364014, "learning_rate": 3.872976338729763e-06, "loss": 0.8375, "step": 1245 }, { "epoch": 0.03892262182780632, "grad_norm": 1.9301809072494507, "learning_rate": 3.88854296388543e-06, "loss": 0.9256, "step": 1250 }, { "epoch": 0.03907831231511755, "grad_norm": 2.4733502864837646, "learning_rate": 3.904109589041096e-06, "loss": 0.9516, "step": 1255 }, { "epoch": 0.03923400280242877, "grad_norm": 2.107326030731201, "learning_rate": 3.919676214196763e-06, "loss": 0.8685, "step": 1260 }, { "epoch": 0.03938969328974, "grad_norm": 1.9983704090118408, "learning_rate": 3.9352428393524285e-06, "loss": 0.9032, "step": 1265 }, { "epoch": 0.039545383777051225, "grad_norm": 2.3073363304138184, "learning_rate": 3.950809464508095e-06, "loss": 0.9229, "step": 1270 }, { "epoch": 0.039701074264362445, "grad_norm": 2.0081968307495117, "learning_rate": 3.966376089663761e-06, "loss": 0.8904, "step": 1275 }, { "epoch": 0.03985676475167367, "grad_norm": 2.47432279586792, "learning_rate": 3.981942714819427e-06, "loss": 0.9125, "step": 1280 }, { "epoch": 0.0400124552389849, "grad_norm": 3.0090091228485107, "learning_rate": 3.997509339975094e-06, "loss": 0.9837, "step": 1285 }, { "epoch": 0.04016814572629612, "grad_norm": 2.17997670173645, "learning_rate": 4.0130759651307596e-06, "loss": 0.8557, "step": 1290 }, { "epoch": 0.04032383621360735, "grad_norm": 2.0967936515808105, "learning_rate": 4.028642590286426e-06, "loss": 0.8505, "step": 1295 }, { "epoch": 0.040479526700918575, "grad_norm": 1.9301856756210327, "learning_rate": 4.044209215442092e-06, "loss": 0.8314, "step": 1300 }, { "epoch": 0.0406352171882298, "grad_norm": 2.9195048809051514, "learning_rate": 4.059775840597759e-06, "loss": 0.9474, "step": 1305 }, { "epoch": 0.04079090767554103, "grad_norm": 2.3655097484588623, "learning_rate": 4.075342465753426e-06, "loss": 0.9221, "step": 1310 }, { "epoch": 0.040946598162852246, "grad_norm": 3.0159451961517334, "learning_rate": 4.0909090909090915e-06, "loss": 0.9125, "step": 1315 }, { "epoch": 0.04110228865016347, "grad_norm": 2.176354169845581, "learning_rate": 4.106475716064757e-06, "loss": 0.9244, "step": 1320 }, { "epoch": 0.0412579791374747, "grad_norm": 2.2410032749176025, "learning_rate": 4.122042341220423e-06, "loss": 0.851, "step": 1325 }, { "epoch": 0.041413669624785925, "grad_norm": 2.1565494537353516, "learning_rate": 4.13760896637609e-06, "loss": 0.9815, "step": 1330 }, { "epoch": 0.04156936011209715, "grad_norm": 2.374141216278076, "learning_rate": 4.153175591531756e-06, "loss": 0.9524, "step": 1335 }, { "epoch": 0.04172505059940838, "grad_norm": 3.5631723403930664, "learning_rate": 4.1687422166874225e-06, "loss": 1.0519, "step": 1340 }, { "epoch": 0.0418807410867196, "grad_norm": 1.9813369512557983, "learning_rate": 4.184308841843088e-06, "loss": 0.8968, "step": 1345 }, { "epoch": 0.04203643157403083, "grad_norm": 2.6984362602233887, "learning_rate": 4.199875466998755e-06, "loss": 0.9164, "step": 1350 }, { "epoch": 0.042192122061342055, "grad_norm": 3.0645453929901123, "learning_rate": 4.215442092154422e-06, "loss": 0.9581, "step": 1355 }, { "epoch": 0.042347812548653274, "grad_norm": 2.262195587158203, "learning_rate": 4.231008717310088e-06, "loss": 0.9191, "step": 1360 }, { "epoch": 0.0425035030359645, "grad_norm": 2.4457004070281982, "learning_rate": 4.246575342465754e-06, "loss": 0.8987, "step": 1365 }, { "epoch": 0.042659193523275726, "grad_norm": 2.097477436065674, "learning_rate": 4.2621419676214195e-06, "loss": 0.9122, "step": 1370 }, { "epoch": 0.04281488401058695, "grad_norm": 2.0758450031280518, "learning_rate": 4.277708592777086e-06, "loss": 0.9844, "step": 1375 }, { "epoch": 0.04297057449789818, "grad_norm": 2.254836320877075, "learning_rate": 4.293275217932752e-06, "loss": 0.9234, "step": 1380 }, { "epoch": 0.043126264985209405, "grad_norm": 1.965528130531311, "learning_rate": 4.308841843088419e-06, "loss": 0.9481, "step": 1385 }, { "epoch": 0.04328195547252063, "grad_norm": 2.207357168197632, "learning_rate": 4.3244084682440855e-06, "loss": 0.9616, "step": 1390 }, { "epoch": 0.04343764595983186, "grad_norm": 2.5827274322509766, "learning_rate": 4.339975093399751e-06, "loss": 0.9098, "step": 1395 }, { "epoch": 0.04359333644714308, "grad_norm": 2.357165813446045, "learning_rate": 4.355541718555418e-06, "loss": 0.854, "step": 1400 }, { "epoch": 0.0437490269344543, "grad_norm": 2.2668285369873047, "learning_rate": 4.371108343711084e-06, "loss": 0.8681, "step": 1405 }, { "epoch": 0.04390471742176553, "grad_norm": 2.051858425140381, "learning_rate": 4.38667496886675e-06, "loss": 0.8718, "step": 1410 }, { "epoch": 0.044060407909076754, "grad_norm": 3.0079305171966553, "learning_rate": 4.402241594022416e-06, "loss": 0.9315, "step": 1415 }, { "epoch": 0.04421609839638798, "grad_norm": 2.3519785404205322, "learning_rate": 4.4178082191780825e-06, "loss": 0.9248, "step": 1420 }, { "epoch": 0.044371788883699206, "grad_norm": 2.352184534072876, "learning_rate": 4.433374844333748e-06, "loss": 0.9377, "step": 1425 }, { "epoch": 0.04452747937101043, "grad_norm": 2.6390204429626465, "learning_rate": 4.448941469489415e-06, "loss": 0.8793, "step": 1430 }, { "epoch": 0.04468316985832166, "grad_norm": 2.208390712738037, "learning_rate": 4.464508094645082e-06, "loss": 0.9044, "step": 1435 }, { "epoch": 0.044838860345632885, "grad_norm": 2.1757047176361084, "learning_rate": 4.480074719800748e-06, "loss": 0.8823, "step": 1440 }, { "epoch": 0.044994550832944104, "grad_norm": 2.163665294647217, "learning_rate": 4.495641344956414e-06, "loss": 0.863, "step": 1445 }, { "epoch": 0.04515024132025533, "grad_norm": 2.2020318508148193, "learning_rate": 4.51120797011208e-06, "loss": 0.9687, "step": 1450 }, { "epoch": 0.045305931807566556, "grad_norm": 2.5220577716827393, "learning_rate": 4.526774595267746e-06, "loss": 0.9147, "step": 1455 }, { "epoch": 0.04546162229487778, "grad_norm": 2.5335886478424072, "learning_rate": 4.542341220423412e-06, "loss": 0.8388, "step": 1460 }, { "epoch": 0.04561731278218901, "grad_norm": 2.1710214614868164, "learning_rate": 4.557907845579079e-06, "loss": 0.9302, "step": 1465 }, { "epoch": 0.045773003269500234, "grad_norm": 2.3980259895324707, "learning_rate": 4.5734744707347454e-06, "loss": 0.8737, "step": 1470 }, { "epoch": 0.04592869375681146, "grad_norm": 2.133552312850952, "learning_rate": 4.589041095890411e-06, "loss": 0.8027, "step": 1475 }, { "epoch": 0.046084384244122686, "grad_norm": 2.0390169620513916, "learning_rate": 4.604607721046078e-06, "loss": 0.8726, "step": 1480 }, { "epoch": 0.04624007473143391, "grad_norm": 2.8825535774230957, "learning_rate": 4.620174346201744e-06, "loss": 0.873, "step": 1485 }, { "epoch": 0.04639576521874513, "grad_norm": 2.0524001121520996, "learning_rate": 4.635740971357411e-06, "loss": 0.8892, "step": 1490 }, { "epoch": 0.04655145570605636, "grad_norm": 2.326815128326416, "learning_rate": 4.6513075965130765e-06, "loss": 0.9346, "step": 1495 }, { "epoch": 0.046707146193367584, "grad_norm": 2.3672306537628174, "learning_rate": 4.666874221668742e-06, "loss": 0.9578, "step": 1500 }, { "epoch": 0.04686283668067881, "grad_norm": 3.295598268508911, "learning_rate": 4.682440846824409e-06, "loss": 1.0054, "step": 1505 }, { "epoch": 0.047018527167990036, "grad_norm": 2.5439209938049316, "learning_rate": 4.698007471980075e-06, "loss": 0.9396, "step": 1510 }, { "epoch": 0.04717421765530126, "grad_norm": 2.366034746170044, "learning_rate": 4.713574097135742e-06, "loss": 0.8983, "step": 1515 }, { "epoch": 0.04732990814261249, "grad_norm": 2.3537790775299072, "learning_rate": 4.729140722291408e-06, "loss": 0.9188, "step": 1520 }, { "epoch": 0.047485598629923714, "grad_norm": 2.102351427078247, "learning_rate": 4.744707347447074e-06, "loss": 0.9898, "step": 1525 }, { "epoch": 0.04764128911723494, "grad_norm": 2.5454773902893066, "learning_rate": 4.76027397260274e-06, "loss": 0.8461, "step": 1530 }, { "epoch": 0.04779697960454616, "grad_norm": 2.2087244987487793, "learning_rate": 4.775840597758407e-06, "loss": 0.9497, "step": 1535 }, { "epoch": 0.047952670091857386, "grad_norm": 2.158704996109009, "learning_rate": 4.791407222914073e-06, "loss": 0.9371, "step": 1540 }, { "epoch": 0.04810836057916861, "grad_norm": 2.249128818511963, "learning_rate": 4.806973848069739e-06, "loss": 0.8751, "step": 1545 }, { "epoch": 0.04826405106647984, "grad_norm": 2.3382530212402344, "learning_rate": 4.822540473225405e-06, "loss": 0.9122, "step": 1550 }, { "epoch": 0.048419741553791064, "grad_norm": 2.411682367324829, "learning_rate": 4.838107098381071e-06, "loss": 0.8851, "step": 1555 }, { "epoch": 0.04857543204110229, "grad_norm": 2.618482828140259, "learning_rate": 4.853673723536738e-06, "loss": 0.7894, "step": 1560 }, { "epoch": 0.048731122528413516, "grad_norm": 1.9321472644805908, "learning_rate": 4.869240348692404e-06, "loss": 0.9096, "step": 1565 }, { "epoch": 0.04888681301572474, "grad_norm": 2.503302574157715, "learning_rate": 4.8848069738480706e-06, "loss": 0.8936, "step": 1570 }, { "epoch": 0.04904250350303596, "grad_norm": 2.0446598529815674, "learning_rate": 4.9003735990037364e-06, "loss": 0.9437, "step": 1575 }, { "epoch": 0.04919819399034719, "grad_norm": 2.311964988708496, "learning_rate": 4.915940224159403e-06, "loss": 0.9211, "step": 1580 }, { "epoch": 0.04935388447765841, "grad_norm": 2.0622713565826416, "learning_rate": 4.931506849315069e-06, "loss": 0.8341, "step": 1585 }, { "epoch": 0.04950957496496964, "grad_norm": 2.372236967086792, "learning_rate": 4.947073474470735e-06, "loss": 1.0006, "step": 1590 }, { "epoch": 0.049665265452280866, "grad_norm": 2.601405620574951, "learning_rate": 4.962640099626402e-06, "loss": 0.9115, "step": 1595 }, { "epoch": 0.04982095593959209, "grad_norm": 2.137932538986206, "learning_rate": 4.9782067247820675e-06, "loss": 0.8173, "step": 1600 }, { "epoch": 0.04997664642690332, "grad_norm": 2.199646234512329, "learning_rate": 4.993773349937734e-06, "loss": 0.9404, "step": 1605 }, { "epoch": 0.050132336914214544, "grad_norm": 2.1440088748931885, "learning_rate": 4.9995083418007805e-06, "loss": 0.9023, "step": 1610 }, { "epoch": 0.05028802740152577, "grad_norm": 2.1103782653808594, "learning_rate": 4.998688911468747e-06, "loss": 0.9369, "step": 1615 }, { "epoch": 0.05044371788883699, "grad_norm": 2.474794387817383, "learning_rate": 4.997869481136714e-06, "loss": 0.9449, "step": 1620 }, { "epoch": 0.050599408376148215, "grad_norm": 2.221768856048584, "learning_rate": 4.997050050804681e-06, "loss": 0.826, "step": 1625 }, { "epoch": 0.05075509886345944, "grad_norm": 2.259697198867798, "learning_rate": 4.996230620472648e-06, "loss": 0.8787, "step": 1630 }, { "epoch": 0.05091078935077067, "grad_norm": 2.6989617347717285, "learning_rate": 4.995411190140615e-06, "loss": 0.9334, "step": 1635 }, { "epoch": 0.051066479838081894, "grad_norm": 2.5472984313964844, "learning_rate": 4.994591759808582e-06, "loss": 0.8928, "step": 1640 }, { "epoch": 0.05122217032539312, "grad_norm": 2.359304428100586, "learning_rate": 4.993772329476549e-06, "loss": 0.8257, "step": 1645 }, { "epoch": 0.051377860812704346, "grad_norm": 1.8511576652526855, "learning_rate": 4.9929528991445155e-06, "loss": 0.8823, "step": 1650 }, { "epoch": 0.05153355130001557, "grad_norm": 2.347883939743042, "learning_rate": 4.9921334688124815e-06, "loss": 0.857, "step": 1655 }, { "epoch": 0.05168924178732679, "grad_norm": 1.9635664224624634, "learning_rate": 4.991314038480448e-06, "loss": 0.8491, "step": 1660 }, { "epoch": 0.05184493227463802, "grad_norm": 2.402343988418579, "learning_rate": 4.990494608148416e-06, "loss": 0.9335, "step": 1665 }, { "epoch": 0.05200062276194924, "grad_norm": 2.2133383750915527, "learning_rate": 4.989675177816383e-06, "loss": 0.9118, "step": 1670 }, { "epoch": 0.05215631324926047, "grad_norm": 2.182804822921753, "learning_rate": 4.988855747484349e-06, "loss": 0.8375, "step": 1675 }, { "epoch": 0.052312003736571695, "grad_norm": 2.439403772354126, "learning_rate": 4.988036317152316e-06, "loss": 1.0069, "step": 1680 }, { "epoch": 0.05246769422388292, "grad_norm": 2.4655115604400635, "learning_rate": 4.987216886820283e-06, "loss": 0.9342, "step": 1685 }, { "epoch": 0.05262338471119415, "grad_norm": 2.1733579635620117, "learning_rate": 4.9863974564882504e-06, "loss": 0.86, "step": 1690 }, { "epoch": 0.052779075198505374, "grad_norm": 2.3587894439697266, "learning_rate": 4.9855780261562165e-06, "loss": 0.9184, "step": 1695 }, { "epoch": 0.0529347656858166, "grad_norm": 2.2983412742614746, "learning_rate": 4.984758595824183e-06, "loss": 0.8656, "step": 1700 }, { "epoch": 0.05309045617312782, "grad_norm": 2.1146183013916016, "learning_rate": 4.98393916549215e-06, "loss": 0.9345, "step": 1705 }, { "epoch": 0.053246146660439045, "grad_norm": 2.368804931640625, "learning_rate": 4.983119735160117e-06, "loss": 0.8286, "step": 1710 }, { "epoch": 0.05340183714775027, "grad_norm": 2.2653415203094482, "learning_rate": 4.982300304828084e-06, "loss": 0.9073, "step": 1715 }, { "epoch": 0.0535575276350615, "grad_norm": 2.487579345703125, "learning_rate": 4.981480874496051e-06, "loss": 0.9202, "step": 1720 }, { "epoch": 0.05371321812237272, "grad_norm": 2.3126909732818604, "learning_rate": 4.980661444164018e-06, "loss": 0.9399, "step": 1725 }, { "epoch": 0.05386890860968395, "grad_norm": 2.9263880252838135, "learning_rate": 4.9798420138319846e-06, "loss": 0.94, "step": 1730 }, { "epoch": 0.054024599096995175, "grad_norm": 2.545346975326538, "learning_rate": 4.979022583499951e-06, "loss": 0.9294, "step": 1735 }, { "epoch": 0.0541802895843064, "grad_norm": 2.130864381790161, "learning_rate": 4.978203153167918e-06, "loss": 0.9388, "step": 1740 }, { "epoch": 0.05433598007161763, "grad_norm": 2.2144992351531982, "learning_rate": 4.977383722835885e-06, "loss": 0.9022, "step": 1745 }, { "epoch": 0.05449167055892885, "grad_norm": 2.227083921432495, "learning_rate": 4.976564292503851e-06, "loss": 0.8138, "step": 1750 }, { "epoch": 0.05464736104624007, "grad_norm": 2.517611265182495, "learning_rate": 4.975744862171819e-06, "loss": 0.9217, "step": 1755 }, { "epoch": 0.0548030515335513, "grad_norm": 2.4257619380950928, "learning_rate": 4.974925431839786e-06, "loss": 0.9589, "step": 1760 }, { "epoch": 0.054958742020862525, "grad_norm": 2.191863775253296, "learning_rate": 4.974106001507753e-06, "loss": 0.8902, "step": 1765 }, { "epoch": 0.05511443250817375, "grad_norm": 2.1323604583740234, "learning_rate": 4.973286571175719e-06, "loss": 0.8661, "step": 1770 }, { "epoch": 0.05527012299548498, "grad_norm": 2.4769585132598877, "learning_rate": 4.9724671408436855e-06, "loss": 0.8993, "step": 1775 }, { "epoch": 0.0554258134827962, "grad_norm": 3.0821971893310547, "learning_rate": 4.971647710511653e-06, "loss": 0.9284, "step": 1780 }, { "epoch": 0.05558150397010743, "grad_norm": 2.7145309448242188, "learning_rate": 4.970828280179619e-06, "loss": 0.8723, "step": 1785 }, { "epoch": 0.05573719445741865, "grad_norm": 2.4385578632354736, "learning_rate": 4.970008849847586e-06, "loss": 0.9299, "step": 1790 }, { "epoch": 0.055892884944729875, "grad_norm": 2.7145490646362305, "learning_rate": 4.969189419515553e-06, "loss": 0.865, "step": 1795 }, { "epoch": 0.0560485754320411, "grad_norm": 2.2025020122528076, "learning_rate": 4.96836998918352e-06, "loss": 0.8218, "step": 1800 }, { "epoch": 0.05620426591935233, "grad_norm": 2.2455432415008545, "learning_rate": 4.967550558851487e-06, "loss": 0.9777, "step": 1805 }, { "epoch": 0.05635995640666355, "grad_norm": 2.1078431606292725, "learning_rate": 4.966731128519454e-06, "loss": 0.8712, "step": 1810 }, { "epoch": 0.05651564689397478, "grad_norm": 2.7334935665130615, "learning_rate": 4.9659116981874205e-06, "loss": 0.9197, "step": 1815 }, { "epoch": 0.056671337381286005, "grad_norm": 2.1328139305114746, "learning_rate": 4.965092267855387e-06, "loss": 0.8895, "step": 1820 }, { "epoch": 0.05682702786859723, "grad_norm": 2.368079423904419, "learning_rate": 4.964272837523354e-06, "loss": 0.9516, "step": 1825 }, { "epoch": 0.05698271835590846, "grad_norm": 2.2522201538085938, "learning_rate": 4.963453407191321e-06, "loss": 0.8422, "step": 1830 }, { "epoch": 0.057138408843219676, "grad_norm": 2.4040050506591797, "learning_rate": 4.962633976859288e-06, "loss": 0.9673, "step": 1835 }, { "epoch": 0.0572940993305309, "grad_norm": 2.7338144779205322, "learning_rate": 4.961814546527255e-06, "loss": 0.8701, "step": 1840 }, { "epoch": 0.05744978981784213, "grad_norm": 2.084688186645508, "learning_rate": 4.960995116195222e-06, "loss": 0.9328, "step": 1845 }, { "epoch": 0.057605480305153355, "grad_norm": 2.1220483779907227, "learning_rate": 4.9601756858631886e-06, "loss": 0.8886, "step": 1850 }, { "epoch": 0.05776117079246458, "grad_norm": 2.254163980484009, "learning_rate": 4.9593562555311554e-06, "loss": 0.8908, "step": 1855 }, { "epoch": 0.05791686127977581, "grad_norm": 2.3993842601776123, "learning_rate": 4.9585368251991215e-06, "loss": 0.8942, "step": 1860 }, { "epoch": 0.05807255176708703, "grad_norm": 2.114431619644165, "learning_rate": 4.957717394867088e-06, "loss": 0.9595, "step": 1865 }, { "epoch": 0.05822824225439826, "grad_norm": 2.0515594482421875, "learning_rate": 4.956897964535056e-06, "loss": 0.8665, "step": 1870 }, { "epoch": 0.058383932741709485, "grad_norm": 2.6313159465789795, "learning_rate": 4.956078534203023e-06, "loss": 0.9221, "step": 1875 }, { "epoch": 0.058539623229020704, "grad_norm": 2.202457904815674, "learning_rate": 4.955259103870989e-06, "loss": 0.8644, "step": 1880 }, { "epoch": 0.05869531371633193, "grad_norm": 2.1401278972625732, "learning_rate": 4.954439673538956e-06, "loss": 0.9193, "step": 1885 }, { "epoch": 0.058851004203643156, "grad_norm": 2.376149892807007, "learning_rate": 4.953620243206923e-06, "loss": 0.8263, "step": 1890 }, { "epoch": 0.05900669469095438, "grad_norm": 2.2251763343811035, "learning_rate": 4.95280081287489e-06, "loss": 0.9368, "step": 1895 }, { "epoch": 0.05916238517826561, "grad_norm": 2.3688836097717285, "learning_rate": 4.951981382542856e-06, "loss": 0.9045, "step": 1900 }, { "epoch": 0.059318075665576835, "grad_norm": 2.1788928508758545, "learning_rate": 4.951161952210823e-06, "loss": 0.9014, "step": 1905 }, { "epoch": 0.05947376615288806, "grad_norm": 3.035266637802124, "learning_rate": 4.95034252187879e-06, "loss": 0.8876, "step": 1910 }, { "epoch": 0.05962945664019929, "grad_norm": 2.439943313598633, "learning_rate": 4.949523091546757e-06, "loss": 0.9654, "step": 1915 }, { "epoch": 0.059785147127510506, "grad_norm": 3.0346474647521973, "learning_rate": 4.948703661214724e-06, "loss": 0.9376, "step": 1920 }, { "epoch": 0.05994083761482173, "grad_norm": 2.4337692260742188, "learning_rate": 4.947884230882691e-06, "loss": 0.8914, "step": 1925 }, { "epoch": 0.06009652810213296, "grad_norm": 1.9191803932189941, "learning_rate": 4.947064800550658e-06, "loss": 0.9307, "step": 1930 }, { "epoch": 0.060252218589444184, "grad_norm": 2.3228490352630615, "learning_rate": 4.9462453702186245e-06, "loss": 0.9035, "step": 1935 }, { "epoch": 0.06040790907675541, "grad_norm": 2.0228307247161865, "learning_rate": 4.945425939886591e-06, "loss": 0.8791, "step": 1940 }, { "epoch": 0.060563599564066636, "grad_norm": 2.5893118381500244, "learning_rate": 4.944606509554558e-06, "loss": 0.8975, "step": 1945 }, { "epoch": 0.06071929005137786, "grad_norm": 2.1577749252319336, "learning_rate": 4.943787079222525e-06, "loss": 0.8868, "step": 1950 }, { "epoch": 0.06087498053868909, "grad_norm": 3.1680068969726562, "learning_rate": 4.942967648890491e-06, "loss": 0.9191, "step": 1955 }, { "epoch": 0.061030671026000315, "grad_norm": 2.310541868209839, "learning_rate": 4.942148218558459e-06, "loss": 0.8046, "step": 1960 }, { "epoch": 0.061186361513311534, "grad_norm": 2.5318684577941895, "learning_rate": 4.941328788226426e-06, "loss": 0.9182, "step": 1965 }, { "epoch": 0.06134205200062276, "grad_norm": 2.857430934906006, "learning_rate": 4.940509357894393e-06, "loss": 0.8928, "step": 1970 }, { "epoch": 0.061497742487933986, "grad_norm": 2.616393566131592, "learning_rate": 4.939689927562359e-06, "loss": 0.8534, "step": 1975 }, { "epoch": 0.06165343297524521, "grad_norm": 2.711148977279663, "learning_rate": 4.9388704972303255e-06, "loss": 0.9247, "step": 1980 }, { "epoch": 0.06180912346255644, "grad_norm": 2.4535999298095703, "learning_rate": 4.938051066898293e-06, "loss": 0.9633, "step": 1985 }, { "epoch": 0.061964813949867664, "grad_norm": 2.937878131866455, "learning_rate": 4.93723163656626e-06, "loss": 0.8827, "step": 1990 }, { "epoch": 0.06212050443717889, "grad_norm": 2.2287607192993164, "learning_rate": 4.936412206234226e-06, "loss": 0.8571, "step": 1995 }, { "epoch": 0.062276194924490116, "grad_norm": 2.347536087036133, "learning_rate": 4.935592775902193e-06, "loss": 0.8963, "step": 2000 }, { "epoch": 0.062431885411801336, "grad_norm": 2.3477139472961426, "learning_rate": 4.93477334557016e-06, "loss": 0.9409, "step": 2005 }, { "epoch": 0.06258757589911257, "grad_norm": 2.0706024169921875, "learning_rate": 4.933953915238127e-06, "loss": 0.8816, "step": 2010 }, { "epoch": 0.06274326638642379, "grad_norm": 2.215817928314209, "learning_rate": 4.9331344849060936e-06, "loss": 0.8853, "step": 2015 }, { "epoch": 0.06289895687373502, "grad_norm": 2.4023361206054688, "learning_rate": 4.9323150545740604e-06, "loss": 0.8686, "step": 2020 }, { "epoch": 0.06305464736104624, "grad_norm": 2.334470510482788, "learning_rate": 4.931495624242027e-06, "loss": 0.7938, "step": 2025 }, { "epoch": 0.06321033784835746, "grad_norm": 2.8142075538635254, "learning_rate": 4.930676193909994e-06, "loss": 0.8616, "step": 2030 }, { "epoch": 0.06336602833566869, "grad_norm": 3.403531789779663, "learning_rate": 4.929856763577961e-06, "loss": 0.899, "step": 2035 }, { "epoch": 0.06352171882297991, "grad_norm": 1.9395052194595337, "learning_rate": 4.929037333245928e-06, "loss": 0.8258, "step": 2040 }, { "epoch": 0.06367740931029114, "grad_norm": 2.1391830444335938, "learning_rate": 4.928217902913895e-06, "loss": 0.9736, "step": 2045 }, { "epoch": 0.06383309979760236, "grad_norm": 2.3220291137695312, "learning_rate": 4.927398472581862e-06, "loss": 0.9269, "step": 2050 }, { "epoch": 0.0639887902849136, "grad_norm": 2.448593854904175, "learning_rate": 4.9265790422498285e-06, "loss": 0.9607, "step": 2055 }, { "epoch": 0.06414448077222482, "grad_norm": 2.120216131210327, "learning_rate": 4.925759611917795e-06, "loss": 0.8595, "step": 2060 }, { "epoch": 0.06430017125953605, "grad_norm": 2.1533780097961426, "learning_rate": 4.924940181585762e-06, "loss": 0.7682, "step": 2065 }, { "epoch": 0.06445586174684727, "grad_norm": 2.692492723464966, "learning_rate": 4.924120751253728e-06, "loss": 0.9656, "step": 2070 }, { "epoch": 0.06461155223415849, "grad_norm": 1.8222336769104004, "learning_rate": 4.923301320921696e-06, "loss": 0.9172, "step": 2075 }, { "epoch": 0.06476724272146972, "grad_norm": 3.3382632732391357, "learning_rate": 4.922481890589663e-06, "loss": 0.8917, "step": 2080 }, { "epoch": 0.06492293320878094, "grad_norm": 2.3720011711120605, "learning_rate": 4.921662460257629e-06, "loss": 0.8216, "step": 2085 }, { "epoch": 0.06507862369609217, "grad_norm": 2.291804552078247, "learning_rate": 4.920843029925596e-06, "loss": 0.9389, "step": 2090 }, { "epoch": 0.06523431418340339, "grad_norm": 1.9511089324951172, "learning_rate": 4.920023599593563e-06, "loss": 0.9563, "step": 2095 }, { "epoch": 0.06539000467071462, "grad_norm": 2.0883944034576416, "learning_rate": 4.91920416926153e-06, "loss": 0.9125, "step": 2100 }, { "epoch": 0.06554569515802584, "grad_norm": 2.5902583599090576, "learning_rate": 4.918384738929496e-06, "loss": 0.9109, "step": 2105 }, { "epoch": 0.06570138564533708, "grad_norm": 2.603945016860962, "learning_rate": 4.917565308597463e-06, "loss": 0.9403, "step": 2110 }, { "epoch": 0.0658570761326483, "grad_norm": 2.220754861831665, "learning_rate": 4.91674587826543e-06, "loss": 0.8782, "step": 2115 }, { "epoch": 0.06601276661995951, "grad_norm": 2.232011079788208, "learning_rate": 4.915926447933397e-06, "loss": 0.7964, "step": 2120 }, { "epoch": 0.06616845710727075, "grad_norm": 2.130406618118286, "learning_rate": 4.915107017601364e-06, "loss": 0.8782, "step": 2125 }, { "epoch": 0.06632414759458197, "grad_norm": 2.531994342803955, "learning_rate": 4.914287587269331e-06, "loss": 0.982, "step": 2130 }, { "epoch": 0.0664798380818932, "grad_norm": 2.14705228805542, "learning_rate": 4.913468156937298e-06, "loss": 0.8607, "step": 2135 }, { "epoch": 0.06663552856920442, "grad_norm": 2.194648265838623, "learning_rate": 4.9126487266052645e-06, "loss": 0.9087, "step": 2140 }, { "epoch": 0.06679121905651565, "grad_norm": 2.644744396209717, "learning_rate": 4.911829296273231e-06, "loss": 0.8963, "step": 2145 }, { "epoch": 0.06694690954382687, "grad_norm": 2.467186689376831, "learning_rate": 4.911009865941198e-06, "loss": 0.9508, "step": 2150 }, { "epoch": 0.0671026000311381, "grad_norm": 2.7660932540893555, "learning_rate": 4.910190435609165e-06, "loss": 0.9137, "step": 2155 }, { "epoch": 0.06725829051844932, "grad_norm": 2.5886118412017822, "learning_rate": 4.909371005277131e-06, "loss": 0.7802, "step": 2160 }, { "epoch": 0.06741398100576054, "grad_norm": 2.6871891021728516, "learning_rate": 4.908551574945099e-06, "loss": 0.8337, "step": 2165 }, { "epoch": 0.06756967149307178, "grad_norm": 2.8378801345825195, "learning_rate": 4.907732144613066e-06, "loss": 0.9514, "step": 2170 }, { "epoch": 0.067725361980383, "grad_norm": 2.7401950359344482, "learning_rate": 4.9069127142810326e-06, "loss": 0.858, "step": 2175 }, { "epoch": 0.06788105246769423, "grad_norm": 1.9982938766479492, "learning_rate": 4.9060932839489986e-06, "loss": 0.8592, "step": 2180 }, { "epoch": 0.06803674295500545, "grad_norm": 2.242845058441162, "learning_rate": 4.9052738536169654e-06, "loss": 0.9197, "step": 2185 }, { "epoch": 0.06819243344231668, "grad_norm": 2.2346301078796387, "learning_rate": 4.904454423284933e-06, "loss": 0.8822, "step": 2190 }, { "epoch": 0.0683481239296279, "grad_norm": 2.5108225345611572, "learning_rate": 4.9036349929529e-06, "loss": 0.8071, "step": 2195 }, { "epoch": 0.06850381441693912, "grad_norm": 2.3316900730133057, "learning_rate": 4.902815562620866e-06, "loss": 0.8133, "step": 2200 }, { "epoch": 0.06865950490425035, "grad_norm": 2.3788537979125977, "learning_rate": 4.901996132288833e-06, "loss": 0.8877, "step": 2205 }, { "epoch": 0.06881519539156157, "grad_norm": 2.174769401550293, "learning_rate": 4.9011767019568e-06, "loss": 0.9096, "step": 2210 }, { "epoch": 0.0689708858788728, "grad_norm": 2.348273515701294, "learning_rate": 4.900357271624767e-06, "loss": 0.7914, "step": 2215 }, { "epoch": 0.06912657636618402, "grad_norm": 2.580627202987671, "learning_rate": 4.8995378412927335e-06, "loss": 0.9401, "step": 2220 }, { "epoch": 0.06928226685349526, "grad_norm": 2.384519577026367, "learning_rate": 4.8987184109607e-06, "loss": 0.9271, "step": 2225 }, { "epoch": 0.06943795734080647, "grad_norm": 1.8668079376220703, "learning_rate": 4.897898980628667e-06, "loss": 0.884, "step": 2230 }, { "epoch": 0.06959364782811771, "grad_norm": 1.808905005455017, "learning_rate": 4.897079550296634e-06, "loss": 0.8713, "step": 2235 }, { "epoch": 0.06974933831542893, "grad_norm": 2.142491340637207, "learning_rate": 4.896260119964601e-06, "loss": 0.975, "step": 2240 }, { "epoch": 0.06990502880274015, "grad_norm": 2.170255184173584, "learning_rate": 4.895440689632568e-06, "loss": 0.8527, "step": 2245 }, { "epoch": 0.07006071929005138, "grad_norm": 2.618077516555786, "learning_rate": 4.894621259300535e-06, "loss": 0.8344, "step": 2250 }, { "epoch": 0.0702164097773626, "grad_norm": 2.1926701068878174, "learning_rate": 4.893801828968502e-06, "loss": 0.9283, "step": 2255 }, { "epoch": 0.07037210026467383, "grad_norm": 2.9993128776550293, "learning_rate": 4.8929823986364685e-06, "loss": 0.9033, "step": 2260 }, { "epoch": 0.07052779075198505, "grad_norm": 2.2325754165649414, "learning_rate": 4.892162968304435e-06, "loss": 0.8875, "step": 2265 }, { "epoch": 0.07068348123929628, "grad_norm": 2.4874467849731445, "learning_rate": 4.891343537972402e-06, "loss": 0.86, "step": 2270 }, { "epoch": 0.0708391717266075, "grad_norm": 2.1531291007995605, "learning_rate": 4.890524107640368e-06, "loss": 0.862, "step": 2275 }, { "epoch": 0.07099486221391874, "grad_norm": 2.024928331375122, "learning_rate": 4.889704677308336e-06, "loss": 0.9288, "step": 2280 }, { "epoch": 0.07115055270122995, "grad_norm": 2.248131513595581, "learning_rate": 4.888885246976303e-06, "loss": 0.9296, "step": 2285 }, { "epoch": 0.07130624318854117, "grad_norm": 1.9718462228775024, "learning_rate": 4.888065816644269e-06, "loss": 0.8172, "step": 2290 }, { "epoch": 0.07146193367585241, "grad_norm": 2.378169536590576, "learning_rate": 4.887246386312236e-06, "loss": 0.8269, "step": 2295 }, { "epoch": 0.07161762416316363, "grad_norm": 2.228590965270996, "learning_rate": 4.886426955980203e-06, "loss": 0.8509, "step": 2300 }, { "epoch": 0.07177331465047486, "grad_norm": 2.302687168121338, "learning_rate": 4.88560752564817e-06, "loss": 0.9417, "step": 2305 }, { "epoch": 0.07192900513778608, "grad_norm": 2.434288740158081, "learning_rate": 4.884788095316136e-06, "loss": 0.9197, "step": 2310 }, { "epoch": 0.07208469562509731, "grad_norm": 2.3677687644958496, "learning_rate": 4.883968664984103e-06, "loss": 0.9409, "step": 2315 }, { "epoch": 0.07224038611240853, "grad_norm": 2.2504208087921143, "learning_rate": 4.88314923465207e-06, "loss": 0.9, "step": 2320 }, { "epoch": 0.07239607659971976, "grad_norm": 2.2305681705474854, "learning_rate": 4.882329804320037e-06, "loss": 0.8836, "step": 2325 }, { "epoch": 0.07255176708703098, "grad_norm": 2.2383973598480225, "learning_rate": 4.881510373988004e-06, "loss": 0.8423, "step": 2330 }, { "epoch": 0.0727074575743422, "grad_norm": 2.131657123565674, "learning_rate": 4.880690943655971e-06, "loss": 0.9537, "step": 2335 }, { "epoch": 0.07286314806165343, "grad_norm": 1.9070625305175781, "learning_rate": 4.8798715133239376e-06, "loss": 0.8511, "step": 2340 }, { "epoch": 0.07301883854896465, "grad_norm": 3.493901014328003, "learning_rate": 4.879052082991904e-06, "loss": 0.8674, "step": 2345 }, { "epoch": 0.07317452903627589, "grad_norm": 2.112846851348877, "learning_rate": 4.878232652659871e-06, "loss": 0.8576, "step": 2350 }, { "epoch": 0.0733302195235871, "grad_norm": 2.3840949535369873, "learning_rate": 4.877413222327838e-06, "loss": 0.851, "step": 2355 }, { "epoch": 0.07348591001089834, "grad_norm": 2.1229186058044434, "learning_rate": 4.876593791995805e-06, "loss": 0.7727, "step": 2360 }, { "epoch": 0.07364160049820956, "grad_norm": 2.305921792984009, "learning_rate": 4.875774361663772e-06, "loss": 0.8983, "step": 2365 }, { "epoch": 0.07379729098552079, "grad_norm": 2.4479787349700928, "learning_rate": 4.874954931331739e-06, "loss": 0.9221, "step": 2370 }, { "epoch": 0.07395298147283201, "grad_norm": 1.8633660078048706, "learning_rate": 4.874135500999706e-06, "loss": 0.9188, "step": 2375 }, { "epoch": 0.07410867196014323, "grad_norm": 2.180283546447754, "learning_rate": 4.8733160706676725e-06, "loss": 0.7869, "step": 2380 }, { "epoch": 0.07426436244745446, "grad_norm": 2.4686195850372314, "learning_rate": 4.8724966403356385e-06, "loss": 0.8422, "step": 2385 }, { "epoch": 0.07442005293476568, "grad_norm": 2.514801502227783, "learning_rate": 4.871677210003606e-06, "loss": 0.9654, "step": 2390 }, { "epoch": 0.07457574342207691, "grad_norm": 2.2483584880828857, "learning_rate": 4.870857779671573e-06, "loss": 0.9382, "step": 2395 }, { "epoch": 0.07473143390938813, "grad_norm": 2.2669599056243896, "learning_rate": 4.87003834933954e-06, "loss": 0.8273, "step": 2400 }, { "epoch": 0.07488712439669937, "grad_norm": 2.0802273750305176, "learning_rate": 4.869218919007506e-06, "loss": 0.8123, "step": 2405 }, { "epoch": 0.07504281488401059, "grad_norm": 2.442228078842163, "learning_rate": 4.868399488675473e-06, "loss": 0.8848, "step": 2410 }, { "epoch": 0.0751985053713218, "grad_norm": 2.7122790813446045, "learning_rate": 4.86758005834344e-06, "loss": 0.8498, "step": 2415 }, { "epoch": 0.07535419585863304, "grad_norm": 1.8616360425949097, "learning_rate": 4.8667606280114075e-06, "loss": 0.8192, "step": 2420 }, { "epoch": 0.07550988634594426, "grad_norm": 2.2256622314453125, "learning_rate": 4.8659411976793735e-06, "loss": 0.9246, "step": 2425 }, { "epoch": 0.07566557683325549, "grad_norm": 2.5420517921447754, "learning_rate": 4.86512176734734e-06, "loss": 0.7901, "step": 2430 }, { "epoch": 0.07582126732056671, "grad_norm": 2.775843620300293, "learning_rate": 4.864302337015307e-06, "loss": 0.8083, "step": 2435 }, { "epoch": 0.07597695780787794, "grad_norm": 2.127397298812866, "learning_rate": 4.863482906683274e-06, "loss": 0.8068, "step": 2440 }, { "epoch": 0.07613264829518916, "grad_norm": 2.7463066577911377, "learning_rate": 4.862663476351241e-06, "loss": 0.8728, "step": 2445 }, { "epoch": 0.0762883387825004, "grad_norm": 2.279409170150757, "learning_rate": 4.861844046019208e-06, "loss": 0.8337, "step": 2450 }, { "epoch": 0.07644402926981161, "grad_norm": 2.3338515758514404, "learning_rate": 4.861024615687175e-06, "loss": 0.8717, "step": 2455 }, { "epoch": 0.07659971975712283, "grad_norm": 2.7174456119537354, "learning_rate": 4.8602051853551416e-06, "loss": 0.9489, "step": 2460 }, { "epoch": 0.07675541024443407, "grad_norm": 2.0545339584350586, "learning_rate": 4.8593857550231084e-06, "loss": 0.8543, "step": 2465 }, { "epoch": 0.07691110073174529, "grad_norm": 2.2194721698760986, "learning_rate": 4.858566324691075e-06, "loss": 0.8177, "step": 2470 }, { "epoch": 0.07706679121905652, "grad_norm": 2.3903660774230957, "learning_rate": 4.857746894359042e-06, "loss": 0.941, "step": 2475 }, { "epoch": 0.07722248170636774, "grad_norm": 2.1401259899139404, "learning_rate": 4.856927464027009e-06, "loss": 0.8402, "step": 2480 }, { "epoch": 0.07737817219367897, "grad_norm": 2.431553363800049, "learning_rate": 4.856108033694976e-06, "loss": 0.8572, "step": 2485 }, { "epoch": 0.07753386268099019, "grad_norm": 2.063371181488037, "learning_rate": 4.855288603362943e-06, "loss": 0.8908, "step": 2490 }, { "epoch": 0.07768955316830142, "grad_norm": 2.61110258102417, "learning_rate": 4.854469173030909e-06, "loss": 0.7858, "step": 2495 }, { "epoch": 0.07784524365561264, "grad_norm": 2.119704246520996, "learning_rate": 4.853649742698876e-06, "loss": 0.849, "step": 2500 }, { "epoch": 0.07800093414292386, "grad_norm": 2.671456813812256, "learning_rate": 4.852830312366843e-06, "loss": 0.9352, "step": 2505 }, { "epoch": 0.0781566246302351, "grad_norm": 2.3283932209014893, "learning_rate": 4.85201088203481e-06, "loss": 0.787, "step": 2510 }, { "epoch": 0.07831231511754631, "grad_norm": 2.1962053775787354, "learning_rate": 4.851191451702776e-06, "loss": 0.8527, "step": 2515 }, { "epoch": 0.07846800560485755, "grad_norm": 2.1821980476379395, "learning_rate": 4.850372021370743e-06, "loss": 0.9312, "step": 2520 }, { "epoch": 0.07862369609216877, "grad_norm": 2.033320426940918, "learning_rate": 4.84955259103871e-06, "loss": 0.8295, "step": 2525 }, { "epoch": 0.07877938657948, "grad_norm": 1.8527387380599976, "learning_rate": 4.848733160706678e-06, "loss": 0.8729, "step": 2530 }, { "epoch": 0.07893507706679122, "grad_norm": 2.207821846008301, "learning_rate": 4.847913730374644e-06, "loss": 0.7952, "step": 2535 }, { "epoch": 0.07909076755410245, "grad_norm": 1.8772039413452148, "learning_rate": 4.847094300042611e-06, "loss": 0.8462, "step": 2540 }, { "epoch": 0.07924645804141367, "grad_norm": 2.142897605895996, "learning_rate": 4.8462748697105775e-06, "loss": 0.9588, "step": 2545 }, { "epoch": 0.07940214852872489, "grad_norm": 2.296881914138794, "learning_rate": 4.845455439378544e-06, "loss": 0.8084, "step": 2550 }, { "epoch": 0.07955783901603612, "grad_norm": 2.5121116638183594, "learning_rate": 4.844636009046511e-06, "loss": 0.7918, "step": 2555 }, { "epoch": 0.07971352950334734, "grad_norm": 2.4558746814727783, "learning_rate": 4.843816578714478e-06, "loss": 0.8565, "step": 2560 }, { "epoch": 0.07986921999065857, "grad_norm": 2.719508647918701, "learning_rate": 4.842997148382445e-06, "loss": 0.9642, "step": 2565 }, { "epoch": 0.0800249104779698, "grad_norm": 2.4146344661712646, "learning_rate": 4.842177718050412e-06, "loss": 0.8894, "step": 2570 }, { "epoch": 0.08018060096528103, "grad_norm": 2.8121769428253174, "learning_rate": 4.841358287718379e-06, "loss": 0.9231, "step": 2575 }, { "epoch": 0.08033629145259225, "grad_norm": 2.2895348072052, "learning_rate": 4.840538857386346e-06, "loss": 0.8396, "step": 2580 }, { "epoch": 0.08049198193990348, "grad_norm": 2.317248582839966, "learning_rate": 4.8397194270543125e-06, "loss": 0.8137, "step": 2585 }, { "epoch": 0.0806476724272147, "grad_norm": 2.2919843196868896, "learning_rate": 4.8388999967222785e-06, "loss": 0.8469, "step": 2590 }, { "epoch": 0.08080336291452592, "grad_norm": 1.9546608924865723, "learning_rate": 4.838080566390246e-06, "loss": 0.8574, "step": 2595 }, { "epoch": 0.08095905340183715, "grad_norm": 2.288544178009033, "learning_rate": 4.837261136058213e-06, "loss": 0.8604, "step": 2600 }, { "epoch": 0.08111474388914837, "grad_norm": 2.226511240005493, "learning_rate": 4.83644170572618e-06, "loss": 0.9095, "step": 2605 }, { "epoch": 0.0812704343764596, "grad_norm": 2.36584210395813, "learning_rate": 4.835622275394146e-06, "loss": 0.8893, "step": 2610 }, { "epoch": 0.08142612486377082, "grad_norm": 2.194528579711914, "learning_rate": 4.834802845062113e-06, "loss": 0.9367, "step": 2615 }, { "epoch": 0.08158181535108205, "grad_norm": 2.125091314315796, "learning_rate": 4.8339834147300805e-06, "loss": 0.8458, "step": 2620 }, { "epoch": 0.08173750583839327, "grad_norm": 1.8183530569076538, "learning_rate": 4.833163984398047e-06, "loss": 0.8401, "step": 2625 }, { "epoch": 0.08189319632570449, "grad_norm": 1.924101710319519, "learning_rate": 4.8323445540660134e-06, "loss": 0.9138, "step": 2630 }, { "epoch": 0.08204888681301573, "grad_norm": 2.258578062057495, "learning_rate": 4.83152512373398e-06, "loss": 0.9309, "step": 2635 }, { "epoch": 0.08220457730032694, "grad_norm": 1.8431562185287476, "learning_rate": 4.830705693401947e-06, "loss": 0.8818, "step": 2640 }, { "epoch": 0.08236026778763818, "grad_norm": 2.6045100688934326, "learning_rate": 4.829886263069914e-06, "loss": 0.9386, "step": 2645 }, { "epoch": 0.0825159582749494, "grad_norm": 2.122636556625366, "learning_rate": 4.829066832737881e-06, "loss": 0.8778, "step": 2650 }, { "epoch": 0.08267164876226063, "grad_norm": 2.06453800201416, "learning_rate": 4.828247402405848e-06, "loss": 0.878, "step": 2655 }, { "epoch": 0.08282733924957185, "grad_norm": 1.7663154602050781, "learning_rate": 4.827427972073815e-06, "loss": 0.7401, "step": 2660 }, { "epoch": 0.08298302973688308, "grad_norm": 2.538478136062622, "learning_rate": 4.8266085417417815e-06, "loss": 0.8881, "step": 2665 }, { "epoch": 0.0831387202241943, "grad_norm": 2.231458902359009, "learning_rate": 4.825789111409748e-06, "loss": 0.8587, "step": 2670 }, { "epoch": 0.08329441071150552, "grad_norm": 2.0448694229125977, "learning_rate": 4.824969681077715e-06, "loss": 0.8373, "step": 2675 }, { "epoch": 0.08345010119881675, "grad_norm": 2.272719383239746, "learning_rate": 4.824150250745682e-06, "loss": 0.9122, "step": 2680 }, { "epoch": 0.08360579168612797, "grad_norm": 2.5782957077026367, "learning_rate": 4.823330820413649e-06, "loss": 0.8208, "step": 2685 }, { "epoch": 0.0837614821734392, "grad_norm": 2.410076141357422, "learning_rate": 4.822511390081616e-06, "loss": 0.8385, "step": 2690 }, { "epoch": 0.08391717266075042, "grad_norm": 2.324800968170166, "learning_rate": 4.821691959749583e-06, "loss": 0.8297, "step": 2695 }, { "epoch": 0.08407286314806166, "grad_norm": 2.0489702224731445, "learning_rate": 4.82087252941755e-06, "loss": 0.9091, "step": 2700 }, { "epoch": 0.08422855363537288, "grad_norm": 2.2529916763305664, "learning_rate": 4.820053099085516e-06, "loss": 0.8557, "step": 2705 }, { "epoch": 0.08438424412268411, "grad_norm": 2.300400495529175, "learning_rate": 4.819233668753483e-06, "loss": 0.924, "step": 2710 }, { "epoch": 0.08453993460999533, "grad_norm": 1.8716250658035278, "learning_rate": 4.81841423842145e-06, "loss": 0.8961, "step": 2715 }, { "epoch": 0.08469562509730655, "grad_norm": 2.085808277130127, "learning_rate": 4.817594808089416e-06, "loss": 0.8396, "step": 2720 }, { "epoch": 0.08485131558461778, "grad_norm": 2.851504325866699, "learning_rate": 4.816775377757383e-06, "loss": 0.96, "step": 2725 }, { "epoch": 0.085007006071929, "grad_norm": 1.923399567604065, "learning_rate": 4.81595594742535e-06, "loss": 0.8309, "step": 2730 }, { "epoch": 0.08516269655924023, "grad_norm": 2.2089130878448486, "learning_rate": 4.815136517093318e-06, "loss": 0.8598, "step": 2735 }, { "epoch": 0.08531838704655145, "grad_norm": 2.0901238918304443, "learning_rate": 4.814317086761284e-06, "loss": 0.9579, "step": 2740 }, { "epoch": 0.08547407753386269, "grad_norm": 2.7145302295684814, "learning_rate": 4.813497656429251e-06, "loss": 0.9185, "step": 2745 }, { "epoch": 0.0856297680211739, "grad_norm": 2.1019744873046875, "learning_rate": 4.8126782260972175e-06, "loss": 0.8818, "step": 2750 }, { "epoch": 0.08578545850848514, "grad_norm": 2.2672131061553955, "learning_rate": 4.811858795765184e-06, "loss": 0.9048, "step": 2755 }, { "epoch": 0.08594114899579636, "grad_norm": 2.4499874114990234, "learning_rate": 4.811039365433151e-06, "loss": 0.8578, "step": 2760 }, { "epoch": 0.08609683948310758, "grad_norm": 2.4043517112731934, "learning_rate": 4.810219935101118e-06, "loss": 0.9569, "step": 2765 }, { "epoch": 0.08625252997041881, "grad_norm": 3.0180745124816895, "learning_rate": 4.809400504769085e-06, "loss": 0.9098, "step": 2770 }, { "epoch": 0.08640822045773003, "grad_norm": 2.4666857719421387, "learning_rate": 4.808581074437052e-06, "loss": 0.9255, "step": 2775 }, { "epoch": 0.08656391094504126, "grad_norm": 2.524409770965576, "learning_rate": 4.807761644105019e-06, "loss": 0.923, "step": 2780 }, { "epoch": 0.08671960143235248, "grad_norm": 1.9897915124893188, "learning_rate": 4.8069422137729855e-06, "loss": 0.8067, "step": 2785 }, { "epoch": 0.08687529191966371, "grad_norm": 2.002516269683838, "learning_rate": 4.806122783440952e-06, "loss": 0.8177, "step": 2790 }, { "epoch": 0.08703098240697493, "grad_norm": 2.4342830181121826, "learning_rate": 4.8053033531089184e-06, "loss": 0.901, "step": 2795 }, { "epoch": 0.08718667289428617, "grad_norm": 2.552844762802124, "learning_rate": 4.804483922776886e-06, "loss": 0.811, "step": 2800 }, { "epoch": 0.08734236338159738, "grad_norm": 1.9612237215042114, "learning_rate": 4.803664492444853e-06, "loss": 0.8332, "step": 2805 }, { "epoch": 0.0874980538689086, "grad_norm": 2.2901113033294678, "learning_rate": 4.80284506211282e-06, "loss": 0.8923, "step": 2810 }, { "epoch": 0.08765374435621984, "grad_norm": 2.2769181728363037, "learning_rate": 4.802025631780786e-06, "loss": 0.8437, "step": 2815 }, { "epoch": 0.08780943484353106, "grad_norm": 2.7966012954711914, "learning_rate": 4.801206201448753e-06, "loss": 0.857, "step": 2820 }, { "epoch": 0.08796512533084229, "grad_norm": 2.2113261222839355, "learning_rate": 4.8003867711167205e-06, "loss": 0.8302, "step": 2825 }, { "epoch": 0.08812081581815351, "grad_norm": 2.444563627243042, "learning_rate": 4.799567340784687e-06, "loss": 0.8686, "step": 2830 }, { "epoch": 0.08827650630546474, "grad_norm": 2.179446220397949, "learning_rate": 4.798747910452653e-06, "loss": 0.9097, "step": 2835 }, { "epoch": 0.08843219679277596, "grad_norm": 2.93967866897583, "learning_rate": 4.79792848012062e-06, "loss": 0.918, "step": 2840 }, { "epoch": 0.0885878872800872, "grad_norm": 2.3560938835144043, "learning_rate": 4.797109049788587e-06, "loss": 0.8877, "step": 2845 }, { "epoch": 0.08874357776739841, "grad_norm": 1.919633388519287, "learning_rate": 4.796289619456555e-06, "loss": 0.8191, "step": 2850 }, { "epoch": 0.08889926825470963, "grad_norm": 2.2975945472717285, "learning_rate": 4.795470189124521e-06, "loss": 0.8152, "step": 2855 }, { "epoch": 0.08905495874202086, "grad_norm": 2.3447582721710205, "learning_rate": 4.794650758792488e-06, "loss": 0.9183, "step": 2860 }, { "epoch": 0.08921064922933208, "grad_norm": 2.6604580879211426, "learning_rate": 4.793831328460455e-06, "loss": 0.8664, "step": 2865 }, { "epoch": 0.08936633971664332, "grad_norm": 2.129537343978882, "learning_rate": 4.7930118981284215e-06, "loss": 0.8991, "step": 2870 }, { "epoch": 0.08952203020395454, "grad_norm": 2.0795562267303467, "learning_rate": 4.792192467796388e-06, "loss": 0.8458, "step": 2875 }, { "epoch": 0.08967772069126577, "grad_norm": 2.306544780731201, "learning_rate": 4.791373037464355e-06, "loss": 0.8383, "step": 2880 }, { "epoch": 0.08983341117857699, "grad_norm": 2.1950082778930664, "learning_rate": 4.790553607132322e-06, "loss": 0.9434, "step": 2885 }, { "epoch": 0.08998910166588821, "grad_norm": 2.153247117996216, "learning_rate": 4.789734176800289e-06, "loss": 0.9068, "step": 2890 }, { "epoch": 0.09014479215319944, "grad_norm": 2.235793352127075, "learning_rate": 4.788914746468256e-06, "loss": 0.9278, "step": 2895 }, { "epoch": 0.09030048264051066, "grad_norm": 2.40755295753479, "learning_rate": 4.788095316136223e-06, "loss": 0.8948, "step": 2900 }, { "epoch": 0.09045617312782189, "grad_norm": 2.3268966674804688, "learning_rate": 4.7872758858041896e-06, "loss": 0.8431, "step": 2905 }, { "epoch": 0.09061186361513311, "grad_norm": 2.118182420730591, "learning_rate": 4.786456455472156e-06, "loss": 0.8878, "step": 2910 }, { "epoch": 0.09076755410244434, "grad_norm": 2.2458441257476807, "learning_rate": 4.785637025140123e-06, "loss": 0.8644, "step": 2915 }, { "epoch": 0.09092324458975556, "grad_norm": 2.202080011367798, "learning_rate": 4.78481759480809e-06, "loss": 0.8572, "step": 2920 }, { "epoch": 0.0910789350770668, "grad_norm": 1.8395111560821533, "learning_rate": 4.783998164476056e-06, "loss": 0.8943, "step": 2925 }, { "epoch": 0.09123462556437802, "grad_norm": 2.0004093647003174, "learning_rate": 4.783178734144023e-06, "loss": 0.8487, "step": 2930 }, { "epoch": 0.09139031605168924, "grad_norm": 2.1894290447235107, "learning_rate": 4.78235930381199e-06, "loss": 0.8433, "step": 2935 }, { "epoch": 0.09154600653900047, "grad_norm": 2.2368757724761963, "learning_rate": 4.781539873479958e-06, "loss": 0.8998, "step": 2940 }, { "epoch": 0.09170169702631169, "grad_norm": 1.8473966121673584, "learning_rate": 4.780720443147924e-06, "loss": 0.888, "step": 2945 }, { "epoch": 0.09185738751362292, "grad_norm": 2.1047637462615967, "learning_rate": 4.7799010128158905e-06, "loss": 0.8755, "step": 2950 }, { "epoch": 0.09201307800093414, "grad_norm": 2.5407681465148926, "learning_rate": 4.779081582483857e-06, "loss": 0.7754, "step": 2955 }, { "epoch": 0.09216876848824537, "grad_norm": 2.437171220779419, "learning_rate": 4.778262152151824e-06, "loss": 0.854, "step": 2960 }, { "epoch": 0.09232445897555659, "grad_norm": 2.1068789958953857, "learning_rate": 4.777442721819791e-06, "loss": 0.8428, "step": 2965 }, { "epoch": 0.09248014946286783, "grad_norm": 2.366657257080078, "learning_rate": 4.776623291487758e-06, "loss": 0.9216, "step": 2970 }, { "epoch": 0.09263583995017904, "grad_norm": 2.2397899627685547, "learning_rate": 4.775803861155725e-06, "loss": 0.8845, "step": 2975 }, { "epoch": 0.09279153043749026, "grad_norm": 2.207642078399658, "learning_rate": 4.774984430823692e-06, "loss": 0.7768, "step": 2980 }, { "epoch": 0.0929472209248015, "grad_norm": 1.9467899799346924, "learning_rate": 4.774165000491659e-06, "loss": 0.8823, "step": 2985 }, { "epoch": 0.09310291141211272, "grad_norm": 2.1066701412200928, "learning_rate": 4.7733455701596255e-06, "loss": 0.9205, "step": 2990 }, { "epoch": 0.09325860189942395, "grad_norm": 2.219558000564575, "learning_rate": 4.772526139827592e-06, "loss": 0.9094, "step": 2995 }, { "epoch": 0.09341429238673517, "grad_norm": 2.803524971008301, "learning_rate": 4.771706709495558e-06, "loss": 0.8881, "step": 3000 }, { "epoch": 0.0935699828740464, "grad_norm": 2.865380048751831, "learning_rate": 4.770887279163526e-06, "loss": 0.8452, "step": 3005 }, { "epoch": 0.09372567336135762, "grad_norm": 2.021775007247925, "learning_rate": 4.770067848831493e-06, "loss": 0.8193, "step": 3010 }, { "epoch": 0.09388136384866885, "grad_norm": 2.3499984741210938, "learning_rate": 4.76924841849946e-06, "loss": 0.8408, "step": 3015 }, { "epoch": 0.09403705433598007, "grad_norm": 2.4631786346435547, "learning_rate": 4.768428988167426e-06, "loss": 0.8688, "step": 3020 }, { "epoch": 0.09419274482329129, "grad_norm": 1.8393235206604004, "learning_rate": 4.767609557835393e-06, "loss": 0.8262, "step": 3025 }, { "epoch": 0.09434843531060252, "grad_norm": 2.1849687099456787, "learning_rate": 4.7667901275033605e-06, "loss": 0.8412, "step": 3030 }, { "epoch": 0.09450412579791374, "grad_norm": 2.604147434234619, "learning_rate": 4.765970697171327e-06, "loss": 0.8987, "step": 3035 }, { "epoch": 0.09465981628522498, "grad_norm": 2.6841070652008057, "learning_rate": 4.765151266839293e-06, "loss": 0.8532, "step": 3040 }, { "epoch": 0.0948155067725362, "grad_norm": 2.047306537628174, "learning_rate": 4.76433183650726e-06, "loss": 0.8781, "step": 3045 }, { "epoch": 0.09497119725984743, "grad_norm": 2.6969785690307617, "learning_rate": 4.763512406175227e-06, "loss": 0.8461, "step": 3050 }, { "epoch": 0.09512688774715865, "grad_norm": 2.024817705154419, "learning_rate": 4.762692975843195e-06, "loss": 0.8694, "step": 3055 }, { "epoch": 0.09528257823446988, "grad_norm": 2.4530272483825684, "learning_rate": 4.761873545511161e-06, "loss": 0.8628, "step": 3060 }, { "epoch": 0.0954382687217811, "grad_norm": 2.189767837524414, "learning_rate": 4.761054115179128e-06, "loss": 0.9308, "step": 3065 }, { "epoch": 0.09559395920909232, "grad_norm": 2.758195400238037, "learning_rate": 4.7602346848470946e-06, "loss": 0.8632, "step": 3070 }, { "epoch": 0.09574964969640355, "grad_norm": 2.690711736679077, "learning_rate": 4.7594152545150614e-06, "loss": 0.8568, "step": 3075 }, { "epoch": 0.09590534018371477, "grad_norm": 2.37143611907959, "learning_rate": 4.758595824183028e-06, "loss": 0.8872, "step": 3080 }, { "epoch": 0.096061030671026, "grad_norm": 2.1954057216644287, "learning_rate": 4.757776393850995e-06, "loss": 0.8767, "step": 3085 }, { "epoch": 0.09621672115833722, "grad_norm": 2.1031782627105713, "learning_rate": 4.756956963518962e-06, "loss": 0.9423, "step": 3090 }, { "epoch": 0.09637241164564846, "grad_norm": 1.8185124397277832, "learning_rate": 4.756137533186929e-06, "loss": 0.8995, "step": 3095 }, { "epoch": 0.09652810213295968, "grad_norm": 1.9448232650756836, "learning_rate": 4.755318102854896e-06, "loss": 0.8406, "step": 3100 }, { "epoch": 0.0966837926202709, "grad_norm": 2.130747079849243, "learning_rate": 4.754498672522863e-06, "loss": 0.8587, "step": 3105 }, { "epoch": 0.09683948310758213, "grad_norm": 2.088467836380005, "learning_rate": 4.7536792421908295e-06, "loss": 0.7939, "step": 3110 }, { "epoch": 0.09699517359489335, "grad_norm": 1.9088728427886963, "learning_rate": 4.7528598118587955e-06, "loss": 0.8155, "step": 3115 }, { "epoch": 0.09715086408220458, "grad_norm": 2.2771968841552734, "learning_rate": 4.752040381526763e-06, "loss": 0.8829, "step": 3120 }, { "epoch": 0.0973065545695158, "grad_norm": 2.5376625061035156, "learning_rate": 4.75122095119473e-06, "loss": 0.8279, "step": 3125 }, { "epoch": 0.09746224505682703, "grad_norm": 2.420715808868408, "learning_rate": 4.750401520862697e-06, "loss": 0.835, "step": 3130 }, { "epoch": 0.09761793554413825, "grad_norm": 2.402330160140991, "learning_rate": 4.749582090530663e-06, "loss": 0.8795, "step": 3135 }, { "epoch": 0.09777362603144948, "grad_norm": 2.231996774673462, "learning_rate": 4.74876266019863e-06, "loss": 0.8641, "step": 3140 }, { "epoch": 0.0979293165187607, "grad_norm": 1.901033878326416, "learning_rate": 4.747943229866598e-06, "loss": 0.8525, "step": 3145 }, { "epoch": 0.09808500700607192, "grad_norm": 2.2670037746429443, "learning_rate": 4.747123799534564e-06, "loss": 0.8302, "step": 3150 }, { "epoch": 0.09824069749338316, "grad_norm": 2.5170223712921143, "learning_rate": 4.7463043692025305e-06, "loss": 0.8161, "step": 3155 }, { "epoch": 0.09839638798069437, "grad_norm": 2.39876651763916, "learning_rate": 4.745484938870497e-06, "loss": 0.9249, "step": 3160 }, { "epoch": 0.09855207846800561, "grad_norm": 2.36861252784729, "learning_rate": 4.744665508538464e-06, "loss": 0.8256, "step": 3165 }, { "epoch": 0.09870776895531683, "grad_norm": 2.173949718475342, "learning_rate": 4.743846078206431e-06, "loss": 0.9168, "step": 3170 }, { "epoch": 0.09886345944262806, "grad_norm": 2.4076523780822754, "learning_rate": 4.743026647874398e-06, "loss": 0.8683, "step": 3175 }, { "epoch": 0.09901914992993928, "grad_norm": 2.657632827758789, "learning_rate": 4.742207217542365e-06, "loss": 0.8631, "step": 3180 }, { "epoch": 0.09917484041725051, "grad_norm": 2.273003339767456, "learning_rate": 4.741387787210332e-06, "loss": 0.8572, "step": 3185 }, { "epoch": 0.09933053090456173, "grad_norm": 2.1488566398620605, "learning_rate": 4.740568356878299e-06, "loss": 0.8927, "step": 3190 }, { "epoch": 0.09948622139187295, "grad_norm": 2.2536935806274414, "learning_rate": 4.7397489265462655e-06, "loss": 0.9029, "step": 3195 }, { "epoch": 0.09964191187918418, "grad_norm": 2.218177556991577, "learning_rate": 4.738929496214232e-06, "loss": 0.8701, "step": 3200 }, { "epoch": 0.0997976023664954, "grad_norm": 2.5103065967559814, "learning_rate": 4.738110065882199e-06, "loss": 0.8112, "step": 3205 }, { "epoch": 0.09995329285380664, "grad_norm": 2.2798163890838623, "learning_rate": 4.737290635550166e-06, "loss": 0.906, "step": 3210 }, { "epoch": 0.10010898334111785, "grad_norm": 2.2425122261047363, "learning_rate": 4.736471205218133e-06, "loss": 0.9099, "step": 3215 }, { "epoch": 0.10026467382842909, "grad_norm": 2.23068904876709, "learning_rate": 4.7356517748861e-06, "loss": 0.8275, "step": 3220 }, { "epoch": 0.10042036431574031, "grad_norm": 2.1959640979766846, "learning_rate": 4.734832344554066e-06, "loss": 0.8395, "step": 3225 }, { "epoch": 0.10057605480305154, "grad_norm": 2.5536279678344727, "learning_rate": 4.7340129142220335e-06, "loss": 0.9028, "step": 3230 }, { "epoch": 0.10073174529036276, "grad_norm": 2.1707935333251953, "learning_rate": 4.73319348389e-06, "loss": 0.9094, "step": 3235 }, { "epoch": 0.10088743577767398, "grad_norm": 2.0428626537323, "learning_rate": 4.732374053557967e-06, "loss": 0.8817, "step": 3240 }, { "epoch": 0.10104312626498521, "grad_norm": 2.2366349697113037, "learning_rate": 4.731554623225933e-06, "loss": 0.8932, "step": 3245 }, { "epoch": 0.10119881675229643, "grad_norm": 2.0009095668792725, "learning_rate": 4.7307351928939e-06, "loss": 0.9234, "step": 3250 }, { "epoch": 0.10135450723960766, "grad_norm": 2.1777923107147217, "learning_rate": 4.729915762561867e-06, "loss": 0.9245, "step": 3255 }, { "epoch": 0.10151019772691888, "grad_norm": 2.168341636657715, "learning_rate": 4.729096332229835e-06, "loss": 0.8094, "step": 3260 }, { "epoch": 0.10166588821423012, "grad_norm": 2.3478944301605225, "learning_rate": 4.728276901897801e-06, "loss": 0.8304, "step": 3265 }, { "epoch": 0.10182157870154133, "grad_norm": 2.252976417541504, "learning_rate": 4.727457471565768e-06, "loss": 0.9178, "step": 3270 }, { "epoch": 0.10197726918885257, "grad_norm": 2.2784295082092285, "learning_rate": 4.7266380412337345e-06, "loss": 0.8303, "step": 3275 }, { "epoch": 0.10213295967616379, "grad_norm": 2.0072038173675537, "learning_rate": 4.725818610901701e-06, "loss": 0.8224, "step": 3280 }, { "epoch": 0.102288650163475, "grad_norm": 1.9865481853485107, "learning_rate": 4.724999180569668e-06, "loss": 0.8714, "step": 3285 }, { "epoch": 0.10244434065078624, "grad_norm": 1.8473429679870605, "learning_rate": 4.724179750237635e-06, "loss": 0.7858, "step": 3290 }, { "epoch": 0.10260003113809746, "grad_norm": 1.957177758216858, "learning_rate": 4.723360319905602e-06, "loss": 0.8788, "step": 3295 }, { "epoch": 0.10275572162540869, "grad_norm": 2.429990768432617, "learning_rate": 4.722540889573569e-06, "loss": 0.808, "step": 3300 }, { "epoch": 0.10291141211271991, "grad_norm": 2.4226162433624268, "learning_rate": 4.721721459241536e-06, "loss": 0.9282, "step": 3305 }, { "epoch": 0.10306710260003114, "grad_norm": 2.143907070159912, "learning_rate": 4.720902028909503e-06, "loss": 0.9172, "step": 3310 }, { "epoch": 0.10322279308734236, "grad_norm": 2.508225440979004, "learning_rate": 4.7200825985774695e-06, "loss": 0.879, "step": 3315 }, { "epoch": 0.10337848357465358, "grad_norm": 2.5535836219787598, "learning_rate": 4.719263168245436e-06, "loss": 0.904, "step": 3320 }, { "epoch": 0.10353417406196481, "grad_norm": 2.254995346069336, "learning_rate": 4.718443737913403e-06, "loss": 0.9014, "step": 3325 }, { "epoch": 0.10368986454927603, "grad_norm": 2.052208423614502, "learning_rate": 4.71762430758137e-06, "loss": 0.8903, "step": 3330 }, { "epoch": 0.10384555503658727, "grad_norm": 2.2314789295196533, "learning_rate": 4.716804877249337e-06, "loss": 0.8971, "step": 3335 }, { "epoch": 0.10400124552389849, "grad_norm": 2.595493793487549, "learning_rate": 4.715985446917303e-06, "loss": 0.9423, "step": 3340 }, { "epoch": 0.10415693601120972, "grad_norm": 2.53218412399292, "learning_rate": 4.715166016585271e-06, "loss": 0.8545, "step": 3345 }, { "epoch": 0.10431262649852094, "grad_norm": 1.9787436723709106, "learning_rate": 4.7143465862532376e-06, "loss": 0.9593, "step": 3350 }, { "epoch": 0.10446831698583217, "grad_norm": 1.9826921224594116, "learning_rate": 4.713527155921204e-06, "loss": 0.858, "step": 3355 }, { "epoch": 0.10462400747314339, "grad_norm": 2.279782772064209, "learning_rate": 4.7127077255891705e-06, "loss": 0.9439, "step": 3360 }, { "epoch": 0.10477969796045461, "grad_norm": 2.026075601577759, "learning_rate": 4.711888295257137e-06, "loss": 0.915, "step": 3365 }, { "epoch": 0.10493538844776584, "grad_norm": 2.155074119567871, "learning_rate": 4.711068864925105e-06, "loss": 0.8123, "step": 3370 }, { "epoch": 0.10509107893507706, "grad_norm": 2.4761505126953125, "learning_rate": 4.710249434593071e-06, "loss": 0.9117, "step": 3375 }, { "epoch": 0.1052467694223883, "grad_norm": 2.206829309463501, "learning_rate": 4.709430004261038e-06, "loss": 0.7888, "step": 3380 }, { "epoch": 0.10540245990969951, "grad_norm": 2.137953281402588, "learning_rate": 4.708610573929005e-06, "loss": 0.8992, "step": 3385 }, { "epoch": 0.10555815039701075, "grad_norm": 2.1726176738739014, "learning_rate": 4.707791143596972e-06, "loss": 0.923, "step": 3390 }, { "epoch": 0.10571384088432197, "grad_norm": 2.2403385639190674, "learning_rate": 4.7069717132649385e-06, "loss": 0.8455, "step": 3395 }, { "epoch": 0.1058695313716332, "grad_norm": 2.555903434753418, "learning_rate": 4.706152282932905e-06, "loss": 0.8808, "step": 3400 }, { "epoch": 0.10602522185894442, "grad_norm": 2.614018678665161, "learning_rate": 4.705332852600872e-06, "loss": 0.8779, "step": 3405 }, { "epoch": 0.10618091234625564, "grad_norm": 1.970125436782837, "learning_rate": 4.704513422268839e-06, "loss": 0.8054, "step": 3410 }, { "epoch": 0.10633660283356687, "grad_norm": 2.1303632259368896, "learning_rate": 4.703693991936806e-06, "loss": 0.8363, "step": 3415 }, { "epoch": 0.10649229332087809, "grad_norm": 2.2741832733154297, "learning_rate": 4.702874561604773e-06, "loss": 0.866, "step": 3420 }, { "epoch": 0.10664798380818932, "grad_norm": 2.5761759281158447, "learning_rate": 4.70205513127274e-06, "loss": 0.8021, "step": 3425 }, { "epoch": 0.10680367429550054, "grad_norm": 2.5322978496551514, "learning_rate": 4.701235700940706e-06, "loss": 0.8807, "step": 3430 }, { "epoch": 0.10695936478281177, "grad_norm": 1.7585734128952026, "learning_rate": 4.7004162706086735e-06, "loss": 0.8173, "step": 3435 }, { "epoch": 0.107115055270123, "grad_norm": 3.1936399936676025, "learning_rate": 4.69959684027664e-06, "loss": 0.8997, "step": 3440 }, { "epoch": 0.10727074575743423, "grad_norm": 2.1075022220611572, "learning_rate": 4.698777409944607e-06, "loss": 0.8533, "step": 3445 }, { "epoch": 0.10742643624474545, "grad_norm": 2.460193157196045, "learning_rate": 4.697957979612573e-06, "loss": 0.9331, "step": 3450 }, { "epoch": 0.10758212673205667, "grad_norm": 2.086595296859741, "learning_rate": 4.69713854928054e-06, "loss": 0.8362, "step": 3455 }, { "epoch": 0.1077378172193679, "grad_norm": 2.1151387691497803, "learning_rate": 4.696319118948508e-06, "loss": 0.8397, "step": 3460 }, { "epoch": 0.10789350770667912, "grad_norm": 2.2622005939483643, "learning_rate": 4.695499688616475e-06, "loss": 0.8915, "step": 3465 }, { "epoch": 0.10804919819399035, "grad_norm": 2.6798198223114014, "learning_rate": 4.694680258284441e-06, "loss": 0.9662, "step": 3470 }, { "epoch": 0.10820488868130157, "grad_norm": 2.0593080520629883, "learning_rate": 4.693860827952408e-06, "loss": 0.8775, "step": 3475 }, { "epoch": 0.1083605791686128, "grad_norm": 1.9952199459075928, "learning_rate": 4.6930413976203745e-06, "loss": 0.8142, "step": 3480 }, { "epoch": 0.10851626965592402, "grad_norm": 2.014030933380127, "learning_rate": 4.692221967288342e-06, "loss": 0.9195, "step": 3485 }, { "epoch": 0.10867196014323525, "grad_norm": 2.248638153076172, "learning_rate": 4.691402536956308e-06, "loss": 0.8558, "step": 3490 }, { "epoch": 0.10882765063054647, "grad_norm": 2.3415963649749756, "learning_rate": 4.690583106624275e-06, "loss": 0.8932, "step": 3495 }, { "epoch": 0.1089833411178577, "grad_norm": 2.3326432704925537, "learning_rate": 4.689763676292242e-06, "loss": 0.8677, "step": 3500 }, { "epoch": 0.10913903160516893, "grad_norm": 2.2048299312591553, "learning_rate": 4.688944245960209e-06, "loss": 0.8414, "step": 3505 }, { "epoch": 0.10929472209248015, "grad_norm": 2.192908763885498, "learning_rate": 4.688124815628176e-06, "loss": 0.8698, "step": 3510 }, { "epoch": 0.10945041257979138, "grad_norm": 2.158454179763794, "learning_rate": 4.6873053852961426e-06, "loss": 0.8219, "step": 3515 }, { "epoch": 0.1096061030671026, "grad_norm": 2.2089638710021973, "learning_rate": 4.6864859549641094e-06, "loss": 0.8792, "step": 3520 }, { "epoch": 0.10976179355441383, "grad_norm": 2.542088508605957, "learning_rate": 4.685666524632076e-06, "loss": 0.8346, "step": 3525 }, { "epoch": 0.10991748404172505, "grad_norm": 2.3030776977539062, "learning_rate": 4.684847094300043e-06, "loss": 0.822, "step": 3530 }, { "epoch": 0.11007317452903627, "grad_norm": 2.467353105545044, "learning_rate": 4.68402766396801e-06, "loss": 0.8077, "step": 3535 }, { "epoch": 0.1102288650163475, "grad_norm": 2.172564744949341, "learning_rate": 4.683208233635977e-06, "loss": 0.8941, "step": 3540 }, { "epoch": 0.11038455550365872, "grad_norm": 2.040215015411377, "learning_rate": 4.682388803303943e-06, "loss": 0.7711, "step": 3545 }, { "epoch": 0.11054024599096995, "grad_norm": 2.1941587924957275, "learning_rate": 4.681569372971911e-06, "loss": 0.816, "step": 3550 }, { "epoch": 0.11069593647828117, "grad_norm": 2.153172492980957, "learning_rate": 4.6807499426398775e-06, "loss": 0.8566, "step": 3555 }, { "epoch": 0.1108516269655924, "grad_norm": 2.052225351333618, "learning_rate": 4.679930512307844e-06, "loss": 0.9006, "step": 3560 }, { "epoch": 0.11100731745290363, "grad_norm": 2.797373056411743, "learning_rate": 4.67911108197581e-06, "loss": 0.8754, "step": 3565 }, { "epoch": 0.11116300794021486, "grad_norm": 1.8904728889465332, "learning_rate": 4.678291651643777e-06, "loss": 0.8705, "step": 3570 }, { "epoch": 0.11131869842752608, "grad_norm": 1.9686708450317383, "learning_rate": 4.677472221311745e-06, "loss": 0.767, "step": 3575 }, { "epoch": 0.1114743889148373, "grad_norm": 2.047800064086914, "learning_rate": 4.676652790979711e-06, "loss": 0.9088, "step": 3580 }, { "epoch": 0.11163007940214853, "grad_norm": 2.251868963241577, "learning_rate": 4.675833360647678e-06, "loss": 0.9529, "step": 3585 }, { "epoch": 0.11178576988945975, "grad_norm": 3.015119791030884, "learning_rate": 4.675013930315645e-06, "loss": 0.8796, "step": 3590 }, { "epoch": 0.11194146037677098, "grad_norm": 2.6674394607543945, "learning_rate": 4.674194499983612e-06, "loss": 0.9098, "step": 3595 }, { "epoch": 0.1120971508640822, "grad_norm": 2.2323198318481445, "learning_rate": 4.6733750696515785e-06, "loss": 0.8074, "step": 3600 }, { "epoch": 0.11225284135139343, "grad_norm": 2.470111131668091, "learning_rate": 4.672555639319545e-06, "loss": 0.9536, "step": 3605 }, { "epoch": 0.11240853183870465, "grad_norm": 1.9919108152389526, "learning_rate": 4.671736208987512e-06, "loss": 0.8872, "step": 3610 }, { "epoch": 0.11256422232601589, "grad_norm": 1.944953441619873, "learning_rate": 4.670916778655479e-06, "loss": 0.7991, "step": 3615 }, { "epoch": 0.1127199128133271, "grad_norm": 2.1290297508239746, "learning_rate": 4.670097348323446e-06, "loss": 0.9496, "step": 3620 }, { "epoch": 0.11287560330063832, "grad_norm": 1.7575253248214722, "learning_rate": 4.669277917991413e-06, "loss": 0.8763, "step": 3625 }, { "epoch": 0.11303129378794956, "grad_norm": 2.2881126403808594, "learning_rate": 4.66845848765938e-06, "loss": 0.866, "step": 3630 }, { "epoch": 0.11318698427526078, "grad_norm": 2.0636229515075684, "learning_rate": 4.667639057327347e-06, "loss": 0.8961, "step": 3635 }, { "epoch": 0.11334267476257201, "grad_norm": 2.704347610473633, "learning_rate": 4.6668196269953135e-06, "loss": 0.8188, "step": 3640 }, { "epoch": 0.11349836524988323, "grad_norm": 2.5057435035705566, "learning_rate": 4.66600019666328e-06, "loss": 0.9349, "step": 3645 }, { "epoch": 0.11365405573719446, "grad_norm": 2.484156847000122, "learning_rate": 4.665180766331247e-06, "loss": 0.8385, "step": 3650 }, { "epoch": 0.11380974622450568, "grad_norm": 2.011611223220825, "learning_rate": 4.664361335999213e-06, "loss": 0.8017, "step": 3655 }, { "epoch": 0.11396543671181691, "grad_norm": 2.557077407836914, "learning_rate": 4.66354190566718e-06, "loss": 0.9256, "step": 3660 }, { "epoch": 0.11412112719912813, "grad_norm": 2.248955011367798, "learning_rate": 4.662722475335148e-06, "loss": 0.8301, "step": 3665 }, { "epoch": 0.11427681768643935, "grad_norm": 2.3965530395507812, "learning_rate": 4.661903045003115e-06, "loss": 0.8341, "step": 3670 }, { "epoch": 0.11443250817375059, "grad_norm": 2.28814959526062, "learning_rate": 4.661083614671081e-06, "loss": 0.8992, "step": 3675 }, { "epoch": 0.1145881986610618, "grad_norm": 1.854034423828125, "learning_rate": 4.6602641843390476e-06, "loss": 0.8185, "step": 3680 }, { "epoch": 0.11474388914837304, "grad_norm": 2.7374770641326904, "learning_rate": 4.6594447540070144e-06, "loss": 0.9313, "step": 3685 }, { "epoch": 0.11489957963568426, "grad_norm": 2.028348445892334, "learning_rate": 4.658625323674982e-06, "loss": 0.9028, "step": 3690 }, { "epoch": 0.11505527012299549, "grad_norm": 1.9713722467422485, "learning_rate": 4.657805893342948e-06, "loss": 0.88, "step": 3695 }, { "epoch": 0.11521096061030671, "grad_norm": 1.914947509765625, "learning_rate": 4.656986463010915e-06, "loss": 0.794, "step": 3700 }, { "epoch": 0.11536665109761794, "grad_norm": 3.430576801300049, "learning_rate": 4.656167032678882e-06, "loss": 0.9207, "step": 3705 }, { "epoch": 0.11552234158492916, "grad_norm": 2.0096404552459717, "learning_rate": 4.655347602346849e-06, "loss": 0.8889, "step": 3710 }, { "epoch": 0.11567803207224038, "grad_norm": 2.3210415840148926, "learning_rate": 4.654528172014816e-06, "loss": 0.9495, "step": 3715 }, { "epoch": 0.11583372255955161, "grad_norm": 1.9776825904846191, "learning_rate": 4.6537087416827825e-06, "loss": 0.8444, "step": 3720 }, { "epoch": 0.11598941304686283, "grad_norm": 2.278287172317505, "learning_rate": 4.652889311350749e-06, "loss": 0.8728, "step": 3725 }, { "epoch": 0.11614510353417407, "grad_norm": 1.9319393634796143, "learning_rate": 4.652069881018716e-06, "loss": 0.9158, "step": 3730 }, { "epoch": 0.11630079402148528, "grad_norm": 2.3784966468811035, "learning_rate": 4.651250450686683e-06, "loss": 0.7974, "step": 3735 }, { "epoch": 0.11645648450879652, "grad_norm": 2.1778905391693115, "learning_rate": 4.65043102035465e-06, "loss": 0.8347, "step": 3740 }, { "epoch": 0.11661217499610774, "grad_norm": 2.4046361446380615, "learning_rate": 4.649611590022617e-06, "loss": 0.8129, "step": 3745 }, { "epoch": 0.11676786548341897, "grad_norm": 3.413358688354492, "learning_rate": 4.648792159690583e-06, "loss": 0.8799, "step": 3750 }, { "epoch": 0.11692355597073019, "grad_norm": 1.75823175907135, "learning_rate": 4.647972729358551e-06, "loss": 0.7628, "step": 3755 }, { "epoch": 0.11707924645804141, "grad_norm": 2.1865246295928955, "learning_rate": 4.6471532990265175e-06, "loss": 0.9065, "step": 3760 }, { "epoch": 0.11723493694535264, "grad_norm": 2.8807411193847656, "learning_rate": 4.646333868694484e-06, "loss": 0.8274, "step": 3765 }, { "epoch": 0.11739062743266386, "grad_norm": 2.1495778560638428, "learning_rate": 4.64551443836245e-06, "loss": 0.87, "step": 3770 }, { "epoch": 0.1175463179199751, "grad_norm": 2.3827507495880127, "learning_rate": 4.644695008030417e-06, "loss": 0.9128, "step": 3775 }, { "epoch": 0.11770200840728631, "grad_norm": 2.1190907955169678, "learning_rate": 4.643875577698385e-06, "loss": 0.9037, "step": 3780 }, { "epoch": 0.11785769889459755, "grad_norm": 1.7495076656341553, "learning_rate": 4.643056147366351e-06, "loss": 0.892, "step": 3785 }, { "epoch": 0.11801338938190876, "grad_norm": 1.677274465560913, "learning_rate": 4.642236717034318e-06, "loss": 0.8528, "step": 3790 }, { "epoch": 0.11816907986921998, "grad_norm": 1.9086309671401978, "learning_rate": 4.641417286702285e-06, "loss": 0.8658, "step": 3795 }, { "epoch": 0.11832477035653122, "grad_norm": 2.345289945602417, "learning_rate": 4.640597856370252e-06, "loss": 0.8876, "step": 3800 }, { "epoch": 0.11848046084384244, "grad_norm": 2.324392795562744, "learning_rate": 4.6397784260382185e-06, "loss": 0.9685, "step": 3805 }, { "epoch": 0.11863615133115367, "grad_norm": 2.2816579341888428, "learning_rate": 4.638958995706185e-06, "loss": 0.8043, "step": 3810 }, { "epoch": 0.11879184181846489, "grad_norm": 2.4708449840545654, "learning_rate": 4.638139565374152e-06, "loss": 0.889, "step": 3815 }, { "epoch": 0.11894753230577612, "grad_norm": 2.7176365852355957, "learning_rate": 4.637320135042119e-06, "loss": 0.9334, "step": 3820 }, { "epoch": 0.11910322279308734, "grad_norm": 2.590594530105591, "learning_rate": 4.636500704710086e-06, "loss": 0.8622, "step": 3825 }, { "epoch": 0.11925891328039857, "grad_norm": 2.2449448108673096, "learning_rate": 4.635681274378053e-06, "loss": 0.892, "step": 3830 }, { "epoch": 0.11941460376770979, "grad_norm": 2.4905905723571777, "learning_rate": 4.63486184404602e-06, "loss": 0.8399, "step": 3835 }, { "epoch": 0.11957029425502101, "grad_norm": 3.122713804244995, "learning_rate": 4.6340424137139865e-06, "loss": 0.7765, "step": 3840 }, { "epoch": 0.11972598474233224, "grad_norm": 1.9784164428710938, "learning_rate": 4.633222983381953e-06, "loss": 0.8047, "step": 3845 }, { "epoch": 0.11988167522964346, "grad_norm": 2.3411080837249756, "learning_rate": 4.63240355304992e-06, "loss": 0.8008, "step": 3850 }, { "epoch": 0.1200373657169547, "grad_norm": 2.183605432510376, "learning_rate": 4.631584122717887e-06, "loss": 0.8437, "step": 3855 }, { "epoch": 0.12019305620426592, "grad_norm": 2.6656131744384766, "learning_rate": 4.630764692385853e-06, "loss": 0.8047, "step": 3860 }, { "epoch": 0.12034874669157715, "grad_norm": 2.6699867248535156, "learning_rate": 4.62994526205382e-06, "loss": 0.9116, "step": 3865 }, { "epoch": 0.12050443717888837, "grad_norm": 2.072291612625122, "learning_rate": 4.629125831721788e-06, "loss": 0.8332, "step": 3870 }, { "epoch": 0.1206601276661996, "grad_norm": 2.5338809490203857, "learning_rate": 4.628306401389755e-06, "loss": 0.784, "step": 3875 }, { "epoch": 0.12081581815351082, "grad_norm": 2.042851686477661, "learning_rate": 4.627486971057721e-06, "loss": 0.8808, "step": 3880 }, { "epoch": 0.12097150864082204, "grad_norm": 1.9637888669967651, "learning_rate": 4.6266675407256875e-06, "loss": 0.8855, "step": 3885 }, { "epoch": 0.12112719912813327, "grad_norm": 2.405907392501831, "learning_rate": 4.625848110393654e-06, "loss": 0.8882, "step": 3890 }, { "epoch": 0.12128288961544449, "grad_norm": 2.16585373878479, "learning_rate": 4.625028680061622e-06, "loss": 0.8159, "step": 3895 }, { "epoch": 0.12143858010275572, "grad_norm": 2.2633631229400635, "learning_rate": 4.624209249729588e-06, "loss": 0.9077, "step": 3900 }, { "epoch": 0.12159427059006694, "grad_norm": 2.1901588439941406, "learning_rate": 4.623389819397555e-06, "loss": 0.8839, "step": 3905 }, { "epoch": 0.12174996107737818, "grad_norm": 2.1158604621887207, "learning_rate": 4.622570389065522e-06, "loss": 0.8045, "step": 3910 }, { "epoch": 0.1219056515646894, "grad_norm": 2.4573123455047607, "learning_rate": 4.621750958733489e-06, "loss": 0.8362, "step": 3915 }, { "epoch": 0.12206134205200063, "grad_norm": 2.7181851863861084, "learning_rate": 4.620931528401456e-06, "loss": 0.7719, "step": 3920 }, { "epoch": 0.12221703253931185, "grad_norm": 2.204954147338867, "learning_rate": 4.6201120980694225e-06, "loss": 0.9383, "step": 3925 }, { "epoch": 0.12237272302662307, "grad_norm": 2.0575613975524902, "learning_rate": 4.619292667737389e-06, "loss": 0.8479, "step": 3930 }, { "epoch": 0.1225284135139343, "grad_norm": 2.2555296421051025, "learning_rate": 4.618473237405356e-06, "loss": 0.8182, "step": 3935 }, { "epoch": 0.12268410400124552, "grad_norm": 2.0158140659332275, "learning_rate": 4.617653807073323e-06, "loss": 0.9012, "step": 3940 }, { "epoch": 0.12283979448855675, "grad_norm": 1.8306264877319336, "learning_rate": 4.61683437674129e-06, "loss": 0.8374, "step": 3945 }, { "epoch": 0.12299548497586797, "grad_norm": 2.1104464530944824, "learning_rate": 4.616014946409257e-06, "loss": 0.798, "step": 3950 }, { "epoch": 0.1231511754631792, "grad_norm": 2.046428680419922, "learning_rate": 4.615195516077223e-06, "loss": 0.8627, "step": 3955 }, { "epoch": 0.12330686595049042, "grad_norm": 2.1848905086517334, "learning_rate": 4.6143760857451906e-06, "loss": 0.8675, "step": 3960 }, { "epoch": 0.12346255643780166, "grad_norm": 2.1361913681030273, "learning_rate": 4.6135566554131574e-06, "loss": 0.8952, "step": 3965 }, { "epoch": 0.12361824692511288, "grad_norm": 2.0596628189086914, "learning_rate": 4.612737225081124e-06, "loss": 0.8825, "step": 3970 }, { "epoch": 0.1237739374124241, "grad_norm": 2.1045405864715576, "learning_rate": 4.61191779474909e-06, "loss": 0.8079, "step": 3975 }, { "epoch": 0.12392962789973533, "grad_norm": 2.043041706085205, "learning_rate": 4.611098364417057e-06, "loss": 0.8311, "step": 3980 }, { "epoch": 0.12408531838704655, "grad_norm": 2.179126739501953, "learning_rate": 4.610278934085025e-06, "loss": 0.8359, "step": 3985 }, { "epoch": 0.12424100887435778, "grad_norm": 1.8362549543380737, "learning_rate": 4.609459503752992e-06, "loss": 0.7822, "step": 3990 }, { "epoch": 0.124396699361669, "grad_norm": 2.01845383644104, "learning_rate": 4.608640073420958e-06, "loss": 0.9133, "step": 3995 }, { "epoch": 0.12455238984898023, "grad_norm": 2.025639057159424, "learning_rate": 4.607820643088925e-06, "loss": 0.7801, "step": 4000 }, { "epoch": 0.12470808033629145, "grad_norm": 2.0585601329803467, "learning_rate": 4.6070012127568915e-06, "loss": 0.8534, "step": 4005 }, { "epoch": 0.12486377082360267, "grad_norm": 2.956775665283203, "learning_rate": 4.606181782424858e-06, "loss": 0.867, "step": 4010 }, { "epoch": 0.1250194613109139, "grad_norm": 2.433889150619507, "learning_rate": 4.605362352092825e-06, "loss": 0.9787, "step": 4015 }, { "epoch": 0.12517515179822514, "grad_norm": 2.1401913166046143, "learning_rate": 4.604542921760792e-06, "loss": 0.8409, "step": 4020 }, { "epoch": 0.12533084228553634, "grad_norm": 2.2635068893432617, "learning_rate": 4.603723491428759e-06, "loss": 0.871, "step": 4025 }, { "epoch": 0.12548653277284758, "grad_norm": 1.9808313846588135, "learning_rate": 4.602904061096726e-06, "loss": 0.8469, "step": 4030 }, { "epoch": 0.1256422232601588, "grad_norm": 2.2486159801483154, "learning_rate": 4.602084630764693e-06, "loss": 0.9005, "step": 4035 }, { "epoch": 0.12579791374747004, "grad_norm": 2.3794145584106445, "learning_rate": 4.60126520043266e-06, "loss": 0.8672, "step": 4040 }, { "epoch": 0.12595360423478125, "grad_norm": 2.6143343448638916, "learning_rate": 4.6004457701006265e-06, "loss": 0.908, "step": 4045 }, { "epoch": 0.12610929472209248, "grad_norm": 2.551694631576538, "learning_rate": 4.599626339768593e-06, "loss": 0.8245, "step": 4050 }, { "epoch": 0.1262649852094037, "grad_norm": 2.163480281829834, "learning_rate": 4.59880690943656e-06, "loss": 0.8481, "step": 4055 }, { "epoch": 0.12642067569671492, "grad_norm": 1.8489880561828613, "learning_rate": 4.597987479104527e-06, "loss": 0.8342, "step": 4060 }, { "epoch": 0.12657636618402615, "grad_norm": 2.5915024280548096, "learning_rate": 4.597168048772494e-06, "loss": 0.9351, "step": 4065 }, { "epoch": 0.12673205667133738, "grad_norm": 2.13767409324646, "learning_rate": 4.596348618440461e-06, "loss": 0.789, "step": 4070 }, { "epoch": 0.12688774715864862, "grad_norm": 2.403836250305176, "learning_rate": 4.595529188108428e-06, "loss": 0.7854, "step": 4075 }, { "epoch": 0.12704343764595982, "grad_norm": 2.108083486557007, "learning_rate": 4.594709757776395e-06, "loss": 0.8103, "step": 4080 }, { "epoch": 0.12719912813327106, "grad_norm": 2.013078212738037, "learning_rate": 4.593890327444361e-06, "loss": 0.8909, "step": 4085 }, { "epoch": 0.1273548186205823, "grad_norm": 2.5774919986724854, "learning_rate": 4.5930708971123275e-06, "loss": 0.8961, "step": 4090 }, { "epoch": 0.12751050910789352, "grad_norm": 2.200855255126953, "learning_rate": 4.592251466780294e-06, "loss": 0.7988, "step": 4095 }, { "epoch": 0.12766619959520473, "grad_norm": 2.2371199131011963, "learning_rate": 4.591432036448262e-06, "loss": 0.8621, "step": 4100 }, { "epoch": 0.12782189008251596, "grad_norm": 2.1950950622558594, "learning_rate": 4.590612606116228e-06, "loss": 0.87, "step": 4105 }, { "epoch": 0.1279775805698272, "grad_norm": 2.518786668777466, "learning_rate": 4.589793175784195e-06, "loss": 0.8547, "step": 4110 }, { "epoch": 0.1281332710571384, "grad_norm": 2.275212287902832, "learning_rate": 4.588973745452162e-06, "loss": 0.8414, "step": 4115 }, { "epoch": 0.12828896154444963, "grad_norm": 2.602886438369751, "learning_rate": 4.588154315120129e-06, "loss": 0.9156, "step": 4120 }, { "epoch": 0.12844465203176086, "grad_norm": 2.0208511352539062, "learning_rate": 4.5873348847880956e-06, "loss": 0.8632, "step": 4125 }, { "epoch": 0.1286003425190721, "grad_norm": 2.1370012760162354, "learning_rate": 4.5865154544560624e-06, "loss": 0.8746, "step": 4130 }, { "epoch": 0.1287560330063833, "grad_norm": 2.0497384071350098, "learning_rate": 4.585696024124029e-06, "loss": 0.8846, "step": 4135 }, { "epoch": 0.12891172349369454, "grad_norm": 2.1125059127807617, "learning_rate": 4.584876593791996e-06, "loss": 0.8153, "step": 4140 }, { "epoch": 0.12906741398100577, "grad_norm": 2.462678909301758, "learning_rate": 4.584057163459963e-06, "loss": 0.8722, "step": 4145 }, { "epoch": 0.12922310446831697, "grad_norm": 2.3528361320495605, "learning_rate": 4.58323773312793e-06, "loss": 0.9318, "step": 4150 }, { "epoch": 0.1293787949556282, "grad_norm": 2.131845712661743, "learning_rate": 4.582418302795897e-06, "loss": 0.9306, "step": 4155 }, { "epoch": 0.12953448544293944, "grad_norm": 1.8907886743545532, "learning_rate": 4.581598872463864e-06, "loss": 0.9271, "step": 4160 }, { "epoch": 0.12969017593025067, "grad_norm": 2.3734374046325684, "learning_rate": 4.5807794421318305e-06, "loss": 0.8817, "step": 4165 }, { "epoch": 0.12984586641756188, "grad_norm": 1.909860372543335, "learning_rate": 4.579960011799797e-06, "loss": 0.8003, "step": 4170 }, { "epoch": 0.1300015569048731, "grad_norm": 1.9336742162704468, "learning_rate": 4.579140581467764e-06, "loss": 0.8404, "step": 4175 }, { "epoch": 0.13015724739218434, "grad_norm": 2.3083856105804443, "learning_rate": 4.57832115113573e-06, "loss": 0.8579, "step": 4180 }, { "epoch": 0.13031293787949555, "grad_norm": 2.2589473724365234, "learning_rate": 4.577501720803698e-06, "loss": 0.8336, "step": 4185 }, { "epoch": 0.13046862836680678, "grad_norm": 2.444685220718384, "learning_rate": 4.576682290471665e-06, "loss": 0.8717, "step": 4190 }, { "epoch": 0.13062431885411802, "grad_norm": 2.0843212604522705, "learning_rate": 4.575862860139632e-06, "loss": 0.8462, "step": 4195 }, { "epoch": 0.13078000934142925, "grad_norm": 2.1546194553375244, "learning_rate": 4.575043429807598e-06, "loss": 0.8729, "step": 4200 }, { "epoch": 0.13093569982874045, "grad_norm": 2.1853442192077637, "learning_rate": 4.574223999475565e-06, "loss": 0.8575, "step": 4205 }, { "epoch": 0.1310913903160517, "grad_norm": 2.2425384521484375, "learning_rate": 4.573404569143532e-06, "loss": 0.8375, "step": 4210 }, { "epoch": 0.13124708080336292, "grad_norm": 1.9977052211761475, "learning_rate": 4.572585138811498e-06, "loss": 0.9419, "step": 4215 }, { "epoch": 0.13140277129067415, "grad_norm": 1.8915103673934937, "learning_rate": 4.571765708479465e-06, "loss": 0.8598, "step": 4220 }, { "epoch": 0.13155846177798536, "grad_norm": 2.447366714477539, "learning_rate": 4.570946278147432e-06, "loss": 0.8415, "step": 4225 }, { "epoch": 0.1317141522652966, "grad_norm": 2.1358752250671387, "learning_rate": 4.570126847815399e-06, "loss": 0.8457, "step": 4230 }, { "epoch": 0.13186984275260782, "grad_norm": 2.3528637886047363, "learning_rate": 4.569307417483366e-06, "loss": 0.8225, "step": 4235 }, { "epoch": 0.13202553323991903, "grad_norm": 2.3759095668792725, "learning_rate": 4.568487987151333e-06, "loss": 0.8865, "step": 4240 }, { "epoch": 0.13218122372723026, "grad_norm": 2.1656835079193115, "learning_rate": 4.5676685568193e-06, "loss": 0.8222, "step": 4245 }, { "epoch": 0.1323369142145415, "grad_norm": 2.4366183280944824, "learning_rate": 4.5668491264872665e-06, "loss": 0.8393, "step": 4250 }, { "epoch": 0.13249260470185273, "grad_norm": 3.9923791885375977, "learning_rate": 4.566029696155233e-06, "loss": 0.883, "step": 4255 }, { "epoch": 0.13264829518916393, "grad_norm": 1.9005409479141235, "learning_rate": 4.5652102658232e-06, "loss": 0.8438, "step": 4260 }, { "epoch": 0.13280398567647517, "grad_norm": 2.14162540435791, "learning_rate": 4.564390835491167e-06, "loss": 0.9296, "step": 4265 }, { "epoch": 0.1329596761637864, "grad_norm": 2.2268738746643066, "learning_rate": 4.563571405159134e-06, "loss": 0.828, "step": 4270 }, { "epoch": 0.1331153666510976, "grad_norm": 2.1769227981567383, "learning_rate": 4.562751974827101e-06, "loss": 0.8821, "step": 4275 }, { "epoch": 0.13327105713840884, "grad_norm": 2.105335235595703, "learning_rate": 4.561932544495068e-06, "loss": 0.8304, "step": 4280 }, { "epoch": 0.13342674762572007, "grad_norm": 2.27117657661438, "learning_rate": 4.5611131141630345e-06, "loss": 0.8988, "step": 4285 }, { "epoch": 0.1335824381130313, "grad_norm": 2.6102664470672607, "learning_rate": 4.5602936838310006e-06, "loss": 0.9204, "step": 4290 }, { "epoch": 0.1337381286003425, "grad_norm": 1.9544968605041504, "learning_rate": 4.5594742534989674e-06, "loss": 0.8545, "step": 4295 }, { "epoch": 0.13389381908765374, "grad_norm": 2.1753594875335693, "learning_rate": 4.558654823166935e-06, "loss": 0.8629, "step": 4300 }, { "epoch": 0.13404950957496498, "grad_norm": 2.386185646057129, "learning_rate": 4.557835392834902e-06, "loss": 0.9028, "step": 4305 }, { "epoch": 0.1342052000622762, "grad_norm": 2.1420233249664307, "learning_rate": 4.557015962502868e-06, "loss": 0.8762, "step": 4310 }, { "epoch": 0.13436089054958741, "grad_norm": 1.7334283590316772, "learning_rate": 4.556196532170835e-06, "loss": 0.7651, "step": 4315 }, { "epoch": 0.13451658103689865, "grad_norm": 1.971728801727295, "learning_rate": 4.555377101838802e-06, "loss": 0.7466, "step": 4320 }, { "epoch": 0.13467227152420988, "grad_norm": 2.1749610900878906, "learning_rate": 4.5545576715067695e-06, "loss": 0.8758, "step": 4325 }, { "epoch": 0.13482796201152109, "grad_norm": 2.1956276893615723, "learning_rate": 4.5537382411747355e-06, "loss": 0.8572, "step": 4330 }, { "epoch": 0.13498365249883232, "grad_norm": 2.6517341136932373, "learning_rate": 4.552918810842702e-06, "loss": 0.8459, "step": 4335 }, { "epoch": 0.13513934298614355, "grad_norm": 2.5818681716918945, "learning_rate": 4.552099380510669e-06, "loss": 0.8485, "step": 4340 }, { "epoch": 0.13529503347345478, "grad_norm": 2.0533487796783447, "learning_rate": 4.551279950178636e-06, "loss": 0.8493, "step": 4345 }, { "epoch": 0.135450723960766, "grad_norm": 3.0232293605804443, "learning_rate": 4.550460519846603e-06, "loss": 0.8311, "step": 4350 }, { "epoch": 0.13560641444807722, "grad_norm": 2.41760516166687, "learning_rate": 4.54964108951457e-06, "loss": 0.8931, "step": 4355 }, { "epoch": 0.13576210493538846, "grad_norm": 2.0700197219848633, "learning_rate": 4.548821659182537e-06, "loss": 0.8906, "step": 4360 }, { "epoch": 0.13591779542269966, "grad_norm": 2.260308265686035, "learning_rate": 4.548002228850504e-06, "loss": 0.8296, "step": 4365 }, { "epoch": 0.1360734859100109, "grad_norm": 2.2027223110198975, "learning_rate": 4.5471827985184705e-06, "loss": 0.8673, "step": 4370 }, { "epoch": 0.13622917639732213, "grad_norm": 2.224111795425415, "learning_rate": 4.546363368186437e-06, "loss": 0.9022, "step": 4375 }, { "epoch": 0.13638486688463336, "grad_norm": 2.1013729572296143, "learning_rate": 4.545543937854404e-06, "loss": 0.856, "step": 4380 }, { "epoch": 0.13654055737194457, "grad_norm": 1.788394570350647, "learning_rate": 4.54472450752237e-06, "loss": 0.7713, "step": 4385 }, { "epoch": 0.1366962478592558, "grad_norm": 2.1374294757843018, "learning_rate": 4.543905077190338e-06, "loss": 0.8315, "step": 4390 }, { "epoch": 0.13685193834656703, "grad_norm": 1.9276926517486572, "learning_rate": 4.543085646858305e-06, "loss": 0.8917, "step": 4395 }, { "epoch": 0.13700762883387824, "grad_norm": 2.4182965755462646, "learning_rate": 4.542266216526272e-06, "loss": 0.8948, "step": 4400 }, { "epoch": 0.13716331932118947, "grad_norm": 1.9441696405410767, "learning_rate": 4.541446786194238e-06, "loss": 0.8238, "step": 4405 }, { "epoch": 0.1373190098085007, "grad_norm": 2.723292112350464, "learning_rate": 4.540627355862205e-06, "loss": 0.8427, "step": 4410 }, { "epoch": 0.13747470029581194, "grad_norm": 2.149484872817993, "learning_rate": 4.539807925530172e-06, "loss": 0.8387, "step": 4415 }, { "epoch": 0.13763039078312314, "grad_norm": 2.599616289138794, "learning_rate": 4.538988495198139e-06, "loss": 0.8758, "step": 4420 }, { "epoch": 0.13778608127043437, "grad_norm": 2.191044569015503, "learning_rate": 4.538169064866105e-06, "loss": 0.907, "step": 4425 }, { "epoch": 0.1379417717577456, "grad_norm": 2.070122003555298, "learning_rate": 4.537349634534072e-06, "loss": 0.8412, "step": 4430 }, { "epoch": 0.13809746224505684, "grad_norm": 2.3146209716796875, "learning_rate": 4.536530204202039e-06, "loss": 0.8716, "step": 4435 }, { "epoch": 0.13825315273236805, "grad_norm": 2.1735410690307617, "learning_rate": 4.535710773870006e-06, "loss": 0.9438, "step": 4440 }, { "epoch": 0.13840884321967928, "grad_norm": 2.658238410949707, "learning_rate": 4.534891343537973e-06, "loss": 0.7817, "step": 4445 }, { "epoch": 0.1385645337069905, "grad_norm": 2.111619710922241, "learning_rate": 4.5340719132059395e-06, "loss": 0.9226, "step": 4450 }, { "epoch": 0.13872022419430172, "grad_norm": 1.7361423969268799, "learning_rate": 4.533252482873906e-06, "loss": 0.8409, "step": 4455 }, { "epoch": 0.13887591468161295, "grad_norm": 2.1453256607055664, "learning_rate": 4.532433052541873e-06, "loss": 0.8905, "step": 4460 }, { "epoch": 0.13903160516892418, "grad_norm": 2.556804895401001, "learning_rate": 4.53161362220984e-06, "loss": 0.9122, "step": 4465 }, { "epoch": 0.13918729565623542, "grad_norm": 2.5452542304992676, "learning_rate": 4.530794191877807e-06, "loss": 0.9386, "step": 4470 }, { "epoch": 0.13934298614354662, "grad_norm": 2.5126354694366455, "learning_rate": 4.529974761545774e-06, "loss": 0.9825, "step": 4475 }, { "epoch": 0.13949867663085785, "grad_norm": 2.535878896713257, "learning_rate": 4.529155331213741e-06, "loss": 0.7781, "step": 4480 }, { "epoch": 0.1396543671181691, "grad_norm": 2.1758337020874023, "learning_rate": 4.528335900881708e-06, "loss": 0.8945, "step": 4485 }, { "epoch": 0.1398100576054803, "grad_norm": 2.4600319862365723, "learning_rate": 4.5275164705496745e-06, "loss": 0.9224, "step": 4490 }, { "epoch": 0.13996574809279153, "grad_norm": 2.0882904529571533, "learning_rate": 4.526697040217641e-06, "loss": 0.9093, "step": 4495 }, { "epoch": 0.14012143858010276, "grad_norm": 2.1675074100494385, "learning_rate": 4.525877609885607e-06, "loss": 0.7832, "step": 4500 }, { "epoch": 0.140277129067414, "grad_norm": 2.0292463302612305, "learning_rate": 4.525058179553575e-06, "loss": 0.8047, "step": 4505 }, { "epoch": 0.1404328195547252, "grad_norm": 2.157677173614502, "learning_rate": 4.524238749221542e-06, "loss": 0.8795, "step": 4510 }, { "epoch": 0.14058851004203643, "grad_norm": 2.3054113388061523, "learning_rate": 4.523419318889508e-06, "loss": 0.8188, "step": 4515 }, { "epoch": 0.14074420052934766, "grad_norm": 2.233888626098633, "learning_rate": 4.522599888557475e-06, "loss": 0.8748, "step": 4520 }, { "epoch": 0.1408998910166589, "grad_norm": 2.6045610904693604, "learning_rate": 4.521780458225442e-06, "loss": 0.8237, "step": 4525 }, { "epoch": 0.1410555815039701, "grad_norm": 1.6691460609436035, "learning_rate": 4.5209610278934095e-06, "loss": 0.843, "step": 4530 }, { "epoch": 0.14121127199128133, "grad_norm": 2.4022583961486816, "learning_rate": 4.5201415975613755e-06, "loss": 0.9602, "step": 4535 }, { "epoch": 0.14136696247859257, "grad_norm": 1.807610273361206, "learning_rate": 4.519322167229342e-06, "loss": 0.8811, "step": 4540 }, { "epoch": 0.14152265296590377, "grad_norm": 2.1473512649536133, "learning_rate": 4.518502736897309e-06, "loss": 0.8447, "step": 4545 }, { "epoch": 0.141678343453215, "grad_norm": 1.9328380823135376, "learning_rate": 4.517683306565276e-06, "loss": 0.8465, "step": 4550 }, { "epoch": 0.14183403394052624, "grad_norm": 2.241105079650879, "learning_rate": 4.516863876233243e-06, "loss": 0.8846, "step": 4555 }, { "epoch": 0.14198972442783747, "grad_norm": 2.042634963989258, "learning_rate": 4.51604444590121e-06, "loss": 0.8529, "step": 4560 }, { "epoch": 0.14214541491514868, "grad_norm": 2.0728039741516113, "learning_rate": 4.515225015569177e-06, "loss": 0.8598, "step": 4565 }, { "epoch": 0.1423011054024599, "grad_norm": 2.050291061401367, "learning_rate": 4.5144055852371436e-06, "loss": 0.9131, "step": 4570 }, { "epoch": 0.14245679588977114, "grad_norm": 2.1564223766326904, "learning_rate": 4.5135861549051104e-06, "loss": 0.8481, "step": 4575 }, { "epoch": 0.14261248637708235, "grad_norm": 2.0487616062164307, "learning_rate": 4.512766724573077e-06, "loss": 0.9174, "step": 4580 }, { "epoch": 0.14276817686439358, "grad_norm": 2.0254154205322266, "learning_rate": 4.511947294241044e-06, "loss": 0.7897, "step": 4585 }, { "epoch": 0.14292386735170481, "grad_norm": 2.3237738609313965, "learning_rate": 4.51112786390901e-06, "loss": 0.8102, "step": 4590 }, { "epoch": 0.14307955783901605, "grad_norm": 2.0158298015594482, "learning_rate": 4.510308433576978e-06, "loss": 0.798, "step": 4595 }, { "epoch": 0.14323524832632725, "grad_norm": 2.477461814880371, "learning_rate": 4.509489003244945e-06, "loss": 0.9588, "step": 4600 }, { "epoch": 0.14339093881363849, "grad_norm": 2.1082112789154053, "learning_rate": 4.508669572912912e-06, "loss": 0.9335, "step": 4605 }, { "epoch": 0.14354662930094972, "grad_norm": 1.9115163087844849, "learning_rate": 4.507850142580878e-06, "loss": 0.8436, "step": 4610 }, { "epoch": 0.14370231978826092, "grad_norm": 2.0811269283294678, "learning_rate": 4.5070307122488445e-06, "loss": 0.828, "step": 4615 }, { "epoch": 0.14385801027557216, "grad_norm": 2.515690326690674, "learning_rate": 4.506211281916812e-06, "loss": 0.8702, "step": 4620 }, { "epoch": 0.1440137007628834, "grad_norm": 2.4769771099090576, "learning_rate": 4.505391851584779e-06, "loss": 0.8135, "step": 4625 }, { "epoch": 0.14416939125019462, "grad_norm": 2.292839765548706, "learning_rate": 4.504572421252745e-06, "loss": 0.7738, "step": 4630 }, { "epoch": 0.14432508173750583, "grad_norm": 2.036343574523926, "learning_rate": 4.503752990920712e-06, "loss": 0.8827, "step": 4635 }, { "epoch": 0.14448077222481706, "grad_norm": 2.36145281791687, "learning_rate": 4.502933560588679e-06, "loss": 0.8391, "step": 4640 }, { "epoch": 0.1446364627121283, "grad_norm": 2.0553269386291504, "learning_rate": 4.502114130256646e-06, "loss": 0.8023, "step": 4645 }, { "epoch": 0.14479215319943953, "grad_norm": 1.888636827468872, "learning_rate": 4.501294699924613e-06, "loss": 0.8374, "step": 4650 }, { "epoch": 0.14494784368675073, "grad_norm": 2.280594825744629, "learning_rate": 4.5004752695925795e-06, "loss": 0.8446, "step": 4655 }, { "epoch": 0.14510353417406197, "grad_norm": 1.9203917980194092, "learning_rate": 4.499655839260546e-06, "loss": 0.8463, "step": 4660 }, { "epoch": 0.1452592246613732, "grad_norm": 2.3740460872650146, "learning_rate": 4.498836408928513e-06, "loss": 0.8104, "step": 4665 }, { "epoch": 0.1454149151486844, "grad_norm": 1.8796730041503906, "learning_rate": 4.49801697859648e-06, "loss": 0.8513, "step": 4670 }, { "epoch": 0.14557060563599564, "grad_norm": 2.7391936779022217, "learning_rate": 4.497197548264447e-06, "loss": 0.9763, "step": 4675 }, { "epoch": 0.14572629612330687, "grad_norm": 2.07395601272583, "learning_rate": 4.496378117932414e-06, "loss": 0.8223, "step": 4680 }, { "epoch": 0.1458819866106181, "grad_norm": 2.2799124717712402, "learning_rate": 4.495558687600381e-06, "loss": 0.9103, "step": 4685 }, { "epoch": 0.1460376770979293, "grad_norm": 2.2764933109283447, "learning_rate": 4.494739257268348e-06, "loss": 0.9046, "step": 4690 }, { "epoch": 0.14619336758524054, "grad_norm": 2.2932887077331543, "learning_rate": 4.4939198269363145e-06, "loss": 0.862, "step": 4695 }, { "epoch": 0.14634905807255177, "grad_norm": 2.166586399078369, "learning_rate": 4.493100396604281e-06, "loss": 0.8639, "step": 4700 }, { "epoch": 0.14650474855986298, "grad_norm": 1.8635563850402832, "learning_rate": 4.492280966272247e-06, "loss": 0.8539, "step": 4705 }, { "epoch": 0.1466604390471742, "grad_norm": 2.3455421924591064, "learning_rate": 4.491461535940215e-06, "loss": 0.807, "step": 4710 }, { "epoch": 0.14681612953448545, "grad_norm": 1.836118221282959, "learning_rate": 4.490642105608182e-06, "loss": 0.8531, "step": 4715 }, { "epoch": 0.14697182002179668, "grad_norm": 2.570453405380249, "learning_rate": 4.489822675276148e-06, "loss": 0.9981, "step": 4720 }, { "epoch": 0.14712751050910788, "grad_norm": 2.157541036605835, "learning_rate": 4.489003244944115e-06, "loss": 0.8358, "step": 4725 }, { "epoch": 0.14728320099641912, "grad_norm": 3.994688034057617, "learning_rate": 4.488183814612082e-06, "loss": 0.807, "step": 4730 }, { "epoch": 0.14743889148373035, "grad_norm": 2.4061644077301025, "learning_rate": 4.487364384280049e-06, "loss": 0.8281, "step": 4735 }, { "epoch": 0.14759458197104158, "grad_norm": 2.446106433868408, "learning_rate": 4.4865449539480154e-06, "loss": 0.8707, "step": 4740 }, { "epoch": 0.1477502724583528, "grad_norm": 2.304197311401367, "learning_rate": 4.485725523615982e-06, "loss": 0.7798, "step": 4745 }, { "epoch": 0.14790596294566402, "grad_norm": 2.157893657684326, "learning_rate": 4.484906093283949e-06, "loss": 0.8106, "step": 4750 }, { "epoch": 0.14806165343297525, "grad_norm": 1.9417530298233032, "learning_rate": 4.484086662951916e-06, "loss": 0.8567, "step": 4755 }, { "epoch": 0.14821734392028646, "grad_norm": 2.5346949100494385, "learning_rate": 4.483267232619883e-06, "loss": 0.9144, "step": 4760 }, { "epoch": 0.1483730344075977, "grad_norm": 1.983025074005127, "learning_rate": 4.48244780228785e-06, "loss": 0.8203, "step": 4765 }, { "epoch": 0.14852872489490893, "grad_norm": 2.296782970428467, "learning_rate": 4.481628371955817e-06, "loss": 0.8753, "step": 4770 }, { "epoch": 0.14868441538222016, "grad_norm": 2.398317813873291, "learning_rate": 4.4808089416237835e-06, "loss": 0.859, "step": 4775 }, { "epoch": 0.14884010586953136, "grad_norm": 1.9277740716934204, "learning_rate": 4.47998951129175e-06, "loss": 0.8275, "step": 4780 }, { "epoch": 0.1489957963568426, "grad_norm": 2.573026418685913, "learning_rate": 4.479170080959717e-06, "loss": 0.9112, "step": 4785 }, { "epoch": 0.14915148684415383, "grad_norm": 2.0555245876312256, "learning_rate": 4.478350650627684e-06, "loss": 0.9077, "step": 4790 }, { "epoch": 0.14930717733146504, "grad_norm": 1.9916950464248657, "learning_rate": 4.47753122029565e-06, "loss": 0.8424, "step": 4795 }, { "epoch": 0.14946286781877627, "grad_norm": 2.2625296115875244, "learning_rate": 4.476711789963618e-06, "loss": 0.8651, "step": 4800 }, { "epoch": 0.1496185583060875, "grad_norm": 2.242208957672119, "learning_rate": 4.475892359631585e-06, "loss": 0.8897, "step": 4805 }, { "epoch": 0.14977424879339873, "grad_norm": 2.145271062850952, "learning_rate": 4.475072929299552e-06, "loss": 0.8447, "step": 4810 }, { "epoch": 0.14992993928070994, "grad_norm": 1.9424023628234863, "learning_rate": 4.474253498967518e-06, "loss": 0.8183, "step": 4815 }, { "epoch": 0.15008562976802117, "grad_norm": 2.2540082931518555, "learning_rate": 4.4734340686354845e-06, "loss": 0.8765, "step": 4820 }, { "epoch": 0.1502413202553324, "grad_norm": 2.4948203563690186, "learning_rate": 4.472614638303452e-06, "loss": 0.7851, "step": 4825 }, { "epoch": 0.1503970107426436, "grad_norm": 2.2502810955047607, "learning_rate": 4.471795207971419e-06, "loss": 0.8505, "step": 4830 }, { "epoch": 0.15055270122995484, "grad_norm": 2.231275796890259, "learning_rate": 4.470975777639385e-06, "loss": 0.7137, "step": 4835 }, { "epoch": 0.15070839171726608, "grad_norm": 2.1585988998413086, "learning_rate": 4.470156347307352e-06, "loss": 0.7908, "step": 4840 }, { "epoch": 0.1508640822045773, "grad_norm": 1.834542989730835, "learning_rate": 4.469336916975319e-06, "loss": 0.8713, "step": 4845 }, { "epoch": 0.15101977269188852, "grad_norm": 2.2706878185272217, "learning_rate": 4.4685174866432866e-06, "loss": 0.8591, "step": 4850 }, { "epoch": 0.15117546317919975, "grad_norm": 1.864743709564209, "learning_rate": 4.467698056311253e-06, "loss": 0.8035, "step": 4855 }, { "epoch": 0.15133115366651098, "grad_norm": 2.240229606628418, "learning_rate": 4.4668786259792195e-06, "loss": 0.8116, "step": 4860 }, { "epoch": 0.15148684415382221, "grad_norm": 2.181086301803589, "learning_rate": 4.466059195647186e-06, "loss": 0.7646, "step": 4865 }, { "epoch": 0.15164253464113342, "grad_norm": 2.170588970184326, "learning_rate": 4.465239765315153e-06, "loss": 0.8283, "step": 4870 }, { "epoch": 0.15179822512844465, "grad_norm": 2.1651968955993652, "learning_rate": 4.46442033498312e-06, "loss": 0.8754, "step": 4875 }, { "epoch": 0.15195391561575589, "grad_norm": 2.0452964305877686, "learning_rate": 4.463600904651087e-06, "loss": 0.8354, "step": 4880 }, { "epoch": 0.1521096061030671, "grad_norm": 2.5599935054779053, "learning_rate": 4.462781474319054e-06, "loss": 0.8287, "step": 4885 }, { "epoch": 0.15226529659037832, "grad_norm": 1.9230740070343018, "learning_rate": 4.461962043987021e-06, "loss": 0.8066, "step": 4890 }, { "epoch": 0.15242098707768956, "grad_norm": 2.4606432914733887, "learning_rate": 4.4611426136549875e-06, "loss": 0.8324, "step": 4895 }, { "epoch": 0.1525766775650008, "grad_norm": 2.1260344982147217, "learning_rate": 4.460323183322954e-06, "loss": 0.8497, "step": 4900 }, { "epoch": 0.152732368052312, "grad_norm": 1.9954558610916138, "learning_rate": 4.459503752990921e-06, "loss": 0.8595, "step": 4905 }, { "epoch": 0.15288805853962323, "grad_norm": 2.0900826454162598, "learning_rate": 4.458684322658888e-06, "loss": 0.9281, "step": 4910 }, { "epoch": 0.15304374902693446, "grad_norm": 2.3312346935272217, "learning_rate": 4.457864892326855e-06, "loss": 0.7574, "step": 4915 }, { "epoch": 0.15319943951424567, "grad_norm": 2.241001844406128, "learning_rate": 4.457045461994822e-06, "loss": 0.9085, "step": 4920 }, { "epoch": 0.1533551300015569, "grad_norm": 1.9087680578231812, "learning_rate": 4.456226031662789e-06, "loss": 0.8199, "step": 4925 }, { "epoch": 0.15351082048886813, "grad_norm": 2.4003658294677734, "learning_rate": 4.455406601330755e-06, "loss": 0.8297, "step": 4930 }, { "epoch": 0.15366651097617937, "grad_norm": 2.410102605819702, "learning_rate": 4.454587170998722e-06, "loss": 0.8432, "step": 4935 }, { "epoch": 0.15382220146349057, "grad_norm": 2.024045944213867, "learning_rate": 4.453767740666689e-06, "loss": 0.9044, "step": 4940 }, { "epoch": 0.1539778919508018, "grad_norm": 2.3241474628448486, "learning_rate": 4.452948310334655e-06, "loss": 0.7937, "step": 4945 }, { "epoch": 0.15413358243811304, "grad_norm": 2.353044033050537, "learning_rate": 4.452128880002622e-06, "loss": 0.8031, "step": 4950 }, { "epoch": 0.15428927292542427, "grad_norm": 2.1306846141815186, "learning_rate": 4.451309449670589e-06, "loss": 0.8418, "step": 4955 }, { "epoch": 0.15444496341273548, "grad_norm": 2.6664650440216064, "learning_rate": 4.450490019338556e-06, "loss": 0.8071, "step": 4960 }, { "epoch": 0.1546006539000467, "grad_norm": 2.1143767833709717, "learning_rate": 4.449670589006523e-06, "loss": 0.847, "step": 4965 }, { "epoch": 0.15475634438735794, "grad_norm": 2.120201826095581, "learning_rate": 4.44885115867449e-06, "loss": 0.7979, "step": 4970 }, { "epoch": 0.15491203487466915, "grad_norm": 2.230572462081909, "learning_rate": 4.448031728342457e-06, "loss": 0.8997, "step": 4975 }, { "epoch": 0.15506772536198038, "grad_norm": 1.9824223518371582, "learning_rate": 4.4472122980104235e-06, "loss": 0.8424, "step": 4980 }, { "epoch": 0.1552234158492916, "grad_norm": 2.1641504764556885, "learning_rate": 4.44639286767839e-06, "loss": 0.8409, "step": 4985 }, { "epoch": 0.15537910633660285, "grad_norm": 1.901978850364685, "learning_rate": 4.445573437346357e-06, "loss": 0.8156, "step": 4990 }, { "epoch": 0.15553479682391405, "grad_norm": 2.55707049369812, "learning_rate": 4.444754007014324e-06, "loss": 0.9003, "step": 4995 }, { "epoch": 0.15569048731122528, "grad_norm": 2.4227755069732666, "learning_rate": 4.443934576682291e-06, "loss": 0.9143, "step": 5000 }, { "epoch": 0.15584617779853652, "grad_norm": 2.24989914894104, "learning_rate": 4.443115146350258e-06, "loss": 0.78, "step": 5005 }, { "epoch": 0.15600186828584772, "grad_norm": 2.240879774093628, "learning_rate": 4.442295716018225e-06, "loss": 0.8995, "step": 5010 }, { "epoch": 0.15615755877315896, "grad_norm": 1.845123529434204, "learning_rate": 4.4414762856861916e-06, "loss": 0.7477, "step": 5015 }, { "epoch": 0.1563132492604702, "grad_norm": 2.6580350399017334, "learning_rate": 4.440656855354158e-06, "loss": 0.9113, "step": 5020 }, { "epoch": 0.15646893974778142, "grad_norm": 2.3661653995513916, "learning_rate": 4.439837425022125e-06, "loss": 0.8848, "step": 5025 }, { "epoch": 0.15662463023509263, "grad_norm": 2.1160614490509033, "learning_rate": 4.439017994690092e-06, "loss": 0.8506, "step": 5030 }, { "epoch": 0.15678032072240386, "grad_norm": 2.2441718578338623, "learning_rate": 4.438198564358059e-06, "loss": 0.7901, "step": 5035 }, { "epoch": 0.1569360112097151, "grad_norm": 2.1336910724639893, "learning_rate": 4.437379134026025e-06, "loss": 0.8774, "step": 5040 }, { "epoch": 0.1570917016970263, "grad_norm": 2.144185781478882, "learning_rate": 4.436559703693992e-06, "loss": 0.8604, "step": 5045 }, { "epoch": 0.15724739218433753, "grad_norm": 2.231241464614868, "learning_rate": 4.43574027336196e-06, "loss": 0.8191, "step": 5050 }, { "epoch": 0.15740308267164876, "grad_norm": 2.1232237815856934, "learning_rate": 4.4349208430299265e-06, "loss": 0.8583, "step": 5055 }, { "epoch": 0.15755877315896, "grad_norm": 1.9219319820404053, "learning_rate": 4.4341014126978925e-06, "loss": 0.8431, "step": 5060 }, { "epoch": 0.1577144636462712, "grad_norm": 2.366776943206787, "learning_rate": 4.433281982365859e-06, "loss": 0.8781, "step": 5065 }, { "epoch": 0.15787015413358244, "grad_norm": 2.221095561981201, "learning_rate": 4.432462552033826e-06, "loss": 0.8693, "step": 5070 }, { "epoch": 0.15802584462089367, "grad_norm": 2.145472764968872, "learning_rate": 4.431643121701793e-06, "loss": 0.8682, "step": 5075 }, { "epoch": 0.1581815351082049, "grad_norm": 2.184908151626587, "learning_rate": 4.43082369136976e-06, "loss": 0.8178, "step": 5080 }, { "epoch": 0.1583372255955161, "grad_norm": 1.9847878217697144, "learning_rate": 4.430004261037727e-06, "loss": 0.8757, "step": 5085 }, { "epoch": 0.15849291608282734, "grad_norm": 2.207984685897827, "learning_rate": 4.429184830705694e-06, "loss": 0.7804, "step": 5090 }, { "epoch": 0.15864860657013857, "grad_norm": 2.3548359870910645, "learning_rate": 4.428365400373661e-06, "loss": 0.8131, "step": 5095 }, { "epoch": 0.15880429705744978, "grad_norm": 2.0693185329437256, "learning_rate": 4.4275459700416275e-06, "loss": 0.9223, "step": 5100 }, { "epoch": 0.158959987544761, "grad_norm": 2.4842658042907715, "learning_rate": 4.426726539709594e-06, "loss": 0.856, "step": 5105 }, { "epoch": 0.15911567803207224, "grad_norm": 2.2832517623901367, "learning_rate": 4.425907109377561e-06, "loss": 0.7783, "step": 5110 }, { "epoch": 0.15927136851938348, "grad_norm": 2.1813735961914062, "learning_rate": 4.425087679045528e-06, "loss": 0.7581, "step": 5115 }, { "epoch": 0.15942705900669468, "grad_norm": 2.5264649391174316, "learning_rate": 4.424268248713495e-06, "loss": 0.843, "step": 5120 }, { "epoch": 0.15958274949400592, "grad_norm": 2.2178685665130615, "learning_rate": 4.423448818381462e-06, "loss": 0.8855, "step": 5125 }, { "epoch": 0.15973843998131715, "grad_norm": 2.2047972679138184, "learning_rate": 4.422629388049429e-06, "loss": 0.8883, "step": 5130 }, { "epoch": 0.15989413046862835, "grad_norm": 2.340724468231201, "learning_rate": 4.421809957717395e-06, "loss": 0.8294, "step": 5135 }, { "epoch": 0.1600498209559396, "grad_norm": 2.5423595905303955, "learning_rate": 4.4209905273853625e-06, "loss": 0.7997, "step": 5140 }, { "epoch": 0.16020551144325082, "grad_norm": 2.060715436935425, "learning_rate": 4.420171097053329e-06, "loss": 0.8591, "step": 5145 }, { "epoch": 0.16036120193056205, "grad_norm": 1.9545613527297974, "learning_rate": 4.419351666721295e-06, "loss": 0.8189, "step": 5150 }, { "epoch": 0.16051689241787326, "grad_norm": 2.058147668838501, "learning_rate": 4.418532236389262e-06, "loss": 0.8355, "step": 5155 }, { "epoch": 0.1606725829051845, "grad_norm": 2.297954797744751, "learning_rate": 4.417712806057229e-06, "loss": 0.7751, "step": 5160 }, { "epoch": 0.16082827339249572, "grad_norm": 2.462841272354126, "learning_rate": 4.416893375725197e-06, "loss": 0.8557, "step": 5165 }, { "epoch": 0.16098396387980696, "grad_norm": 2.1128079891204834, "learning_rate": 4.416073945393163e-06, "loss": 0.8338, "step": 5170 }, { "epoch": 0.16113965436711816, "grad_norm": 2.549293279647827, "learning_rate": 4.41525451506113e-06, "loss": 0.8009, "step": 5175 }, { "epoch": 0.1612953448544294, "grad_norm": 2.195167303085327, "learning_rate": 4.4144350847290966e-06, "loss": 0.8646, "step": 5180 }, { "epoch": 0.16145103534174063, "grad_norm": 2.519803762435913, "learning_rate": 4.4136156543970634e-06, "loss": 0.8209, "step": 5185 }, { "epoch": 0.16160672582905183, "grad_norm": 2.0936081409454346, "learning_rate": 4.41279622406503e-06, "loss": 0.9072, "step": 5190 }, { "epoch": 0.16176241631636307, "grad_norm": 2.1711432933807373, "learning_rate": 4.411976793732997e-06, "loss": 0.9095, "step": 5195 }, { "epoch": 0.1619181068036743, "grad_norm": 2.3746213912963867, "learning_rate": 4.411157363400964e-06, "loss": 0.8462, "step": 5200 }, { "epoch": 0.16207379729098553, "grad_norm": 2.1848597526550293, "learning_rate": 4.410337933068931e-06, "loss": 0.9056, "step": 5205 }, { "epoch": 0.16222948777829674, "grad_norm": 2.4528849124908447, "learning_rate": 4.409518502736898e-06, "loss": 0.8202, "step": 5210 }, { "epoch": 0.16238517826560797, "grad_norm": 2.2896883487701416, "learning_rate": 4.408699072404865e-06, "loss": 0.7522, "step": 5215 }, { "epoch": 0.1625408687529192, "grad_norm": 3.0400760173797607, "learning_rate": 4.4078796420728315e-06, "loss": 0.8839, "step": 5220 }, { "epoch": 0.1626965592402304, "grad_norm": 2.060401439666748, "learning_rate": 4.4070602117407975e-06, "loss": 0.7995, "step": 5225 }, { "epoch": 0.16285224972754164, "grad_norm": 2.2662487030029297, "learning_rate": 4.406240781408765e-06, "loss": 0.8558, "step": 5230 }, { "epoch": 0.16300794021485288, "grad_norm": 1.9563267230987549, "learning_rate": 4.405421351076732e-06, "loss": 0.7634, "step": 5235 }, { "epoch": 0.1631636307021641, "grad_norm": 1.9069252014160156, "learning_rate": 4.404601920744699e-06, "loss": 0.8345, "step": 5240 }, { "epoch": 0.1633193211894753, "grad_norm": 1.9403265714645386, "learning_rate": 4.403782490412665e-06, "loss": 0.8206, "step": 5245 }, { "epoch": 0.16347501167678655, "grad_norm": 2.4439048767089844, "learning_rate": 4.402963060080632e-06, "loss": 0.8757, "step": 5250 }, { "epoch": 0.16363070216409778, "grad_norm": 2.6312313079833984, "learning_rate": 4.4021436297486e-06, "loss": 0.8235, "step": 5255 }, { "epoch": 0.16378639265140899, "grad_norm": 2.1159651279449463, "learning_rate": 4.4013241994165665e-06, "loss": 0.9053, "step": 5260 }, { "epoch": 0.16394208313872022, "grad_norm": 2.1085331439971924, "learning_rate": 4.4005047690845325e-06, "loss": 0.863, "step": 5265 }, { "epoch": 0.16409777362603145, "grad_norm": 2.776810646057129, "learning_rate": 4.399685338752499e-06, "loss": 0.92, "step": 5270 }, { "epoch": 0.16425346411334268, "grad_norm": 2.423015832901001, "learning_rate": 4.398865908420466e-06, "loss": 0.7462, "step": 5275 }, { "epoch": 0.1644091546006539, "grad_norm": 1.939535140991211, "learning_rate": 4.398046478088434e-06, "loss": 0.8659, "step": 5280 }, { "epoch": 0.16456484508796512, "grad_norm": 1.7569829225540161, "learning_rate": 4.3972270477564e-06, "loss": 0.917, "step": 5285 }, { "epoch": 0.16472053557527636, "grad_norm": 2.116337299346924, "learning_rate": 4.396407617424367e-06, "loss": 0.9364, "step": 5290 }, { "epoch": 0.1648762260625876, "grad_norm": 2.24186372756958, "learning_rate": 4.395588187092334e-06, "loss": 0.83, "step": 5295 }, { "epoch": 0.1650319165498988, "grad_norm": 2.0614655017852783, "learning_rate": 4.394768756760301e-06, "loss": 0.8065, "step": 5300 }, { "epoch": 0.16518760703721003, "grad_norm": 2.339306592941284, "learning_rate": 4.3939493264282675e-06, "loss": 0.8665, "step": 5305 }, { "epoch": 0.16534329752452126, "grad_norm": 2.7791121006011963, "learning_rate": 4.393129896096234e-06, "loss": 0.7783, "step": 5310 }, { "epoch": 0.16549898801183247, "grad_norm": 1.9560863971710205, "learning_rate": 4.392310465764201e-06, "loss": 0.8417, "step": 5315 }, { "epoch": 0.1656546784991437, "grad_norm": 2.0337729454040527, "learning_rate": 4.391491035432168e-06, "loss": 0.8522, "step": 5320 }, { "epoch": 0.16581036898645493, "grad_norm": 2.4419989585876465, "learning_rate": 4.390671605100135e-06, "loss": 0.9141, "step": 5325 }, { "epoch": 0.16596605947376616, "grad_norm": 2.100072145462036, "learning_rate": 4.389852174768102e-06, "loss": 0.8871, "step": 5330 }, { "epoch": 0.16612174996107737, "grad_norm": 1.857796311378479, "learning_rate": 4.389032744436069e-06, "loss": 0.8096, "step": 5335 }, { "epoch": 0.1662774404483886, "grad_norm": 2.3345563411712646, "learning_rate": 4.388213314104035e-06, "loss": 0.8496, "step": 5340 }, { "epoch": 0.16643313093569984, "grad_norm": 2.1148931980133057, "learning_rate": 4.387393883772002e-06, "loss": 0.8155, "step": 5345 }, { "epoch": 0.16658882142301104, "grad_norm": 2.218453884124756, "learning_rate": 4.386574453439969e-06, "loss": 0.8043, "step": 5350 }, { "epoch": 0.16674451191032227, "grad_norm": 2.4121880531311035, "learning_rate": 4.385755023107936e-06, "loss": 0.8244, "step": 5355 }, { "epoch": 0.1669002023976335, "grad_norm": 1.8566899299621582, "learning_rate": 4.384935592775902e-06, "loss": 0.8382, "step": 5360 }, { "epoch": 0.16705589288494474, "grad_norm": 2.021416187286377, "learning_rate": 4.384116162443869e-06, "loss": 0.8537, "step": 5365 }, { "epoch": 0.16721158337225595, "grad_norm": 2.070321798324585, "learning_rate": 4.383296732111837e-06, "loss": 0.8648, "step": 5370 }, { "epoch": 0.16736727385956718, "grad_norm": 2.2255797386169434, "learning_rate": 4.382477301779803e-06, "loss": 0.9006, "step": 5375 }, { "epoch": 0.1675229643468784, "grad_norm": 2.394930362701416, "learning_rate": 4.38165787144777e-06, "loss": 0.8545, "step": 5380 }, { "epoch": 0.16767865483418964, "grad_norm": 2.200451374053955, "learning_rate": 4.3808384411157365e-06, "loss": 0.889, "step": 5385 }, { "epoch": 0.16783434532150085, "grad_norm": 2.358041763305664, "learning_rate": 4.380019010783703e-06, "loss": 0.9011, "step": 5390 }, { "epoch": 0.16799003580881208, "grad_norm": 2.2817485332489014, "learning_rate": 4.37919958045167e-06, "loss": 0.8187, "step": 5395 }, { "epoch": 0.16814572629612332, "grad_norm": 2.1750218868255615, "learning_rate": 4.378380150119637e-06, "loss": 0.8928, "step": 5400 }, { "epoch": 0.16830141678343452, "grad_norm": 2.13179612159729, "learning_rate": 4.377560719787604e-06, "loss": 0.8024, "step": 5405 }, { "epoch": 0.16845710727074575, "grad_norm": 2.3835337162017822, "learning_rate": 4.376741289455571e-06, "loss": 0.7859, "step": 5410 }, { "epoch": 0.168612797758057, "grad_norm": 2.500035285949707, "learning_rate": 4.375921859123538e-06, "loss": 0.8508, "step": 5415 }, { "epoch": 0.16876848824536822, "grad_norm": 1.9487508535385132, "learning_rate": 4.375102428791505e-06, "loss": 0.8479, "step": 5420 }, { "epoch": 0.16892417873267943, "grad_norm": 2.141026258468628, "learning_rate": 4.3742829984594715e-06, "loss": 0.8537, "step": 5425 }, { "epoch": 0.16907986921999066, "grad_norm": 2.2177555561065674, "learning_rate": 4.3734635681274375e-06, "loss": 0.889, "step": 5430 }, { "epoch": 0.1692355597073019, "grad_norm": 2.149988889694214, "learning_rate": 4.372644137795405e-06, "loss": 0.7971, "step": 5435 }, { "epoch": 0.1693912501946131, "grad_norm": 1.978331208229065, "learning_rate": 4.371824707463372e-06, "loss": 0.8397, "step": 5440 }, { "epoch": 0.16954694068192433, "grad_norm": 2.132972002029419, "learning_rate": 4.371005277131339e-06, "loss": 0.8239, "step": 5445 }, { "epoch": 0.16970263116923556, "grad_norm": 2.225266933441162, "learning_rate": 4.370185846799305e-06, "loss": 0.857, "step": 5450 }, { "epoch": 0.1698583216565468, "grad_norm": 1.8121079206466675, "learning_rate": 4.369366416467272e-06, "loss": 0.892, "step": 5455 }, { "epoch": 0.170014012143858, "grad_norm": 2.049814462661743, "learning_rate": 4.3685469861352396e-06, "loss": 0.8924, "step": 5460 }, { "epoch": 0.17016970263116923, "grad_norm": 2.166435956954956, "learning_rate": 4.3677275558032064e-06, "loss": 0.8118, "step": 5465 }, { "epoch": 0.17032539311848047, "grad_norm": 2.0388309955596924, "learning_rate": 4.3669081254711725e-06, "loss": 0.8769, "step": 5470 }, { "epoch": 0.17048108360579167, "grad_norm": 2.3386335372924805, "learning_rate": 4.366088695139139e-06, "loss": 0.8027, "step": 5475 }, { "epoch": 0.1706367740931029, "grad_norm": 2.0535054206848145, "learning_rate": 4.365269264807106e-06, "loss": 0.8872, "step": 5480 }, { "epoch": 0.17079246458041414, "grad_norm": 2.0268850326538086, "learning_rate": 4.364449834475074e-06, "loss": 0.8283, "step": 5485 }, { "epoch": 0.17094815506772537, "grad_norm": 2.0212080478668213, "learning_rate": 4.36363040414304e-06, "loss": 0.8878, "step": 5490 }, { "epoch": 0.17110384555503658, "grad_norm": 2.1437482833862305, "learning_rate": 4.362810973811007e-06, "loss": 0.8304, "step": 5495 }, { "epoch": 0.1712595360423478, "grad_norm": 2.094193458557129, "learning_rate": 4.361991543478974e-06, "loss": 0.8068, "step": 5500 }, { "epoch": 0.17141522652965904, "grad_norm": 2.2155113220214844, "learning_rate": 4.3611721131469405e-06, "loss": 0.8193, "step": 5505 }, { "epoch": 0.17157091701697028, "grad_norm": 2.4712979793548584, "learning_rate": 4.360352682814907e-06, "loss": 0.846, "step": 5510 }, { "epoch": 0.17172660750428148, "grad_norm": 2.4685957431793213, "learning_rate": 4.359533252482874e-06, "loss": 0.8865, "step": 5515 }, { "epoch": 0.17188229799159271, "grad_norm": 2.192850112915039, "learning_rate": 4.358713822150841e-06, "loss": 0.794, "step": 5520 }, { "epoch": 0.17203798847890395, "grad_norm": 2.2076382637023926, "learning_rate": 4.357894391818808e-06, "loss": 0.8676, "step": 5525 }, { "epoch": 0.17219367896621515, "grad_norm": 2.6585371494293213, "learning_rate": 4.357074961486775e-06, "loss": 0.8613, "step": 5530 }, { "epoch": 0.17234936945352639, "grad_norm": 2.034376382827759, "learning_rate": 4.356255531154742e-06, "loss": 0.8521, "step": 5535 }, { "epoch": 0.17250505994083762, "grad_norm": 2.1458797454833984, "learning_rate": 4.355436100822709e-06, "loss": 0.739, "step": 5540 }, { "epoch": 0.17266075042814885, "grad_norm": 2.086189031600952, "learning_rate": 4.354616670490675e-06, "loss": 0.8414, "step": 5545 }, { "epoch": 0.17281644091546006, "grad_norm": 2.565664768218994, "learning_rate": 4.353797240158642e-06, "loss": 0.8627, "step": 5550 }, { "epoch": 0.1729721314027713, "grad_norm": 2.144747257232666, "learning_rate": 4.352977809826609e-06, "loss": 0.8523, "step": 5555 }, { "epoch": 0.17312782189008252, "grad_norm": 2.697878360748291, "learning_rate": 4.352158379494576e-06, "loss": 0.8583, "step": 5560 }, { "epoch": 0.17328351237739373, "grad_norm": 2.312253952026367, "learning_rate": 4.351338949162542e-06, "loss": 0.9341, "step": 5565 }, { "epoch": 0.17343920286470496, "grad_norm": 2.3515713214874268, "learning_rate": 4.350519518830509e-06, "loss": 0.8412, "step": 5570 }, { "epoch": 0.1735948933520162, "grad_norm": 2.7180473804473877, "learning_rate": 4.349700088498477e-06, "loss": 0.8176, "step": 5575 }, { "epoch": 0.17375058383932743, "grad_norm": 2.2708098888397217, "learning_rate": 4.348880658166443e-06, "loss": 0.8531, "step": 5580 }, { "epoch": 0.17390627432663863, "grad_norm": 2.017293691635132, "learning_rate": 4.34806122783441e-06, "loss": 0.8284, "step": 5585 }, { "epoch": 0.17406196481394987, "grad_norm": 2.1315197944641113, "learning_rate": 4.3472417975023765e-06, "loss": 0.8139, "step": 5590 }, { "epoch": 0.1742176553012611, "grad_norm": 2.314835786819458, "learning_rate": 4.346422367170343e-06, "loss": 0.8914, "step": 5595 }, { "epoch": 0.17437334578857233, "grad_norm": 2.3204896450042725, "learning_rate": 4.34560293683831e-06, "loss": 0.902, "step": 5600 }, { "epoch": 0.17452903627588354, "grad_norm": 1.992165207862854, "learning_rate": 4.344783506506277e-06, "loss": 0.8821, "step": 5605 }, { "epoch": 0.17468472676319477, "grad_norm": 3.5363097190856934, "learning_rate": 4.343964076174244e-06, "loss": 0.845, "step": 5610 }, { "epoch": 0.174840417250506, "grad_norm": 1.8949450254440308, "learning_rate": 4.343144645842211e-06, "loss": 0.8209, "step": 5615 }, { "epoch": 0.1749961077378172, "grad_norm": 2.21087646484375, "learning_rate": 4.342325215510178e-06, "loss": 0.8936, "step": 5620 }, { "epoch": 0.17515179822512844, "grad_norm": 1.9762651920318604, "learning_rate": 4.3415057851781446e-06, "loss": 0.8659, "step": 5625 }, { "epoch": 0.17530748871243967, "grad_norm": 2.2141523361206055, "learning_rate": 4.3406863548461114e-06, "loss": 0.811, "step": 5630 }, { "epoch": 0.1754631791997509, "grad_norm": 1.9872905015945435, "learning_rate": 4.339866924514078e-06, "loss": 0.8581, "step": 5635 }, { "epoch": 0.1756188696870621, "grad_norm": 2.234013795852661, "learning_rate": 4.339047494182045e-06, "loss": 0.8422, "step": 5640 }, { "epoch": 0.17577456017437335, "grad_norm": 2.027029514312744, "learning_rate": 4.338228063850012e-06, "loss": 0.8812, "step": 5645 }, { "epoch": 0.17593025066168458, "grad_norm": 2.3712823390960693, "learning_rate": 4.337408633517979e-06, "loss": 0.8143, "step": 5650 }, { "epoch": 0.17608594114899578, "grad_norm": 2.1156442165374756, "learning_rate": 4.336589203185945e-06, "loss": 0.8667, "step": 5655 }, { "epoch": 0.17624163163630702, "grad_norm": 2.077629804611206, "learning_rate": 4.335769772853912e-06, "loss": 0.8169, "step": 5660 }, { "epoch": 0.17639732212361825, "grad_norm": 1.948521614074707, "learning_rate": 4.3349503425218795e-06, "loss": 0.8244, "step": 5665 }, { "epoch": 0.17655301261092948, "grad_norm": 1.9569650888442993, "learning_rate": 4.334130912189846e-06, "loss": 0.8425, "step": 5670 }, { "epoch": 0.1767087030982407, "grad_norm": 2.3287160396575928, "learning_rate": 4.333311481857812e-06, "loss": 0.7755, "step": 5675 }, { "epoch": 0.17686439358555192, "grad_norm": 2.681758165359497, "learning_rate": 4.332492051525779e-06, "loss": 0.8213, "step": 5680 }, { "epoch": 0.17702008407286315, "grad_norm": 1.8722271919250488, "learning_rate": 4.331672621193746e-06, "loss": 0.8556, "step": 5685 }, { "epoch": 0.1771757745601744, "grad_norm": 1.923965334892273, "learning_rate": 4.330853190861714e-06, "loss": 0.8181, "step": 5690 }, { "epoch": 0.1773314650474856, "grad_norm": 2.1575894355773926, "learning_rate": 4.33003376052968e-06, "loss": 0.8802, "step": 5695 }, { "epoch": 0.17748715553479683, "grad_norm": 2.5305702686309814, "learning_rate": 4.329214330197647e-06, "loss": 0.8463, "step": 5700 }, { "epoch": 0.17764284602210806, "grad_norm": 2.1569876670837402, "learning_rate": 4.328394899865614e-06, "loss": 0.8157, "step": 5705 }, { "epoch": 0.17779853650941926, "grad_norm": 2.117487668991089, "learning_rate": 4.3275754695335805e-06, "loss": 0.8998, "step": 5710 }, { "epoch": 0.1779542269967305, "grad_norm": 2.066032886505127, "learning_rate": 4.326756039201547e-06, "loss": 0.8049, "step": 5715 }, { "epoch": 0.17810991748404173, "grad_norm": 2.225698947906494, "learning_rate": 4.325936608869514e-06, "loss": 0.8381, "step": 5720 }, { "epoch": 0.17826560797135296, "grad_norm": 2.8029346466064453, "learning_rate": 4.325117178537481e-06, "loss": 0.8942, "step": 5725 }, { "epoch": 0.17842129845866417, "grad_norm": 2.240114212036133, "learning_rate": 4.324297748205448e-06, "loss": 0.8391, "step": 5730 }, { "epoch": 0.1785769889459754, "grad_norm": 2.940363645553589, "learning_rate": 4.323478317873415e-06, "loss": 0.8307, "step": 5735 }, { "epoch": 0.17873267943328663, "grad_norm": 1.874420166015625, "learning_rate": 4.322658887541382e-06, "loss": 0.8657, "step": 5740 }, { "epoch": 0.17888836992059784, "grad_norm": 2.297536611557007, "learning_rate": 4.321839457209349e-06, "loss": 0.9037, "step": 5745 }, { "epoch": 0.17904406040790907, "grad_norm": 2.366318702697754, "learning_rate": 4.3210200268773155e-06, "loss": 0.8546, "step": 5750 }, { "epoch": 0.1791997508952203, "grad_norm": 3.4325027465820312, "learning_rate": 4.320200596545282e-06, "loss": 0.8792, "step": 5755 }, { "epoch": 0.17935544138253154, "grad_norm": 2.1297779083251953, "learning_rate": 4.319381166213249e-06, "loss": 0.8208, "step": 5760 }, { "epoch": 0.17951113186984274, "grad_norm": 2.1324312686920166, "learning_rate": 4.318561735881216e-06, "loss": 0.785, "step": 5765 }, { "epoch": 0.17966682235715398, "grad_norm": 2.300025701522827, "learning_rate": 4.317742305549182e-06, "loss": 0.8267, "step": 5770 }, { "epoch": 0.1798225128444652, "grad_norm": 1.8773940801620483, "learning_rate": 4.316922875217149e-06, "loss": 0.8442, "step": 5775 }, { "epoch": 0.17997820333177642, "grad_norm": 2.023031234741211, "learning_rate": 4.316103444885117e-06, "loss": 0.8256, "step": 5780 }, { "epoch": 0.18013389381908765, "grad_norm": 1.725677490234375, "learning_rate": 4.3152840145530835e-06, "loss": 0.8832, "step": 5785 }, { "epoch": 0.18028958430639888, "grad_norm": 2.307124137878418, "learning_rate": 4.3144645842210496e-06, "loss": 0.9293, "step": 5790 }, { "epoch": 0.18044527479371011, "grad_norm": 1.9926342964172363, "learning_rate": 4.3136451538890164e-06, "loss": 0.8616, "step": 5795 }, { "epoch": 0.18060096528102132, "grad_norm": 2.150270700454712, "learning_rate": 4.312825723556983e-06, "loss": 0.8545, "step": 5800 }, { "epoch": 0.18075665576833255, "grad_norm": 2.516529083251953, "learning_rate": 4.31200629322495e-06, "loss": 0.8122, "step": 5805 }, { "epoch": 0.18091234625564379, "grad_norm": 2.109006643295288, "learning_rate": 4.311186862892917e-06, "loss": 0.8217, "step": 5810 }, { "epoch": 0.18106803674295502, "grad_norm": 2.0922884941101074, "learning_rate": 4.310367432560884e-06, "loss": 0.7943, "step": 5815 }, { "epoch": 0.18122372723026622, "grad_norm": 2.314455270767212, "learning_rate": 4.309548002228851e-06, "loss": 0.8219, "step": 5820 }, { "epoch": 0.18137941771757746, "grad_norm": 3.259751319885254, "learning_rate": 4.308728571896818e-06, "loss": 0.8366, "step": 5825 }, { "epoch": 0.1815351082048887, "grad_norm": 2.424942970275879, "learning_rate": 4.3079091415647845e-06, "loss": 0.9236, "step": 5830 }, { "epoch": 0.1816907986921999, "grad_norm": 2.1335582733154297, "learning_rate": 4.307089711232751e-06, "loss": 0.8561, "step": 5835 }, { "epoch": 0.18184648917951113, "grad_norm": 1.9702082872390747, "learning_rate": 4.306270280900718e-06, "loss": 0.766, "step": 5840 }, { "epoch": 0.18200217966682236, "grad_norm": 1.889363169670105, "learning_rate": 4.305450850568685e-06, "loss": 0.7904, "step": 5845 }, { "epoch": 0.1821578701541336, "grad_norm": 2.0053112506866455, "learning_rate": 4.304631420236652e-06, "loss": 0.7914, "step": 5850 }, { "epoch": 0.1823135606414448, "grad_norm": 2.0299394130706787, "learning_rate": 4.303811989904619e-06, "loss": 0.8996, "step": 5855 }, { "epoch": 0.18246925112875603, "grad_norm": 2.132533311843872, "learning_rate": 4.302992559572585e-06, "loss": 0.834, "step": 5860 }, { "epoch": 0.18262494161606727, "grad_norm": 2.341862916946411, "learning_rate": 4.302173129240553e-06, "loss": 0.9329, "step": 5865 }, { "epoch": 0.18278063210337847, "grad_norm": 2.4259214401245117, "learning_rate": 4.3013536989085195e-06, "loss": 0.7971, "step": 5870 }, { "epoch": 0.1829363225906897, "grad_norm": 2.7561252117156982, "learning_rate": 4.300534268576486e-06, "loss": 0.7896, "step": 5875 }, { "epoch": 0.18309201307800094, "grad_norm": 2.4723756313323975, "learning_rate": 4.299714838244452e-06, "loss": 0.8476, "step": 5880 }, { "epoch": 0.18324770356531217, "grad_norm": 2.2497317790985107, "learning_rate": 4.298895407912419e-06, "loss": 0.7982, "step": 5885 }, { "epoch": 0.18340339405262338, "grad_norm": 2.292165994644165, "learning_rate": 4.298075977580386e-06, "loss": 0.854, "step": 5890 }, { "epoch": 0.1835590845399346, "grad_norm": 1.8411747217178345, "learning_rate": 4.297256547248354e-06, "loss": 0.8328, "step": 5895 }, { "epoch": 0.18371477502724584, "grad_norm": 1.862614393234253, "learning_rate": 4.29643711691632e-06, "loss": 0.9043, "step": 5900 }, { "epoch": 0.18387046551455707, "grad_norm": 2.2432310581207275, "learning_rate": 4.295617686584287e-06, "loss": 0.7545, "step": 5905 }, { "epoch": 0.18402615600186828, "grad_norm": 2.219866991043091, "learning_rate": 4.294798256252254e-06, "loss": 0.8068, "step": 5910 }, { "epoch": 0.1841818464891795, "grad_norm": 2.383572578430176, "learning_rate": 4.2939788259202205e-06, "loss": 0.784, "step": 5915 }, { "epoch": 0.18433753697649075, "grad_norm": 2.6113784313201904, "learning_rate": 4.293159395588187e-06, "loss": 0.801, "step": 5920 }, { "epoch": 0.18449322746380195, "grad_norm": 1.9454386234283447, "learning_rate": 4.292339965256154e-06, "loss": 0.7925, "step": 5925 }, { "epoch": 0.18464891795111318, "grad_norm": 1.855411410331726, "learning_rate": 4.291520534924121e-06, "loss": 0.7988, "step": 5930 }, { "epoch": 0.18480460843842442, "grad_norm": 1.9861040115356445, "learning_rate": 4.290701104592088e-06, "loss": 0.8001, "step": 5935 }, { "epoch": 0.18496029892573565, "grad_norm": 2.601616144180298, "learning_rate": 4.289881674260055e-06, "loss": 0.8868, "step": 5940 }, { "epoch": 0.18511598941304686, "grad_norm": 2.147611379623413, "learning_rate": 4.289062243928022e-06, "loss": 0.8289, "step": 5945 }, { "epoch": 0.1852716799003581, "grad_norm": 2.07558274269104, "learning_rate": 4.2882428135959885e-06, "loss": 0.9683, "step": 5950 }, { "epoch": 0.18542737038766932, "grad_norm": 2.21073055267334, "learning_rate": 4.287423383263955e-06, "loss": 0.8332, "step": 5955 }, { "epoch": 0.18558306087498053, "grad_norm": 1.9213372468948364, "learning_rate": 4.286603952931922e-06, "loss": 0.9804, "step": 5960 }, { "epoch": 0.18573875136229176, "grad_norm": 4.073292255401611, "learning_rate": 4.285784522599889e-06, "loss": 0.8128, "step": 5965 }, { "epoch": 0.185894441849603, "grad_norm": 2.1020779609680176, "learning_rate": 4.284965092267856e-06, "loss": 0.89, "step": 5970 }, { "epoch": 0.18605013233691423, "grad_norm": 2.3488049507141113, "learning_rate": 4.284145661935822e-06, "loss": 0.8401, "step": 5975 }, { "epoch": 0.18620582282422543, "grad_norm": 3.6160151958465576, "learning_rate": 4.28332623160379e-06, "loss": 0.8459, "step": 5980 }, { "epoch": 0.18636151331153666, "grad_norm": 2.487563371658325, "learning_rate": 4.282506801271757e-06, "loss": 0.8255, "step": 5985 }, { "epoch": 0.1865172037988479, "grad_norm": 2.32141375541687, "learning_rate": 4.2816873709397235e-06, "loss": 0.8764, "step": 5990 }, { "epoch": 0.1866728942861591, "grad_norm": 2.2264034748077393, "learning_rate": 4.2808679406076895e-06, "loss": 0.825, "step": 5995 }, { "epoch": 0.18682858477347034, "grad_norm": 2.3819334506988525, "learning_rate": 4.280048510275656e-06, "loss": 0.7802, "step": 6000 }, { "epoch": 0.18698427526078157, "grad_norm": 2.2201180458068848, "learning_rate": 4.279229079943624e-06, "loss": 0.8143, "step": 6005 }, { "epoch": 0.1871399657480928, "grad_norm": 2.1442952156066895, "learning_rate": 4.27840964961159e-06, "loss": 0.7332, "step": 6010 }, { "epoch": 0.187295656235404, "grad_norm": 2.2135839462280273, "learning_rate": 4.277590219279557e-06, "loss": 0.8296, "step": 6015 }, { "epoch": 0.18745134672271524, "grad_norm": 2.559251546859741, "learning_rate": 4.276770788947524e-06, "loss": 0.8497, "step": 6020 }, { "epoch": 0.18760703721002647, "grad_norm": 1.9566985368728638, "learning_rate": 4.275951358615491e-06, "loss": 0.8052, "step": 6025 }, { "epoch": 0.1877627276973377, "grad_norm": 2.244408130645752, "learning_rate": 4.275131928283458e-06, "loss": 0.8422, "step": 6030 }, { "epoch": 0.1879184181846489, "grad_norm": 2.049684762954712, "learning_rate": 4.2743124979514245e-06, "loss": 0.7786, "step": 6035 }, { "epoch": 0.18807410867196014, "grad_norm": 2.0833327770233154, "learning_rate": 4.273493067619391e-06, "loss": 0.7874, "step": 6040 }, { "epoch": 0.18822979915927138, "grad_norm": 1.9852747917175293, "learning_rate": 4.272673637287358e-06, "loss": 0.8165, "step": 6045 }, { "epoch": 0.18838548964658258, "grad_norm": 2.401332378387451, "learning_rate": 4.271854206955325e-06, "loss": 0.8128, "step": 6050 }, { "epoch": 0.18854118013389382, "grad_norm": 2.0841970443725586, "learning_rate": 4.271034776623292e-06, "loss": 0.8102, "step": 6055 }, { "epoch": 0.18869687062120505, "grad_norm": 2.063305616378784, "learning_rate": 4.270215346291259e-06, "loss": 0.8116, "step": 6060 }, { "epoch": 0.18885256110851628, "grad_norm": 2.785865068435669, "learning_rate": 4.269395915959226e-06, "loss": 0.89, "step": 6065 }, { "epoch": 0.1890082515958275, "grad_norm": 2.3502914905548096, "learning_rate": 4.2685764856271926e-06, "loss": 0.8604, "step": 6070 }, { "epoch": 0.18916394208313872, "grad_norm": 6.709879398345947, "learning_rate": 4.2677570552951594e-06, "loss": 0.8496, "step": 6075 }, { "epoch": 0.18931963257044995, "grad_norm": 2.2493345737457275, "learning_rate": 4.266937624963126e-06, "loss": 0.9001, "step": 6080 }, { "epoch": 0.18947532305776116, "grad_norm": 1.8575046062469482, "learning_rate": 4.266118194631092e-06, "loss": 0.7804, "step": 6085 }, { "epoch": 0.1896310135450724, "grad_norm": 1.7721598148345947, "learning_rate": 4.265298764299059e-06, "loss": 0.8848, "step": 6090 }, { "epoch": 0.18978670403238362, "grad_norm": 2.2682740688323975, "learning_rate": 4.264479333967027e-06, "loss": 0.8436, "step": 6095 }, { "epoch": 0.18994239451969486, "grad_norm": 2.212176561355591, "learning_rate": 4.263659903634994e-06, "loss": 0.7972, "step": 6100 }, { "epoch": 0.19009808500700606, "grad_norm": 2.3351666927337646, "learning_rate": 4.26284047330296e-06, "loss": 0.8141, "step": 6105 }, { "epoch": 0.1902537754943173, "grad_norm": 2.3131062984466553, "learning_rate": 4.262021042970927e-06, "loss": 0.814, "step": 6110 }, { "epoch": 0.19040946598162853, "grad_norm": 2.273505449295044, "learning_rate": 4.2612016126388935e-06, "loss": 0.8208, "step": 6115 }, { "epoch": 0.19056515646893976, "grad_norm": 2.2434241771698, "learning_rate": 4.260382182306861e-06, "loss": 0.7758, "step": 6120 }, { "epoch": 0.19072084695625097, "grad_norm": 1.9676765203475952, "learning_rate": 4.259562751974827e-06, "loss": 0.923, "step": 6125 }, { "epoch": 0.1908765374435622, "grad_norm": 1.9673675298690796, "learning_rate": 4.258743321642794e-06, "loss": 0.8233, "step": 6130 }, { "epoch": 0.19103222793087343, "grad_norm": 1.8827406167984009, "learning_rate": 4.257923891310761e-06, "loss": 0.8347, "step": 6135 }, { "epoch": 0.19118791841818464, "grad_norm": 2.3906397819519043, "learning_rate": 4.257104460978728e-06, "loss": 0.9321, "step": 6140 }, { "epoch": 0.19134360890549587, "grad_norm": 2.556053400039673, "learning_rate": 4.256285030646695e-06, "loss": 0.7955, "step": 6145 }, { "epoch": 0.1914992993928071, "grad_norm": 2.1357812881469727, "learning_rate": 4.255465600314662e-06, "loss": 0.7853, "step": 6150 }, { "epoch": 0.19165498988011834, "grad_norm": 2.421515941619873, "learning_rate": 4.2546461699826285e-06, "loss": 0.8188, "step": 6155 }, { "epoch": 0.19181068036742954, "grad_norm": 2.4330978393554688, "learning_rate": 4.253826739650595e-06, "loss": 0.8934, "step": 6160 }, { "epoch": 0.19196637085474078, "grad_norm": 1.9715044498443604, "learning_rate": 4.253007309318562e-06, "loss": 0.8316, "step": 6165 }, { "epoch": 0.192122061342052, "grad_norm": 2.390254259109497, "learning_rate": 4.252187878986529e-06, "loss": 0.8649, "step": 6170 }, { "epoch": 0.1922777518293632, "grad_norm": 1.9275269508361816, "learning_rate": 4.251368448654496e-06, "loss": 0.926, "step": 6175 }, { "epoch": 0.19243344231667445, "grad_norm": 2.3456573486328125, "learning_rate": 4.250549018322462e-06, "loss": 0.7989, "step": 6180 }, { "epoch": 0.19258913280398568, "grad_norm": 4.410299301147461, "learning_rate": 4.24972958799043e-06, "loss": 0.9099, "step": 6185 }, { "epoch": 0.1927448232912969, "grad_norm": 2.305147171020508, "learning_rate": 4.248910157658397e-06, "loss": 0.8332, "step": 6190 }, { "epoch": 0.19290051377860812, "grad_norm": 2.3862078189849854, "learning_rate": 4.2480907273263635e-06, "loss": 0.8105, "step": 6195 }, { "epoch": 0.19305620426591935, "grad_norm": 1.9854096174240112, "learning_rate": 4.2472712969943295e-06, "loss": 0.7683, "step": 6200 }, { "epoch": 0.19321189475323058, "grad_norm": 1.9062918424606323, "learning_rate": 4.246451866662296e-06, "loss": 0.7981, "step": 6205 }, { "epoch": 0.1933675852405418, "grad_norm": 1.9443817138671875, "learning_rate": 4.245632436330264e-06, "loss": 0.8667, "step": 6210 }, { "epoch": 0.19352327572785302, "grad_norm": 1.9063620567321777, "learning_rate": 4.244813005998231e-06, "loss": 0.8538, "step": 6215 }, { "epoch": 0.19367896621516426, "grad_norm": 1.938868522644043, "learning_rate": 4.243993575666197e-06, "loss": 0.8546, "step": 6220 }, { "epoch": 0.1938346567024755, "grad_norm": 2.544550895690918, "learning_rate": 4.243174145334164e-06, "loss": 0.9174, "step": 6225 }, { "epoch": 0.1939903471897867, "grad_norm": 1.8157238960266113, "learning_rate": 4.242354715002131e-06, "loss": 0.8645, "step": 6230 }, { "epoch": 0.19414603767709793, "grad_norm": 1.940098762512207, "learning_rate": 4.2415352846700976e-06, "loss": 0.8397, "step": 6235 }, { "epoch": 0.19430172816440916, "grad_norm": 2.6403396129608154, "learning_rate": 4.2407158543380644e-06, "loss": 0.8656, "step": 6240 }, { "epoch": 0.1944574186517204, "grad_norm": 2.5808868408203125, "learning_rate": 4.239896424006031e-06, "loss": 0.7722, "step": 6245 }, { "epoch": 0.1946131091390316, "grad_norm": 1.6903711557388306, "learning_rate": 4.239076993673998e-06, "loss": 0.8132, "step": 6250 }, { "epoch": 0.19476879962634283, "grad_norm": 1.9535936117172241, "learning_rate": 4.238257563341965e-06, "loss": 0.8136, "step": 6255 }, { "epoch": 0.19492449011365406, "grad_norm": 2.4885551929473877, "learning_rate": 4.237438133009932e-06, "loss": 0.8887, "step": 6260 }, { "epoch": 0.19508018060096527, "grad_norm": 2.2231833934783936, "learning_rate": 4.236618702677899e-06, "loss": 0.8378, "step": 6265 }, { "epoch": 0.1952358710882765, "grad_norm": 2.0374650955200195, "learning_rate": 4.235799272345866e-06, "loss": 0.7587, "step": 6270 }, { "epoch": 0.19539156157558774, "grad_norm": 2.012476921081543, "learning_rate": 4.2349798420138325e-06, "loss": 0.8291, "step": 6275 }, { "epoch": 0.19554725206289897, "grad_norm": 1.8950145244598389, "learning_rate": 4.234160411681799e-06, "loss": 0.8047, "step": 6280 }, { "epoch": 0.19570294255021017, "grad_norm": 2.1786704063415527, "learning_rate": 4.233340981349766e-06, "loss": 0.8578, "step": 6285 }, { "epoch": 0.1958586330375214, "grad_norm": 2.1542584896087646, "learning_rate": 4.232521551017732e-06, "loss": 0.8227, "step": 6290 }, { "epoch": 0.19601432352483264, "grad_norm": 2.5967135429382324, "learning_rate": 4.231702120685699e-06, "loss": 0.8315, "step": 6295 }, { "epoch": 0.19617001401214385, "grad_norm": 1.9760197401046753, "learning_rate": 4.230882690353667e-06, "loss": 0.8296, "step": 6300 }, { "epoch": 0.19632570449945508, "grad_norm": 2.2786355018615723, "learning_rate": 4.230063260021634e-06, "loss": 0.8382, "step": 6305 }, { "epoch": 0.1964813949867663, "grad_norm": 2.1212189197540283, "learning_rate": 4.2292438296896e-06, "loss": 0.8274, "step": 6310 }, { "epoch": 0.19663708547407754, "grad_norm": 2.319551467895508, "learning_rate": 4.228424399357567e-06, "loss": 0.8784, "step": 6315 }, { "epoch": 0.19679277596138875, "grad_norm": 1.9837534427642822, "learning_rate": 4.2276049690255335e-06, "loss": 0.8698, "step": 6320 }, { "epoch": 0.19694846644869998, "grad_norm": 2.100247621536255, "learning_rate": 4.226785538693501e-06, "loss": 0.7788, "step": 6325 }, { "epoch": 0.19710415693601122, "grad_norm": 2.445542573928833, "learning_rate": 4.225966108361467e-06, "loss": 0.8834, "step": 6330 }, { "epoch": 0.19725984742332245, "grad_norm": 1.9963898658752441, "learning_rate": 4.225146678029434e-06, "loss": 0.84, "step": 6335 }, { "epoch": 0.19741553791063365, "grad_norm": 2.1742630004882812, "learning_rate": 4.224327247697401e-06, "loss": 0.8641, "step": 6340 }, { "epoch": 0.1975712283979449, "grad_norm": 2.137125253677368, "learning_rate": 4.223507817365368e-06, "loss": 0.7672, "step": 6345 }, { "epoch": 0.19772691888525612, "grad_norm": 2.0553677082061768, "learning_rate": 4.222688387033335e-06, "loss": 0.823, "step": 6350 }, { "epoch": 0.19788260937256733, "grad_norm": 1.7321666479110718, "learning_rate": 4.221868956701302e-06, "loss": 0.8412, "step": 6355 }, { "epoch": 0.19803829985987856, "grad_norm": 1.8666033744812012, "learning_rate": 4.2210495263692684e-06, "loss": 0.7913, "step": 6360 }, { "epoch": 0.1981939903471898, "grad_norm": 2.1635775566101074, "learning_rate": 4.220230096037235e-06, "loss": 0.7774, "step": 6365 }, { "epoch": 0.19834968083450102, "grad_norm": 1.9649049043655396, "learning_rate": 4.219410665705202e-06, "loss": 0.8271, "step": 6370 }, { "epoch": 0.19850537132181223, "grad_norm": 2.2650094032287598, "learning_rate": 4.218591235373169e-06, "loss": 0.9257, "step": 6375 }, { "epoch": 0.19866106180912346, "grad_norm": 2.2699997425079346, "learning_rate": 4.217771805041136e-06, "loss": 0.8471, "step": 6380 }, { "epoch": 0.1988167522964347, "grad_norm": 2.492231607437134, "learning_rate": 4.216952374709102e-06, "loss": 0.8652, "step": 6385 }, { "epoch": 0.1989724427837459, "grad_norm": 2.2747433185577393, "learning_rate": 4.21613294437707e-06, "loss": 0.8077, "step": 6390 }, { "epoch": 0.19912813327105713, "grad_norm": 2.189537525177002, "learning_rate": 4.2153135140450365e-06, "loss": 0.8978, "step": 6395 }, { "epoch": 0.19928382375836837, "grad_norm": 2.2064197063446045, "learning_rate": 4.214494083713003e-06, "loss": 0.7765, "step": 6400 }, { "epoch": 0.1994395142456796, "grad_norm": 2.1456716060638428, "learning_rate": 4.2136746533809694e-06, "loss": 0.8743, "step": 6405 }, { "epoch": 0.1995952047329908, "grad_norm": 2.0344581604003906, "learning_rate": 4.212855223048936e-06, "loss": 0.8333, "step": 6410 }, { "epoch": 0.19975089522030204, "grad_norm": 2.552791118621826, "learning_rate": 4.212035792716904e-06, "loss": 0.8397, "step": 6415 }, { "epoch": 0.19990658570761327, "grad_norm": 2.3544182777404785, "learning_rate": 4.211216362384871e-06, "loss": 0.7921, "step": 6420 }, { "epoch": 0.20006227619492448, "grad_norm": 2.04618501663208, "learning_rate": 4.210396932052837e-06, "loss": 0.848, "step": 6425 }, { "epoch": 0.2002179666822357, "grad_norm": 2.0861403942108154, "learning_rate": 4.209577501720804e-06, "loss": 0.8184, "step": 6430 }, { "epoch": 0.20037365716954694, "grad_norm": 2.4220261573791504, "learning_rate": 4.208758071388771e-06, "loss": 0.8952, "step": 6435 }, { "epoch": 0.20052934765685818, "grad_norm": 2.0711402893066406, "learning_rate": 4.2079386410567375e-06, "loss": 0.8254, "step": 6440 }, { "epoch": 0.20068503814416938, "grad_norm": 2.463491439819336, "learning_rate": 4.207119210724704e-06, "loss": 0.8024, "step": 6445 }, { "epoch": 0.20084072863148061, "grad_norm": 2.3838658332824707, "learning_rate": 4.206299780392671e-06, "loss": 0.8669, "step": 6450 }, { "epoch": 0.20099641911879185, "grad_norm": 2.0854461193084717, "learning_rate": 4.205480350060638e-06, "loss": 0.8157, "step": 6455 }, { "epoch": 0.20115210960610308, "grad_norm": 2.004016876220703, "learning_rate": 4.204660919728605e-06, "loss": 0.8797, "step": 6460 }, { "epoch": 0.20130780009341429, "grad_norm": 2.2413582801818848, "learning_rate": 4.203841489396572e-06, "loss": 0.8227, "step": 6465 }, { "epoch": 0.20146349058072552, "grad_norm": 1.867682933807373, "learning_rate": 4.203022059064539e-06, "loss": 0.8483, "step": 6470 }, { "epoch": 0.20161918106803675, "grad_norm": 2.164456844329834, "learning_rate": 4.202202628732506e-06, "loss": 0.7823, "step": 6475 }, { "epoch": 0.20177487155534796, "grad_norm": 2.458446741104126, "learning_rate": 4.2013831984004725e-06, "loss": 0.8839, "step": 6480 }, { "epoch": 0.2019305620426592, "grad_norm": 1.8970510959625244, "learning_rate": 4.200563768068439e-06, "loss": 0.8243, "step": 6485 }, { "epoch": 0.20208625252997042, "grad_norm": 2.0655930042266846, "learning_rate": 4.199744337736406e-06, "loss": 0.8602, "step": 6490 }, { "epoch": 0.20224194301728166, "grad_norm": 2.5191051959991455, "learning_rate": 4.198924907404373e-06, "loss": 0.7904, "step": 6495 }, { "epoch": 0.20239763350459286, "grad_norm": 2.0264320373535156, "learning_rate": 4.198105477072339e-06, "loss": 0.79, "step": 6500 }, { "epoch": 0.2025533239919041, "grad_norm": 1.8731263875961304, "learning_rate": 4.197286046740307e-06, "loss": 0.7714, "step": 6505 }, { "epoch": 0.20270901447921533, "grad_norm": 2.099196195602417, "learning_rate": 4.196466616408274e-06, "loss": 0.8214, "step": 6510 }, { "epoch": 0.20286470496652653, "grad_norm": 1.9507365226745605, "learning_rate": 4.19564718607624e-06, "loss": 0.8135, "step": 6515 }, { "epoch": 0.20302039545383777, "grad_norm": 2.1406664848327637, "learning_rate": 4.194827755744207e-06, "loss": 0.8218, "step": 6520 }, { "epoch": 0.203176085941149, "grad_norm": 2.8067595958709717, "learning_rate": 4.1940083254121734e-06, "loss": 0.7142, "step": 6525 }, { "epoch": 0.20333177642846023, "grad_norm": 2.4620790481567383, "learning_rate": 4.193188895080141e-06, "loss": 0.7927, "step": 6530 }, { "epoch": 0.20348746691577144, "grad_norm": 2.7948286533355713, "learning_rate": 4.192369464748107e-06, "loss": 0.9019, "step": 6535 }, { "epoch": 0.20364315740308267, "grad_norm": 1.9520727396011353, "learning_rate": 4.191550034416074e-06, "loss": 0.8666, "step": 6540 }, { "epoch": 0.2037988478903939, "grad_norm": 1.8143844604492188, "learning_rate": 4.190730604084041e-06, "loss": 0.9042, "step": 6545 }, { "epoch": 0.20395453837770514, "grad_norm": 2.3503899574279785, "learning_rate": 4.189911173752008e-06, "loss": 0.874, "step": 6550 }, { "epoch": 0.20411022886501634, "grad_norm": 2.331395149230957, "learning_rate": 4.189091743419975e-06, "loss": 0.885, "step": 6555 }, { "epoch": 0.20426591935232757, "grad_norm": 2.252469301223755, "learning_rate": 4.1882723130879415e-06, "loss": 0.8985, "step": 6560 }, { "epoch": 0.2044216098396388, "grad_norm": 1.8110697269439697, "learning_rate": 4.187452882755908e-06, "loss": 0.8085, "step": 6565 }, { "epoch": 0.20457730032695, "grad_norm": 2.374177932739258, "learning_rate": 4.186633452423875e-06, "loss": 0.8675, "step": 6570 }, { "epoch": 0.20473299081426125, "grad_norm": 2.0586512088775635, "learning_rate": 4.185814022091842e-06, "loss": 0.8093, "step": 6575 }, { "epoch": 0.20488868130157248, "grad_norm": 2.007845878601074, "learning_rate": 4.184994591759809e-06, "loss": 0.8733, "step": 6580 }, { "epoch": 0.2050443717888837, "grad_norm": 2.1340372562408447, "learning_rate": 4.184175161427776e-06, "loss": 0.8393, "step": 6585 }, { "epoch": 0.20520006227619492, "grad_norm": 1.9887737035751343, "learning_rate": 4.183355731095743e-06, "loss": 0.8249, "step": 6590 }, { "epoch": 0.20535575276350615, "grad_norm": 1.9929063320159912, "learning_rate": 4.18253630076371e-06, "loss": 0.8119, "step": 6595 }, { "epoch": 0.20551144325081738, "grad_norm": 2.3077776432037354, "learning_rate": 4.1817168704316765e-06, "loss": 0.7381, "step": 6600 }, { "epoch": 0.2056671337381286, "grad_norm": 2.288452625274658, "learning_rate": 4.180897440099643e-06, "loss": 0.8069, "step": 6605 }, { "epoch": 0.20582282422543982, "grad_norm": 2.005281686782837, "learning_rate": 4.180078009767609e-06, "loss": 0.8621, "step": 6610 }, { "epoch": 0.20597851471275105, "grad_norm": 2.3807594776153564, "learning_rate": 4.179258579435576e-06, "loss": 0.8472, "step": 6615 }, { "epoch": 0.2061342052000623, "grad_norm": 2.0019590854644775, "learning_rate": 4.178439149103544e-06, "loss": 0.849, "step": 6620 }, { "epoch": 0.2062898956873735, "grad_norm": 2.5652925968170166, "learning_rate": 4.177619718771511e-06, "loss": 0.8885, "step": 6625 }, { "epoch": 0.20644558617468473, "grad_norm": 2.267880439758301, "learning_rate": 4.176800288439477e-06, "loss": 0.7301, "step": 6630 }, { "epoch": 0.20660127666199596, "grad_norm": 2.2931947708129883, "learning_rate": 4.175980858107444e-06, "loss": 0.8592, "step": 6635 }, { "epoch": 0.20675696714930716, "grad_norm": 2.4831974506378174, "learning_rate": 4.175161427775411e-06, "loss": 0.7806, "step": 6640 }, { "epoch": 0.2069126576366184, "grad_norm": 2.1888668537139893, "learning_rate": 4.174341997443378e-06, "loss": 0.8856, "step": 6645 }, { "epoch": 0.20706834812392963, "grad_norm": 2.206017017364502, "learning_rate": 4.173522567111344e-06, "loss": 0.8703, "step": 6650 }, { "epoch": 0.20722403861124086, "grad_norm": 2.7038471698760986, "learning_rate": 4.172703136779311e-06, "loss": 0.852, "step": 6655 }, { "epoch": 0.20737972909855207, "grad_norm": 2.1589953899383545, "learning_rate": 4.171883706447278e-06, "loss": 0.8173, "step": 6660 }, { "epoch": 0.2075354195858633, "grad_norm": 2.7420217990875244, "learning_rate": 4.171064276115245e-06, "loss": 0.8765, "step": 6665 }, { "epoch": 0.20769111007317453, "grad_norm": 2.2568860054016113, "learning_rate": 4.170244845783212e-06, "loss": 0.8995, "step": 6670 }, { "epoch": 0.20784680056048577, "grad_norm": 2.0984232425689697, "learning_rate": 4.169425415451179e-06, "loss": 0.83, "step": 6675 }, { "epoch": 0.20800249104779697, "grad_norm": 2.594890594482422, "learning_rate": 4.1686059851191456e-06, "loss": 0.8259, "step": 6680 }, { "epoch": 0.2081581815351082, "grad_norm": 2.2074902057647705, "learning_rate": 4.1677865547871124e-06, "loss": 0.8161, "step": 6685 }, { "epoch": 0.20831387202241944, "grad_norm": 2.1114563941955566, "learning_rate": 4.166967124455079e-06, "loss": 0.8033, "step": 6690 }, { "epoch": 0.20846956250973064, "grad_norm": 2.0053422451019287, "learning_rate": 4.166147694123046e-06, "loss": 0.8688, "step": 6695 }, { "epoch": 0.20862525299704188, "grad_norm": 1.978340744972229, "learning_rate": 4.165328263791013e-06, "loss": 0.8272, "step": 6700 }, { "epoch": 0.2087809434843531, "grad_norm": 2.185070753097534, "learning_rate": 4.16450883345898e-06, "loss": 0.8283, "step": 6705 }, { "epoch": 0.20893663397166434, "grad_norm": 2.542189598083496, "learning_rate": 4.163689403126947e-06, "loss": 0.9202, "step": 6710 }, { "epoch": 0.20909232445897555, "grad_norm": 2.0383248329162598, "learning_rate": 4.162869972794914e-06, "loss": 0.8703, "step": 6715 }, { "epoch": 0.20924801494628678, "grad_norm": 2.215056896209717, "learning_rate": 4.16205054246288e-06, "loss": 0.8697, "step": 6720 }, { "epoch": 0.20940370543359801, "grad_norm": 2.013138771057129, "learning_rate": 4.1612311121308465e-06, "loss": 0.7898, "step": 6725 }, { "epoch": 0.20955939592090922, "grad_norm": 1.9809224605560303, "learning_rate": 4.160411681798813e-06, "loss": 0.8387, "step": 6730 }, { "epoch": 0.20971508640822045, "grad_norm": 2.0650222301483154, "learning_rate": 4.159592251466781e-06, "loss": 0.8181, "step": 6735 }, { "epoch": 0.20987077689553169, "grad_norm": 2.3129031658172607, "learning_rate": 4.158772821134747e-06, "loss": 0.8613, "step": 6740 }, { "epoch": 0.21002646738284292, "grad_norm": 2.16253924369812, "learning_rate": 4.157953390802714e-06, "loss": 0.7829, "step": 6745 }, { "epoch": 0.21018215787015412, "grad_norm": 2.0595037937164307, "learning_rate": 4.157133960470681e-06, "loss": 0.7734, "step": 6750 }, { "epoch": 0.21033784835746536, "grad_norm": 2.541996955871582, "learning_rate": 4.156314530138648e-06, "loss": 0.7996, "step": 6755 }, { "epoch": 0.2104935388447766, "grad_norm": 2.166623592376709, "learning_rate": 4.155495099806615e-06, "loss": 0.8726, "step": 6760 }, { "epoch": 0.21064922933208782, "grad_norm": 2.087785482406616, "learning_rate": 4.1546756694745815e-06, "loss": 0.8474, "step": 6765 }, { "epoch": 0.21080491981939903, "grad_norm": 1.7544705867767334, "learning_rate": 4.153856239142548e-06, "loss": 0.8629, "step": 6770 }, { "epoch": 0.21096061030671026, "grad_norm": 2.686365842819214, "learning_rate": 4.153036808810515e-06, "loss": 0.903, "step": 6775 }, { "epoch": 0.2111163007940215, "grad_norm": 2.1644630432128906, "learning_rate": 4.152217378478482e-06, "loss": 0.8293, "step": 6780 }, { "epoch": 0.2112719912813327, "grad_norm": 1.9083870649337769, "learning_rate": 4.151397948146449e-06, "loss": 0.8153, "step": 6785 }, { "epoch": 0.21142768176864393, "grad_norm": 2.149704694747925, "learning_rate": 4.150578517814416e-06, "loss": 0.8726, "step": 6790 }, { "epoch": 0.21158337225595517, "grad_norm": 2.4879720211029053, "learning_rate": 4.149759087482383e-06, "loss": 0.788, "step": 6795 }, { "epoch": 0.2117390627432664, "grad_norm": 2.3105669021606445, "learning_rate": 4.14893965715035e-06, "loss": 0.7638, "step": 6800 }, { "epoch": 0.2118947532305776, "grad_norm": 2.227503538131714, "learning_rate": 4.1481202268183164e-06, "loss": 0.8625, "step": 6805 }, { "epoch": 0.21205044371788884, "grad_norm": 1.9104280471801758, "learning_rate": 4.147300796486283e-06, "loss": 0.8375, "step": 6810 }, { "epoch": 0.21220613420520007, "grad_norm": 2.0081849098205566, "learning_rate": 4.146481366154249e-06, "loss": 0.9074, "step": 6815 }, { "epoch": 0.21236182469251128, "grad_norm": 2.53279447555542, "learning_rate": 4.145661935822217e-06, "loss": 0.7745, "step": 6820 }, { "epoch": 0.2125175151798225, "grad_norm": 2.572136402130127, "learning_rate": 4.144842505490184e-06, "loss": 0.7428, "step": 6825 }, { "epoch": 0.21267320566713374, "grad_norm": 2.1192009449005127, "learning_rate": 4.144023075158151e-06, "loss": 0.8767, "step": 6830 }, { "epoch": 0.21282889615444497, "grad_norm": 1.873154640197754, "learning_rate": 4.143203644826117e-06, "loss": 0.8585, "step": 6835 }, { "epoch": 0.21298458664175618, "grad_norm": 2.712212324142456, "learning_rate": 4.142384214494084e-06, "loss": 0.9152, "step": 6840 }, { "epoch": 0.2131402771290674, "grad_norm": 2.3799517154693604, "learning_rate": 4.141564784162051e-06, "loss": 0.843, "step": 6845 }, { "epoch": 0.21329596761637865, "grad_norm": 2.127462387084961, "learning_rate": 4.140745353830018e-06, "loss": 0.8767, "step": 6850 }, { "epoch": 0.21345165810368985, "grad_norm": 2.0099434852600098, "learning_rate": 4.139925923497984e-06, "loss": 0.8651, "step": 6855 }, { "epoch": 0.21360734859100108, "grad_norm": 2.178203582763672, "learning_rate": 4.139106493165951e-06, "loss": 0.8592, "step": 6860 }, { "epoch": 0.21376303907831232, "grad_norm": 1.9794440269470215, "learning_rate": 4.138287062833918e-06, "loss": 0.822, "step": 6865 }, { "epoch": 0.21391872956562355, "grad_norm": 2.328331708908081, "learning_rate": 4.137467632501885e-06, "loss": 0.833, "step": 6870 }, { "epoch": 0.21407442005293476, "grad_norm": 2.1072638034820557, "learning_rate": 4.136648202169852e-06, "loss": 0.7907, "step": 6875 }, { "epoch": 0.214230110540246, "grad_norm": 2.2162458896636963, "learning_rate": 4.135828771837819e-06, "loss": 0.8238, "step": 6880 }, { "epoch": 0.21438580102755722, "grad_norm": 2.2307779788970947, "learning_rate": 4.1350093415057855e-06, "loss": 0.8739, "step": 6885 }, { "epoch": 0.21454149151486845, "grad_norm": 2.210283041000366, "learning_rate": 4.134189911173752e-06, "loss": 0.9395, "step": 6890 }, { "epoch": 0.21469718200217966, "grad_norm": 1.9863277673721313, "learning_rate": 4.133370480841719e-06, "loss": 0.7924, "step": 6895 }, { "epoch": 0.2148528724894909, "grad_norm": 2.400404930114746, "learning_rate": 4.132551050509686e-06, "loss": 0.8763, "step": 6900 }, { "epoch": 0.21500856297680213, "grad_norm": 2.058394193649292, "learning_rate": 4.131731620177653e-06, "loss": 0.7647, "step": 6905 }, { "epoch": 0.21516425346411333, "grad_norm": 2.437596082687378, "learning_rate": 4.13091218984562e-06, "loss": 0.8438, "step": 6910 }, { "epoch": 0.21531994395142456, "grad_norm": 1.9378023147583008, "learning_rate": 4.130092759513587e-06, "loss": 0.8151, "step": 6915 }, { "epoch": 0.2154756344387358, "grad_norm": 2.2490813732147217, "learning_rate": 4.129273329181554e-06, "loss": 0.8322, "step": 6920 }, { "epoch": 0.21563132492604703, "grad_norm": 1.9712376594543457, "learning_rate": 4.1284538988495205e-06, "loss": 0.8106, "step": 6925 }, { "epoch": 0.21578701541335824, "grad_norm": 1.8738828897476196, "learning_rate": 4.1276344685174865e-06, "loss": 0.8006, "step": 6930 }, { "epoch": 0.21594270590066947, "grad_norm": 2.355872869491577, "learning_rate": 4.126815038185454e-06, "loss": 0.8477, "step": 6935 }, { "epoch": 0.2160983963879807, "grad_norm": 2.2224888801574707, "learning_rate": 4.125995607853421e-06, "loss": 0.8935, "step": 6940 }, { "epoch": 0.2162540868752919, "grad_norm": 2.069570302963257, "learning_rate": 4.125176177521387e-06, "loss": 0.8458, "step": 6945 }, { "epoch": 0.21640977736260314, "grad_norm": 1.982688546180725, "learning_rate": 4.124356747189354e-06, "loss": 0.8068, "step": 6950 }, { "epoch": 0.21656546784991437, "grad_norm": 2.3874411582946777, "learning_rate": 4.123537316857321e-06, "loss": 0.8298, "step": 6955 }, { "epoch": 0.2167211583372256, "grad_norm": 1.9647815227508545, "learning_rate": 4.1227178865252886e-06, "loss": 0.8552, "step": 6960 }, { "epoch": 0.2168768488245368, "grad_norm": 1.774656891822815, "learning_rate": 4.121898456193255e-06, "loss": 0.8052, "step": 6965 }, { "epoch": 0.21703253931184804, "grad_norm": 2.111085891723633, "learning_rate": 4.1210790258612214e-06, "loss": 0.7689, "step": 6970 }, { "epoch": 0.21718822979915928, "grad_norm": 2.3051936626434326, "learning_rate": 4.120259595529188e-06, "loss": 0.7748, "step": 6975 }, { "epoch": 0.2173439202864705, "grad_norm": 2.350425958633423, "learning_rate": 4.119440165197155e-06, "loss": 0.8015, "step": 6980 }, { "epoch": 0.21749961077378172, "grad_norm": 2.118988275527954, "learning_rate": 4.118620734865122e-06, "loss": 0.7626, "step": 6985 }, { "epoch": 0.21765530126109295, "grad_norm": 2.2296009063720703, "learning_rate": 4.117801304533089e-06, "loss": 0.7742, "step": 6990 }, { "epoch": 0.21781099174840418, "grad_norm": 2.0748491287231445, "learning_rate": 4.116981874201056e-06, "loss": 0.7926, "step": 6995 }, { "epoch": 0.2179666822357154, "grad_norm": 1.9788402318954468, "learning_rate": 4.116162443869023e-06, "loss": 0.8161, "step": 7000 }, { "epoch": 0.21812237272302662, "grad_norm": 1.927450180053711, "learning_rate": 4.1153430135369895e-06, "loss": 0.7969, "step": 7005 }, { "epoch": 0.21827806321033785, "grad_norm": 1.876054048538208, "learning_rate": 4.114523583204956e-06, "loss": 0.8022, "step": 7010 }, { "epoch": 0.21843375369764909, "grad_norm": 2.5122592449188232, "learning_rate": 4.113704152872923e-06, "loss": 0.8535, "step": 7015 }, { "epoch": 0.2185894441849603, "grad_norm": 2.382160186767578, "learning_rate": 4.112884722540889e-06, "loss": 0.7904, "step": 7020 }, { "epoch": 0.21874513467227152, "grad_norm": 2.0582687854766846, "learning_rate": 4.112065292208857e-06, "loss": 0.7914, "step": 7025 }, { "epoch": 0.21890082515958276, "grad_norm": 2.1553235054016113, "learning_rate": 4.111245861876824e-06, "loss": 0.8703, "step": 7030 }, { "epoch": 0.21905651564689396, "grad_norm": 2.24690580368042, "learning_rate": 4.110426431544791e-06, "loss": 0.7676, "step": 7035 }, { "epoch": 0.2192122061342052, "grad_norm": 2.207500457763672, "learning_rate": 4.109607001212757e-06, "loss": 0.8617, "step": 7040 }, { "epoch": 0.21936789662151643, "grad_norm": 2.0742645263671875, "learning_rate": 4.108787570880724e-06, "loss": 0.8229, "step": 7045 }, { "epoch": 0.21952358710882766, "grad_norm": 2.4433534145355225, "learning_rate": 4.107968140548691e-06, "loss": 0.8213, "step": 7050 }, { "epoch": 0.21967927759613887, "grad_norm": 2.4873759746551514, "learning_rate": 4.107148710216658e-06, "loss": 0.865, "step": 7055 }, { "epoch": 0.2198349680834501, "grad_norm": 1.8824107646942139, "learning_rate": 4.106329279884624e-06, "loss": 0.8778, "step": 7060 }, { "epoch": 0.21999065857076133, "grad_norm": 1.9007360935211182, "learning_rate": 4.105509849552591e-06, "loss": 0.8566, "step": 7065 }, { "epoch": 0.22014634905807254, "grad_norm": 2.408503293991089, "learning_rate": 4.104690419220558e-06, "loss": 0.8714, "step": 7070 }, { "epoch": 0.22030203954538377, "grad_norm": 1.9524818658828735, "learning_rate": 4.103870988888526e-06, "loss": 0.7995, "step": 7075 }, { "epoch": 0.220457730032695, "grad_norm": 1.9913926124572754, "learning_rate": 4.103051558556492e-06, "loss": 0.8134, "step": 7080 }, { "epoch": 0.22061342052000624, "grad_norm": 2.0821034908294678, "learning_rate": 4.102232128224459e-06, "loss": 0.8523, "step": 7085 }, { "epoch": 0.22076911100731744, "grad_norm": 2.179324150085449, "learning_rate": 4.1014126978924255e-06, "loss": 0.8528, "step": 7090 }, { "epoch": 0.22092480149462868, "grad_norm": 2.123481035232544, "learning_rate": 4.100593267560392e-06, "loss": 0.857, "step": 7095 }, { "epoch": 0.2210804919819399, "grad_norm": 2.272094964981079, "learning_rate": 4.099773837228359e-06, "loss": 0.8358, "step": 7100 }, { "epoch": 0.22123618246925114, "grad_norm": 2.3068861961364746, "learning_rate": 4.098954406896326e-06, "loss": 0.8293, "step": 7105 }, { "epoch": 0.22139187295656235, "grad_norm": 1.9520374536514282, "learning_rate": 4.098134976564293e-06, "loss": 0.7823, "step": 7110 }, { "epoch": 0.22154756344387358, "grad_norm": 1.9922089576721191, "learning_rate": 4.09731554623226e-06, "loss": 0.7678, "step": 7115 }, { "epoch": 0.2217032539311848, "grad_norm": 2.115525722503662, "learning_rate": 4.096496115900227e-06, "loss": 0.808, "step": 7120 }, { "epoch": 0.22185894441849602, "grad_norm": 2.1176869869232178, "learning_rate": 4.0956766855681936e-06, "loss": 0.8288, "step": 7125 }, { "epoch": 0.22201463490580725, "grad_norm": 2.179840326309204, "learning_rate": 4.0948572552361604e-06, "loss": 0.7851, "step": 7130 }, { "epoch": 0.22217032539311848, "grad_norm": 1.6483625173568726, "learning_rate": 4.0940378249041264e-06, "loss": 0.8934, "step": 7135 }, { "epoch": 0.22232601588042972, "grad_norm": 1.8364272117614746, "learning_rate": 4.093218394572094e-06, "loss": 0.8473, "step": 7140 }, { "epoch": 0.22248170636774092, "grad_norm": 2.2542941570281982, "learning_rate": 4.092398964240061e-06, "loss": 0.8021, "step": 7145 }, { "epoch": 0.22263739685505216, "grad_norm": 2.003249406814575, "learning_rate": 4.091579533908027e-06, "loss": 0.8665, "step": 7150 }, { "epoch": 0.2227930873423634, "grad_norm": 2.1613330841064453, "learning_rate": 4.090760103575994e-06, "loss": 0.85, "step": 7155 }, { "epoch": 0.2229487778296746, "grad_norm": 2.117668628692627, "learning_rate": 4.089940673243961e-06, "loss": 0.8936, "step": 7160 }, { "epoch": 0.22310446831698583, "grad_norm": 2.133073568344116, "learning_rate": 4.0891212429119285e-06, "loss": 0.7949, "step": 7165 }, { "epoch": 0.22326015880429706, "grad_norm": 2.9912683963775635, "learning_rate": 4.0883018125798945e-06, "loss": 0.834, "step": 7170 }, { "epoch": 0.2234158492916083, "grad_norm": 2.310751438140869, "learning_rate": 4.087482382247861e-06, "loss": 0.8278, "step": 7175 }, { "epoch": 0.2235715397789195, "grad_norm": 2.1889379024505615, "learning_rate": 4.086662951915828e-06, "loss": 0.6914, "step": 7180 }, { "epoch": 0.22372723026623073, "grad_norm": 2.7901394367218018, "learning_rate": 4.085843521583795e-06, "loss": 0.8137, "step": 7185 }, { "epoch": 0.22388292075354196, "grad_norm": 2.3982996940612793, "learning_rate": 4.085024091251762e-06, "loss": 0.8095, "step": 7190 }, { "epoch": 0.2240386112408532, "grad_norm": 2.3470394611358643, "learning_rate": 4.084204660919729e-06, "loss": 0.8667, "step": 7195 }, { "epoch": 0.2241943017281644, "grad_norm": 1.9153913259506226, "learning_rate": 4.083385230587696e-06, "loss": 0.8115, "step": 7200 }, { "epoch": 0.22434999221547564, "grad_norm": 2.321101188659668, "learning_rate": 4.082565800255663e-06, "loss": 0.8246, "step": 7205 }, { "epoch": 0.22450568270278687, "grad_norm": 1.9483132362365723, "learning_rate": 4.0817463699236295e-06, "loss": 0.8055, "step": 7210 }, { "epoch": 0.22466137319009807, "grad_norm": 2.4717564582824707, "learning_rate": 4.080926939591596e-06, "loss": 0.8738, "step": 7215 }, { "epoch": 0.2248170636774093, "grad_norm": 2.0497121810913086, "learning_rate": 4.080107509259563e-06, "loss": 0.8212, "step": 7220 }, { "epoch": 0.22497275416472054, "grad_norm": 2.0530784130096436, "learning_rate": 4.079288078927529e-06, "loss": 0.7623, "step": 7225 }, { "epoch": 0.22512844465203177, "grad_norm": 2.2748401165008545, "learning_rate": 4.078468648595497e-06, "loss": 0.8776, "step": 7230 }, { "epoch": 0.22528413513934298, "grad_norm": 1.8849689960479736, "learning_rate": 4.077649218263464e-06, "loss": 0.7011, "step": 7235 }, { "epoch": 0.2254398256266542, "grad_norm": 2.1246869564056396, "learning_rate": 4.076829787931431e-06, "loss": 0.8313, "step": 7240 }, { "epoch": 0.22559551611396544, "grad_norm": 2.1276907920837402, "learning_rate": 4.076010357599397e-06, "loss": 0.7791, "step": 7245 }, { "epoch": 0.22575120660127665, "grad_norm": 2.184978723526001, "learning_rate": 4.075190927267364e-06, "loss": 0.7447, "step": 7250 }, { "epoch": 0.22590689708858788, "grad_norm": 2.191155433654785, "learning_rate": 4.074371496935331e-06, "loss": 0.7526, "step": 7255 }, { "epoch": 0.22606258757589912, "grad_norm": 1.8225866556167603, "learning_rate": 4.073552066603298e-06, "loss": 0.868, "step": 7260 }, { "epoch": 0.22621827806321035, "grad_norm": 2.305983066558838, "learning_rate": 4.072732636271264e-06, "loss": 0.8765, "step": 7265 }, { "epoch": 0.22637396855052155, "grad_norm": 2.1050121784210205, "learning_rate": 4.071913205939231e-06, "loss": 0.8663, "step": 7270 }, { "epoch": 0.2265296590378328, "grad_norm": 2.252382755279541, "learning_rate": 4.071093775607198e-06, "loss": 0.7576, "step": 7275 }, { "epoch": 0.22668534952514402, "grad_norm": 2.3045589923858643, "learning_rate": 4.070274345275166e-06, "loss": 0.8474, "step": 7280 }, { "epoch": 0.22684104001245525, "grad_norm": 3.6101372241973877, "learning_rate": 4.069454914943132e-06, "loss": 0.8368, "step": 7285 }, { "epoch": 0.22699673049976646, "grad_norm": 2.405020236968994, "learning_rate": 4.0686354846110986e-06, "loss": 0.8671, "step": 7290 }, { "epoch": 0.2271524209870777, "grad_norm": 2.302595853805542, "learning_rate": 4.0678160542790654e-06, "loss": 0.8004, "step": 7295 }, { "epoch": 0.22730811147438892, "grad_norm": 1.9179757833480835, "learning_rate": 4.066996623947032e-06, "loss": 0.7876, "step": 7300 }, { "epoch": 0.22746380196170013, "grad_norm": 2.3345694541931152, "learning_rate": 4.066177193614999e-06, "loss": 0.7972, "step": 7305 }, { "epoch": 0.22761949244901136, "grad_norm": 1.9965609312057495, "learning_rate": 4.065357763282966e-06, "loss": 0.7462, "step": 7310 }, { "epoch": 0.2277751829363226, "grad_norm": 2.320011854171753, "learning_rate": 4.064538332950933e-06, "loss": 0.7752, "step": 7315 }, { "epoch": 0.22793087342363383, "grad_norm": 1.7369252443313599, "learning_rate": 4.0637189026189e-06, "loss": 0.8194, "step": 7320 }, { "epoch": 0.22808656391094503, "grad_norm": 2.4824235439300537, "learning_rate": 4.062899472286867e-06, "loss": 0.7795, "step": 7325 }, { "epoch": 0.22824225439825627, "grad_norm": 2.4139463901519775, "learning_rate": 4.0620800419548335e-06, "loss": 0.857, "step": 7330 }, { "epoch": 0.2283979448855675, "grad_norm": 1.9904236793518066, "learning_rate": 4.0612606116228e-06, "loss": 0.862, "step": 7335 }, { "epoch": 0.2285536353728787, "grad_norm": 1.8441646099090576, "learning_rate": 4.060441181290766e-06, "loss": 0.8033, "step": 7340 }, { "epoch": 0.22870932586018994, "grad_norm": 1.9013663530349731, "learning_rate": 4.059621750958734e-06, "loss": 0.8107, "step": 7345 }, { "epoch": 0.22886501634750117, "grad_norm": 2.8347220420837402, "learning_rate": 4.058802320626701e-06, "loss": 0.7978, "step": 7350 }, { "epoch": 0.2290207068348124, "grad_norm": 2.23042631149292, "learning_rate": 4.057982890294668e-06, "loss": 0.7748, "step": 7355 }, { "epoch": 0.2291763973221236, "grad_norm": 2.5924124717712402, "learning_rate": 4.057163459962634e-06, "loss": 0.7647, "step": 7360 }, { "epoch": 0.22933208780943484, "grad_norm": 2.493475914001465, "learning_rate": 4.056344029630601e-06, "loss": 0.8688, "step": 7365 }, { "epoch": 0.22948777829674608, "grad_norm": 1.753089427947998, "learning_rate": 4.0555245992985685e-06, "loss": 0.8344, "step": 7370 }, { "epoch": 0.22964346878405728, "grad_norm": 2.128600597381592, "learning_rate": 4.0547051689665345e-06, "loss": 0.7795, "step": 7375 }, { "epoch": 0.2297991592713685, "grad_norm": 2.3946104049682617, "learning_rate": 4.053885738634501e-06, "loss": 0.8823, "step": 7380 }, { "epoch": 0.22995484975867975, "grad_norm": 2.1551997661590576, "learning_rate": 4.053066308302468e-06, "loss": 0.9451, "step": 7385 }, { "epoch": 0.23011054024599098, "grad_norm": 2.096571445465088, "learning_rate": 4.052246877970435e-06, "loss": 0.8574, "step": 7390 }, { "epoch": 0.23026623073330219, "grad_norm": 2.074284076690674, "learning_rate": 4.051427447638402e-06, "loss": 0.8189, "step": 7395 }, { "epoch": 0.23042192122061342, "grad_norm": 2.1618568897247314, "learning_rate": 4.050608017306369e-06, "loss": 0.7872, "step": 7400 }, { "epoch": 0.23057761170792465, "grad_norm": 2.1171884536743164, "learning_rate": 4.049788586974336e-06, "loss": 0.833, "step": 7405 }, { "epoch": 0.23073330219523588, "grad_norm": 2.3123581409454346, "learning_rate": 4.048969156642303e-06, "loss": 0.8035, "step": 7410 }, { "epoch": 0.2308889926825471, "grad_norm": 2.6515915393829346, "learning_rate": 4.0481497263102694e-06, "loss": 0.9131, "step": 7415 }, { "epoch": 0.23104468316985832, "grad_norm": 2.468559503555298, "learning_rate": 4.047330295978236e-06, "loss": 0.8774, "step": 7420 }, { "epoch": 0.23120037365716956, "grad_norm": 2.126229763031006, "learning_rate": 4.046510865646203e-06, "loss": 0.7734, "step": 7425 }, { "epoch": 0.23135606414448076, "grad_norm": 2.2753541469573975, "learning_rate": 4.04569143531417e-06, "loss": 0.8541, "step": 7430 }, { "epoch": 0.231511754631792, "grad_norm": 2.268052339553833, "learning_rate": 4.044872004982137e-06, "loss": 0.7967, "step": 7435 }, { "epoch": 0.23166744511910323, "grad_norm": 1.991653323173523, "learning_rate": 4.044052574650104e-06, "loss": 0.8362, "step": 7440 }, { "epoch": 0.23182313560641446, "grad_norm": 1.9142411947250366, "learning_rate": 4.043233144318071e-06, "loss": 0.8474, "step": 7445 }, { "epoch": 0.23197882609372567, "grad_norm": 2.22780179977417, "learning_rate": 4.042413713986037e-06, "loss": 0.8145, "step": 7450 }, { "epoch": 0.2321345165810369, "grad_norm": 2.1273140907287598, "learning_rate": 4.0415942836540036e-06, "loss": 0.7408, "step": 7455 }, { "epoch": 0.23229020706834813, "grad_norm": 2.2923049926757812, "learning_rate": 4.040774853321971e-06, "loss": 0.8897, "step": 7460 }, { "epoch": 0.23244589755565934, "grad_norm": 2.234722375869751, "learning_rate": 4.039955422989938e-06, "loss": 0.8372, "step": 7465 }, { "epoch": 0.23260158804297057, "grad_norm": 2.2019455432891846, "learning_rate": 4.039135992657904e-06, "loss": 0.8155, "step": 7470 }, { "epoch": 0.2327572785302818, "grad_norm": 1.9493032693862915, "learning_rate": 4.038316562325871e-06, "loss": 0.8384, "step": 7475 }, { "epoch": 0.23291296901759304, "grad_norm": 1.9531068801879883, "learning_rate": 4.037497131993838e-06, "loss": 0.7808, "step": 7480 }, { "epoch": 0.23306865950490424, "grad_norm": 2.160487413406372, "learning_rate": 4.036677701661806e-06, "loss": 0.7485, "step": 7485 }, { "epoch": 0.23322434999221547, "grad_norm": 2.092299222946167, "learning_rate": 4.035858271329772e-06, "loss": 0.7748, "step": 7490 }, { "epoch": 0.2333800404795267, "grad_norm": 2.250291347503662, "learning_rate": 4.0350388409977385e-06, "loss": 0.7869, "step": 7495 }, { "epoch": 0.23353573096683794, "grad_norm": 2.00376033782959, "learning_rate": 4.034219410665705e-06, "loss": 0.8323, "step": 7500 }, { "epoch": 0.23369142145414915, "grad_norm": 2.0555007457733154, "learning_rate": 4.033399980333672e-06, "loss": 0.8703, "step": 7505 }, { "epoch": 0.23384711194146038, "grad_norm": 2.2569997310638428, "learning_rate": 4.032580550001639e-06, "loss": 0.8515, "step": 7510 }, { "epoch": 0.2340028024287716, "grad_norm": 2.1287078857421875, "learning_rate": 4.031761119669606e-06, "loss": 0.8499, "step": 7515 }, { "epoch": 0.23415849291608282, "grad_norm": 2.1902663707733154, "learning_rate": 4.030941689337573e-06, "loss": 0.7909, "step": 7520 }, { "epoch": 0.23431418340339405, "grad_norm": 2.513153553009033, "learning_rate": 4.03012225900554e-06, "loss": 0.7818, "step": 7525 }, { "epoch": 0.23446987389070528, "grad_norm": 2.1833078861236572, "learning_rate": 4.029302828673507e-06, "loss": 0.7736, "step": 7530 }, { "epoch": 0.23462556437801652, "grad_norm": 2.243927478790283, "learning_rate": 4.0284833983414735e-06, "loss": 0.7924, "step": 7535 }, { "epoch": 0.23478125486532772, "grad_norm": 2.062227725982666, "learning_rate": 4.02766396800944e-06, "loss": 0.8341, "step": 7540 }, { "epoch": 0.23493694535263895, "grad_norm": 2.0104496479034424, "learning_rate": 4.026844537677407e-06, "loss": 0.8275, "step": 7545 }, { "epoch": 0.2350926358399502, "grad_norm": 2.7596352100372314, "learning_rate": 4.026025107345374e-06, "loss": 0.8611, "step": 7550 }, { "epoch": 0.2352483263272614, "grad_norm": 2.1496877670288086, "learning_rate": 4.025205677013341e-06, "loss": 0.8549, "step": 7555 }, { "epoch": 0.23540401681457263, "grad_norm": 1.635653018951416, "learning_rate": 4.024386246681308e-06, "loss": 0.8326, "step": 7560 }, { "epoch": 0.23555970730188386, "grad_norm": 2.2375528812408447, "learning_rate": 4.023566816349274e-06, "loss": 0.8295, "step": 7565 }, { "epoch": 0.2357153977891951, "grad_norm": 2.2815518379211426, "learning_rate": 4.022747386017241e-06, "loss": 0.8548, "step": 7570 }, { "epoch": 0.2358710882765063, "grad_norm": 2.08514666557312, "learning_rate": 4.0219279556852084e-06, "loss": 0.8845, "step": 7575 }, { "epoch": 0.23602677876381753, "grad_norm": 2.236286163330078, "learning_rate": 4.0211085253531744e-06, "loss": 0.786, "step": 7580 }, { "epoch": 0.23618246925112876, "grad_norm": 1.942417025566101, "learning_rate": 4.020289095021141e-06, "loss": 0.7739, "step": 7585 }, { "epoch": 0.23633815973843997, "grad_norm": 2.1622564792633057, "learning_rate": 4.019469664689108e-06, "loss": 0.8411, "step": 7590 }, { "epoch": 0.2364938502257512, "grad_norm": 1.972784161567688, "learning_rate": 4.018650234357075e-06, "loss": 0.7963, "step": 7595 }, { "epoch": 0.23664954071306243, "grad_norm": 2.046875, "learning_rate": 4.017830804025042e-06, "loss": 0.8287, "step": 7600 }, { "epoch": 0.23680523120037367, "grad_norm": 2.189091205596924, "learning_rate": 4.017011373693009e-06, "loss": 0.7414, "step": 7605 }, { "epoch": 0.23696092168768487, "grad_norm": 1.8283278942108154, "learning_rate": 4.016191943360976e-06, "loss": 0.8738, "step": 7610 }, { "epoch": 0.2371166121749961, "grad_norm": 1.9412646293640137, "learning_rate": 4.0153725130289425e-06, "loss": 0.8221, "step": 7615 }, { "epoch": 0.23727230266230734, "grad_norm": 2.107886791229248, "learning_rate": 4.014553082696909e-06, "loss": 0.8093, "step": 7620 }, { "epoch": 0.23742799314961857, "grad_norm": 2.024857521057129, "learning_rate": 4.013733652364876e-06, "loss": 0.78, "step": 7625 }, { "epoch": 0.23758368363692978, "grad_norm": 2.2192702293395996, "learning_rate": 4.012914222032843e-06, "loss": 0.875, "step": 7630 }, { "epoch": 0.237739374124241, "grad_norm": 2.312843084335327, "learning_rate": 4.01209479170081e-06, "loss": 0.8234, "step": 7635 }, { "epoch": 0.23789506461155224, "grad_norm": 2.4317333698272705, "learning_rate": 4.011275361368777e-06, "loss": 0.8585, "step": 7640 }, { "epoch": 0.23805075509886345, "grad_norm": 2.1113874912261963, "learning_rate": 4.010455931036744e-06, "loss": 0.7908, "step": 7645 }, { "epoch": 0.23820644558617468, "grad_norm": 2.1029601097106934, "learning_rate": 4.009636500704711e-06, "loss": 0.7601, "step": 7650 }, { "epoch": 0.23836213607348591, "grad_norm": 2.505934953689575, "learning_rate": 4.008817070372677e-06, "loss": 0.8822, "step": 7655 }, { "epoch": 0.23851782656079715, "grad_norm": 2.0215535163879395, "learning_rate": 4.007997640040644e-06, "loss": 0.7381, "step": 7660 }, { "epoch": 0.23867351704810835, "grad_norm": 2.2529823780059814, "learning_rate": 4.007178209708611e-06, "loss": 0.8156, "step": 7665 }, { "epoch": 0.23882920753541959, "grad_norm": 2.4654178619384766, "learning_rate": 4.006358779376578e-06, "loss": 0.7837, "step": 7670 }, { "epoch": 0.23898489802273082, "grad_norm": 2.3770639896392822, "learning_rate": 4.005539349044544e-06, "loss": 0.8276, "step": 7675 }, { "epoch": 0.23914058851004202, "grad_norm": 2.157303810119629, "learning_rate": 4.004719918712511e-06, "loss": 0.8164, "step": 7680 }, { "epoch": 0.23929627899735326, "grad_norm": 2.4783382415771484, "learning_rate": 4.003900488380479e-06, "loss": 0.8254, "step": 7685 }, { "epoch": 0.2394519694846645, "grad_norm": 2.2299304008483887, "learning_rate": 4.0030810580484456e-06, "loss": 0.8043, "step": 7690 }, { "epoch": 0.23960765997197572, "grad_norm": 2.8877029418945312, "learning_rate": 4.002261627716412e-06, "loss": 0.8706, "step": 7695 }, { "epoch": 0.23976335045928693, "grad_norm": 2.1351253986358643, "learning_rate": 4.0014421973843785e-06, "loss": 0.8073, "step": 7700 }, { "epoch": 0.23991904094659816, "grad_norm": 2.3229928016662598, "learning_rate": 4.000622767052345e-06, "loss": 0.802, "step": 7705 }, { "epoch": 0.2400747314339094, "grad_norm": 2.2033703327178955, "learning_rate": 3.999803336720312e-06, "loss": 0.8331, "step": 7710 }, { "epoch": 0.24023042192122063, "grad_norm": 2.2994725704193115, "learning_rate": 3.998983906388279e-06, "loss": 0.8408, "step": 7715 }, { "epoch": 0.24038611240853183, "grad_norm": 2.8454113006591797, "learning_rate": 3.998164476056246e-06, "loss": 0.8023, "step": 7720 }, { "epoch": 0.24054180289584307, "grad_norm": 2.2568726539611816, "learning_rate": 3.997345045724213e-06, "loss": 0.8417, "step": 7725 }, { "epoch": 0.2406974933831543, "grad_norm": 2.2293508052825928, "learning_rate": 3.99652561539218e-06, "loss": 0.8835, "step": 7730 }, { "epoch": 0.2408531838704655, "grad_norm": 1.909530758857727, "learning_rate": 3.9957061850601466e-06, "loss": 0.8136, "step": 7735 }, { "epoch": 0.24100887435777674, "grad_norm": 2.026780128479004, "learning_rate": 3.9948867547281134e-06, "loss": 0.8647, "step": 7740 }, { "epoch": 0.24116456484508797, "grad_norm": 2.172496795654297, "learning_rate": 3.99406732439608e-06, "loss": 0.9091, "step": 7745 }, { "epoch": 0.2413202553323992, "grad_norm": 2.413907289505005, "learning_rate": 3.993247894064047e-06, "loss": 0.8409, "step": 7750 }, { "epoch": 0.2414759458197104, "grad_norm": 2.4889612197875977, "learning_rate": 3.992428463732014e-06, "loss": 0.7772, "step": 7755 }, { "epoch": 0.24163163630702164, "grad_norm": 2.5223875045776367, "learning_rate": 3.991609033399981e-06, "loss": 0.8978, "step": 7760 }, { "epoch": 0.24178732679433287, "grad_norm": 2.3656532764434814, "learning_rate": 3.990789603067948e-06, "loss": 0.8499, "step": 7765 }, { "epoch": 0.24194301728164408, "grad_norm": 2.548638343811035, "learning_rate": 3.989970172735914e-06, "loss": 0.8456, "step": 7770 }, { "epoch": 0.2420987077689553, "grad_norm": 2.4156742095947266, "learning_rate": 3.9891507424038815e-06, "loss": 0.8451, "step": 7775 }, { "epoch": 0.24225439825626655, "grad_norm": 2.034412145614624, "learning_rate": 3.988331312071848e-06, "loss": 0.8782, "step": 7780 }, { "epoch": 0.24241008874357778, "grad_norm": 2.630352258682251, "learning_rate": 3.987511881739815e-06, "loss": 0.8252, "step": 7785 }, { "epoch": 0.24256577923088898, "grad_norm": 2.233874797821045, "learning_rate": 3.986692451407781e-06, "loss": 0.8542, "step": 7790 }, { "epoch": 0.24272146971820022, "grad_norm": 2.178621530532837, "learning_rate": 3.985873021075748e-06, "loss": 0.8645, "step": 7795 }, { "epoch": 0.24287716020551145, "grad_norm": 2.1693694591522217, "learning_rate": 3.985053590743716e-06, "loss": 0.859, "step": 7800 }, { "epoch": 0.24303285069282266, "grad_norm": 1.7122329473495483, "learning_rate": 3.984234160411682e-06, "loss": 0.7198, "step": 7805 }, { "epoch": 0.2431885411801339, "grad_norm": 2.1749913692474365, "learning_rate": 3.983414730079649e-06, "loss": 0.757, "step": 7810 }, { "epoch": 0.24334423166744512, "grad_norm": 2.258507251739502, "learning_rate": 3.982595299747616e-06, "loss": 0.8161, "step": 7815 }, { "epoch": 0.24349992215475635, "grad_norm": 2.288970708847046, "learning_rate": 3.9817758694155825e-06, "loss": 0.8928, "step": 7820 }, { "epoch": 0.24365561264206756, "grad_norm": 2.173678159713745, "learning_rate": 3.980956439083549e-06, "loss": 0.8114, "step": 7825 }, { "epoch": 0.2438113031293788, "grad_norm": 2.039473295211792, "learning_rate": 3.980137008751516e-06, "loss": 0.9029, "step": 7830 }, { "epoch": 0.24396699361669003, "grad_norm": 2.3666293621063232, "learning_rate": 3.979317578419483e-06, "loss": 0.8087, "step": 7835 }, { "epoch": 0.24412268410400126, "grad_norm": 2.416086196899414, "learning_rate": 3.97849814808745e-06, "loss": 0.7959, "step": 7840 }, { "epoch": 0.24427837459131246, "grad_norm": 2.514375925064087, "learning_rate": 3.977678717755417e-06, "loss": 0.8098, "step": 7845 }, { "epoch": 0.2444340650786237, "grad_norm": 2.273054838180542, "learning_rate": 3.976859287423384e-06, "loss": 0.806, "step": 7850 }, { "epoch": 0.24458975556593493, "grad_norm": 2.065432548522949, "learning_rate": 3.9760398570913506e-06, "loss": 0.8033, "step": 7855 }, { "epoch": 0.24474544605324614, "grad_norm": 2.250357151031494, "learning_rate": 3.9752204267593174e-06, "loss": 0.8268, "step": 7860 }, { "epoch": 0.24490113654055737, "grad_norm": 2.0132296085357666, "learning_rate": 3.974400996427284e-06, "loss": 0.7876, "step": 7865 }, { "epoch": 0.2450568270278686, "grad_norm": 2.8934364318847656, "learning_rate": 3.973581566095251e-06, "loss": 0.8314, "step": 7870 }, { "epoch": 0.24521251751517983, "grad_norm": 2.1307101249694824, "learning_rate": 3.972762135763218e-06, "loss": 0.7399, "step": 7875 }, { "epoch": 0.24536820800249104, "grad_norm": 2.143662214279175, "learning_rate": 3.971942705431184e-06, "loss": 0.812, "step": 7880 }, { "epoch": 0.24552389848980227, "grad_norm": 2.0079050064086914, "learning_rate": 3.971123275099151e-06, "loss": 0.7891, "step": 7885 }, { "epoch": 0.2456795889771135, "grad_norm": 2.4335391521453857, "learning_rate": 3.970303844767119e-06, "loss": 0.8246, "step": 7890 }, { "epoch": 0.2458352794644247, "grad_norm": 2.3436789512634277, "learning_rate": 3.9694844144350855e-06, "loss": 0.8776, "step": 7895 }, { "epoch": 0.24599096995173594, "grad_norm": 2.5492703914642334, "learning_rate": 3.9686649841030516e-06, "loss": 0.8339, "step": 7900 }, { "epoch": 0.24614666043904718, "grad_norm": 2.3936450481414795, "learning_rate": 3.967845553771018e-06, "loss": 0.8308, "step": 7905 }, { "epoch": 0.2463023509263584, "grad_norm": 2.411764621734619, "learning_rate": 3.967026123438985e-06, "loss": 0.8096, "step": 7910 }, { "epoch": 0.24645804141366962, "grad_norm": 2.6928436756134033, "learning_rate": 3.966206693106953e-06, "loss": 0.8777, "step": 7915 }, { "epoch": 0.24661373190098085, "grad_norm": 2.3129541873931885, "learning_rate": 3.965387262774919e-06, "loss": 0.9157, "step": 7920 }, { "epoch": 0.24676942238829208, "grad_norm": 2.477638006210327, "learning_rate": 3.964567832442886e-06, "loss": 0.8932, "step": 7925 }, { "epoch": 0.24692511287560331, "grad_norm": 2.009692668914795, "learning_rate": 3.963748402110853e-06, "loss": 0.7817, "step": 7930 }, { "epoch": 0.24708080336291452, "grad_norm": 2.397150754928589, "learning_rate": 3.96292897177882e-06, "loss": 0.7354, "step": 7935 }, { "epoch": 0.24723649385022575, "grad_norm": 2.4967124462127686, "learning_rate": 3.9621095414467865e-06, "loss": 0.8514, "step": 7940 }, { "epoch": 0.24739218433753699, "grad_norm": 2.4448890686035156, "learning_rate": 3.961290111114753e-06, "loss": 0.8659, "step": 7945 }, { "epoch": 0.2475478748248482, "grad_norm": 2.1105518341064453, "learning_rate": 3.96047068078272e-06, "loss": 0.8578, "step": 7950 }, { "epoch": 0.24770356531215942, "grad_norm": 1.9993144273757935, "learning_rate": 3.959651250450687e-06, "loss": 0.7908, "step": 7955 }, { "epoch": 0.24785925579947066, "grad_norm": 2.136861562728882, "learning_rate": 3.958831820118654e-06, "loss": 0.7918, "step": 7960 }, { "epoch": 0.2480149462867819, "grad_norm": 2.4820995330810547, "learning_rate": 3.958012389786621e-06, "loss": 0.8266, "step": 7965 }, { "epoch": 0.2481706367740931, "grad_norm": 2.825849771499634, "learning_rate": 3.957192959454588e-06, "loss": 0.823, "step": 7970 }, { "epoch": 0.24832632726140433, "grad_norm": 2.625868558883667, "learning_rate": 3.956373529122554e-06, "loss": 0.7989, "step": 7975 }, { "epoch": 0.24848201774871556, "grad_norm": 2.121126890182495, "learning_rate": 3.9555540987905215e-06, "loss": 0.8447, "step": 7980 }, { "epoch": 0.24863770823602677, "grad_norm": 2.714202404022217, "learning_rate": 3.954734668458488e-06, "loss": 0.8347, "step": 7985 }, { "epoch": 0.248793398723338, "grad_norm": 2.041186571121216, "learning_rate": 3.953915238126455e-06, "loss": 0.8789, "step": 7990 }, { "epoch": 0.24894908921064923, "grad_norm": 2.0319225788116455, "learning_rate": 3.953095807794421e-06, "loss": 0.8438, "step": 7995 }, { "epoch": 0.24910477969796047, "grad_norm": 1.6805351972579956, "learning_rate": 3.952276377462388e-06, "loss": 0.7922, "step": 8000 }, { "epoch": 0.24926047018527167, "grad_norm": 2.186257839202881, "learning_rate": 3.951456947130356e-06, "loss": 0.8284, "step": 8005 }, { "epoch": 0.2494161606725829, "grad_norm": 2.1433260440826416, "learning_rate": 3.950637516798322e-06, "loss": 0.8739, "step": 8010 }, { "epoch": 0.24957185115989414, "grad_norm": 2.5441906452178955, "learning_rate": 3.949818086466289e-06, "loss": 0.8061, "step": 8015 }, { "epoch": 0.24972754164720534, "grad_norm": 2.5893218517303467, "learning_rate": 3.9489986561342556e-06, "loss": 0.8518, "step": 8020 }, { "epoch": 0.24988323213451658, "grad_norm": 2.059692859649658, "learning_rate": 3.9481792258022224e-06, "loss": 0.7825, "step": 8025 }, { "epoch": 0.2500389226218278, "grad_norm": 2.3424224853515625, "learning_rate": 3.947359795470189e-06, "loss": 0.7878, "step": 8030 }, { "epoch": 0.25019461310913904, "grad_norm": 2.3414628505706787, "learning_rate": 3.946540365138156e-06, "loss": 0.7844, "step": 8035 }, { "epoch": 0.2503503035964503, "grad_norm": 2.039336919784546, "learning_rate": 3.945720934806123e-06, "loss": 0.8801, "step": 8040 }, { "epoch": 0.2505059940837615, "grad_norm": 1.8397727012634277, "learning_rate": 3.94490150447409e-06, "loss": 0.7948, "step": 8045 }, { "epoch": 0.2506616845710727, "grad_norm": 2.0594701766967773, "learning_rate": 3.944082074142057e-06, "loss": 0.7623, "step": 8050 }, { "epoch": 0.2508173750583839, "grad_norm": 2.2838029861450195, "learning_rate": 3.943262643810024e-06, "loss": 0.7766, "step": 8055 }, { "epoch": 0.25097306554569515, "grad_norm": 1.889347791671753, "learning_rate": 3.9424432134779905e-06, "loss": 0.853, "step": 8060 }, { "epoch": 0.2511287560330064, "grad_norm": 2.132585048675537, "learning_rate": 3.941623783145957e-06, "loss": 0.7694, "step": 8065 }, { "epoch": 0.2512844465203176, "grad_norm": 2.126568555831909, "learning_rate": 3.940804352813924e-06, "loss": 0.8169, "step": 8070 }, { "epoch": 0.25144013700762885, "grad_norm": 2.006751298904419, "learning_rate": 3.939984922481891e-06, "loss": 0.8808, "step": 8075 }, { "epoch": 0.2515958274949401, "grad_norm": 2.171025514602661, "learning_rate": 3.939165492149858e-06, "loss": 0.7447, "step": 8080 }, { "epoch": 0.25175151798225126, "grad_norm": 2.2733118534088135, "learning_rate": 3.938346061817824e-06, "loss": 0.831, "step": 8085 }, { "epoch": 0.2519072084695625, "grad_norm": 2.2273521423339844, "learning_rate": 3.937526631485791e-06, "loss": 0.739, "step": 8090 }, { "epoch": 0.2520628989568737, "grad_norm": 2.30985426902771, "learning_rate": 3.936707201153759e-06, "loss": 0.7913, "step": 8095 }, { "epoch": 0.25221858944418496, "grad_norm": 2.1907031536102295, "learning_rate": 3.9358877708217255e-06, "loss": 0.8888, "step": 8100 }, { "epoch": 0.2523742799314962, "grad_norm": 2.3780250549316406, "learning_rate": 3.9350683404896915e-06, "loss": 0.8723, "step": 8105 }, { "epoch": 0.2525299704188074, "grad_norm": 2.276857852935791, "learning_rate": 3.934248910157658e-06, "loss": 0.7619, "step": 8110 }, { "epoch": 0.25268566090611866, "grad_norm": 2.2094733715057373, "learning_rate": 3.933429479825625e-06, "loss": 0.8053, "step": 8115 }, { "epoch": 0.25284135139342984, "grad_norm": 2.445166826248169, "learning_rate": 3.932610049493593e-06, "loss": 0.8921, "step": 8120 }, { "epoch": 0.25299704188074107, "grad_norm": 2.2223057746887207, "learning_rate": 3.931790619161559e-06, "loss": 0.8064, "step": 8125 }, { "epoch": 0.2531527323680523, "grad_norm": 2.2528891563415527, "learning_rate": 3.930971188829526e-06, "loss": 0.8311, "step": 8130 }, { "epoch": 0.25330842285536354, "grad_norm": 2.1214792728424072, "learning_rate": 3.930151758497493e-06, "loss": 0.8359, "step": 8135 }, { "epoch": 0.25346411334267477, "grad_norm": 2.410438299179077, "learning_rate": 3.92933232816546e-06, "loss": 0.8223, "step": 8140 }, { "epoch": 0.253619803829986, "grad_norm": 1.8039658069610596, "learning_rate": 3.9285128978334265e-06, "loss": 0.776, "step": 8145 }, { "epoch": 0.25377549431729723, "grad_norm": 2.4893290996551514, "learning_rate": 3.927693467501393e-06, "loss": 0.8277, "step": 8150 }, { "epoch": 0.2539311848046084, "grad_norm": 2.347656488418579, "learning_rate": 3.92687403716936e-06, "loss": 0.75, "step": 8155 }, { "epoch": 0.25408687529191964, "grad_norm": 2.126288652420044, "learning_rate": 3.926054606837327e-06, "loss": 0.8909, "step": 8160 }, { "epoch": 0.2542425657792309, "grad_norm": 2.208160877227783, "learning_rate": 3.925235176505294e-06, "loss": 0.8529, "step": 8165 }, { "epoch": 0.2543982562665421, "grad_norm": 1.9659610986709595, "learning_rate": 3.924415746173261e-06, "loss": 0.8163, "step": 8170 }, { "epoch": 0.25455394675385334, "grad_norm": 2.2483513355255127, "learning_rate": 3.923596315841228e-06, "loss": 0.7872, "step": 8175 }, { "epoch": 0.2547096372411646, "grad_norm": 2.1416900157928467, "learning_rate": 3.922776885509194e-06, "loss": 0.7904, "step": 8180 }, { "epoch": 0.2548653277284758, "grad_norm": 2.3952181339263916, "learning_rate": 3.921957455177161e-06, "loss": 0.8546, "step": 8185 }, { "epoch": 0.25502101821578704, "grad_norm": 2.1872854232788086, "learning_rate": 3.921138024845128e-06, "loss": 0.8819, "step": 8190 }, { "epoch": 0.2551767087030982, "grad_norm": 2.364805221557617, "learning_rate": 3.920318594513095e-06, "loss": 0.8489, "step": 8195 }, { "epoch": 0.25533239919040945, "grad_norm": 2.1426846981048584, "learning_rate": 3.919499164181061e-06, "loss": 0.8205, "step": 8200 }, { "epoch": 0.2554880896777207, "grad_norm": 1.9633291959762573, "learning_rate": 3.918679733849028e-06, "loss": 0.8388, "step": 8205 }, { "epoch": 0.2556437801650319, "grad_norm": 2.01847505569458, "learning_rate": 3.917860303516996e-06, "loss": 0.8453, "step": 8210 }, { "epoch": 0.25579947065234315, "grad_norm": 2.2857611179351807, "learning_rate": 3.917040873184963e-06, "loss": 0.8329, "step": 8215 }, { "epoch": 0.2559551611396544, "grad_norm": 2.001810312271118, "learning_rate": 3.916221442852929e-06, "loss": 0.8143, "step": 8220 }, { "epoch": 0.2561108516269656, "grad_norm": 2.269902229309082, "learning_rate": 3.9154020125208955e-06, "loss": 0.7815, "step": 8225 }, { "epoch": 0.2562665421142768, "grad_norm": 2.383138656616211, "learning_rate": 3.914582582188862e-06, "loss": 0.831, "step": 8230 }, { "epoch": 0.25642223260158803, "grad_norm": 2.593519926071167, "learning_rate": 3.913763151856829e-06, "loss": 0.8588, "step": 8235 }, { "epoch": 0.25657792308889926, "grad_norm": 2.2331032752990723, "learning_rate": 3.912943721524796e-06, "loss": 0.7339, "step": 8240 }, { "epoch": 0.2567336135762105, "grad_norm": 2.2246618270874023, "learning_rate": 3.912124291192763e-06, "loss": 0.8758, "step": 8245 }, { "epoch": 0.25688930406352173, "grad_norm": 2.2144885063171387, "learning_rate": 3.91130486086073e-06, "loss": 0.7837, "step": 8250 }, { "epoch": 0.25704499455083296, "grad_norm": 2.2380080223083496, "learning_rate": 3.910485430528697e-06, "loss": 0.7884, "step": 8255 }, { "epoch": 0.2572006850381442, "grad_norm": 2.2893056869506836, "learning_rate": 3.909666000196664e-06, "loss": 0.8682, "step": 8260 }, { "epoch": 0.25735637552545537, "grad_norm": 1.9067960977554321, "learning_rate": 3.9088465698646305e-06, "loss": 0.7274, "step": 8265 }, { "epoch": 0.2575120660127666, "grad_norm": 2.2944231033325195, "learning_rate": 3.908027139532597e-06, "loss": 0.768, "step": 8270 }, { "epoch": 0.25766775650007784, "grad_norm": 2.170111894607544, "learning_rate": 3.907207709200564e-06, "loss": 0.844, "step": 8275 }, { "epoch": 0.25782344698738907, "grad_norm": 1.751772165298462, "learning_rate": 3.906388278868531e-06, "loss": 0.8744, "step": 8280 }, { "epoch": 0.2579791374747003, "grad_norm": 2.028012275695801, "learning_rate": 3.905568848536498e-06, "loss": 0.9003, "step": 8285 }, { "epoch": 0.25813482796201154, "grad_norm": 2.69138240814209, "learning_rate": 3.904749418204465e-06, "loss": 0.8604, "step": 8290 }, { "epoch": 0.25829051844932277, "grad_norm": 2.0799105167388916, "learning_rate": 3.903929987872431e-06, "loss": 0.8673, "step": 8295 }, { "epoch": 0.25844620893663395, "grad_norm": 2.319126605987549, "learning_rate": 3.9031105575403986e-06, "loss": 0.7594, "step": 8300 }, { "epoch": 0.2586018994239452, "grad_norm": 2.0441789627075195, "learning_rate": 3.9022911272083654e-06, "loss": 0.8891, "step": 8305 }, { "epoch": 0.2587575899112564, "grad_norm": 2.4357352256774902, "learning_rate": 3.9014716968763315e-06, "loss": 0.871, "step": 8310 }, { "epoch": 0.25891328039856765, "grad_norm": 2.0064594745635986, "learning_rate": 3.900652266544298e-06, "loss": 0.8705, "step": 8315 }, { "epoch": 0.2590689708858789, "grad_norm": 2.625074863433838, "learning_rate": 3.899832836212265e-06, "loss": 0.8317, "step": 8320 }, { "epoch": 0.2592246613731901, "grad_norm": 1.9414966106414795, "learning_rate": 3.899013405880233e-06, "loss": 0.8329, "step": 8325 }, { "epoch": 0.25938035186050135, "grad_norm": 2.1252079010009766, "learning_rate": 3.898193975548199e-06, "loss": 0.805, "step": 8330 }, { "epoch": 0.2595360423478125, "grad_norm": 2.1143505573272705, "learning_rate": 3.897374545216166e-06, "loss": 0.7999, "step": 8335 }, { "epoch": 0.25969173283512376, "grad_norm": 2.1445112228393555, "learning_rate": 3.896555114884133e-06, "loss": 0.9067, "step": 8340 }, { "epoch": 0.259847423322435, "grad_norm": 2.1397156715393066, "learning_rate": 3.8957356845520996e-06, "loss": 0.7252, "step": 8345 }, { "epoch": 0.2600031138097462, "grad_norm": 1.901000738143921, "learning_rate": 3.894916254220066e-06, "loss": 0.7763, "step": 8350 }, { "epoch": 0.26015880429705746, "grad_norm": 2.4371707439422607, "learning_rate": 3.894096823888033e-06, "loss": 0.8781, "step": 8355 }, { "epoch": 0.2603144947843687, "grad_norm": 2.092404842376709, "learning_rate": 3.893277393556e-06, "loss": 0.8557, "step": 8360 }, { "epoch": 0.2604701852716799, "grad_norm": 2.321331262588501, "learning_rate": 3.892457963223967e-06, "loss": 0.9006, "step": 8365 }, { "epoch": 0.2606258757589911, "grad_norm": 2.041555166244507, "learning_rate": 3.891638532891934e-06, "loss": 0.8224, "step": 8370 }, { "epoch": 0.26078156624630233, "grad_norm": 1.8129746913909912, "learning_rate": 3.890819102559901e-06, "loss": 0.7688, "step": 8375 }, { "epoch": 0.26093725673361357, "grad_norm": 1.9606953859329224, "learning_rate": 3.889999672227868e-06, "loss": 0.8484, "step": 8380 }, { "epoch": 0.2610929472209248, "grad_norm": 2.3950858116149902, "learning_rate": 3.8891802418958345e-06, "loss": 0.8717, "step": 8385 }, { "epoch": 0.26124863770823603, "grad_norm": 2.416135549545288, "learning_rate": 3.888360811563801e-06, "loss": 0.8481, "step": 8390 }, { "epoch": 0.26140432819554726, "grad_norm": 2.081617832183838, "learning_rate": 3.887541381231768e-06, "loss": 0.8317, "step": 8395 }, { "epoch": 0.2615600186828585, "grad_norm": 2.2693958282470703, "learning_rate": 3.886721950899735e-06, "loss": 0.8126, "step": 8400 }, { "epoch": 0.26171570917016973, "grad_norm": 1.872719407081604, "learning_rate": 3.885902520567701e-06, "loss": 0.8994, "step": 8405 }, { "epoch": 0.2618713996574809, "grad_norm": 2.2475593090057373, "learning_rate": 3.885083090235668e-06, "loss": 0.8849, "step": 8410 }, { "epoch": 0.26202709014479214, "grad_norm": 2.1771762371063232, "learning_rate": 3.884263659903636e-06, "loss": 0.8431, "step": 8415 }, { "epoch": 0.2621827806321034, "grad_norm": 1.8231983184814453, "learning_rate": 3.883444229571603e-06, "loss": 0.7433, "step": 8420 }, { "epoch": 0.2623384711194146, "grad_norm": 2.0780301094055176, "learning_rate": 3.882624799239569e-06, "loss": 0.8594, "step": 8425 }, { "epoch": 0.26249416160672584, "grad_norm": 2.087771415710449, "learning_rate": 3.8818053689075355e-06, "loss": 0.8816, "step": 8430 }, { "epoch": 0.2626498520940371, "grad_norm": 2.036008596420288, "learning_rate": 3.880985938575502e-06, "loss": 0.8427, "step": 8435 }, { "epoch": 0.2628055425813483, "grad_norm": 1.8763377666473389, "learning_rate": 3.880166508243469e-06, "loss": 0.753, "step": 8440 }, { "epoch": 0.2629612330686595, "grad_norm": 2.449145555496216, "learning_rate": 3.879347077911436e-06, "loss": 0.8329, "step": 8445 }, { "epoch": 0.2631169235559707, "grad_norm": 2.1895878314971924, "learning_rate": 3.878527647579403e-06, "loss": 0.7877, "step": 8450 }, { "epoch": 0.26327261404328195, "grad_norm": 1.8923367261886597, "learning_rate": 3.87770821724737e-06, "loss": 0.8141, "step": 8455 }, { "epoch": 0.2634283045305932, "grad_norm": 2.2918803691864014, "learning_rate": 3.876888786915337e-06, "loss": 0.7954, "step": 8460 }, { "epoch": 0.2635839950179044, "grad_norm": 2.3328640460968018, "learning_rate": 3.8760693565833036e-06, "loss": 0.8328, "step": 8465 }, { "epoch": 0.26373968550521565, "grad_norm": 2.066530704498291, "learning_rate": 3.8752499262512704e-06, "loss": 0.7507, "step": 8470 }, { "epoch": 0.2638953759925269, "grad_norm": 2.0105385780334473, "learning_rate": 3.874430495919237e-06, "loss": 0.8305, "step": 8475 }, { "epoch": 0.26405106647983806, "grad_norm": 2.5133957862854004, "learning_rate": 3.873611065587204e-06, "loss": 0.8709, "step": 8480 }, { "epoch": 0.2642067569671493, "grad_norm": 2.487514019012451, "learning_rate": 3.872791635255171e-06, "loss": 0.8202, "step": 8485 }, { "epoch": 0.2643624474544605, "grad_norm": 2.1499805450439453, "learning_rate": 3.871972204923138e-06, "loss": 0.8395, "step": 8490 }, { "epoch": 0.26451813794177176, "grad_norm": 2.143308162689209, "learning_rate": 3.871152774591105e-06, "loss": 0.8786, "step": 8495 }, { "epoch": 0.264673828429083, "grad_norm": 2.0097901821136475, "learning_rate": 3.870333344259072e-06, "loss": 0.8901, "step": 8500 }, { "epoch": 0.2648295189163942, "grad_norm": 2.177795648574829, "learning_rate": 3.8695139139270385e-06, "loss": 0.7295, "step": 8505 }, { "epoch": 0.26498520940370546, "grad_norm": 2.5332653522491455, "learning_rate": 3.868694483595005e-06, "loss": 0.8286, "step": 8510 }, { "epoch": 0.26514089989101663, "grad_norm": 2.1319265365600586, "learning_rate": 3.867875053262971e-06, "loss": 0.8971, "step": 8515 }, { "epoch": 0.26529659037832787, "grad_norm": 2.0589847564697266, "learning_rate": 3.867055622930938e-06, "loss": 0.7541, "step": 8520 }, { "epoch": 0.2654522808656391, "grad_norm": 1.9589674472808838, "learning_rate": 3.866236192598906e-06, "loss": 0.8481, "step": 8525 }, { "epoch": 0.26560797135295033, "grad_norm": 1.9876517057418823, "learning_rate": 3.865416762266873e-06, "loss": 0.7822, "step": 8530 }, { "epoch": 0.26576366184026157, "grad_norm": 2.264004945755005, "learning_rate": 3.864597331934839e-06, "loss": 0.8652, "step": 8535 }, { "epoch": 0.2659193523275728, "grad_norm": 2.4531025886535645, "learning_rate": 3.863777901602806e-06, "loss": 0.8047, "step": 8540 }, { "epoch": 0.26607504281488403, "grad_norm": 2.2093546390533447, "learning_rate": 3.862958471270773e-06, "loss": 0.8804, "step": 8545 }, { "epoch": 0.2662307333021952, "grad_norm": 2.3848111629486084, "learning_rate": 3.8621390409387395e-06, "loss": 0.8595, "step": 8550 }, { "epoch": 0.26638642378950644, "grad_norm": 2.5824830532073975, "learning_rate": 3.861319610606706e-06, "loss": 0.8623, "step": 8555 }, { "epoch": 0.2665421142768177, "grad_norm": 2.273392915725708, "learning_rate": 3.860500180274673e-06, "loss": 0.8244, "step": 8560 }, { "epoch": 0.2666978047641289, "grad_norm": 2.498789072036743, "learning_rate": 3.85968074994264e-06, "loss": 0.7907, "step": 8565 }, { "epoch": 0.26685349525144014, "grad_norm": 1.9756805896759033, "learning_rate": 3.858861319610607e-06, "loss": 0.7104, "step": 8570 }, { "epoch": 0.2670091857387514, "grad_norm": 1.8668391704559326, "learning_rate": 3.858041889278574e-06, "loss": 0.8543, "step": 8575 }, { "epoch": 0.2671648762260626, "grad_norm": 2.317455768585205, "learning_rate": 3.857222458946541e-06, "loss": 0.7605, "step": 8580 }, { "epoch": 0.2673205667133738, "grad_norm": 2.1029348373413086, "learning_rate": 3.856403028614508e-06, "loss": 0.8025, "step": 8585 }, { "epoch": 0.267476257200685, "grad_norm": 2.452324867248535, "learning_rate": 3.8555835982824745e-06, "loss": 0.873, "step": 8590 }, { "epoch": 0.26763194768799625, "grad_norm": 1.8889188766479492, "learning_rate": 3.854764167950441e-06, "loss": 0.7942, "step": 8595 }, { "epoch": 0.2677876381753075, "grad_norm": 2.123441457748413, "learning_rate": 3.853944737618408e-06, "loss": 0.8321, "step": 8600 }, { "epoch": 0.2679433286626187, "grad_norm": 2.50577449798584, "learning_rate": 3.853125307286375e-06, "loss": 0.8202, "step": 8605 }, { "epoch": 0.26809901914992995, "grad_norm": 1.9260977506637573, "learning_rate": 3.852305876954341e-06, "loss": 0.8099, "step": 8610 }, { "epoch": 0.2682547096372412, "grad_norm": 2.279188394546509, "learning_rate": 3.851486446622309e-06, "loss": 0.8533, "step": 8615 }, { "epoch": 0.2684104001245524, "grad_norm": 2.125196933746338, "learning_rate": 3.850667016290276e-06, "loss": 0.8026, "step": 8620 }, { "epoch": 0.2685660906118636, "grad_norm": 2.411081314086914, "learning_rate": 3.8498475859582426e-06, "loss": 0.9511, "step": 8625 }, { "epoch": 0.26872178109917483, "grad_norm": 2.5435166358947754, "learning_rate": 3.8490281556262086e-06, "loss": 0.8336, "step": 8630 }, { "epoch": 0.26887747158648606, "grad_norm": 2.2257132530212402, "learning_rate": 3.8482087252941754e-06, "loss": 0.7969, "step": 8635 }, { "epoch": 0.2690331620737973, "grad_norm": 2.739088535308838, "learning_rate": 3.847389294962143e-06, "loss": 0.8291, "step": 8640 }, { "epoch": 0.2691888525611085, "grad_norm": 2.447866916656494, "learning_rate": 3.84656986463011e-06, "loss": 0.7659, "step": 8645 }, { "epoch": 0.26934454304841976, "grad_norm": 2.1917614936828613, "learning_rate": 3.845750434298076e-06, "loss": 0.9203, "step": 8650 }, { "epoch": 0.269500233535731, "grad_norm": 1.9804939031600952, "learning_rate": 3.844931003966043e-06, "loss": 0.7779, "step": 8655 }, { "epoch": 0.26965592402304217, "grad_norm": 2.2489845752716064, "learning_rate": 3.84411157363401e-06, "loss": 0.8765, "step": 8660 }, { "epoch": 0.2698116145103534, "grad_norm": 2.245689630508423, "learning_rate": 3.843292143301977e-06, "loss": 0.7949, "step": 8665 }, { "epoch": 0.26996730499766464, "grad_norm": 2.134840965270996, "learning_rate": 3.8424727129699435e-06, "loss": 0.8003, "step": 8670 }, { "epoch": 0.27012299548497587, "grad_norm": 2.1424694061279297, "learning_rate": 3.84165328263791e-06, "loss": 0.827, "step": 8675 }, { "epoch": 0.2702786859722871, "grad_norm": 1.9292160272598267, "learning_rate": 3.840833852305877e-06, "loss": 0.6783, "step": 8680 }, { "epoch": 0.27043437645959834, "grad_norm": 2.000875949859619, "learning_rate": 3.840014421973844e-06, "loss": 0.7467, "step": 8685 }, { "epoch": 0.27059006694690957, "grad_norm": 1.882279396057129, "learning_rate": 3.839194991641811e-06, "loss": 0.8298, "step": 8690 }, { "epoch": 0.27074575743422075, "grad_norm": 2.020681142807007, "learning_rate": 3.838375561309778e-06, "loss": 0.8318, "step": 8695 }, { "epoch": 0.270901447921532, "grad_norm": 1.9555383920669556, "learning_rate": 3.837556130977745e-06, "loss": 0.7847, "step": 8700 }, { "epoch": 0.2710571384088432, "grad_norm": 2.3935482501983643, "learning_rate": 3.836736700645712e-06, "loss": 0.7743, "step": 8705 }, { "epoch": 0.27121282889615445, "grad_norm": 2.004554033279419, "learning_rate": 3.8359172703136785e-06, "loss": 0.8341, "step": 8710 }, { "epoch": 0.2713685193834657, "grad_norm": 1.881767749786377, "learning_rate": 3.835097839981645e-06, "loss": 0.8492, "step": 8715 }, { "epoch": 0.2715242098707769, "grad_norm": 2.7567660808563232, "learning_rate": 3.834278409649612e-06, "loss": 0.7962, "step": 8720 }, { "epoch": 0.27167990035808814, "grad_norm": 2.1303274631500244, "learning_rate": 3.833458979317578e-06, "loss": 0.7822, "step": 8725 }, { "epoch": 0.2718355908453993, "grad_norm": 2.1496047973632812, "learning_rate": 3.832639548985546e-06, "loss": 0.8183, "step": 8730 }, { "epoch": 0.27199128133271055, "grad_norm": 2.106736421585083, "learning_rate": 3.831820118653513e-06, "loss": 0.7879, "step": 8735 }, { "epoch": 0.2721469718200218, "grad_norm": 2.456458568572998, "learning_rate": 3.831000688321479e-06, "loss": 0.8306, "step": 8740 }, { "epoch": 0.272302662307333, "grad_norm": 3.1316490173339844, "learning_rate": 3.830181257989446e-06, "loss": 0.9226, "step": 8745 }, { "epoch": 0.27245835279464425, "grad_norm": 2.0318400859832764, "learning_rate": 3.829361827657413e-06, "loss": 0.8327, "step": 8750 }, { "epoch": 0.2726140432819555, "grad_norm": 2.0907950401306152, "learning_rate": 3.82854239732538e-06, "loss": 0.7646, "step": 8755 }, { "epoch": 0.2727697337692667, "grad_norm": 1.9527640342712402, "learning_rate": 3.827722966993346e-06, "loss": 0.8533, "step": 8760 }, { "epoch": 0.2729254242565779, "grad_norm": 2.33003306388855, "learning_rate": 3.826903536661313e-06, "loss": 0.7544, "step": 8765 }, { "epoch": 0.27308111474388913, "grad_norm": 2.6043434143066406, "learning_rate": 3.82608410632928e-06, "loss": 0.816, "step": 8770 }, { "epoch": 0.27323680523120036, "grad_norm": 2.484513282775879, "learning_rate": 3.825264675997247e-06, "loss": 0.8197, "step": 8775 }, { "epoch": 0.2733924957185116, "grad_norm": 2.1870651245117188, "learning_rate": 3.824445245665214e-06, "loss": 0.8091, "step": 8780 }, { "epoch": 0.27354818620582283, "grad_norm": 1.9930411577224731, "learning_rate": 3.823625815333181e-06, "loss": 0.7544, "step": 8785 }, { "epoch": 0.27370387669313406, "grad_norm": 2.527296543121338, "learning_rate": 3.8228063850011476e-06, "loss": 0.7944, "step": 8790 }, { "epoch": 0.2738595671804453, "grad_norm": 2.3717358112335205, "learning_rate": 3.821986954669114e-06, "loss": 0.8832, "step": 8795 }, { "epoch": 0.2740152576677565, "grad_norm": 2.234308958053589, "learning_rate": 3.821167524337081e-06, "loss": 0.7597, "step": 8800 }, { "epoch": 0.2741709481550677, "grad_norm": 2.1092135906219482, "learning_rate": 3.820348094005048e-06, "loss": 0.7863, "step": 8805 }, { "epoch": 0.27432663864237894, "grad_norm": 1.8603535890579224, "learning_rate": 3.819528663673015e-06, "loss": 0.8731, "step": 8810 }, { "epoch": 0.2744823291296902, "grad_norm": 2.2936763763427734, "learning_rate": 3.818709233340981e-06, "loss": 0.7644, "step": 8815 }, { "epoch": 0.2746380196170014, "grad_norm": 1.8087701797485352, "learning_rate": 3.817889803008949e-06, "loss": 0.8091, "step": 8820 }, { "epoch": 0.27479371010431264, "grad_norm": 1.7537189722061157, "learning_rate": 3.817070372676916e-06, "loss": 0.8438, "step": 8825 }, { "epoch": 0.27494940059162387, "grad_norm": 2.3216991424560547, "learning_rate": 3.8162509423448825e-06, "loss": 0.8041, "step": 8830 }, { "epoch": 0.2751050910789351, "grad_norm": 1.9951385259628296, "learning_rate": 3.8154315120128485e-06, "loss": 0.8209, "step": 8835 }, { "epoch": 0.2752607815662463, "grad_norm": 1.9182933568954468, "learning_rate": 3.814612081680816e-06, "loss": 0.8604, "step": 8840 }, { "epoch": 0.2754164720535575, "grad_norm": 2.132505178451538, "learning_rate": 3.8137926513487827e-06, "loss": 0.8245, "step": 8845 }, { "epoch": 0.27557216254086875, "grad_norm": 2.0308704376220703, "learning_rate": 3.8129732210167496e-06, "loss": 0.7808, "step": 8850 }, { "epoch": 0.27572785302818, "grad_norm": 2.541843891143799, "learning_rate": 3.812153790684716e-06, "loss": 0.7954, "step": 8855 }, { "epoch": 0.2758835435154912, "grad_norm": 2.1441473960876465, "learning_rate": 3.8113343603526833e-06, "loss": 0.7383, "step": 8860 }, { "epoch": 0.27603923400280245, "grad_norm": 2.4691059589385986, "learning_rate": 3.81051493002065e-06, "loss": 0.7676, "step": 8865 }, { "epoch": 0.2761949244901137, "grad_norm": 2.6317496299743652, "learning_rate": 3.8096954996886166e-06, "loss": 0.8148, "step": 8870 }, { "epoch": 0.27635061497742486, "grad_norm": 1.7272148132324219, "learning_rate": 3.8088760693565835e-06, "loss": 0.7979, "step": 8875 }, { "epoch": 0.2765063054647361, "grad_norm": 1.979234218597412, "learning_rate": 3.8080566390245504e-06, "loss": 0.8323, "step": 8880 }, { "epoch": 0.2766619959520473, "grad_norm": 2.623952865600586, "learning_rate": 3.8072372086925176e-06, "loss": 0.8249, "step": 8885 }, { "epoch": 0.27681768643935856, "grad_norm": 1.8707823753356934, "learning_rate": 3.8064177783604837e-06, "loss": 0.8271, "step": 8890 }, { "epoch": 0.2769733769266698, "grad_norm": 2.0287303924560547, "learning_rate": 3.805598348028451e-06, "loss": 0.8261, "step": 8895 }, { "epoch": 0.277129067413981, "grad_norm": 3.13638973236084, "learning_rate": 3.804778917696418e-06, "loss": 0.7721, "step": 8900 }, { "epoch": 0.27728475790129226, "grad_norm": 2.23653244972229, "learning_rate": 3.8039594873643847e-06, "loss": 0.7847, "step": 8905 }, { "epoch": 0.27744044838860343, "grad_norm": 2.333169937133789, "learning_rate": 3.803140057032351e-06, "loss": 0.8424, "step": 8910 }, { "epoch": 0.27759613887591467, "grad_norm": 2.678864002227783, "learning_rate": 3.802320626700318e-06, "loss": 0.8254, "step": 8915 }, { "epoch": 0.2777518293632259, "grad_norm": 2.394540548324585, "learning_rate": 3.8015011963682853e-06, "loss": 0.7111, "step": 8920 }, { "epoch": 0.27790751985053713, "grad_norm": 2.114196300506592, "learning_rate": 3.800681766036252e-06, "loss": 0.7273, "step": 8925 }, { "epoch": 0.27806321033784837, "grad_norm": 2.2363810539245605, "learning_rate": 3.7998623357042186e-06, "loss": 0.8056, "step": 8930 }, { "epoch": 0.2782189008251596, "grad_norm": 2.1664204597473145, "learning_rate": 3.7990429053721855e-06, "loss": 0.8308, "step": 8935 }, { "epoch": 0.27837459131247083, "grad_norm": 2.4343135356903076, "learning_rate": 3.7982234750401524e-06, "loss": 0.8197, "step": 8940 }, { "epoch": 0.278530281799782, "grad_norm": 2.82647967338562, "learning_rate": 3.797404044708119e-06, "loss": 0.9243, "step": 8945 }, { "epoch": 0.27868597228709324, "grad_norm": 1.935091257095337, "learning_rate": 3.796584614376086e-06, "loss": 0.8202, "step": 8950 }, { "epoch": 0.2788416627744045, "grad_norm": 2.1486501693725586, "learning_rate": 3.795765184044053e-06, "loss": 0.8001, "step": 8955 }, { "epoch": 0.2789973532617157, "grad_norm": 1.9327610731124878, "learning_rate": 3.79494575371202e-06, "loss": 0.7486, "step": 8960 }, { "epoch": 0.27915304374902694, "grad_norm": 2.018120288848877, "learning_rate": 3.7941263233799863e-06, "loss": 0.8536, "step": 8965 }, { "epoch": 0.2793087342363382, "grad_norm": 2.2347638607025146, "learning_rate": 3.793306893047953e-06, "loss": 0.8144, "step": 8970 }, { "epoch": 0.2794644247236494, "grad_norm": 2.189312696456909, "learning_rate": 3.7924874627159205e-06, "loss": 0.8144, "step": 8975 }, { "epoch": 0.2796201152109606, "grad_norm": 2.3036890029907227, "learning_rate": 3.7916680323838873e-06, "loss": 0.7795, "step": 8980 }, { "epoch": 0.2797758056982718, "grad_norm": 2.171196937561035, "learning_rate": 3.7908486020518538e-06, "loss": 0.7681, "step": 8985 }, { "epoch": 0.27993149618558305, "grad_norm": 2.057676076889038, "learning_rate": 3.7900291717198206e-06, "loss": 0.7755, "step": 8990 }, { "epoch": 0.2800871866728943, "grad_norm": 2.219013214111328, "learning_rate": 3.7892097413877875e-06, "loss": 0.8039, "step": 8995 }, { "epoch": 0.2802428771602055, "grad_norm": 2.0604512691497803, "learning_rate": 3.788390311055755e-06, "loss": 0.7893, "step": 9000 }, { "epoch": 0.28039856764751675, "grad_norm": 2.0260212421417236, "learning_rate": 3.787570880723721e-06, "loss": 0.799, "step": 9005 }, { "epoch": 0.280554258134828, "grad_norm": 2.0678462982177734, "learning_rate": 3.786751450391688e-06, "loss": 0.7717, "step": 9010 }, { "epoch": 0.28070994862213916, "grad_norm": 2.261289358139038, "learning_rate": 3.785932020059655e-06, "loss": 0.8441, "step": 9015 }, { "epoch": 0.2808656391094504, "grad_norm": 2.1257426738739014, "learning_rate": 3.7851125897276214e-06, "loss": 0.83, "step": 9020 }, { "epoch": 0.2810213295967616, "grad_norm": 2.0787041187286377, "learning_rate": 3.7842931593955883e-06, "loss": 0.7421, "step": 9025 }, { "epoch": 0.28117702008407286, "grad_norm": 2.2203972339630127, "learning_rate": 3.783473729063555e-06, "loss": 0.7567, "step": 9030 }, { "epoch": 0.2813327105713841, "grad_norm": 1.855427622795105, "learning_rate": 3.7826542987315225e-06, "loss": 0.8848, "step": 9035 }, { "epoch": 0.2814884010586953, "grad_norm": 1.9826242923736572, "learning_rate": 3.781834868399489e-06, "loss": 0.7339, "step": 9040 }, { "epoch": 0.28164409154600656, "grad_norm": 2.0237460136413574, "learning_rate": 3.7810154380674558e-06, "loss": 0.858, "step": 9045 }, { "epoch": 0.2817997820333178, "grad_norm": 2.430854320526123, "learning_rate": 3.7801960077354226e-06, "loss": 0.8296, "step": 9050 }, { "epoch": 0.28195547252062897, "grad_norm": 2.518751859664917, "learning_rate": 3.7793765774033895e-06, "loss": 0.8003, "step": 9055 }, { "epoch": 0.2821111630079402, "grad_norm": 2.3995628356933594, "learning_rate": 3.778557147071356e-06, "loss": 0.8089, "step": 9060 }, { "epoch": 0.28226685349525144, "grad_norm": 1.8219636678695679, "learning_rate": 3.7777377167393233e-06, "loss": 0.8513, "step": 9065 }, { "epoch": 0.28242254398256267, "grad_norm": 2.327407121658325, "learning_rate": 3.77691828640729e-06, "loss": 0.8527, "step": 9070 }, { "epoch": 0.2825782344698739, "grad_norm": 2.177553176879883, "learning_rate": 3.776098856075257e-06, "loss": 0.8036, "step": 9075 }, { "epoch": 0.28273392495718513, "grad_norm": 2.353069305419922, "learning_rate": 3.7752794257432234e-06, "loss": 0.8269, "step": 9080 }, { "epoch": 0.28288961544449637, "grad_norm": 1.8987529277801514, "learning_rate": 3.7744599954111903e-06, "loss": 0.8771, "step": 9085 }, { "epoch": 0.28304530593180754, "grad_norm": 2.501072645187378, "learning_rate": 3.7736405650791576e-06, "loss": 0.872, "step": 9090 }, { "epoch": 0.2832009964191188, "grad_norm": 2.1366231441497803, "learning_rate": 3.772821134747124e-06, "loss": 0.8487, "step": 9095 }, { "epoch": 0.28335668690643, "grad_norm": 2.181898593902588, "learning_rate": 3.772001704415091e-06, "loss": 0.8524, "step": 9100 }, { "epoch": 0.28351237739374124, "grad_norm": 1.9886646270751953, "learning_rate": 3.771182274083058e-06, "loss": 0.7963, "step": 9105 }, { "epoch": 0.2836680678810525, "grad_norm": 1.9385735988616943, "learning_rate": 3.7703628437510247e-06, "loss": 0.7958, "step": 9110 }, { "epoch": 0.2838237583683637, "grad_norm": 2.1059489250183105, "learning_rate": 3.769543413418991e-06, "loss": 0.8886, "step": 9115 }, { "epoch": 0.28397944885567494, "grad_norm": 2.1060895919799805, "learning_rate": 3.7687239830869584e-06, "loss": 0.7571, "step": 9120 }, { "epoch": 0.2841351393429861, "grad_norm": 2.7102065086364746, "learning_rate": 3.7679045527549253e-06, "loss": 0.8141, "step": 9125 }, { "epoch": 0.28429082983029735, "grad_norm": 2.203922748565674, "learning_rate": 3.767085122422892e-06, "loss": 0.913, "step": 9130 }, { "epoch": 0.2844465203176086, "grad_norm": 1.7877392768859863, "learning_rate": 3.7662656920908586e-06, "loss": 0.8341, "step": 9135 }, { "epoch": 0.2846022108049198, "grad_norm": 2.0986714363098145, "learning_rate": 3.7654462617588255e-06, "loss": 0.7531, "step": 9140 }, { "epoch": 0.28475790129223105, "grad_norm": 2.180964708328247, "learning_rate": 3.7646268314267923e-06, "loss": 0.8385, "step": 9145 }, { "epoch": 0.2849135917795423, "grad_norm": 2.0187575817108154, "learning_rate": 3.7638074010947596e-06, "loss": 0.7916, "step": 9150 }, { "epoch": 0.2850692822668535, "grad_norm": 2.068692207336426, "learning_rate": 3.762987970762726e-06, "loss": 0.7373, "step": 9155 }, { "epoch": 0.2852249727541647, "grad_norm": 2.318464994430542, "learning_rate": 3.762168540430693e-06, "loss": 0.7577, "step": 9160 }, { "epoch": 0.28538066324147593, "grad_norm": 2.522848129272461, "learning_rate": 3.76134911009866e-06, "loss": 0.7963, "step": 9165 }, { "epoch": 0.28553635372878716, "grad_norm": 2.1077139377593994, "learning_rate": 3.7605296797666262e-06, "loss": 0.7826, "step": 9170 }, { "epoch": 0.2856920442160984, "grad_norm": 2.066443681716919, "learning_rate": 3.759710249434593e-06, "loss": 0.8825, "step": 9175 }, { "epoch": 0.28584773470340963, "grad_norm": 1.7600017786026, "learning_rate": 3.7588908191025604e-06, "loss": 0.8182, "step": 9180 }, { "epoch": 0.28600342519072086, "grad_norm": 2.266317367553711, "learning_rate": 3.7580713887705273e-06, "loss": 0.7691, "step": 9185 }, { "epoch": 0.2861591156780321, "grad_norm": 2.351933479309082, "learning_rate": 3.7572519584384937e-06, "loss": 0.7455, "step": 9190 }, { "epoch": 0.28631480616534327, "grad_norm": 1.851676106452942, "learning_rate": 3.7564325281064606e-06, "loss": 0.8198, "step": 9195 }, { "epoch": 0.2864704966526545, "grad_norm": 2.3065192699432373, "learning_rate": 3.7556130977744275e-06, "loss": 0.7956, "step": 9200 }, { "epoch": 0.28662618713996574, "grad_norm": 2.3421826362609863, "learning_rate": 3.7547936674423948e-06, "loss": 0.9234, "step": 9205 }, { "epoch": 0.28678187762727697, "grad_norm": 1.84872305393219, "learning_rate": 3.753974237110361e-06, "loss": 0.7992, "step": 9210 }, { "epoch": 0.2869375681145882, "grad_norm": 1.9933736324310303, "learning_rate": 3.753154806778328e-06, "loss": 0.8908, "step": 9215 }, { "epoch": 0.28709325860189944, "grad_norm": 2.8443844318389893, "learning_rate": 3.752335376446295e-06, "loss": 0.7994, "step": 9220 }, { "epoch": 0.28724894908921067, "grad_norm": 1.9629337787628174, "learning_rate": 3.7515159461142614e-06, "loss": 0.8145, "step": 9225 }, { "epoch": 0.28740463957652185, "grad_norm": 1.9691550731658936, "learning_rate": 3.7506965157822283e-06, "loss": 0.7949, "step": 9230 }, { "epoch": 0.2875603300638331, "grad_norm": 2.266523599624634, "learning_rate": 3.7498770854501955e-06, "loss": 0.8265, "step": 9235 }, { "epoch": 0.2877160205511443, "grad_norm": 1.9884454011917114, "learning_rate": 3.7490576551181624e-06, "loss": 0.7655, "step": 9240 }, { "epoch": 0.28787171103845555, "grad_norm": 1.9247690439224243, "learning_rate": 3.748238224786129e-06, "loss": 0.808, "step": 9245 }, { "epoch": 0.2880274015257668, "grad_norm": 2.0846104621887207, "learning_rate": 3.7474187944540957e-06, "loss": 0.7785, "step": 9250 }, { "epoch": 0.288183092013078, "grad_norm": 2.237348794937134, "learning_rate": 3.7465993641220626e-06, "loss": 0.8297, "step": 9255 }, { "epoch": 0.28833878250038925, "grad_norm": 1.9507629871368408, "learning_rate": 3.74577993379003e-06, "loss": 0.8364, "step": 9260 }, { "epoch": 0.2884944729877005, "grad_norm": 1.9784719944000244, "learning_rate": 3.744960503457996e-06, "loss": 0.8728, "step": 9265 }, { "epoch": 0.28865016347501166, "grad_norm": 2.292827844619751, "learning_rate": 3.7441410731259632e-06, "loss": 0.7497, "step": 9270 }, { "epoch": 0.2888058539623229, "grad_norm": 2.1829535961151123, "learning_rate": 3.74332164279393e-06, "loss": 0.7977, "step": 9275 }, { "epoch": 0.2889615444496341, "grad_norm": 2.300802230834961, "learning_rate": 3.742502212461897e-06, "loss": 0.835, "step": 9280 }, { "epoch": 0.28911723493694536, "grad_norm": 2.4842963218688965, "learning_rate": 3.7416827821298634e-06, "loss": 0.8506, "step": 9285 }, { "epoch": 0.2892729254242566, "grad_norm": 2.0386173725128174, "learning_rate": 3.7408633517978303e-06, "loss": 0.7556, "step": 9290 }, { "epoch": 0.2894286159115678, "grad_norm": 1.9345123767852783, "learning_rate": 3.7400439214657976e-06, "loss": 0.8358, "step": 9295 }, { "epoch": 0.28958430639887905, "grad_norm": 2.263641357421875, "learning_rate": 3.739224491133764e-06, "loss": 0.8497, "step": 9300 }, { "epoch": 0.28973999688619023, "grad_norm": 2.0116612911224365, "learning_rate": 3.738405060801731e-06, "loss": 0.8171, "step": 9305 }, { "epoch": 0.28989568737350146, "grad_norm": 2.4836208820343018, "learning_rate": 3.7375856304696977e-06, "loss": 0.8801, "step": 9310 }, { "epoch": 0.2900513778608127, "grad_norm": 1.90793776512146, "learning_rate": 3.7367662001376646e-06, "loss": 0.7757, "step": 9315 }, { "epoch": 0.29020706834812393, "grad_norm": 2.0211377143859863, "learning_rate": 3.735946769805631e-06, "loss": 0.8282, "step": 9320 }, { "epoch": 0.29036275883543516, "grad_norm": 1.816876769065857, "learning_rate": 3.7351273394735984e-06, "loss": 0.7859, "step": 9325 }, { "epoch": 0.2905184493227464, "grad_norm": 2.360604763031006, "learning_rate": 3.7343079091415652e-06, "loss": 0.8275, "step": 9330 }, { "epoch": 0.29067413981005763, "grad_norm": 2.2443103790283203, "learning_rate": 3.733488478809532e-06, "loss": 0.7965, "step": 9335 }, { "epoch": 0.2908298302973688, "grad_norm": 2.1334986686706543, "learning_rate": 3.7326690484774985e-06, "loss": 0.7831, "step": 9340 }, { "epoch": 0.29098552078468004, "grad_norm": 1.817060112953186, "learning_rate": 3.7318496181454654e-06, "loss": 0.8401, "step": 9345 }, { "epoch": 0.2911412112719913, "grad_norm": 1.927741289138794, "learning_rate": 3.7310301878134327e-06, "loss": 0.8186, "step": 9350 }, { "epoch": 0.2912969017593025, "grad_norm": 1.8069721460342407, "learning_rate": 3.7302107574813996e-06, "loss": 0.7978, "step": 9355 }, { "epoch": 0.29145259224661374, "grad_norm": 1.941782832145691, "learning_rate": 3.729391327149366e-06, "loss": 0.8715, "step": 9360 }, { "epoch": 0.291608282733925, "grad_norm": 1.8589553833007812, "learning_rate": 3.728571896817333e-06, "loss": 0.7865, "step": 9365 }, { "epoch": 0.2917639732212362, "grad_norm": 1.9367533922195435, "learning_rate": 3.7277524664852998e-06, "loss": 0.7727, "step": 9370 }, { "epoch": 0.2919196637085474, "grad_norm": 2.1773617267608643, "learning_rate": 3.726933036153266e-06, "loss": 0.798, "step": 9375 }, { "epoch": 0.2920753541958586, "grad_norm": 2.073425769805908, "learning_rate": 3.726113605821233e-06, "loss": 0.8459, "step": 9380 }, { "epoch": 0.29223104468316985, "grad_norm": 2.4241483211517334, "learning_rate": 3.7252941754892004e-06, "loss": 0.7414, "step": 9385 }, { "epoch": 0.2923867351704811, "grad_norm": 2.195293664932251, "learning_rate": 3.7244747451571672e-06, "loss": 0.7723, "step": 9390 }, { "epoch": 0.2925424256577923, "grad_norm": 1.8553720712661743, "learning_rate": 3.7236553148251337e-06, "loss": 0.8463, "step": 9395 }, { "epoch": 0.29269811614510355, "grad_norm": 2.288586139678955, "learning_rate": 3.7228358844931005e-06, "loss": 0.8512, "step": 9400 }, { "epoch": 0.2928538066324148, "grad_norm": 2.4933297634124756, "learning_rate": 3.7220164541610674e-06, "loss": 0.8778, "step": 9405 }, { "epoch": 0.29300949711972596, "grad_norm": 2.476285696029663, "learning_rate": 3.7211970238290347e-06, "loss": 0.791, "step": 9410 }, { "epoch": 0.2931651876070372, "grad_norm": 1.8632206916809082, "learning_rate": 3.720377593497001e-06, "loss": 0.853, "step": 9415 }, { "epoch": 0.2933208780943484, "grad_norm": 2.2245333194732666, "learning_rate": 3.719558163164968e-06, "loss": 0.8279, "step": 9420 }, { "epoch": 0.29347656858165966, "grad_norm": 2.1542160511016846, "learning_rate": 3.718738732832935e-06, "loss": 0.7925, "step": 9425 }, { "epoch": 0.2936322590689709, "grad_norm": 2.255016803741455, "learning_rate": 3.7179193025009018e-06, "loss": 0.8718, "step": 9430 }, { "epoch": 0.2937879495562821, "grad_norm": 2.247122287750244, "learning_rate": 3.7170998721688682e-06, "loss": 0.8385, "step": 9435 }, { "epoch": 0.29394364004359336, "grad_norm": 2.1287682056427, "learning_rate": 3.7162804418368355e-06, "loss": 0.8122, "step": 9440 }, { "epoch": 0.29409933053090453, "grad_norm": 2.3947293758392334, "learning_rate": 3.7154610115048024e-06, "loss": 0.7539, "step": 9445 }, { "epoch": 0.29425502101821577, "grad_norm": 2.1337783336639404, "learning_rate": 3.714641581172769e-06, "loss": 0.8642, "step": 9450 }, { "epoch": 0.294410711505527, "grad_norm": 2.0244970321655273, "learning_rate": 3.7138221508407357e-06, "loss": 0.8128, "step": 9455 }, { "epoch": 0.29456640199283823, "grad_norm": 1.7729511260986328, "learning_rate": 3.7130027205087026e-06, "loss": 0.7522, "step": 9460 }, { "epoch": 0.29472209248014947, "grad_norm": 2.7317113876342773, "learning_rate": 3.71218329017667e-06, "loss": 0.7911, "step": 9465 }, { "epoch": 0.2948777829674607, "grad_norm": 1.9378437995910645, "learning_rate": 3.7113638598446363e-06, "loss": 0.8398, "step": 9470 }, { "epoch": 0.29503347345477193, "grad_norm": 1.8008944988250732, "learning_rate": 3.710544429512603e-06, "loss": 0.852, "step": 9475 }, { "epoch": 0.29518916394208317, "grad_norm": 2.3014473915100098, "learning_rate": 3.70972499918057e-06, "loss": 0.7949, "step": 9480 }, { "epoch": 0.29534485442939434, "grad_norm": 1.7720763683319092, "learning_rate": 3.708905568848537e-06, "loss": 0.8688, "step": 9485 }, { "epoch": 0.2955005449167056, "grad_norm": 2.1224544048309326, "learning_rate": 3.7080861385165034e-06, "loss": 0.822, "step": 9490 }, { "epoch": 0.2956562354040168, "grad_norm": 2.163248062133789, "learning_rate": 3.7072667081844702e-06, "loss": 0.7983, "step": 9495 }, { "epoch": 0.29581192589132804, "grad_norm": 2.0184788703918457, "learning_rate": 3.7064472778524375e-06, "loss": 0.8309, "step": 9500 }, { "epoch": 0.2959676163786393, "grad_norm": 2.067173957824707, "learning_rate": 3.7056278475204044e-06, "loss": 0.8612, "step": 9505 }, { "epoch": 0.2961233068659505, "grad_norm": 2.134979248046875, "learning_rate": 3.704808417188371e-06, "loss": 0.8467, "step": 9510 }, { "epoch": 0.29627899735326174, "grad_norm": 2.4939746856689453, "learning_rate": 3.7039889868563377e-06, "loss": 0.7594, "step": 9515 }, { "epoch": 0.2964346878405729, "grad_norm": 2.4104723930358887, "learning_rate": 3.7031695565243046e-06, "loss": 0.7897, "step": 9520 }, { "epoch": 0.29659037832788415, "grad_norm": 2.1026957035064697, "learning_rate": 3.702350126192271e-06, "loss": 0.8895, "step": 9525 }, { "epoch": 0.2967460688151954, "grad_norm": 2.2288870811462402, "learning_rate": 3.7015306958602383e-06, "loss": 0.8938, "step": 9530 }, { "epoch": 0.2969017593025066, "grad_norm": 2.284355878829956, "learning_rate": 3.700711265528205e-06, "loss": 0.8323, "step": 9535 }, { "epoch": 0.29705744978981785, "grad_norm": 2.1252894401550293, "learning_rate": 3.699891835196172e-06, "loss": 0.7629, "step": 9540 }, { "epoch": 0.2972131402771291, "grad_norm": 2.3662238121032715, "learning_rate": 3.6990724048641385e-06, "loss": 0.8129, "step": 9545 }, { "epoch": 0.2973688307644403, "grad_norm": 2.0564613342285156, "learning_rate": 3.6982529745321054e-06, "loss": 0.761, "step": 9550 }, { "epoch": 0.2975245212517515, "grad_norm": 1.9122052192687988, "learning_rate": 3.6974335442000727e-06, "loss": 0.8732, "step": 9555 }, { "epoch": 0.29768021173906273, "grad_norm": 2.0353996753692627, "learning_rate": 3.6966141138680395e-06, "loss": 0.7905, "step": 9560 }, { "epoch": 0.29783590222637396, "grad_norm": 2.2015063762664795, "learning_rate": 3.695794683536006e-06, "loss": 0.7958, "step": 9565 }, { "epoch": 0.2979915927136852, "grad_norm": 2.0203628540039062, "learning_rate": 3.694975253203973e-06, "loss": 0.8229, "step": 9570 }, { "epoch": 0.2981472832009964, "grad_norm": 2.4477787017822266, "learning_rate": 3.6941558228719397e-06, "loss": 0.8144, "step": 9575 }, { "epoch": 0.29830297368830766, "grad_norm": 1.9531193971633911, "learning_rate": 3.693336392539907e-06, "loss": 0.8527, "step": 9580 }, { "epoch": 0.2984586641756189, "grad_norm": 1.915258526802063, "learning_rate": 3.6925169622078735e-06, "loss": 0.8692, "step": 9585 }, { "epoch": 0.29861435466293007, "grad_norm": 2.177274227142334, "learning_rate": 3.6916975318758403e-06, "loss": 0.8502, "step": 9590 }, { "epoch": 0.2987700451502413, "grad_norm": 1.9813120365142822, "learning_rate": 3.690878101543807e-06, "loss": 0.8461, "step": 9595 }, { "epoch": 0.29892573563755254, "grad_norm": 2.6729719638824463, "learning_rate": 3.6900586712117736e-06, "loss": 0.7719, "step": 9600 }, { "epoch": 0.29908142612486377, "grad_norm": 1.9438040256500244, "learning_rate": 3.6892392408797405e-06, "loss": 0.7811, "step": 9605 }, { "epoch": 0.299237116612175, "grad_norm": 2.0309195518493652, "learning_rate": 3.688419810547708e-06, "loss": 0.8372, "step": 9610 }, { "epoch": 0.29939280709948624, "grad_norm": 2.0365378856658936, "learning_rate": 3.6876003802156747e-06, "loss": 0.8296, "step": 9615 }, { "epoch": 0.29954849758679747, "grad_norm": 2.0112555027008057, "learning_rate": 3.686780949883641e-06, "loss": 0.7344, "step": 9620 }, { "epoch": 0.29970418807410865, "grad_norm": 2.40269136428833, "learning_rate": 3.685961519551608e-06, "loss": 0.8591, "step": 9625 }, { "epoch": 0.2998598785614199, "grad_norm": 2.2879691123962402, "learning_rate": 3.685142089219575e-06, "loss": 0.9042, "step": 9630 }, { "epoch": 0.3000155690487311, "grad_norm": 2.3844070434570312, "learning_rate": 3.6843226588875417e-06, "loss": 0.7762, "step": 9635 }, { "epoch": 0.30017125953604235, "grad_norm": 2.0271120071411133, "learning_rate": 3.683503228555508e-06, "loss": 0.8139, "step": 9640 }, { "epoch": 0.3003269500233536, "grad_norm": 2.1288883686065674, "learning_rate": 3.6826837982234755e-06, "loss": 0.814, "step": 9645 }, { "epoch": 0.3004826405106648, "grad_norm": 2.634899616241455, "learning_rate": 3.6818643678914423e-06, "loss": 0.8231, "step": 9650 }, { "epoch": 0.30063833099797604, "grad_norm": 2.127939462661743, "learning_rate": 3.6810449375594088e-06, "loss": 0.8411, "step": 9655 }, { "epoch": 0.3007940214852872, "grad_norm": 2.3068044185638428, "learning_rate": 3.6802255072273756e-06, "loss": 0.7221, "step": 9660 }, { "epoch": 0.30094971197259845, "grad_norm": 2.0716092586517334, "learning_rate": 3.6794060768953425e-06, "loss": 0.7529, "step": 9665 }, { "epoch": 0.3011054024599097, "grad_norm": 1.8690119981765747, "learning_rate": 3.67858664656331e-06, "loss": 0.7967, "step": 9670 }, { "epoch": 0.3012610929472209, "grad_norm": 2.4518773555755615, "learning_rate": 3.6777672162312763e-06, "loss": 0.8756, "step": 9675 }, { "epoch": 0.30141678343453215, "grad_norm": 2.929454803466797, "learning_rate": 3.676947785899243e-06, "loss": 0.9095, "step": 9680 }, { "epoch": 0.3015724739218434, "grad_norm": 1.9392356872558594, "learning_rate": 3.67612835556721e-06, "loss": 0.8021, "step": 9685 }, { "epoch": 0.3017281644091546, "grad_norm": 1.934813141822815, "learning_rate": 3.675308925235177e-06, "loss": 0.8074, "step": 9690 }, { "epoch": 0.30188385489646585, "grad_norm": 2.119175434112549, "learning_rate": 3.6744894949031433e-06, "loss": 0.7538, "step": 9695 }, { "epoch": 0.30203954538377703, "grad_norm": 2.2564501762390137, "learning_rate": 3.6736700645711106e-06, "loss": 0.8559, "step": 9700 }, { "epoch": 0.30219523587108826, "grad_norm": 2.261545181274414, "learning_rate": 3.6728506342390775e-06, "loss": 0.8017, "step": 9705 }, { "epoch": 0.3023509263583995, "grad_norm": 1.9509832859039307, "learning_rate": 3.6720312039070443e-06, "loss": 0.8302, "step": 9710 }, { "epoch": 0.30250661684571073, "grad_norm": 2.1275227069854736, "learning_rate": 3.6712117735750108e-06, "loss": 0.8258, "step": 9715 }, { "epoch": 0.30266230733302196, "grad_norm": 2.434011936187744, "learning_rate": 3.6703923432429777e-06, "loss": 0.8878, "step": 9720 }, { "epoch": 0.3028179978203332, "grad_norm": 2.09475040435791, "learning_rate": 3.669572912910945e-06, "loss": 0.8357, "step": 9725 }, { "epoch": 0.30297368830764443, "grad_norm": 2.2430973052978516, "learning_rate": 3.668753482578911e-06, "loss": 0.7992, "step": 9730 }, { "epoch": 0.3031293787949556, "grad_norm": 1.918562412261963, "learning_rate": 3.6679340522468783e-06, "loss": 0.7185, "step": 9735 }, { "epoch": 0.30328506928226684, "grad_norm": 2.4847583770751953, "learning_rate": 3.667114621914845e-06, "loss": 0.8304, "step": 9740 }, { "epoch": 0.3034407597695781, "grad_norm": 2.3277969360351562, "learning_rate": 3.666295191582812e-06, "loss": 0.8008, "step": 9745 }, { "epoch": 0.3035964502568893, "grad_norm": 2.2012181282043457, "learning_rate": 3.6654757612507784e-06, "loss": 0.7748, "step": 9750 }, { "epoch": 0.30375214074420054, "grad_norm": 2.314701795578003, "learning_rate": 3.6646563309187453e-06, "loss": 0.8528, "step": 9755 }, { "epoch": 0.30390783123151177, "grad_norm": 2.02898907661438, "learning_rate": 3.6638369005867126e-06, "loss": 0.8057, "step": 9760 }, { "epoch": 0.304063521718823, "grad_norm": 2.023075819015503, "learning_rate": 3.6630174702546795e-06, "loss": 0.8064, "step": 9765 }, { "epoch": 0.3042192122061342, "grad_norm": 2.0126571655273438, "learning_rate": 3.662198039922646e-06, "loss": 0.8173, "step": 9770 }, { "epoch": 0.3043749026934454, "grad_norm": 1.8205914497375488, "learning_rate": 3.661378609590613e-06, "loss": 0.7522, "step": 9775 }, { "epoch": 0.30453059318075665, "grad_norm": 2.0832338333129883, "learning_rate": 3.6605591792585797e-06, "loss": 0.8589, "step": 9780 }, { "epoch": 0.3046862836680679, "grad_norm": 2.0912418365478516, "learning_rate": 3.659739748926547e-06, "loss": 0.8661, "step": 9785 }, { "epoch": 0.3048419741553791, "grad_norm": 2.3415772914886475, "learning_rate": 3.6589203185945134e-06, "loss": 0.9106, "step": 9790 }, { "epoch": 0.30499766464269035, "grad_norm": 2.78208589553833, "learning_rate": 3.6581008882624803e-06, "loss": 0.8971, "step": 9795 }, { "epoch": 0.3051533551300016, "grad_norm": 1.933606743812561, "learning_rate": 3.657281457930447e-06, "loss": 0.8743, "step": 9800 }, { "epoch": 0.30530904561731276, "grad_norm": 1.8075592517852783, "learning_rate": 3.6564620275984136e-06, "loss": 0.8361, "step": 9805 }, { "epoch": 0.305464736104624, "grad_norm": 2.099163770675659, "learning_rate": 3.6556425972663805e-06, "loss": 0.743, "step": 9810 }, { "epoch": 0.3056204265919352, "grad_norm": 1.9956165552139282, "learning_rate": 3.6548231669343478e-06, "loss": 0.7939, "step": 9815 }, { "epoch": 0.30577611707924646, "grad_norm": 2.038628578186035, "learning_rate": 3.6540037366023146e-06, "loss": 0.7893, "step": 9820 }, { "epoch": 0.3059318075665577, "grad_norm": 2.0673296451568604, "learning_rate": 3.653184306270281e-06, "loss": 0.786, "step": 9825 }, { "epoch": 0.3060874980538689, "grad_norm": 1.9249213933944702, "learning_rate": 3.652364875938248e-06, "loss": 0.7801, "step": 9830 }, { "epoch": 0.30624318854118016, "grad_norm": 2.4273176193237305, "learning_rate": 3.651545445606215e-06, "loss": 0.828, "step": 9835 }, { "epoch": 0.30639887902849133, "grad_norm": 2.2270445823669434, "learning_rate": 3.650726015274182e-06, "loss": 0.7485, "step": 9840 }, { "epoch": 0.30655456951580257, "grad_norm": 2.050851583480835, "learning_rate": 3.649906584942148e-06, "loss": 0.8084, "step": 9845 }, { "epoch": 0.3067102600031138, "grad_norm": 2.013960599899292, "learning_rate": 3.6490871546101154e-06, "loss": 0.8325, "step": 9850 }, { "epoch": 0.30686595049042503, "grad_norm": 1.9259347915649414, "learning_rate": 3.6482677242780823e-06, "loss": 0.8312, "step": 9855 }, { "epoch": 0.30702164097773627, "grad_norm": 2.3272783756256104, "learning_rate": 3.647448293946049e-06, "loss": 0.8339, "step": 9860 }, { "epoch": 0.3071773314650475, "grad_norm": 2.6997389793395996, "learning_rate": 3.6466288636140156e-06, "loss": 0.8525, "step": 9865 }, { "epoch": 0.30733302195235873, "grad_norm": 2.3030917644500732, "learning_rate": 3.6458094332819825e-06, "loss": 0.8409, "step": 9870 }, { "epoch": 0.3074887124396699, "grad_norm": 2.7194409370422363, "learning_rate": 3.6449900029499498e-06, "loss": 0.8323, "step": 9875 }, { "epoch": 0.30764440292698114, "grad_norm": 2.082319974899292, "learning_rate": 3.644170572617916e-06, "loss": 0.7444, "step": 9880 }, { "epoch": 0.3078000934142924, "grad_norm": 2.349153995513916, "learning_rate": 3.643351142285883e-06, "loss": 0.7813, "step": 9885 }, { "epoch": 0.3079557839016036, "grad_norm": 2.2672438621520996, "learning_rate": 3.64253171195385e-06, "loss": 0.8104, "step": 9890 }, { "epoch": 0.30811147438891484, "grad_norm": 1.9378798007965088, "learning_rate": 3.641712281621817e-06, "loss": 0.8942, "step": 9895 }, { "epoch": 0.3082671648762261, "grad_norm": 2.3622164726257324, "learning_rate": 3.6408928512897833e-06, "loss": 0.9988, "step": 9900 }, { "epoch": 0.3084228553635373, "grad_norm": 2.0483829975128174, "learning_rate": 3.6400734209577506e-06, "loss": 0.8223, "step": 9905 }, { "epoch": 0.30857854585084854, "grad_norm": 2.306988000869751, "learning_rate": 3.6392539906257174e-06, "loss": 0.8408, "step": 9910 }, { "epoch": 0.3087342363381597, "grad_norm": 2.0905094146728516, "learning_rate": 3.6384345602936843e-06, "loss": 0.8296, "step": 9915 }, { "epoch": 0.30888992682547095, "grad_norm": 1.9906445741653442, "learning_rate": 3.6376151299616507e-06, "loss": 0.8059, "step": 9920 }, { "epoch": 0.3090456173127822, "grad_norm": 2.6415929794311523, "learning_rate": 3.6367956996296176e-06, "loss": 0.8844, "step": 9925 }, { "epoch": 0.3092013078000934, "grad_norm": 2.6061880588531494, "learning_rate": 3.635976269297585e-06, "loss": 0.9304, "step": 9930 }, { "epoch": 0.30935699828740465, "grad_norm": 2.1301209926605225, "learning_rate": 3.6351568389655518e-06, "loss": 0.7812, "step": 9935 }, { "epoch": 0.3095126887747159, "grad_norm": 2.4712839126586914, "learning_rate": 3.6343374086335182e-06, "loss": 0.7359, "step": 9940 }, { "epoch": 0.3096683792620271, "grad_norm": 2.498213768005371, "learning_rate": 3.633517978301485e-06, "loss": 0.8375, "step": 9945 }, { "epoch": 0.3098240697493383, "grad_norm": 2.6188247203826904, "learning_rate": 3.632698547969452e-06, "loss": 0.8101, "step": 9950 }, { "epoch": 0.3099797602366495, "grad_norm": 2.5004520416259766, "learning_rate": 3.6318791176374184e-06, "loss": 0.8094, "step": 9955 }, { "epoch": 0.31013545072396076, "grad_norm": 2.125661611557007, "learning_rate": 3.6310596873053857e-06, "loss": 0.9098, "step": 9960 }, { "epoch": 0.310291141211272, "grad_norm": 2.1470861434936523, "learning_rate": 3.6302402569733526e-06, "loss": 0.7904, "step": 9965 }, { "epoch": 0.3104468316985832, "grad_norm": 2.321880340576172, "learning_rate": 3.6294208266413194e-06, "loss": 0.7794, "step": 9970 }, { "epoch": 0.31060252218589446, "grad_norm": 2.0833187103271484, "learning_rate": 3.628601396309286e-06, "loss": 0.8043, "step": 9975 }, { "epoch": 0.3107582126732057, "grad_norm": 2.481963634490967, "learning_rate": 3.6277819659772528e-06, "loss": 0.8034, "step": 9980 }, { "epoch": 0.31091390316051687, "grad_norm": 2.3676412105560303, "learning_rate": 3.6269625356452196e-06, "loss": 0.8381, "step": 9985 }, { "epoch": 0.3110695936478281, "grad_norm": 2.2044601440429688, "learning_rate": 3.626143105313187e-06, "loss": 0.8122, "step": 9990 }, { "epoch": 0.31122528413513934, "grad_norm": 2.3047900199890137, "learning_rate": 3.6253236749811534e-06, "loss": 0.8105, "step": 9995 }, { "epoch": 0.31138097462245057, "grad_norm": 1.7496418952941895, "learning_rate": 3.6245042446491202e-06, "loss": 0.8089, "step": 10000 }, { "epoch": 0.3115366651097618, "grad_norm": 2.4471547603607178, "learning_rate": 3.623684814317087e-06, "loss": 0.8271, "step": 10005 }, { "epoch": 0.31169235559707303, "grad_norm": 1.8745688199996948, "learning_rate": 3.622865383985054e-06, "loss": 0.9111, "step": 10010 }, { "epoch": 0.31184804608438427, "grad_norm": 2.1051316261291504, "learning_rate": 3.6220459536530204e-06, "loss": 0.7776, "step": 10015 }, { "epoch": 0.31200373657169544, "grad_norm": 1.8703484535217285, "learning_rate": 3.6212265233209877e-06, "loss": 0.8338, "step": 10020 }, { "epoch": 0.3121594270590067, "grad_norm": 1.9315778017044067, "learning_rate": 3.6204070929889546e-06, "loss": 0.8334, "step": 10025 }, { "epoch": 0.3123151175463179, "grad_norm": 1.9939898252487183, "learning_rate": 3.619587662656921e-06, "loss": 0.8553, "step": 10030 }, { "epoch": 0.31247080803362914, "grad_norm": 2.25555419921875, "learning_rate": 3.618768232324888e-06, "loss": 0.8597, "step": 10035 }, { "epoch": 0.3126264985209404, "grad_norm": 2.3278489112854004, "learning_rate": 3.6179488019928548e-06, "loss": 0.8424, "step": 10040 }, { "epoch": 0.3127821890082516, "grad_norm": 2.336583375930786, "learning_rate": 3.617129371660822e-06, "loss": 0.7891, "step": 10045 }, { "epoch": 0.31293787949556284, "grad_norm": 2.1796765327453613, "learning_rate": 3.6163099413287885e-06, "loss": 0.7381, "step": 10050 }, { "epoch": 0.313093569982874, "grad_norm": 2.5595972537994385, "learning_rate": 3.6154905109967554e-06, "loss": 0.822, "step": 10055 }, { "epoch": 0.31324926047018525, "grad_norm": 2.232551097869873, "learning_rate": 3.6146710806647222e-06, "loss": 0.7598, "step": 10060 }, { "epoch": 0.3134049509574965, "grad_norm": 2.6108107566833496, "learning_rate": 3.613851650332689e-06, "loss": 0.8156, "step": 10065 }, { "epoch": 0.3135606414448077, "grad_norm": 2.50107479095459, "learning_rate": 3.6130322200006556e-06, "loss": 0.8256, "step": 10070 }, { "epoch": 0.31371633193211895, "grad_norm": 2.3034019470214844, "learning_rate": 3.612212789668623e-06, "loss": 0.8237, "step": 10075 }, { "epoch": 0.3138720224194302, "grad_norm": 2.068666458129883, "learning_rate": 3.6113933593365897e-06, "loss": 0.8397, "step": 10080 }, { "epoch": 0.3140277129067414, "grad_norm": 2.0493478775024414, "learning_rate": 3.610573929004556e-06, "loss": 0.8102, "step": 10085 }, { "epoch": 0.3141834033940526, "grad_norm": 2.0678489208221436, "learning_rate": 3.609754498672523e-06, "loss": 0.8316, "step": 10090 }, { "epoch": 0.31433909388136383, "grad_norm": 2.393348217010498, "learning_rate": 3.60893506834049e-06, "loss": 0.8537, "step": 10095 }, { "epoch": 0.31449478436867506, "grad_norm": 2.099565029144287, "learning_rate": 3.608115638008457e-06, "loss": 0.8383, "step": 10100 }, { "epoch": 0.3146504748559863, "grad_norm": 1.8435999155044556, "learning_rate": 3.6072962076764232e-06, "loss": 0.8121, "step": 10105 }, { "epoch": 0.31480616534329753, "grad_norm": 2.14517879486084, "learning_rate": 3.6064767773443905e-06, "loss": 0.7626, "step": 10110 }, { "epoch": 0.31496185583060876, "grad_norm": 1.984793782234192, "learning_rate": 3.6056573470123574e-06, "loss": 0.7992, "step": 10115 }, { "epoch": 0.31511754631792, "grad_norm": 2.1368699073791504, "learning_rate": 3.6048379166803243e-06, "loss": 0.8384, "step": 10120 }, { "epoch": 0.3152732368052312, "grad_norm": 2.069117307662964, "learning_rate": 3.6040184863482907e-06, "loss": 0.7856, "step": 10125 }, { "epoch": 0.3154289272925424, "grad_norm": 2.1201298236846924, "learning_rate": 3.6031990560162576e-06, "loss": 0.8392, "step": 10130 }, { "epoch": 0.31558461777985364, "grad_norm": 2.060596227645874, "learning_rate": 3.602379625684225e-06, "loss": 0.8399, "step": 10135 }, { "epoch": 0.31574030826716487, "grad_norm": 2.4200665950775146, "learning_rate": 3.6015601953521917e-06, "loss": 0.8426, "step": 10140 }, { "epoch": 0.3158959987544761, "grad_norm": 2.131847381591797, "learning_rate": 3.600740765020158e-06, "loss": 0.834, "step": 10145 }, { "epoch": 0.31605168924178734, "grad_norm": 2.3326120376586914, "learning_rate": 3.599921334688125e-06, "loss": 0.7781, "step": 10150 }, { "epoch": 0.31620737972909857, "grad_norm": 2.0909433364868164, "learning_rate": 3.599101904356092e-06, "loss": 0.8128, "step": 10155 }, { "epoch": 0.3163630702164098, "grad_norm": 1.9694315195083618, "learning_rate": 3.5982824740240584e-06, "loss": 0.7938, "step": 10160 }, { "epoch": 0.316518760703721, "grad_norm": 1.9234870672225952, "learning_rate": 3.5974630436920257e-06, "loss": 0.8206, "step": 10165 }, { "epoch": 0.3166744511910322, "grad_norm": 1.863610029220581, "learning_rate": 3.5966436133599925e-06, "loss": 0.7755, "step": 10170 }, { "epoch": 0.31683014167834345, "grad_norm": 2.162919282913208, "learning_rate": 3.5958241830279594e-06, "loss": 0.843, "step": 10175 }, { "epoch": 0.3169858321656547, "grad_norm": 2.4943926334381104, "learning_rate": 3.595004752695926e-06, "loss": 0.8216, "step": 10180 }, { "epoch": 0.3171415226529659, "grad_norm": 2.5486676692962646, "learning_rate": 3.5941853223638927e-06, "loss": 0.7527, "step": 10185 }, { "epoch": 0.31729721314027715, "grad_norm": 2.1736342906951904, "learning_rate": 3.59336589203186e-06, "loss": 0.8352, "step": 10190 }, { "epoch": 0.3174529036275884, "grad_norm": 2.2775564193725586, "learning_rate": 3.592546461699827e-06, "loss": 0.7015, "step": 10195 }, { "epoch": 0.31760859411489956, "grad_norm": 2.4581832885742188, "learning_rate": 3.5917270313677933e-06, "loss": 0.8386, "step": 10200 }, { "epoch": 0.3177642846022108, "grad_norm": 2.16003680229187, "learning_rate": 3.59090760103576e-06, "loss": 0.8454, "step": 10205 }, { "epoch": 0.317919975089522, "grad_norm": 2.1523287296295166, "learning_rate": 3.590088170703727e-06, "loss": 0.7726, "step": 10210 }, { "epoch": 0.31807566557683326, "grad_norm": 2.137249708175659, "learning_rate": 3.5892687403716943e-06, "loss": 0.8236, "step": 10215 }, { "epoch": 0.3182313560641445, "grad_norm": 2.111661911010742, "learning_rate": 3.5884493100396604e-06, "loss": 0.8264, "step": 10220 }, { "epoch": 0.3183870465514557, "grad_norm": 2.0184009075164795, "learning_rate": 3.5876298797076277e-06, "loss": 0.8607, "step": 10225 }, { "epoch": 0.31854273703876695, "grad_norm": 2.158478021621704, "learning_rate": 3.5868104493755945e-06, "loss": 0.8495, "step": 10230 }, { "epoch": 0.31869842752607813, "grad_norm": 2.758975028991699, "learning_rate": 3.585991019043561e-06, "loss": 0.8113, "step": 10235 }, { "epoch": 0.31885411801338936, "grad_norm": 2.917931318283081, "learning_rate": 3.585171588711528e-06, "loss": 0.8279, "step": 10240 }, { "epoch": 0.3190098085007006, "grad_norm": 2.0053141117095947, "learning_rate": 3.5843521583794947e-06, "loss": 0.7907, "step": 10245 }, { "epoch": 0.31916549898801183, "grad_norm": 1.856824517250061, "learning_rate": 3.583532728047462e-06, "loss": 0.8268, "step": 10250 }, { "epoch": 0.31932118947532306, "grad_norm": 2.303931713104248, "learning_rate": 3.5827132977154285e-06, "loss": 0.8023, "step": 10255 }, { "epoch": 0.3194768799626343, "grad_norm": 1.854084849357605, "learning_rate": 3.5818938673833953e-06, "loss": 0.7889, "step": 10260 }, { "epoch": 0.31963257044994553, "grad_norm": 2.0147266387939453, "learning_rate": 3.581074437051362e-06, "loss": 0.8483, "step": 10265 }, { "epoch": 0.3197882609372567, "grad_norm": 2.5774543285369873, "learning_rate": 3.580255006719329e-06, "loss": 0.8471, "step": 10270 }, { "epoch": 0.31994395142456794, "grad_norm": 2.140476703643799, "learning_rate": 3.5794355763872955e-06, "loss": 0.8471, "step": 10275 }, { "epoch": 0.3200996419118792, "grad_norm": 2.117168426513672, "learning_rate": 3.578616146055263e-06, "loss": 0.8949, "step": 10280 }, { "epoch": 0.3202553323991904, "grad_norm": 2.013580322265625, "learning_rate": 3.5777967157232297e-06, "loss": 0.7569, "step": 10285 }, { "epoch": 0.32041102288650164, "grad_norm": 1.7841801643371582, "learning_rate": 3.5769772853911965e-06, "loss": 0.8065, "step": 10290 }, { "epoch": 0.3205667133738129, "grad_norm": 2.388169050216675, "learning_rate": 3.576157855059163e-06, "loss": 0.8408, "step": 10295 }, { "epoch": 0.3207224038611241, "grad_norm": 2.230550765991211, "learning_rate": 3.57533842472713e-06, "loss": 0.7551, "step": 10300 }, { "epoch": 0.3208780943484353, "grad_norm": 2.0785486698150635, "learning_rate": 3.574518994395097e-06, "loss": 0.8089, "step": 10305 }, { "epoch": 0.3210337848357465, "grad_norm": 2.394530773162842, "learning_rate": 3.5736995640630636e-06, "loss": 0.8611, "step": 10310 }, { "epoch": 0.32118947532305775, "grad_norm": 1.9237080812454224, "learning_rate": 3.5728801337310305e-06, "loss": 0.7534, "step": 10315 }, { "epoch": 0.321345165810369, "grad_norm": 1.9405070543289185, "learning_rate": 3.5720607033989973e-06, "loss": 0.7763, "step": 10320 }, { "epoch": 0.3215008562976802, "grad_norm": 2.2885324954986572, "learning_rate": 3.571241273066964e-06, "loss": 0.8417, "step": 10325 }, { "epoch": 0.32165654678499145, "grad_norm": 2.208047389984131, "learning_rate": 3.5704218427349307e-06, "loss": 0.8246, "step": 10330 }, { "epoch": 0.3218122372723027, "grad_norm": 1.9755587577819824, "learning_rate": 3.5696024124028975e-06, "loss": 0.777, "step": 10335 }, { "epoch": 0.3219679277596139, "grad_norm": 2.2543227672576904, "learning_rate": 3.568782982070865e-06, "loss": 0.7748, "step": 10340 }, { "epoch": 0.3221236182469251, "grad_norm": 2.0768070220947266, "learning_rate": 3.5679635517388317e-06, "loss": 0.7842, "step": 10345 }, { "epoch": 0.3222793087342363, "grad_norm": 1.9848986864089966, "learning_rate": 3.567144121406798e-06, "loss": 0.781, "step": 10350 }, { "epoch": 0.32243499922154756, "grad_norm": 2.163912296295166, "learning_rate": 3.566324691074765e-06, "loss": 0.7724, "step": 10355 }, { "epoch": 0.3225906897088588, "grad_norm": 2.0854361057281494, "learning_rate": 3.565505260742732e-06, "loss": 0.8828, "step": 10360 }, { "epoch": 0.32274638019617, "grad_norm": 2.1601157188415527, "learning_rate": 3.564685830410699e-06, "loss": 0.8387, "step": 10365 }, { "epoch": 0.32290207068348126, "grad_norm": 2.0546023845672607, "learning_rate": 3.5638664000786656e-06, "loss": 0.8152, "step": 10370 }, { "epoch": 0.3230577611707925, "grad_norm": 2.142986536026001, "learning_rate": 3.5630469697466325e-06, "loss": 0.8796, "step": 10375 }, { "epoch": 0.32321345165810367, "grad_norm": 2.404796600341797, "learning_rate": 3.5622275394145993e-06, "loss": 0.8914, "step": 10380 }, { "epoch": 0.3233691421454149, "grad_norm": 2.3505327701568604, "learning_rate": 3.561408109082566e-06, "loss": 0.7795, "step": 10385 }, { "epoch": 0.32352483263272613, "grad_norm": 2.5659332275390625, "learning_rate": 3.5605886787505327e-06, "loss": 0.7803, "step": 10390 }, { "epoch": 0.32368052312003737, "grad_norm": 1.9990476369857788, "learning_rate": 3.5597692484185e-06, "loss": 0.7016, "step": 10395 }, { "epoch": 0.3238362136073486, "grad_norm": 1.9971007108688354, "learning_rate": 3.558949818086467e-06, "loss": 0.8015, "step": 10400 }, { "epoch": 0.32399190409465983, "grad_norm": 1.835050106048584, "learning_rate": 3.5581303877544333e-06, "loss": 0.7598, "step": 10405 }, { "epoch": 0.32414759458197107, "grad_norm": 2.117694616317749, "learning_rate": 3.5573109574224e-06, "loss": 0.8203, "step": 10410 }, { "epoch": 0.32430328506928224, "grad_norm": 2.283860683441162, "learning_rate": 3.556491527090367e-06, "loss": 0.8721, "step": 10415 }, { "epoch": 0.3244589755565935, "grad_norm": 1.940263271331787, "learning_rate": 3.5556720967583343e-06, "loss": 0.8496, "step": 10420 }, { "epoch": 0.3246146660439047, "grad_norm": 2.887402057647705, "learning_rate": 3.5548526664263008e-06, "loss": 0.8326, "step": 10425 }, { "epoch": 0.32477035653121594, "grad_norm": 2.0379199981689453, "learning_rate": 3.5540332360942676e-06, "loss": 0.7748, "step": 10430 }, { "epoch": 0.3249260470185272, "grad_norm": 2.54207706451416, "learning_rate": 3.5532138057622345e-06, "loss": 0.9041, "step": 10435 }, { "epoch": 0.3250817375058384, "grad_norm": 2.179654598236084, "learning_rate": 3.5523943754302014e-06, "loss": 0.77, "step": 10440 }, { "epoch": 0.32523742799314964, "grad_norm": 1.9245054721832275, "learning_rate": 3.551574945098168e-06, "loss": 0.8103, "step": 10445 }, { "epoch": 0.3253931184804608, "grad_norm": 3.2172820568084717, "learning_rate": 3.550755514766135e-06, "loss": 0.8196, "step": 10450 }, { "epoch": 0.32554880896777205, "grad_norm": 2.031177282333374, "learning_rate": 3.549936084434102e-06, "loss": 0.8001, "step": 10455 }, { "epoch": 0.3257044994550833, "grad_norm": 1.875224232673645, "learning_rate": 3.5491166541020684e-06, "loss": 0.7996, "step": 10460 }, { "epoch": 0.3258601899423945, "grad_norm": 2.307318687438965, "learning_rate": 3.5482972237700353e-06, "loss": 0.7681, "step": 10465 }, { "epoch": 0.32601588042970575, "grad_norm": 2.285524845123291, "learning_rate": 3.547477793438002e-06, "loss": 0.8096, "step": 10470 }, { "epoch": 0.326171570917017, "grad_norm": 1.8206688165664673, "learning_rate": 3.546658363105969e-06, "loss": 0.8588, "step": 10475 }, { "epoch": 0.3263272614043282, "grad_norm": 1.7606775760650635, "learning_rate": 3.5458389327739355e-06, "loss": 0.8058, "step": 10480 }, { "epoch": 0.3264829518916394, "grad_norm": 2.1256935596466064, "learning_rate": 3.5450195024419028e-06, "loss": 0.7826, "step": 10485 }, { "epoch": 0.3266386423789506, "grad_norm": 2.3529205322265625, "learning_rate": 3.5442000721098696e-06, "loss": 0.8075, "step": 10490 }, { "epoch": 0.32679433286626186, "grad_norm": 2.604861259460449, "learning_rate": 3.5433806417778365e-06, "loss": 0.8959, "step": 10495 }, { "epoch": 0.3269500233535731, "grad_norm": 1.7520031929016113, "learning_rate": 3.542561211445803e-06, "loss": 0.8491, "step": 10500 }, { "epoch": 0.3271057138408843, "grad_norm": 2.0044350624084473, "learning_rate": 3.54174178111377e-06, "loss": 0.7456, "step": 10505 }, { "epoch": 0.32726140432819556, "grad_norm": 2.359410524368286, "learning_rate": 3.540922350781737e-06, "loss": 0.7946, "step": 10510 }, { "epoch": 0.3274170948155068, "grad_norm": 2.3047170639038086, "learning_rate": 3.5401029204497036e-06, "loss": 0.8474, "step": 10515 }, { "epoch": 0.32757278530281797, "grad_norm": 3.0044825077056885, "learning_rate": 3.5392834901176704e-06, "loss": 0.864, "step": 10520 }, { "epoch": 0.3277284757901292, "grad_norm": 1.8123804330825806, "learning_rate": 3.5384640597856373e-06, "loss": 0.8532, "step": 10525 }, { "epoch": 0.32788416627744044, "grad_norm": 2.215425968170166, "learning_rate": 3.537644629453604e-06, "loss": 0.8645, "step": 10530 }, { "epoch": 0.32803985676475167, "grad_norm": 2.4048848152160645, "learning_rate": 3.5368251991215706e-06, "loss": 0.8256, "step": 10535 }, { "epoch": 0.3281955472520629, "grad_norm": 1.9972492456436157, "learning_rate": 3.536005768789538e-06, "loss": 0.8559, "step": 10540 }, { "epoch": 0.32835123773937414, "grad_norm": 2.0519914627075195, "learning_rate": 3.5351863384575048e-06, "loss": 0.7548, "step": 10545 }, { "epoch": 0.32850692822668537, "grad_norm": 1.9906408786773682, "learning_rate": 3.5343669081254716e-06, "loss": 0.7673, "step": 10550 }, { "epoch": 0.3286626187139966, "grad_norm": 2.316880226135254, "learning_rate": 3.533547477793438e-06, "loss": 0.8277, "step": 10555 }, { "epoch": 0.3288183092013078, "grad_norm": 2.809948682785034, "learning_rate": 3.532728047461405e-06, "loss": 0.8781, "step": 10560 }, { "epoch": 0.328973999688619, "grad_norm": 2.300095558166504, "learning_rate": 3.5319086171293723e-06, "loss": 0.7382, "step": 10565 }, { "epoch": 0.32912969017593025, "grad_norm": 2.3239083290100098, "learning_rate": 3.531089186797339e-06, "loss": 0.8636, "step": 10570 }, { "epoch": 0.3292853806632415, "grad_norm": 1.7624084949493408, "learning_rate": 3.5302697564653056e-06, "loss": 0.8196, "step": 10575 }, { "epoch": 0.3294410711505527, "grad_norm": 2.1292941570281982, "learning_rate": 3.5294503261332724e-06, "loss": 0.7811, "step": 10580 }, { "epoch": 0.32959676163786394, "grad_norm": 2.142068862915039, "learning_rate": 3.5286308958012393e-06, "loss": 0.7589, "step": 10585 }, { "epoch": 0.3297524521251752, "grad_norm": 3.154853343963623, "learning_rate": 3.5278114654692058e-06, "loss": 0.8839, "step": 10590 }, { "epoch": 0.32990814261248635, "grad_norm": 2.3330304622650146, "learning_rate": 3.5269920351371726e-06, "loss": 0.7992, "step": 10595 }, { "epoch": 0.3300638330997976, "grad_norm": 2.0810434818267822, "learning_rate": 3.52617260480514e-06, "loss": 0.7808, "step": 10600 }, { "epoch": 0.3302195235871088, "grad_norm": 1.8621824979782104, "learning_rate": 3.5253531744731068e-06, "loss": 0.7238, "step": 10605 }, { "epoch": 0.33037521407442005, "grad_norm": 2.057253360748291, "learning_rate": 3.5245337441410732e-06, "loss": 0.7347, "step": 10610 }, { "epoch": 0.3305309045617313, "grad_norm": 2.1704349517822266, "learning_rate": 3.52371431380904e-06, "loss": 0.73, "step": 10615 }, { "epoch": 0.3306865950490425, "grad_norm": 1.9989922046661377, "learning_rate": 3.522894883477007e-06, "loss": 0.8754, "step": 10620 }, { "epoch": 0.33084228553635375, "grad_norm": 2.2577619552612305, "learning_rate": 3.5220754531449743e-06, "loss": 0.7718, "step": 10625 }, { "epoch": 0.33099797602366493, "grad_norm": 2.2878005504608154, "learning_rate": 3.5212560228129407e-06, "loss": 0.7543, "step": 10630 }, { "epoch": 0.33115366651097616, "grad_norm": 2.320361852645874, "learning_rate": 3.5204365924809076e-06, "loss": 0.8797, "step": 10635 }, { "epoch": 0.3313093569982874, "grad_norm": 2.262129545211792, "learning_rate": 3.5196171621488744e-06, "loss": 0.8073, "step": 10640 }, { "epoch": 0.33146504748559863, "grad_norm": 1.7809191942214966, "learning_rate": 3.5187977318168413e-06, "loss": 0.8248, "step": 10645 }, { "epoch": 0.33162073797290986, "grad_norm": 2.0001087188720703, "learning_rate": 3.5179783014848078e-06, "loss": 0.7873, "step": 10650 }, { "epoch": 0.3317764284602211, "grad_norm": 2.4939351081848145, "learning_rate": 3.517158871152775e-06, "loss": 0.8075, "step": 10655 }, { "epoch": 0.33193211894753233, "grad_norm": 2.897261142730713, "learning_rate": 3.516339440820742e-06, "loss": 0.8338, "step": 10660 }, { "epoch": 0.3320878094348435, "grad_norm": 1.9421519041061401, "learning_rate": 3.5155200104887084e-06, "loss": 0.8647, "step": 10665 }, { "epoch": 0.33224349992215474, "grad_norm": 2.700812339782715, "learning_rate": 3.5147005801566752e-06, "loss": 0.8273, "step": 10670 }, { "epoch": 0.33239919040946597, "grad_norm": 2.4779129028320312, "learning_rate": 3.513881149824642e-06, "loss": 0.794, "step": 10675 }, { "epoch": 0.3325548808967772, "grad_norm": 2.0844154357910156, "learning_rate": 3.5130617194926094e-06, "loss": 0.8527, "step": 10680 }, { "epoch": 0.33271057138408844, "grad_norm": 2.1641671657562256, "learning_rate": 3.5122422891605754e-06, "loss": 0.8623, "step": 10685 }, { "epoch": 0.33286626187139967, "grad_norm": 2.0269699096679688, "learning_rate": 3.5114228588285427e-06, "loss": 0.7439, "step": 10690 }, { "epoch": 0.3330219523587109, "grad_norm": 1.893662691116333, "learning_rate": 3.5106034284965096e-06, "loss": 0.7854, "step": 10695 }, { "epoch": 0.3331776428460221, "grad_norm": 2.2493629455566406, "learning_rate": 3.5097839981644765e-06, "loss": 0.7838, "step": 10700 }, { "epoch": 0.3333333333333333, "grad_norm": 2.466794967651367, "learning_rate": 3.508964567832443e-06, "loss": 0.8121, "step": 10705 }, { "epoch": 0.33348902382064455, "grad_norm": 1.806740403175354, "learning_rate": 3.5081451375004098e-06, "loss": 0.7793, "step": 10710 }, { "epoch": 0.3336447143079558, "grad_norm": 2.711190700531006, "learning_rate": 3.507325707168377e-06, "loss": 0.7738, "step": 10715 }, { "epoch": 0.333800404795267, "grad_norm": 2.431136131286621, "learning_rate": 3.506506276836344e-06, "loss": 0.7339, "step": 10720 }, { "epoch": 0.33395609528257825, "grad_norm": 2.1690194606781006, "learning_rate": 3.5056868465043104e-06, "loss": 0.8911, "step": 10725 }, { "epoch": 0.3341117857698895, "grad_norm": 2.352970838546753, "learning_rate": 3.5048674161722773e-06, "loss": 0.8321, "step": 10730 }, { "epoch": 0.33426747625720066, "grad_norm": 1.8741061687469482, "learning_rate": 3.504047985840244e-06, "loss": 0.8276, "step": 10735 }, { "epoch": 0.3344231667445119, "grad_norm": 1.9060108661651611, "learning_rate": 3.5032285555082106e-06, "loss": 0.8506, "step": 10740 }, { "epoch": 0.3345788572318231, "grad_norm": 2.03912091255188, "learning_rate": 3.502409125176178e-06, "loss": 0.8342, "step": 10745 }, { "epoch": 0.33473454771913436, "grad_norm": 2.036721706390381, "learning_rate": 3.5015896948441447e-06, "loss": 0.8051, "step": 10750 }, { "epoch": 0.3348902382064456, "grad_norm": 1.9955687522888184, "learning_rate": 3.5007702645121116e-06, "loss": 0.8262, "step": 10755 }, { "epoch": 0.3350459286937568, "grad_norm": 2.553379774093628, "learning_rate": 3.499950834180078e-06, "loss": 0.7564, "step": 10760 }, { "epoch": 0.33520161918106806, "grad_norm": 2.3542776107788086, "learning_rate": 3.499131403848045e-06, "loss": 0.8155, "step": 10765 }, { "epoch": 0.3353573096683793, "grad_norm": 1.7462449073791504, "learning_rate": 3.498311973516012e-06, "loss": 0.7512, "step": 10770 }, { "epoch": 0.33551300015569047, "grad_norm": 2.324857711791992, "learning_rate": 3.497492543183979e-06, "loss": 0.814, "step": 10775 }, { "epoch": 0.3356686906430017, "grad_norm": 2.078493118286133, "learning_rate": 3.4966731128519455e-06, "loss": 0.7479, "step": 10780 }, { "epoch": 0.33582438113031293, "grad_norm": 2.243496894836426, "learning_rate": 3.4958536825199124e-06, "loss": 0.8077, "step": 10785 }, { "epoch": 0.33598007161762417, "grad_norm": 2.129973888397217, "learning_rate": 3.4950342521878793e-06, "loss": 0.8492, "step": 10790 }, { "epoch": 0.3361357621049354, "grad_norm": 2.1845948696136475, "learning_rate": 3.4942148218558466e-06, "loss": 0.8041, "step": 10795 }, { "epoch": 0.33629145259224663, "grad_norm": 2.244553804397583, "learning_rate": 3.493395391523813e-06, "loss": 0.875, "step": 10800 }, { "epoch": 0.33644714307955786, "grad_norm": 1.8708151578903198, "learning_rate": 3.49257596119178e-06, "loss": 0.8008, "step": 10805 }, { "epoch": 0.33660283356686904, "grad_norm": 2.1838996410369873, "learning_rate": 3.4917565308597467e-06, "loss": 0.7427, "step": 10810 }, { "epoch": 0.3367585240541803, "grad_norm": 2.76155161857605, "learning_rate": 3.490937100527713e-06, "loss": 0.7538, "step": 10815 }, { "epoch": 0.3369142145414915, "grad_norm": 1.8864333629608154, "learning_rate": 3.49011767019568e-06, "loss": 0.7698, "step": 10820 }, { "epoch": 0.33706990502880274, "grad_norm": 2.0682644844055176, "learning_rate": 3.489298239863647e-06, "loss": 0.7776, "step": 10825 }, { "epoch": 0.337225595516114, "grad_norm": 2.2527034282684326, "learning_rate": 3.4884788095316142e-06, "loss": 0.8117, "step": 10830 }, { "epoch": 0.3373812860034252, "grad_norm": 2.3814733028411865, "learning_rate": 3.4876593791995807e-06, "loss": 0.8524, "step": 10835 }, { "epoch": 0.33753697649073644, "grad_norm": 2.5638089179992676, "learning_rate": 3.4868399488675475e-06, "loss": 0.7871, "step": 10840 }, { "epoch": 0.3376926669780476, "grad_norm": 2.415503740310669, "learning_rate": 3.4860205185355144e-06, "loss": 0.778, "step": 10845 }, { "epoch": 0.33784835746535885, "grad_norm": 2.041818141937256, "learning_rate": 3.4852010882034813e-06, "loss": 0.7732, "step": 10850 }, { "epoch": 0.3380040479526701, "grad_norm": 3.0252630710601807, "learning_rate": 3.4843816578714477e-06, "loss": 0.9243, "step": 10855 }, { "epoch": 0.3381597384399813, "grad_norm": 2.4616904258728027, "learning_rate": 3.483562227539415e-06, "loss": 0.8441, "step": 10860 }, { "epoch": 0.33831542892729255, "grad_norm": 1.9418234825134277, "learning_rate": 3.482742797207382e-06, "loss": 0.8371, "step": 10865 }, { "epoch": 0.3384711194146038, "grad_norm": 1.9870290756225586, "learning_rate": 3.4819233668753483e-06, "loss": 0.814, "step": 10870 }, { "epoch": 0.338626809901915, "grad_norm": 2.251129627227783, "learning_rate": 3.481103936543315e-06, "loss": 0.8122, "step": 10875 }, { "epoch": 0.3387825003892262, "grad_norm": 2.174529790878296, "learning_rate": 3.480284506211282e-06, "loss": 0.8158, "step": 10880 }, { "epoch": 0.3389381908765374, "grad_norm": 1.8124334812164307, "learning_rate": 3.4794650758792494e-06, "loss": 0.8027, "step": 10885 }, { "epoch": 0.33909388136384866, "grad_norm": 2.602018356323242, "learning_rate": 3.478645645547216e-06, "loss": 0.8203, "step": 10890 }, { "epoch": 0.3392495718511599, "grad_norm": 2.24841046333313, "learning_rate": 3.4778262152151827e-06, "loss": 0.8148, "step": 10895 }, { "epoch": 0.3394052623384711, "grad_norm": 1.6334220170974731, "learning_rate": 3.4770067848831495e-06, "loss": 0.753, "step": 10900 }, { "epoch": 0.33956095282578236, "grad_norm": 2.3519537448883057, "learning_rate": 3.4761873545511164e-06, "loss": 0.7938, "step": 10905 }, { "epoch": 0.3397166433130936, "grad_norm": 2.0920660495758057, "learning_rate": 3.475367924219083e-06, "loss": 0.787, "step": 10910 }, { "epoch": 0.33987233380040477, "grad_norm": 1.869102120399475, "learning_rate": 3.47454849388705e-06, "loss": 0.8092, "step": 10915 }, { "epoch": 0.340028024287716, "grad_norm": 2.0688560009002686, "learning_rate": 3.473729063555017e-06, "loss": 0.8421, "step": 10920 }, { "epoch": 0.34018371477502723, "grad_norm": 1.8107738494873047, "learning_rate": 3.472909633222984e-06, "loss": 0.7303, "step": 10925 }, { "epoch": 0.34033940526233847, "grad_norm": 2.5249805450439453, "learning_rate": 3.4720902028909503e-06, "loss": 0.8701, "step": 10930 }, { "epoch": 0.3404950957496497, "grad_norm": 1.8367732763290405, "learning_rate": 3.471270772558917e-06, "loss": 0.9371, "step": 10935 }, { "epoch": 0.34065078623696093, "grad_norm": 1.8700639009475708, "learning_rate": 3.4704513422268845e-06, "loss": 0.8422, "step": 10940 }, { "epoch": 0.34080647672427217, "grad_norm": 2.0589394569396973, "learning_rate": 3.4696319118948505e-06, "loss": 0.7976, "step": 10945 }, { "epoch": 0.34096216721158334, "grad_norm": 1.9861880540847778, "learning_rate": 3.468812481562818e-06, "loss": 0.7889, "step": 10950 }, { "epoch": 0.3411178576988946, "grad_norm": 1.7290558815002441, "learning_rate": 3.4679930512307847e-06, "loss": 0.9257, "step": 10955 }, { "epoch": 0.3412735481862058, "grad_norm": 2.2105162143707275, "learning_rate": 3.4671736208987516e-06, "loss": 0.7619, "step": 10960 }, { "epoch": 0.34142923867351704, "grad_norm": 2.141098976135254, "learning_rate": 3.466354190566718e-06, "loss": 0.7698, "step": 10965 }, { "epoch": 0.3415849291608283, "grad_norm": 2.005021572113037, "learning_rate": 3.465534760234685e-06, "loss": 0.7985, "step": 10970 }, { "epoch": 0.3417406196481395, "grad_norm": 2.0356290340423584, "learning_rate": 3.464715329902652e-06, "loss": 0.7801, "step": 10975 }, { "epoch": 0.34189631013545074, "grad_norm": 2.3624041080474854, "learning_rate": 3.463895899570619e-06, "loss": 0.7823, "step": 10980 }, { "epoch": 0.342052000622762, "grad_norm": 2.039722442626953, "learning_rate": 3.4630764692385855e-06, "loss": 0.8665, "step": 10985 }, { "epoch": 0.34220769111007315, "grad_norm": 2.3046369552612305, "learning_rate": 3.4622570389065523e-06, "loss": 0.8327, "step": 10990 }, { "epoch": 0.3423633815973844, "grad_norm": 2.0703985691070557, "learning_rate": 3.4614376085745192e-06, "loss": 0.7827, "step": 10995 }, { "epoch": 0.3425190720846956, "grad_norm": 2.013594150543213, "learning_rate": 3.4606181782424865e-06, "loss": 0.8191, "step": 11000 }, { "epoch": 0.34267476257200685, "grad_norm": 2.4861900806427, "learning_rate": 3.459798747910453e-06, "loss": 0.8641, "step": 11005 }, { "epoch": 0.3428304530593181, "grad_norm": 2.149782419204712, "learning_rate": 3.45897931757842e-06, "loss": 0.7903, "step": 11010 }, { "epoch": 0.3429861435466293, "grad_norm": 2.29890513420105, "learning_rate": 3.4581598872463867e-06, "loss": 0.771, "step": 11015 }, { "epoch": 0.34314183403394055, "grad_norm": 2.252154588699341, "learning_rate": 3.457340456914353e-06, "loss": 0.8011, "step": 11020 }, { "epoch": 0.34329752452125173, "grad_norm": 2.0843985080718994, "learning_rate": 3.45652102658232e-06, "loss": 0.7902, "step": 11025 }, { "epoch": 0.34345321500856296, "grad_norm": 2.245764970779419, "learning_rate": 3.4557015962502873e-06, "loss": 0.7797, "step": 11030 }, { "epoch": 0.3436089054958742, "grad_norm": 2.1560158729553223, "learning_rate": 3.454882165918254e-06, "loss": 0.8031, "step": 11035 }, { "epoch": 0.34376459598318543, "grad_norm": 2.152374505996704, "learning_rate": 3.4540627355862206e-06, "loss": 0.8537, "step": 11040 }, { "epoch": 0.34392028647049666, "grad_norm": 2.4454166889190674, "learning_rate": 3.4532433052541875e-06, "loss": 0.8508, "step": 11045 }, { "epoch": 0.3440759769578079, "grad_norm": 2.1429781913757324, "learning_rate": 3.4524238749221544e-06, "loss": 0.7954, "step": 11050 }, { "epoch": 0.3442316674451191, "grad_norm": 2.4228270053863525, "learning_rate": 3.4516044445901217e-06, "loss": 0.8308, "step": 11055 }, { "epoch": 0.3443873579324303, "grad_norm": 2.5099291801452637, "learning_rate": 3.4507850142580877e-06, "loss": 0.778, "step": 11060 }, { "epoch": 0.34454304841974154, "grad_norm": 2.5718934535980225, "learning_rate": 3.449965583926055e-06, "loss": 0.8184, "step": 11065 }, { "epoch": 0.34469873890705277, "grad_norm": 2.68961501121521, "learning_rate": 3.449146153594022e-06, "loss": 0.8662, "step": 11070 }, { "epoch": 0.344854429394364, "grad_norm": 2.045213460922241, "learning_rate": 3.4483267232619887e-06, "loss": 0.7577, "step": 11075 }, { "epoch": 0.34501011988167524, "grad_norm": 2.1788697242736816, "learning_rate": 3.447507292929955e-06, "loss": 0.7772, "step": 11080 }, { "epoch": 0.34516581036898647, "grad_norm": 1.9854917526245117, "learning_rate": 3.446687862597922e-06, "loss": 0.8793, "step": 11085 }, { "epoch": 0.3453215008562977, "grad_norm": 2.7239222526550293, "learning_rate": 3.4458684322658893e-06, "loss": 0.8386, "step": 11090 }, { "epoch": 0.3454771913436089, "grad_norm": 2.5228629112243652, "learning_rate": 3.4450490019338558e-06, "loss": 0.6925, "step": 11095 }, { "epoch": 0.3456328818309201, "grad_norm": 1.7672538757324219, "learning_rate": 3.4442295716018226e-06, "loss": 0.7357, "step": 11100 }, { "epoch": 0.34578857231823135, "grad_norm": 3.004814863204956, "learning_rate": 3.4434101412697895e-06, "loss": 0.77, "step": 11105 }, { "epoch": 0.3459442628055426, "grad_norm": 2.049859046936035, "learning_rate": 3.4425907109377564e-06, "loss": 0.7815, "step": 11110 }, { "epoch": 0.3460999532928538, "grad_norm": 2.486403703689575, "learning_rate": 3.441771280605723e-06, "loss": 0.9075, "step": 11115 }, { "epoch": 0.34625564378016505, "grad_norm": 2.1320695877075195, "learning_rate": 3.44095185027369e-06, "loss": 0.8311, "step": 11120 }, { "epoch": 0.3464113342674763, "grad_norm": 3.411667823791504, "learning_rate": 3.440132419941657e-06, "loss": 0.8091, "step": 11125 }, { "epoch": 0.34656702475478746, "grad_norm": 3.1619467735290527, "learning_rate": 3.439312989609624e-06, "loss": 0.8172, "step": 11130 }, { "epoch": 0.3467227152420987, "grad_norm": 3.0674474239349365, "learning_rate": 3.4384935592775903e-06, "loss": 0.849, "step": 11135 }, { "epoch": 0.3468784057294099, "grad_norm": 1.9319267272949219, "learning_rate": 3.437674128945557e-06, "loss": 0.8274, "step": 11140 }, { "epoch": 0.34703409621672116, "grad_norm": 2.0450265407562256, "learning_rate": 3.4368546986135245e-06, "loss": 0.7814, "step": 11145 }, { "epoch": 0.3471897867040324, "grad_norm": 1.7630400657653809, "learning_rate": 3.4360352682814913e-06, "loss": 0.7838, "step": 11150 }, { "epoch": 0.3473454771913436, "grad_norm": 2.4598429203033447, "learning_rate": 3.4352158379494578e-06, "loss": 0.803, "step": 11155 }, { "epoch": 0.34750116767865485, "grad_norm": 1.9678964614868164, "learning_rate": 3.4343964076174246e-06, "loss": 0.7856, "step": 11160 }, { "epoch": 0.34765685816596603, "grad_norm": 1.900251030921936, "learning_rate": 3.4335769772853915e-06, "loss": 0.7957, "step": 11165 }, { "epoch": 0.34781254865327726, "grad_norm": 2.17801833152771, "learning_rate": 3.432757546953358e-06, "loss": 0.8057, "step": 11170 }, { "epoch": 0.3479682391405885, "grad_norm": 1.9947242736816406, "learning_rate": 3.431938116621325e-06, "loss": 0.817, "step": 11175 }, { "epoch": 0.34812392962789973, "grad_norm": 2.2601592540740967, "learning_rate": 3.431118686289292e-06, "loss": 0.8137, "step": 11180 }, { "epoch": 0.34827962011521096, "grad_norm": 2.691236734390259, "learning_rate": 3.430299255957259e-06, "loss": 0.8518, "step": 11185 }, { "epoch": 0.3484353106025222, "grad_norm": 4.065630912780762, "learning_rate": 3.4294798256252254e-06, "loss": 0.8143, "step": 11190 }, { "epoch": 0.34859100108983343, "grad_norm": 2.0814058780670166, "learning_rate": 3.4286603952931923e-06, "loss": 0.7245, "step": 11195 }, { "epoch": 0.34874669157714466, "grad_norm": 2.0141234397888184, "learning_rate": 3.427840964961159e-06, "loss": 0.7576, "step": 11200 }, { "epoch": 0.34890238206445584, "grad_norm": 2.0667288303375244, "learning_rate": 3.4270215346291265e-06, "loss": 0.7798, "step": 11205 }, { "epoch": 0.3490580725517671, "grad_norm": 1.8493425846099854, "learning_rate": 3.426202104297093e-06, "loss": 0.7999, "step": 11210 }, { "epoch": 0.3492137630390783, "grad_norm": 2.4412922859191895, "learning_rate": 3.4253826739650598e-06, "loss": 0.8079, "step": 11215 }, { "epoch": 0.34936945352638954, "grad_norm": 2.214355945587158, "learning_rate": 3.4245632436330267e-06, "loss": 0.8657, "step": 11220 }, { "epoch": 0.3495251440137008, "grad_norm": 2.2975292205810547, "learning_rate": 3.4237438133009935e-06, "loss": 0.8332, "step": 11225 }, { "epoch": 0.349680834501012, "grad_norm": 2.0743556022644043, "learning_rate": 3.42292438296896e-06, "loss": 0.7687, "step": 11230 }, { "epoch": 0.34983652498832324, "grad_norm": 1.8554295301437378, "learning_rate": 3.4221049526369273e-06, "loss": 0.8318, "step": 11235 }, { "epoch": 0.3499922154756344, "grad_norm": 2.153111696243286, "learning_rate": 3.421285522304894e-06, "loss": 0.8624, "step": 11240 }, { "epoch": 0.35014790596294565, "grad_norm": 2.7328178882598877, "learning_rate": 3.4204660919728606e-06, "loss": 0.8871, "step": 11245 }, { "epoch": 0.3503035964502569, "grad_norm": 2.2299766540527344, "learning_rate": 3.4196466616408274e-06, "loss": 0.7718, "step": 11250 }, { "epoch": 0.3504592869375681, "grad_norm": 2.067863941192627, "learning_rate": 3.4188272313087943e-06, "loss": 0.7729, "step": 11255 }, { "epoch": 0.35061497742487935, "grad_norm": 2.1147894859313965, "learning_rate": 3.4180078009767616e-06, "loss": 0.754, "step": 11260 }, { "epoch": 0.3507706679121906, "grad_norm": 2.3486649990081787, "learning_rate": 3.417188370644728e-06, "loss": 0.7554, "step": 11265 }, { "epoch": 0.3509263583995018, "grad_norm": 2.025604009628296, "learning_rate": 3.416368940312695e-06, "loss": 0.8279, "step": 11270 }, { "epoch": 0.351082048886813, "grad_norm": 1.72270929813385, "learning_rate": 3.415549509980662e-06, "loss": 0.7379, "step": 11275 }, { "epoch": 0.3512377393741242, "grad_norm": 2.223379373550415, "learning_rate": 3.4147300796486287e-06, "loss": 0.8691, "step": 11280 }, { "epoch": 0.35139342986143546, "grad_norm": 2.3764472007751465, "learning_rate": 3.413910649316595e-06, "loss": 0.8006, "step": 11285 }, { "epoch": 0.3515491203487467, "grad_norm": 2.517911434173584, "learning_rate": 3.4130912189845624e-06, "loss": 0.735, "step": 11290 }, { "epoch": 0.3517048108360579, "grad_norm": 2.1197054386138916, "learning_rate": 3.4122717886525293e-06, "loss": 0.8273, "step": 11295 }, { "epoch": 0.35186050132336916, "grad_norm": 2.2029435634613037, "learning_rate": 3.4114523583204957e-06, "loss": 0.8174, "step": 11300 }, { "epoch": 0.3520161918106804, "grad_norm": 2.5016260147094727, "learning_rate": 3.4106329279884626e-06, "loss": 0.8135, "step": 11305 }, { "epoch": 0.35217188229799157, "grad_norm": 2.2262048721313477, "learning_rate": 3.4098134976564295e-06, "loss": 0.7972, "step": 11310 }, { "epoch": 0.3523275727853028, "grad_norm": 2.3445627689361572, "learning_rate": 3.4089940673243963e-06, "loss": 0.8179, "step": 11315 }, { "epoch": 0.35248326327261403, "grad_norm": 1.7742832899093628, "learning_rate": 3.4081746369923628e-06, "loss": 0.8049, "step": 11320 }, { "epoch": 0.35263895375992527, "grad_norm": 1.970386028289795, "learning_rate": 3.40735520666033e-06, "loss": 0.8376, "step": 11325 }, { "epoch": 0.3527946442472365, "grad_norm": 1.9638872146606445, "learning_rate": 3.406535776328297e-06, "loss": 0.7716, "step": 11330 }, { "epoch": 0.35295033473454773, "grad_norm": 1.7961838245391846, "learning_rate": 3.405716345996264e-06, "loss": 0.7423, "step": 11335 }, { "epoch": 0.35310602522185897, "grad_norm": 2.0625576972961426, "learning_rate": 3.4048969156642302e-06, "loss": 0.8539, "step": 11340 }, { "epoch": 0.35326171570917014, "grad_norm": 2.233635902404785, "learning_rate": 3.404077485332197e-06, "loss": 0.7811, "step": 11345 }, { "epoch": 0.3534174061964814, "grad_norm": 1.8487764596939087, "learning_rate": 3.4032580550001644e-06, "loss": 0.7628, "step": 11350 }, { "epoch": 0.3535730966837926, "grad_norm": 2.1016316413879395, "learning_rate": 3.4024386246681313e-06, "loss": 0.7603, "step": 11355 }, { "epoch": 0.35372878717110384, "grad_norm": 2.564016103744507, "learning_rate": 3.4016191943360977e-06, "loss": 0.8046, "step": 11360 }, { "epoch": 0.3538844776584151, "grad_norm": 2.3871865272521973, "learning_rate": 3.4007997640040646e-06, "loss": 0.8452, "step": 11365 }, { "epoch": 0.3540401681457263, "grad_norm": 2.1068942546844482, "learning_rate": 3.3999803336720315e-06, "loss": 0.8749, "step": 11370 }, { "epoch": 0.35419585863303754, "grad_norm": 1.7172279357910156, "learning_rate": 3.399160903339998e-06, "loss": 0.7745, "step": 11375 }, { "epoch": 0.3543515491203488, "grad_norm": 2.1470911502838135, "learning_rate": 3.398341473007965e-06, "loss": 0.8003, "step": 11380 }, { "epoch": 0.35450723960765995, "grad_norm": 1.9910612106323242, "learning_rate": 3.397522042675932e-06, "loss": 0.764, "step": 11385 }, { "epoch": 0.3546629300949712, "grad_norm": 1.9391525983810425, "learning_rate": 3.396702612343899e-06, "loss": 0.825, "step": 11390 }, { "epoch": 0.3548186205822824, "grad_norm": 2.1333673000335693, "learning_rate": 3.3958831820118654e-06, "loss": 0.8256, "step": 11395 }, { "epoch": 0.35497431106959365, "grad_norm": 2.019531011581421, "learning_rate": 3.3950637516798323e-06, "loss": 0.8515, "step": 11400 }, { "epoch": 0.3551300015569049, "grad_norm": 1.913193941116333, "learning_rate": 3.3942443213477996e-06, "loss": 0.8358, "step": 11405 }, { "epoch": 0.3552856920442161, "grad_norm": 1.9813499450683594, "learning_rate": 3.3934248910157664e-06, "loss": 0.8178, "step": 11410 }, { "epoch": 0.35544138253152735, "grad_norm": 2.0066545009613037, "learning_rate": 3.392605460683733e-06, "loss": 0.7236, "step": 11415 }, { "epoch": 0.3555970730188385, "grad_norm": 1.8532991409301758, "learning_rate": 3.3917860303516997e-06, "loss": 0.7276, "step": 11420 }, { "epoch": 0.35575276350614976, "grad_norm": 2.114351749420166, "learning_rate": 3.3909666000196666e-06, "loss": 0.8728, "step": 11425 }, { "epoch": 0.355908453993461, "grad_norm": 2.1462409496307373, "learning_rate": 3.3901471696876335e-06, "loss": 0.7654, "step": 11430 }, { "epoch": 0.3560641444807722, "grad_norm": 2.1651079654693604, "learning_rate": 3.3893277393556e-06, "loss": 0.8201, "step": 11435 }, { "epoch": 0.35621983496808346, "grad_norm": 2.048037052154541, "learning_rate": 3.3885083090235672e-06, "loss": 0.8341, "step": 11440 }, { "epoch": 0.3563755254553947, "grad_norm": 2.45245623588562, "learning_rate": 3.387688878691534e-06, "loss": 0.7667, "step": 11445 }, { "epoch": 0.3565312159427059, "grad_norm": 1.9931336641311646, "learning_rate": 3.3868694483595005e-06, "loss": 0.7527, "step": 11450 }, { "epoch": 0.3566869064300171, "grad_norm": 2.224912405014038, "learning_rate": 3.3860500180274674e-06, "loss": 0.8241, "step": 11455 }, { "epoch": 0.35684259691732834, "grad_norm": 2.1872310638427734, "learning_rate": 3.3852305876954343e-06, "loss": 0.7522, "step": 11460 }, { "epoch": 0.35699828740463957, "grad_norm": 1.8587095737457275, "learning_rate": 3.3844111573634016e-06, "loss": 0.76, "step": 11465 }, { "epoch": 0.3571539778919508, "grad_norm": 2.376906394958496, "learning_rate": 3.383591727031368e-06, "loss": 0.8521, "step": 11470 }, { "epoch": 0.35730966837926204, "grad_norm": 2.1127920150756836, "learning_rate": 3.382772296699335e-06, "loss": 0.8087, "step": 11475 }, { "epoch": 0.35746535886657327, "grad_norm": 1.936508059501648, "learning_rate": 3.3819528663673017e-06, "loss": 0.7926, "step": 11480 }, { "epoch": 0.3576210493538845, "grad_norm": 2.0324714183807373, "learning_rate": 3.3811334360352686e-06, "loss": 0.7637, "step": 11485 }, { "epoch": 0.3577767398411957, "grad_norm": 2.015150785446167, "learning_rate": 3.380314005703235e-06, "loss": 0.8186, "step": 11490 }, { "epoch": 0.3579324303285069, "grad_norm": 2.36008882522583, "learning_rate": 3.3794945753712024e-06, "loss": 0.8391, "step": 11495 }, { "epoch": 0.35808812081581815, "grad_norm": 2.2216646671295166, "learning_rate": 3.3786751450391692e-06, "loss": 0.7913, "step": 11500 }, { "epoch": 0.3582438113031294, "grad_norm": 2.212763786315918, "learning_rate": 3.377855714707136e-06, "loss": 0.7829, "step": 11505 }, { "epoch": 0.3583995017904406, "grad_norm": 2.1765849590301514, "learning_rate": 3.3770362843751025e-06, "loss": 0.7411, "step": 11510 }, { "epoch": 0.35855519227775184, "grad_norm": 2.032787561416626, "learning_rate": 3.3762168540430694e-06, "loss": 0.7318, "step": 11515 }, { "epoch": 0.3587108827650631, "grad_norm": 2.4344022274017334, "learning_rate": 3.3753974237110367e-06, "loss": 0.8173, "step": 11520 }, { "epoch": 0.35886657325237425, "grad_norm": 2.8679919242858887, "learning_rate": 3.3745779933790027e-06, "loss": 0.8543, "step": 11525 }, { "epoch": 0.3590222637396855, "grad_norm": 2.0397937297821045, "learning_rate": 3.37375856304697e-06, "loss": 0.78, "step": 11530 }, { "epoch": 0.3591779542269967, "grad_norm": 2.1337664127349854, "learning_rate": 3.372939132714937e-06, "loss": 0.8317, "step": 11535 }, { "epoch": 0.35933364471430795, "grad_norm": 2.3442389965057373, "learning_rate": 3.3721197023829038e-06, "loss": 0.7816, "step": 11540 }, { "epoch": 0.3594893352016192, "grad_norm": 1.7646821737289429, "learning_rate": 3.37130027205087e-06, "loss": 0.7723, "step": 11545 }, { "epoch": 0.3596450256889304, "grad_norm": 3.3756940364837646, "learning_rate": 3.370480841718837e-06, "loss": 0.7679, "step": 11550 }, { "epoch": 0.35980071617624165, "grad_norm": 2.009077787399292, "learning_rate": 3.3696614113868044e-06, "loss": 0.794, "step": 11555 }, { "epoch": 0.35995640666355283, "grad_norm": 1.9864920377731323, "learning_rate": 3.3688419810547712e-06, "loss": 0.7239, "step": 11560 }, { "epoch": 0.36011209715086406, "grad_norm": 1.996185541152954, "learning_rate": 3.3680225507227377e-06, "loss": 0.7989, "step": 11565 }, { "epoch": 0.3602677876381753, "grad_norm": 1.9319438934326172, "learning_rate": 3.3672031203907046e-06, "loss": 0.88, "step": 11570 }, { "epoch": 0.36042347812548653, "grad_norm": 1.9830034971237183, "learning_rate": 3.3663836900586714e-06, "loss": 0.8343, "step": 11575 }, { "epoch": 0.36057916861279776, "grad_norm": 2.412731170654297, "learning_rate": 3.3655642597266387e-06, "loss": 0.8641, "step": 11580 }, { "epoch": 0.360734859100109, "grad_norm": 1.7059993743896484, "learning_rate": 3.364744829394605e-06, "loss": 0.7545, "step": 11585 }, { "epoch": 0.36089054958742023, "grad_norm": 2.0653717517852783, "learning_rate": 3.363925399062572e-06, "loss": 0.7583, "step": 11590 }, { "epoch": 0.36104624007473146, "grad_norm": 1.8797532320022583, "learning_rate": 3.363105968730539e-06, "loss": 0.801, "step": 11595 }, { "epoch": 0.36120193056204264, "grad_norm": 2.7397260665893555, "learning_rate": 3.3622865383985053e-06, "loss": 0.8148, "step": 11600 }, { "epoch": 0.36135762104935387, "grad_norm": 3.0308876037597656, "learning_rate": 3.3614671080664722e-06, "loss": 0.8391, "step": 11605 }, { "epoch": 0.3615133115366651, "grad_norm": 1.7730613946914673, "learning_rate": 3.3606476777344395e-06, "loss": 0.795, "step": 11610 }, { "epoch": 0.36166900202397634, "grad_norm": 1.9365955591201782, "learning_rate": 3.3598282474024064e-06, "loss": 0.7972, "step": 11615 }, { "epoch": 0.36182469251128757, "grad_norm": 1.969569444656372, "learning_rate": 3.359008817070373e-06, "loss": 0.6893, "step": 11620 }, { "epoch": 0.3619803829985988, "grad_norm": 2.154012680053711, "learning_rate": 3.3581893867383397e-06, "loss": 0.8356, "step": 11625 }, { "epoch": 0.36213607348591004, "grad_norm": 2.12337064743042, "learning_rate": 3.3573699564063066e-06, "loss": 0.8011, "step": 11630 }, { "epoch": 0.3622917639732212, "grad_norm": 2.076167345046997, "learning_rate": 3.356550526074274e-06, "loss": 0.8659, "step": 11635 }, { "epoch": 0.36244745446053245, "grad_norm": 2.1812171936035156, "learning_rate": 3.3557310957422403e-06, "loss": 0.8032, "step": 11640 }, { "epoch": 0.3626031449478437, "grad_norm": 1.8987849950790405, "learning_rate": 3.354911665410207e-06, "loss": 0.8475, "step": 11645 }, { "epoch": 0.3627588354351549, "grad_norm": 1.852985143661499, "learning_rate": 3.354092235078174e-06, "loss": 0.9117, "step": 11650 }, { "epoch": 0.36291452592246615, "grad_norm": 2.1158041954040527, "learning_rate": 3.353272804746141e-06, "loss": 0.8373, "step": 11655 }, { "epoch": 0.3630702164097774, "grad_norm": 2.3903281688690186, "learning_rate": 3.3524533744141074e-06, "loss": 0.8471, "step": 11660 }, { "epoch": 0.3632259068970886, "grad_norm": 2.1278626918792725, "learning_rate": 3.3516339440820742e-06, "loss": 0.7932, "step": 11665 }, { "epoch": 0.3633815973843998, "grad_norm": 1.710691213607788, "learning_rate": 3.3508145137500415e-06, "loss": 0.8112, "step": 11670 }, { "epoch": 0.363537287871711, "grad_norm": 2.7342424392700195, "learning_rate": 3.349995083418008e-06, "loss": 0.7495, "step": 11675 }, { "epoch": 0.36369297835902226, "grad_norm": 2.0775790214538574, "learning_rate": 3.349175653085975e-06, "loss": 0.7973, "step": 11680 }, { "epoch": 0.3638486688463335, "grad_norm": 2.4621407985687256, "learning_rate": 3.3483562227539417e-06, "loss": 0.7911, "step": 11685 }, { "epoch": 0.3640043593336447, "grad_norm": 2.1944258213043213, "learning_rate": 3.3475367924219086e-06, "loss": 0.7836, "step": 11690 }, { "epoch": 0.36416004982095596, "grad_norm": 2.089184522628784, "learning_rate": 3.346717362089875e-06, "loss": 0.8711, "step": 11695 }, { "epoch": 0.3643157403082672, "grad_norm": 2.4727511405944824, "learning_rate": 3.3458979317578423e-06, "loss": 0.7733, "step": 11700 }, { "epoch": 0.36447143079557837, "grad_norm": 2.2274160385131836, "learning_rate": 3.345078501425809e-06, "loss": 0.8433, "step": 11705 }, { "epoch": 0.3646271212828896, "grad_norm": 1.8939409255981445, "learning_rate": 3.344259071093776e-06, "loss": 0.7611, "step": 11710 }, { "epoch": 0.36478281177020083, "grad_norm": 2.128046989440918, "learning_rate": 3.3434396407617425e-06, "loss": 0.8594, "step": 11715 }, { "epoch": 0.36493850225751207, "grad_norm": 2.3468995094299316, "learning_rate": 3.3426202104297094e-06, "loss": 0.7414, "step": 11720 }, { "epoch": 0.3650941927448233, "grad_norm": 2.024493932723999, "learning_rate": 3.3418007800976767e-06, "loss": 0.7436, "step": 11725 }, { "epoch": 0.36524988323213453, "grad_norm": 2.7996346950531006, "learning_rate": 3.340981349765643e-06, "loss": 0.8275, "step": 11730 }, { "epoch": 0.36540557371944576, "grad_norm": 2.1184334754943848, "learning_rate": 3.34016191943361e-06, "loss": 0.8389, "step": 11735 }, { "epoch": 0.36556126420675694, "grad_norm": 1.9216718673706055, "learning_rate": 3.339342489101577e-06, "loss": 0.7616, "step": 11740 }, { "epoch": 0.3657169546940682, "grad_norm": 2.043119192123413, "learning_rate": 3.3385230587695437e-06, "loss": 0.8004, "step": 11745 }, { "epoch": 0.3658726451813794, "grad_norm": 1.9472289085388184, "learning_rate": 3.33770362843751e-06, "loss": 0.7899, "step": 11750 }, { "epoch": 0.36602833566869064, "grad_norm": 1.9418368339538574, "learning_rate": 3.3368841981054775e-06, "loss": 0.7736, "step": 11755 }, { "epoch": 0.3661840261560019, "grad_norm": 2.0400986671447754, "learning_rate": 3.3360647677734443e-06, "loss": 0.7699, "step": 11760 }, { "epoch": 0.3663397166433131, "grad_norm": 2.3027596473693848, "learning_rate": 3.335245337441411e-06, "loss": 0.8601, "step": 11765 }, { "epoch": 0.36649540713062434, "grad_norm": 2.091459274291992, "learning_rate": 3.3344259071093776e-06, "loss": 0.7817, "step": 11770 }, { "epoch": 0.3666510976179355, "grad_norm": 2.2420716285705566, "learning_rate": 3.3336064767773445e-06, "loss": 0.8082, "step": 11775 }, { "epoch": 0.36680678810524675, "grad_norm": 2.139906167984009, "learning_rate": 3.332787046445312e-06, "loss": 0.8251, "step": 11780 }, { "epoch": 0.366962478592558, "grad_norm": 2.62746524810791, "learning_rate": 3.3319676161132787e-06, "loss": 0.8159, "step": 11785 }, { "epoch": 0.3671181690798692, "grad_norm": 2.4422714710235596, "learning_rate": 3.331148185781245e-06, "loss": 0.751, "step": 11790 }, { "epoch": 0.36727385956718045, "grad_norm": 2.316038131713867, "learning_rate": 3.330328755449212e-06, "loss": 0.8117, "step": 11795 }, { "epoch": 0.3674295500544917, "grad_norm": 2.587416648864746, "learning_rate": 3.329509325117179e-06, "loss": 0.8161, "step": 11800 }, { "epoch": 0.3675852405418029, "grad_norm": 2.312556743621826, "learning_rate": 3.3286898947851453e-06, "loss": 0.8061, "step": 11805 }, { "epoch": 0.36774093102911415, "grad_norm": 3.355590343475342, "learning_rate": 3.327870464453112e-06, "loss": 0.82, "step": 11810 }, { "epoch": 0.3678966215164253, "grad_norm": 2.0407955646514893, "learning_rate": 3.3270510341210795e-06, "loss": 0.8583, "step": 11815 }, { "epoch": 0.36805231200373656, "grad_norm": 2.1787431240081787, "learning_rate": 3.3262316037890463e-06, "loss": 0.733, "step": 11820 }, { "epoch": 0.3682080024910478, "grad_norm": 2.170001745223999, "learning_rate": 3.3254121734570128e-06, "loss": 0.7828, "step": 11825 }, { "epoch": 0.368363692978359, "grad_norm": 2.1197988986968994, "learning_rate": 3.3245927431249796e-06, "loss": 0.8453, "step": 11830 }, { "epoch": 0.36851938346567026, "grad_norm": 2.1901752948760986, "learning_rate": 3.3237733127929465e-06, "loss": 0.8175, "step": 11835 }, { "epoch": 0.3686750739529815, "grad_norm": 2.26810359954834, "learning_rate": 3.322953882460914e-06, "loss": 0.8007, "step": 11840 }, { "epoch": 0.3688307644402927, "grad_norm": 1.757181167602539, "learning_rate": 3.3221344521288803e-06, "loss": 0.7868, "step": 11845 }, { "epoch": 0.3689864549276039, "grad_norm": 2.702521562576294, "learning_rate": 3.321315021796847e-06, "loss": 0.8273, "step": 11850 }, { "epoch": 0.36914214541491513, "grad_norm": 2.098705768585205, "learning_rate": 3.320495591464814e-06, "loss": 0.7914, "step": 11855 }, { "epoch": 0.36929783590222637, "grad_norm": 2.1231377124786377, "learning_rate": 3.319676161132781e-06, "loss": 0.7421, "step": 11860 }, { "epoch": 0.3694535263895376, "grad_norm": 2.215454339981079, "learning_rate": 3.3188567308007473e-06, "loss": 0.7304, "step": 11865 }, { "epoch": 0.36960921687684883, "grad_norm": 2.391075611114502, "learning_rate": 3.3180373004687146e-06, "loss": 0.8974, "step": 11870 }, { "epoch": 0.36976490736416007, "grad_norm": 2.054159164428711, "learning_rate": 3.3172178701366815e-06, "loss": 0.7351, "step": 11875 }, { "epoch": 0.3699205978514713, "grad_norm": 2.1810719966888428, "learning_rate": 3.316398439804648e-06, "loss": 0.8296, "step": 11880 }, { "epoch": 0.3700762883387825, "grad_norm": 2.336212158203125, "learning_rate": 3.315579009472615e-06, "loss": 0.7321, "step": 11885 }, { "epoch": 0.3702319788260937, "grad_norm": 2.098031520843506, "learning_rate": 3.3147595791405817e-06, "loss": 0.8322, "step": 11890 }, { "epoch": 0.37038766931340494, "grad_norm": 2.2100307941436768, "learning_rate": 3.313940148808549e-06, "loss": 0.7465, "step": 11895 }, { "epoch": 0.3705433598007162, "grad_norm": 2.701361894607544, "learning_rate": 3.313120718476515e-06, "loss": 0.86, "step": 11900 }, { "epoch": 0.3706990502880274, "grad_norm": 2.02945613861084, "learning_rate": 3.3123012881444823e-06, "loss": 0.7127, "step": 11905 }, { "epoch": 0.37085474077533864, "grad_norm": 2.262557029724121, "learning_rate": 3.311481857812449e-06, "loss": 0.8777, "step": 11910 }, { "epoch": 0.3710104312626499, "grad_norm": 2.0447020530700684, "learning_rate": 3.310662427480416e-06, "loss": 0.8563, "step": 11915 }, { "epoch": 0.37116612174996105, "grad_norm": 1.989819049835205, "learning_rate": 3.3098429971483825e-06, "loss": 0.6929, "step": 11920 }, { "epoch": 0.3713218122372723, "grad_norm": 1.8719359636306763, "learning_rate": 3.3090235668163493e-06, "loss": 0.8306, "step": 11925 }, { "epoch": 0.3714775027245835, "grad_norm": 2.058535575866699, "learning_rate": 3.3082041364843166e-06, "loss": 0.7768, "step": 11930 }, { "epoch": 0.37163319321189475, "grad_norm": 2.6702401638031006, "learning_rate": 3.3073847061522835e-06, "loss": 0.7894, "step": 11935 }, { "epoch": 0.371788883699206, "grad_norm": 2.234600782394409, "learning_rate": 3.30656527582025e-06, "loss": 0.8128, "step": 11940 }, { "epoch": 0.3719445741865172, "grad_norm": 2.1894078254699707, "learning_rate": 3.305745845488217e-06, "loss": 0.8001, "step": 11945 }, { "epoch": 0.37210026467382845, "grad_norm": 2.130246162414551, "learning_rate": 3.3049264151561837e-06, "loss": 0.8199, "step": 11950 }, { "epoch": 0.37225595516113963, "grad_norm": 2.0279362201690674, "learning_rate": 3.30410698482415e-06, "loss": 0.7945, "step": 11955 }, { "epoch": 0.37241164564845086, "grad_norm": 1.981247067451477, "learning_rate": 3.3032875544921174e-06, "loss": 0.8259, "step": 11960 }, { "epoch": 0.3725673361357621, "grad_norm": 2.1630918979644775, "learning_rate": 3.3024681241600843e-06, "loss": 0.7387, "step": 11965 }, { "epoch": 0.37272302662307333, "grad_norm": 2.1702792644500732, "learning_rate": 3.301648693828051e-06, "loss": 0.7998, "step": 11970 }, { "epoch": 0.37287871711038456, "grad_norm": 1.829454779624939, "learning_rate": 3.3008292634960176e-06, "loss": 0.8797, "step": 11975 }, { "epoch": 0.3730344075976958, "grad_norm": 1.8696855306625366, "learning_rate": 3.3000098331639845e-06, "loss": 0.7647, "step": 11980 }, { "epoch": 0.373190098085007, "grad_norm": 1.906919002532959, "learning_rate": 3.2991904028319518e-06, "loss": 0.7529, "step": 11985 }, { "epoch": 0.3733457885723182, "grad_norm": 2.0680813789367676, "learning_rate": 3.2983709724999186e-06, "loss": 0.8198, "step": 11990 }, { "epoch": 0.37350147905962944, "grad_norm": 2.069528102874756, "learning_rate": 3.297551542167885e-06, "loss": 0.7706, "step": 11995 }, { "epoch": 0.37365716954694067, "grad_norm": 2.123650550842285, "learning_rate": 3.296732111835852e-06, "loss": 0.7775, "step": 12000 }, { "epoch": 0.3738128600342519, "grad_norm": 2.412431478500366, "learning_rate": 3.295912681503819e-06, "loss": 0.7779, "step": 12005 }, { "epoch": 0.37396855052156314, "grad_norm": 2.198991060256958, "learning_rate": 3.295093251171786e-06, "loss": 0.7775, "step": 12010 }, { "epoch": 0.37412424100887437, "grad_norm": 2.024510622024536, "learning_rate": 3.294273820839752e-06, "loss": 0.8029, "step": 12015 }, { "epoch": 0.3742799314961856, "grad_norm": 1.7897207736968994, "learning_rate": 3.2934543905077194e-06, "loss": 0.7898, "step": 12020 }, { "epoch": 0.37443562198349684, "grad_norm": 2.011000156402588, "learning_rate": 3.2926349601756863e-06, "loss": 0.7265, "step": 12025 }, { "epoch": 0.374591312470808, "grad_norm": 1.8146910667419434, "learning_rate": 3.2918155298436527e-06, "loss": 0.8263, "step": 12030 }, { "epoch": 0.37474700295811925, "grad_norm": 2.287693500518799, "learning_rate": 3.2909960995116196e-06, "loss": 0.7469, "step": 12035 }, { "epoch": 0.3749026934454305, "grad_norm": 1.9649739265441895, "learning_rate": 3.2901766691795865e-06, "loss": 0.7807, "step": 12040 }, { "epoch": 0.3750583839327417, "grad_norm": 1.9164575338363647, "learning_rate": 3.2893572388475538e-06, "loss": 0.7688, "step": 12045 }, { "epoch": 0.37521407442005295, "grad_norm": 2.584155797958374, "learning_rate": 3.2885378085155202e-06, "loss": 0.8165, "step": 12050 }, { "epoch": 0.3753697649073642, "grad_norm": 2.214937448501587, "learning_rate": 3.287718378183487e-06, "loss": 0.7929, "step": 12055 }, { "epoch": 0.3755254553946754, "grad_norm": 1.9794259071350098, "learning_rate": 3.286898947851454e-06, "loss": 0.7721, "step": 12060 }, { "epoch": 0.3756811458819866, "grad_norm": 2.882521152496338, "learning_rate": 3.286079517519421e-06, "loss": 0.81, "step": 12065 }, { "epoch": 0.3758368363692978, "grad_norm": 2.1323702335357666, "learning_rate": 3.2852600871873873e-06, "loss": 0.7409, "step": 12070 }, { "epoch": 0.37599252685660906, "grad_norm": 2.2217602729797363, "learning_rate": 3.2844406568553546e-06, "loss": 0.7375, "step": 12075 }, { "epoch": 0.3761482173439203, "grad_norm": 2.3161439895629883, "learning_rate": 3.2836212265233214e-06, "loss": 0.7482, "step": 12080 }, { "epoch": 0.3763039078312315, "grad_norm": 1.9363577365875244, "learning_rate": 3.2828017961912883e-06, "loss": 0.8292, "step": 12085 }, { "epoch": 0.37645959831854275, "grad_norm": 2.0603418350219727, "learning_rate": 3.2819823658592547e-06, "loss": 0.8068, "step": 12090 }, { "epoch": 0.376615288805854, "grad_norm": 2.042330265045166, "learning_rate": 3.2811629355272216e-06, "loss": 0.7219, "step": 12095 }, { "epoch": 0.37677097929316516, "grad_norm": 1.985053300857544, "learning_rate": 3.280343505195189e-06, "loss": 0.837, "step": 12100 }, { "epoch": 0.3769266697804764, "grad_norm": 1.7681702375411987, "learning_rate": 3.2795240748631554e-06, "loss": 0.8024, "step": 12105 }, { "epoch": 0.37708236026778763, "grad_norm": 2.042457342147827, "learning_rate": 3.2787046445311222e-06, "loss": 0.7533, "step": 12110 }, { "epoch": 0.37723805075509886, "grad_norm": 2.618462324142456, "learning_rate": 3.277885214199089e-06, "loss": 0.7841, "step": 12115 }, { "epoch": 0.3773937412424101, "grad_norm": 2.2591137886047363, "learning_rate": 3.277065783867056e-06, "loss": 0.8061, "step": 12120 }, { "epoch": 0.37754943172972133, "grad_norm": 1.6708102226257324, "learning_rate": 3.2762463535350224e-06, "loss": 0.8039, "step": 12125 }, { "epoch": 0.37770512221703256, "grad_norm": 2.51212477684021, "learning_rate": 3.2754269232029897e-06, "loss": 0.8537, "step": 12130 }, { "epoch": 0.37786081270434374, "grad_norm": 1.9665396213531494, "learning_rate": 3.2746074928709566e-06, "loss": 0.7027, "step": 12135 }, { "epoch": 0.378016503191655, "grad_norm": 1.96918785572052, "learning_rate": 3.2737880625389234e-06, "loss": 0.7494, "step": 12140 }, { "epoch": 0.3781721936789662, "grad_norm": 2.250408887863159, "learning_rate": 3.27296863220689e-06, "loss": 0.8514, "step": 12145 }, { "epoch": 0.37832788416627744, "grad_norm": 1.9184596538543701, "learning_rate": 3.2721492018748568e-06, "loss": 0.7449, "step": 12150 }, { "epoch": 0.3784835746535887, "grad_norm": 1.978955626487732, "learning_rate": 3.2713297715428236e-06, "loss": 0.7635, "step": 12155 }, { "epoch": 0.3786392651408999, "grad_norm": 2.1364009380340576, "learning_rate": 3.27051034121079e-06, "loss": 0.7184, "step": 12160 }, { "epoch": 0.37879495562821114, "grad_norm": 2.297900676727295, "learning_rate": 3.2696909108787574e-06, "loss": 0.8474, "step": 12165 }, { "epoch": 0.3789506461155223, "grad_norm": 2.2371740341186523, "learning_rate": 3.2688714805467242e-06, "loss": 0.7366, "step": 12170 }, { "epoch": 0.37910633660283355, "grad_norm": 1.8691701889038086, "learning_rate": 3.268052050214691e-06, "loss": 0.7702, "step": 12175 }, { "epoch": 0.3792620270901448, "grad_norm": 1.999115228652954, "learning_rate": 3.2672326198826576e-06, "loss": 0.8028, "step": 12180 }, { "epoch": 0.379417717577456, "grad_norm": 2.5121402740478516, "learning_rate": 3.2664131895506244e-06, "loss": 0.8126, "step": 12185 }, { "epoch": 0.37957340806476725, "grad_norm": 2.013821601867676, "learning_rate": 3.2655937592185917e-06, "loss": 0.7213, "step": 12190 }, { "epoch": 0.3797290985520785, "grad_norm": 2.1531057357788086, "learning_rate": 3.2647743288865586e-06, "loss": 0.7654, "step": 12195 }, { "epoch": 0.3798847890393897, "grad_norm": 2.1148509979248047, "learning_rate": 3.263954898554525e-06, "loss": 0.7609, "step": 12200 }, { "epoch": 0.3800404795267009, "grad_norm": 2.4777562618255615, "learning_rate": 3.263135468222492e-06, "loss": 0.804, "step": 12205 }, { "epoch": 0.3801961700140121, "grad_norm": 2.2175347805023193, "learning_rate": 3.2623160378904588e-06, "loss": 0.9467, "step": 12210 }, { "epoch": 0.38035186050132336, "grad_norm": 2.20306396484375, "learning_rate": 3.261496607558426e-06, "loss": 0.8404, "step": 12215 }, { "epoch": 0.3805075509886346, "grad_norm": 2.3416361808776855, "learning_rate": 3.2606771772263925e-06, "loss": 0.8167, "step": 12220 }, { "epoch": 0.3806632414759458, "grad_norm": 2.042141914367676, "learning_rate": 3.2598577468943594e-06, "loss": 0.839, "step": 12225 }, { "epoch": 0.38081893196325706, "grad_norm": 2.197232961654663, "learning_rate": 3.2590383165623262e-06, "loss": 0.8262, "step": 12230 }, { "epoch": 0.3809746224505683, "grad_norm": 2.3606417179107666, "learning_rate": 3.2582188862302927e-06, "loss": 0.7853, "step": 12235 }, { "epoch": 0.3811303129378795, "grad_norm": 2.185873031616211, "learning_rate": 3.2573994558982596e-06, "loss": 0.8069, "step": 12240 }, { "epoch": 0.3812860034251907, "grad_norm": 2.039916515350342, "learning_rate": 3.256580025566227e-06, "loss": 0.8225, "step": 12245 }, { "epoch": 0.38144169391250193, "grad_norm": 2.0653066635131836, "learning_rate": 3.2557605952341937e-06, "loss": 0.7304, "step": 12250 }, { "epoch": 0.38159738439981317, "grad_norm": 2.4914252758026123, "learning_rate": 3.25494116490216e-06, "loss": 0.8692, "step": 12255 }, { "epoch": 0.3817530748871244, "grad_norm": 1.9901870489120483, "learning_rate": 3.254121734570127e-06, "loss": 0.7371, "step": 12260 }, { "epoch": 0.38190876537443563, "grad_norm": 1.7261254787445068, "learning_rate": 3.253302304238094e-06, "loss": 0.7977, "step": 12265 }, { "epoch": 0.38206445586174687, "grad_norm": 2.137098550796509, "learning_rate": 3.2524828739060608e-06, "loss": 0.7642, "step": 12270 }, { "epoch": 0.3822201463490581, "grad_norm": 2.1538758277893066, "learning_rate": 3.2516634435740272e-06, "loss": 0.7685, "step": 12275 }, { "epoch": 0.3823758368363693, "grad_norm": 2.170478582382202, "learning_rate": 3.2508440132419945e-06, "loss": 0.8061, "step": 12280 }, { "epoch": 0.3825315273236805, "grad_norm": 2.1760644912719727, "learning_rate": 3.2500245829099614e-06, "loss": 0.7168, "step": 12285 }, { "epoch": 0.38268721781099174, "grad_norm": 1.8090318441390991, "learning_rate": 3.2492051525779283e-06, "loss": 0.7927, "step": 12290 }, { "epoch": 0.382842908298303, "grad_norm": 2.1403329372406006, "learning_rate": 3.2483857222458947e-06, "loss": 0.8169, "step": 12295 }, { "epoch": 0.3829985987856142, "grad_norm": 2.127054452896118, "learning_rate": 3.2475662919138616e-06, "loss": 0.8488, "step": 12300 }, { "epoch": 0.38315428927292544, "grad_norm": 1.9606190919876099, "learning_rate": 3.246746861581829e-06, "loss": 0.7517, "step": 12305 }, { "epoch": 0.3833099797602367, "grad_norm": 2.280975103378296, "learning_rate": 3.2459274312497953e-06, "loss": 0.704, "step": 12310 }, { "epoch": 0.38346567024754785, "grad_norm": 2.4874958992004395, "learning_rate": 3.245108000917762e-06, "loss": 0.8004, "step": 12315 }, { "epoch": 0.3836213607348591, "grad_norm": 1.950250267982483, "learning_rate": 3.244288570585729e-06, "loss": 0.7869, "step": 12320 }, { "epoch": 0.3837770512221703, "grad_norm": 2.604252576828003, "learning_rate": 3.243469140253696e-06, "loss": 0.7476, "step": 12325 }, { "epoch": 0.38393274170948155, "grad_norm": 1.9076988697052002, "learning_rate": 3.2426497099216624e-06, "loss": 0.7432, "step": 12330 }, { "epoch": 0.3840884321967928, "grad_norm": 2.2485458850860596, "learning_rate": 3.2418302795896297e-06, "loss": 0.7427, "step": 12335 }, { "epoch": 0.384244122684104, "grad_norm": 2.038015842437744, "learning_rate": 3.2410108492575965e-06, "loss": 0.7695, "step": 12340 }, { "epoch": 0.38439981317141525, "grad_norm": 2.267770767211914, "learning_rate": 3.2401914189255634e-06, "loss": 0.8157, "step": 12345 }, { "epoch": 0.3845555036587264, "grad_norm": 2.1716456413269043, "learning_rate": 3.23937198859353e-06, "loss": 0.8072, "step": 12350 }, { "epoch": 0.38471119414603766, "grad_norm": 1.9498050212860107, "learning_rate": 3.2385525582614967e-06, "loss": 0.8443, "step": 12355 }, { "epoch": 0.3848668846333489, "grad_norm": 1.9213920831680298, "learning_rate": 3.237733127929464e-06, "loss": 0.8296, "step": 12360 }, { "epoch": 0.3850225751206601, "grad_norm": 2.187166929244995, "learning_rate": 3.236913697597431e-06, "loss": 0.8231, "step": 12365 }, { "epoch": 0.38517826560797136, "grad_norm": 2.3455278873443604, "learning_rate": 3.2360942672653973e-06, "loss": 0.737, "step": 12370 }, { "epoch": 0.3853339560952826, "grad_norm": 1.8778960704803467, "learning_rate": 3.235274836933364e-06, "loss": 0.8663, "step": 12375 }, { "epoch": 0.3854896465825938, "grad_norm": 1.7973036766052246, "learning_rate": 3.234455406601331e-06, "loss": 0.7685, "step": 12380 }, { "epoch": 0.385645337069905, "grad_norm": 2.405735731124878, "learning_rate": 3.2336359762692975e-06, "loss": 0.8021, "step": 12385 }, { "epoch": 0.38580102755721624, "grad_norm": 2.0787057876586914, "learning_rate": 3.2328165459372644e-06, "loss": 0.8024, "step": 12390 }, { "epoch": 0.38595671804452747, "grad_norm": 2.551778554916382, "learning_rate": 3.2319971156052317e-06, "loss": 0.8509, "step": 12395 }, { "epoch": 0.3861124085318387, "grad_norm": 2.8773159980773926, "learning_rate": 3.2311776852731985e-06, "loss": 0.7857, "step": 12400 }, { "epoch": 0.38626809901914994, "grad_norm": 2.340360641479492, "learning_rate": 3.230358254941165e-06, "loss": 0.8429, "step": 12405 }, { "epoch": 0.38642378950646117, "grad_norm": 3.030585765838623, "learning_rate": 3.229538824609132e-06, "loss": 0.7658, "step": 12410 }, { "epoch": 0.3865794799937724, "grad_norm": 2.1066009998321533, "learning_rate": 3.2287193942770987e-06, "loss": 0.7716, "step": 12415 }, { "epoch": 0.3867351704810836, "grad_norm": 2.076045274734497, "learning_rate": 3.227899963945066e-06, "loss": 0.818, "step": 12420 }, { "epoch": 0.3868908609683948, "grad_norm": 1.9141508340835571, "learning_rate": 3.2270805336130325e-06, "loss": 0.8161, "step": 12425 }, { "epoch": 0.38704655145570604, "grad_norm": 2.69189453125, "learning_rate": 3.2262611032809993e-06, "loss": 0.8631, "step": 12430 }, { "epoch": 0.3872022419430173, "grad_norm": 2.2416181564331055, "learning_rate": 3.225441672948966e-06, "loss": 0.7948, "step": 12435 }, { "epoch": 0.3873579324303285, "grad_norm": 2.237800121307373, "learning_rate": 3.224622242616933e-06, "loss": 0.8547, "step": 12440 }, { "epoch": 0.38751362291763974, "grad_norm": 2.1397900581359863, "learning_rate": 3.2238028122848995e-06, "loss": 0.8613, "step": 12445 }, { "epoch": 0.387669313404951, "grad_norm": 2.8111603260040283, "learning_rate": 3.222983381952867e-06, "loss": 0.7369, "step": 12450 }, { "epoch": 0.3878250038922622, "grad_norm": 2.26181697845459, "learning_rate": 3.2221639516208337e-06, "loss": 0.7823, "step": 12455 }, { "epoch": 0.3879806943795734, "grad_norm": 2.1688690185546875, "learning_rate": 3.2213445212888e-06, "loss": 0.7952, "step": 12460 }, { "epoch": 0.3881363848668846, "grad_norm": 2.3796329498291016, "learning_rate": 3.220525090956767e-06, "loss": 0.7732, "step": 12465 }, { "epoch": 0.38829207535419585, "grad_norm": 1.9599984884262085, "learning_rate": 3.219705660624734e-06, "loss": 0.8142, "step": 12470 }, { "epoch": 0.3884477658415071, "grad_norm": 1.764557123184204, "learning_rate": 3.218886230292701e-06, "loss": 0.82, "step": 12475 }, { "epoch": 0.3886034563288183, "grad_norm": 1.9811931848526, "learning_rate": 3.2180667999606676e-06, "loss": 0.8294, "step": 12480 }, { "epoch": 0.38875914681612955, "grad_norm": 2.0283639430999756, "learning_rate": 3.2172473696286345e-06, "loss": 0.8532, "step": 12485 }, { "epoch": 0.3889148373034408, "grad_norm": 2.126713514328003, "learning_rate": 3.2164279392966013e-06, "loss": 0.6831, "step": 12490 }, { "epoch": 0.38907052779075196, "grad_norm": 2.6020431518554688, "learning_rate": 3.2156085089645682e-06, "loss": 0.7984, "step": 12495 }, { "epoch": 0.3892262182780632, "grad_norm": 2.6163690090179443, "learning_rate": 3.2147890786325347e-06, "loss": 0.8001, "step": 12500 }, { "epoch": 0.38938190876537443, "grad_norm": 2.108591079711914, "learning_rate": 3.2139696483005015e-06, "loss": 0.8165, "step": 12505 }, { "epoch": 0.38953759925268566, "grad_norm": 1.9935729503631592, "learning_rate": 3.213150217968469e-06, "loss": 0.8415, "step": 12510 }, { "epoch": 0.3896932897399969, "grad_norm": 2.693328380584717, "learning_rate": 3.2123307876364357e-06, "loss": 0.8222, "step": 12515 }, { "epoch": 0.38984898022730813, "grad_norm": 2.434457302093506, "learning_rate": 3.211511357304402e-06, "loss": 0.809, "step": 12520 }, { "epoch": 0.39000467071461936, "grad_norm": 1.846578598022461, "learning_rate": 3.210691926972369e-06, "loss": 0.8131, "step": 12525 }, { "epoch": 0.39016036120193054, "grad_norm": 2.0568690299987793, "learning_rate": 3.209872496640336e-06, "loss": 0.7781, "step": 12530 }, { "epoch": 0.39031605168924177, "grad_norm": 2.273071050643921, "learning_rate": 3.2090530663083023e-06, "loss": 0.7897, "step": 12535 }, { "epoch": 0.390471742176553, "grad_norm": 2.107590675354004, "learning_rate": 3.2082336359762696e-06, "loss": 0.788, "step": 12540 }, { "epoch": 0.39062743266386424, "grad_norm": 2.072071075439453, "learning_rate": 3.2074142056442365e-06, "loss": 0.7905, "step": 12545 }, { "epoch": 0.39078312315117547, "grad_norm": 1.9460923671722412, "learning_rate": 3.2065947753122034e-06, "loss": 0.8197, "step": 12550 }, { "epoch": 0.3909388136384867, "grad_norm": 2.2971103191375732, "learning_rate": 3.20577534498017e-06, "loss": 0.8262, "step": 12555 }, { "epoch": 0.39109450412579794, "grad_norm": 1.9215067625045776, "learning_rate": 3.2049559146481367e-06, "loss": 0.84, "step": 12560 }, { "epoch": 0.3912501946131091, "grad_norm": 2.1434121131896973, "learning_rate": 3.204136484316104e-06, "loss": 0.8176, "step": 12565 }, { "epoch": 0.39140588510042035, "grad_norm": 1.6450175046920776, "learning_rate": 3.203317053984071e-06, "loss": 0.7817, "step": 12570 }, { "epoch": 0.3915615755877316, "grad_norm": 1.8679169416427612, "learning_rate": 3.2024976236520373e-06, "loss": 0.7138, "step": 12575 }, { "epoch": 0.3917172660750428, "grad_norm": 2.0485990047454834, "learning_rate": 3.201678193320004e-06, "loss": 0.7812, "step": 12580 }, { "epoch": 0.39187295656235405, "grad_norm": 2.7382893562316895, "learning_rate": 3.200858762987971e-06, "loss": 0.8937, "step": 12585 }, { "epoch": 0.3920286470496653, "grad_norm": 2.27289080619812, "learning_rate": 3.2000393326559375e-06, "loss": 0.7528, "step": 12590 }, { "epoch": 0.3921843375369765, "grad_norm": 2.298896312713623, "learning_rate": 3.1992199023239048e-06, "loss": 0.7551, "step": 12595 }, { "epoch": 0.3923400280242877, "grad_norm": 2.017191171646118, "learning_rate": 3.1984004719918716e-06, "loss": 0.8321, "step": 12600 }, { "epoch": 0.3924957185115989, "grad_norm": 1.8882354497909546, "learning_rate": 3.1975810416598385e-06, "loss": 0.7505, "step": 12605 }, { "epoch": 0.39265140899891016, "grad_norm": 1.8644320964813232, "learning_rate": 3.196761611327805e-06, "loss": 0.8328, "step": 12610 }, { "epoch": 0.3928070994862214, "grad_norm": 2.199554204940796, "learning_rate": 3.195942180995772e-06, "loss": 0.7598, "step": 12615 }, { "epoch": 0.3929627899735326, "grad_norm": 2.050727605819702, "learning_rate": 3.195122750663739e-06, "loss": 0.8064, "step": 12620 }, { "epoch": 0.39311848046084386, "grad_norm": 2.210737705230713, "learning_rate": 3.194303320331706e-06, "loss": 0.7142, "step": 12625 }, { "epoch": 0.3932741709481551, "grad_norm": 2.077033519744873, "learning_rate": 3.1934838899996724e-06, "loss": 0.7817, "step": 12630 }, { "epoch": 0.39342986143546627, "grad_norm": 1.6887425184249878, "learning_rate": 3.1926644596676393e-06, "loss": 0.8441, "step": 12635 }, { "epoch": 0.3935855519227775, "grad_norm": 2.1521527767181396, "learning_rate": 3.191845029335606e-06, "loss": 0.8118, "step": 12640 }, { "epoch": 0.39374124241008873, "grad_norm": 2.0851681232452393, "learning_rate": 3.191025599003573e-06, "loss": 0.8147, "step": 12645 }, { "epoch": 0.39389693289739997, "grad_norm": 2.4282517433166504, "learning_rate": 3.1902061686715395e-06, "loss": 0.763, "step": 12650 }, { "epoch": 0.3940526233847112, "grad_norm": 1.8959665298461914, "learning_rate": 3.1893867383395068e-06, "loss": 0.7754, "step": 12655 }, { "epoch": 0.39420831387202243, "grad_norm": 2.4851315021514893, "learning_rate": 3.1885673080074736e-06, "loss": 0.8248, "step": 12660 }, { "epoch": 0.39436400435933366, "grad_norm": 2.105236291885376, "learning_rate": 3.18774787767544e-06, "loss": 0.8346, "step": 12665 }, { "epoch": 0.3945196948466449, "grad_norm": 1.713736653327942, "learning_rate": 3.186928447343407e-06, "loss": 0.7515, "step": 12670 }, { "epoch": 0.3946753853339561, "grad_norm": 2.1729624271392822, "learning_rate": 3.186109017011374e-06, "loss": 0.8587, "step": 12675 }, { "epoch": 0.3948310758212673, "grad_norm": 1.995887041091919, "learning_rate": 3.185289586679341e-06, "loss": 0.7465, "step": 12680 }, { "epoch": 0.39498676630857854, "grad_norm": 2.174442768096924, "learning_rate": 3.1844701563473076e-06, "loss": 0.766, "step": 12685 }, { "epoch": 0.3951424567958898, "grad_norm": 2.9540088176727295, "learning_rate": 3.1836507260152744e-06, "loss": 0.7323, "step": 12690 }, { "epoch": 0.395298147283201, "grad_norm": 2.4783101081848145, "learning_rate": 3.1828312956832413e-06, "loss": 0.8092, "step": 12695 }, { "epoch": 0.39545383777051224, "grad_norm": 2.351886034011841, "learning_rate": 3.182011865351208e-06, "loss": 0.7508, "step": 12700 }, { "epoch": 0.3956095282578235, "grad_norm": 2.228043556213379, "learning_rate": 3.1811924350191746e-06, "loss": 0.8567, "step": 12705 }, { "epoch": 0.39576521874513465, "grad_norm": 2.008012533187866, "learning_rate": 3.180373004687142e-06, "loss": 0.7339, "step": 12710 }, { "epoch": 0.3959209092324459, "grad_norm": 2.0263850688934326, "learning_rate": 3.1795535743551088e-06, "loss": 0.8053, "step": 12715 }, { "epoch": 0.3960765997197571, "grad_norm": 1.9863290786743164, "learning_rate": 3.1787341440230756e-06, "loss": 0.8274, "step": 12720 }, { "epoch": 0.39623229020706835, "grad_norm": 2.1425068378448486, "learning_rate": 3.177914713691042e-06, "loss": 0.8346, "step": 12725 }, { "epoch": 0.3963879806943796, "grad_norm": 2.059844732284546, "learning_rate": 3.177095283359009e-06, "loss": 0.7851, "step": 12730 }, { "epoch": 0.3965436711816908, "grad_norm": 2.598580837249756, "learning_rate": 3.1762758530269763e-06, "loss": 0.8552, "step": 12735 }, { "epoch": 0.39669936166900205, "grad_norm": 2.2523906230926514, "learning_rate": 3.1754564226949423e-06, "loss": 0.8387, "step": 12740 }, { "epoch": 0.3968550521563132, "grad_norm": 2.1398372650146484, "learning_rate": 3.1746369923629096e-06, "loss": 0.8134, "step": 12745 }, { "epoch": 0.39701074264362446, "grad_norm": 2.1605238914489746, "learning_rate": 3.1738175620308764e-06, "loss": 0.8491, "step": 12750 }, { "epoch": 0.3971664331309357, "grad_norm": 2.4323904514312744, "learning_rate": 3.1729981316988433e-06, "loss": 0.7784, "step": 12755 }, { "epoch": 0.3973221236182469, "grad_norm": 2.531496286392212, "learning_rate": 3.1721787013668098e-06, "loss": 0.7343, "step": 12760 }, { "epoch": 0.39747781410555816, "grad_norm": 2.25883412361145, "learning_rate": 3.1713592710347766e-06, "loss": 0.7698, "step": 12765 }, { "epoch": 0.3976335045928694, "grad_norm": 1.9553961753845215, "learning_rate": 3.170539840702744e-06, "loss": 0.8361, "step": 12770 }, { "epoch": 0.3977891950801806, "grad_norm": 2.219313383102417, "learning_rate": 3.1697204103707108e-06, "loss": 0.8201, "step": 12775 }, { "epoch": 0.3979448855674918, "grad_norm": 1.8708455562591553, "learning_rate": 3.1689009800386772e-06, "loss": 0.8407, "step": 12780 }, { "epoch": 0.39810057605480303, "grad_norm": 2.225745439529419, "learning_rate": 3.168081549706644e-06, "loss": 0.7648, "step": 12785 }, { "epoch": 0.39825626654211427, "grad_norm": 1.9761215448379517, "learning_rate": 3.167262119374611e-06, "loss": 0.741, "step": 12790 }, { "epoch": 0.3984119570294255, "grad_norm": 2.2333829402923584, "learning_rate": 3.1664426890425783e-06, "loss": 0.8238, "step": 12795 }, { "epoch": 0.39856764751673673, "grad_norm": 2.1884372234344482, "learning_rate": 3.1656232587105447e-06, "loss": 0.7811, "step": 12800 }, { "epoch": 0.39872333800404797, "grad_norm": 2.2955150604248047, "learning_rate": 3.1648038283785116e-06, "loss": 0.7669, "step": 12805 }, { "epoch": 0.3988790284913592, "grad_norm": 1.664554476737976, "learning_rate": 3.1639843980464785e-06, "loss": 0.7396, "step": 12810 }, { "epoch": 0.3990347189786704, "grad_norm": 2.098034143447876, "learning_rate": 3.163164967714445e-06, "loss": 0.8154, "step": 12815 }, { "epoch": 0.3991904094659816, "grad_norm": 2.057340621948242, "learning_rate": 3.1623455373824118e-06, "loss": 0.7869, "step": 12820 }, { "epoch": 0.39934609995329284, "grad_norm": 2.220696449279785, "learning_rate": 3.161526107050379e-06, "loss": 0.7347, "step": 12825 }, { "epoch": 0.3995017904406041, "grad_norm": 2.3138771057128906, "learning_rate": 3.160706676718346e-06, "loss": 0.8192, "step": 12830 }, { "epoch": 0.3996574809279153, "grad_norm": 1.8456510305404663, "learning_rate": 3.1598872463863124e-06, "loss": 0.8291, "step": 12835 }, { "epoch": 0.39981317141522654, "grad_norm": 1.7327516078948975, "learning_rate": 3.1590678160542792e-06, "loss": 0.7338, "step": 12840 }, { "epoch": 0.3999688619025378, "grad_norm": 2.5290539264678955, "learning_rate": 3.158248385722246e-06, "loss": 0.843, "step": 12845 }, { "epoch": 0.40012455238984895, "grad_norm": 2.387026786804199, "learning_rate": 3.1574289553902134e-06, "loss": 0.8403, "step": 12850 }, { "epoch": 0.4002802428771602, "grad_norm": 3.0616002082824707, "learning_rate": 3.1566095250581794e-06, "loss": 0.8206, "step": 12855 }, { "epoch": 0.4004359333644714, "grad_norm": 2.1998212337493896, "learning_rate": 3.1557900947261467e-06, "loss": 0.783, "step": 12860 }, { "epoch": 0.40059162385178265, "grad_norm": 2.040221929550171, "learning_rate": 3.1549706643941136e-06, "loss": 0.8039, "step": 12865 }, { "epoch": 0.4007473143390939, "grad_norm": 1.8476839065551758, "learning_rate": 3.1541512340620805e-06, "loss": 0.7783, "step": 12870 }, { "epoch": 0.4009030048264051, "grad_norm": 2.20424485206604, "learning_rate": 3.153331803730047e-06, "loss": 0.7988, "step": 12875 }, { "epoch": 0.40105869531371635, "grad_norm": 1.8207478523254395, "learning_rate": 3.1525123733980138e-06, "loss": 0.7961, "step": 12880 }, { "epoch": 0.4012143858010276, "grad_norm": 1.9143280982971191, "learning_rate": 3.151692943065981e-06, "loss": 0.7654, "step": 12885 }, { "epoch": 0.40137007628833876, "grad_norm": 1.9733577966690063, "learning_rate": 3.1508735127339475e-06, "loss": 0.7709, "step": 12890 }, { "epoch": 0.40152576677565, "grad_norm": 2.031527042388916, "learning_rate": 3.1500540824019144e-06, "loss": 0.7654, "step": 12895 }, { "epoch": 0.40168145726296123, "grad_norm": 1.8441249132156372, "learning_rate": 3.1492346520698813e-06, "loss": 0.8302, "step": 12900 }, { "epoch": 0.40183714775027246, "grad_norm": 2.2401936054229736, "learning_rate": 3.148415221737848e-06, "loss": 0.8379, "step": 12905 }, { "epoch": 0.4019928382375837, "grad_norm": 2.435145616531372, "learning_rate": 3.1475957914058146e-06, "loss": 0.8313, "step": 12910 }, { "epoch": 0.4021485287248949, "grad_norm": 2.14208722114563, "learning_rate": 3.146776361073782e-06, "loss": 0.8191, "step": 12915 }, { "epoch": 0.40230421921220616, "grad_norm": 2.092670440673828, "learning_rate": 3.1459569307417487e-06, "loss": 0.7209, "step": 12920 }, { "epoch": 0.40245990969951734, "grad_norm": 1.8127923011779785, "learning_rate": 3.1451375004097156e-06, "loss": 0.8277, "step": 12925 }, { "epoch": 0.40261560018682857, "grad_norm": 2.307558298110962, "learning_rate": 3.144318070077682e-06, "loss": 0.6592, "step": 12930 }, { "epoch": 0.4027712906741398, "grad_norm": 2.1175436973571777, "learning_rate": 3.143498639745649e-06, "loss": 0.9159, "step": 12935 }, { "epoch": 0.40292698116145104, "grad_norm": 1.9766708612442017, "learning_rate": 3.142679209413616e-06, "loss": 0.7566, "step": 12940 }, { "epoch": 0.40308267164876227, "grad_norm": 1.7688367366790771, "learning_rate": 3.141859779081583e-06, "loss": 0.8334, "step": 12945 }, { "epoch": 0.4032383621360735, "grad_norm": 1.8208800554275513, "learning_rate": 3.1410403487495495e-06, "loss": 0.86, "step": 12950 }, { "epoch": 0.40339405262338474, "grad_norm": 2.2223126888275146, "learning_rate": 3.1402209184175164e-06, "loss": 0.8116, "step": 12955 }, { "epoch": 0.4035497431106959, "grad_norm": 1.8440351486206055, "learning_rate": 3.1394014880854833e-06, "loss": 0.763, "step": 12960 }, { "epoch": 0.40370543359800715, "grad_norm": 2.5103471279144287, "learning_rate": 3.1385820577534497e-06, "loss": 0.8239, "step": 12965 }, { "epoch": 0.4038611240853184, "grad_norm": 2.0126562118530273, "learning_rate": 3.137762627421417e-06, "loss": 0.7846, "step": 12970 }, { "epoch": 0.4040168145726296, "grad_norm": 2.783601760864258, "learning_rate": 3.136943197089384e-06, "loss": 0.8646, "step": 12975 }, { "epoch": 0.40417250505994085, "grad_norm": 1.9739117622375488, "learning_rate": 3.1361237667573507e-06, "loss": 0.7934, "step": 12980 }, { "epoch": 0.4043281955472521, "grad_norm": 1.8489551544189453, "learning_rate": 3.135304336425317e-06, "loss": 0.6901, "step": 12985 }, { "epoch": 0.4044838860345633, "grad_norm": 1.969118595123291, "learning_rate": 3.134484906093284e-06, "loss": 0.8158, "step": 12990 }, { "epoch": 0.4046395765218745, "grad_norm": 2.276930332183838, "learning_rate": 3.133665475761251e-06, "loss": 0.7318, "step": 12995 }, { "epoch": 0.4047952670091857, "grad_norm": 2.1402721405029297, "learning_rate": 3.1328460454292182e-06, "loss": 0.8016, "step": 13000 }, { "epoch": 0.40495095749649695, "grad_norm": 2.0057880878448486, "learning_rate": 3.1320266150971847e-06, "loss": 0.7505, "step": 13005 }, { "epoch": 0.4051066479838082, "grad_norm": 1.9989991188049316, "learning_rate": 3.1312071847651515e-06, "loss": 0.8958, "step": 13010 }, { "epoch": 0.4052623384711194, "grad_norm": 2.334563970565796, "learning_rate": 3.1303877544331184e-06, "loss": 0.7915, "step": 13015 }, { "epoch": 0.40541802895843065, "grad_norm": 2.4232966899871826, "learning_rate": 3.129568324101085e-06, "loss": 0.8323, "step": 13020 }, { "epoch": 0.4055737194457419, "grad_norm": 2.332789421081543, "learning_rate": 3.1287488937690517e-06, "loss": 0.6973, "step": 13025 }, { "epoch": 0.40572940993305306, "grad_norm": 2.6427319049835205, "learning_rate": 3.127929463437019e-06, "loss": 0.8303, "step": 13030 }, { "epoch": 0.4058851004203643, "grad_norm": 1.9058457612991333, "learning_rate": 3.127110033104986e-06, "loss": 0.7696, "step": 13035 }, { "epoch": 0.40604079090767553, "grad_norm": 1.8894271850585938, "learning_rate": 3.1262906027729523e-06, "loss": 0.7632, "step": 13040 }, { "epoch": 0.40619648139498676, "grad_norm": 2.010582208633423, "learning_rate": 3.125471172440919e-06, "loss": 0.8507, "step": 13045 }, { "epoch": 0.406352171882298, "grad_norm": 2.3559064865112305, "learning_rate": 3.124651742108886e-06, "loss": 0.861, "step": 13050 }, { "epoch": 0.40650786236960923, "grad_norm": 2.318882465362549, "learning_rate": 3.1238323117768534e-06, "loss": 0.7785, "step": 13055 }, { "epoch": 0.40666355285692046, "grad_norm": 1.7815288305282593, "learning_rate": 3.12301288144482e-06, "loss": 0.8378, "step": 13060 }, { "epoch": 0.40681924334423164, "grad_norm": 2.0826687812805176, "learning_rate": 3.1221934511127867e-06, "loss": 0.828, "step": 13065 }, { "epoch": 0.4069749338315429, "grad_norm": 2.0522165298461914, "learning_rate": 3.1213740207807535e-06, "loss": 0.783, "step": 13070 }, { "epoch": 0.4071306243188541, "grad_norm": 2.0722906589508057, "learning_rate": 3.1205545904487204e-06, "loss": 0.7303, "step": 13075 }, { "epoch": 0.40728631480616534, "grad_norm": 2.630880117416382, "learning_rate": 3.119735160116687e-06, "loss": 0.8243, "step": 13080 }, { "epoch": 0.4074420052934766, "grad_norm": 1.9394335746765137, "learning_rate": 3.118915729784654e-06, "loss": 0.8713, "step": 13085 }, { "epoch": 0.4075976957807878, "grad_norm": 2.1129865646362305, "learning_rate": 3.118096299452621e-06, "loss": 0.7198, "step": 13090 }, { "epoch": 0.40775338626809904, "grad_norm": 2.5170326232910156, "learning_rate": 3.1172768691205875e-06, "loss": 0.8465, "step": 13095 }, { "epoch": 0.40790907675541027, "grad_norm": 2.1121740341186523, "learning_rate": 3.1164574387885543e-06, "loss": 0.8445, "step": 13100 }, { "epoch": 0.40806476724272145, "grad_norm": 2.1622273921966553, "learning_rate": 3.115638008456521e-06, "loss": 0.7542, "step": 13105 }, { "epoch": 0.4082204577300327, "grad_norm": 2.1193370819091797, "learning_rate": 3.114818578124488e-06, "loss": 0.7858, "step": 13110 }, { "epoch": 0.4083761482173439, "grad_norm": 1.963694453239441, "learning_rate": 3.1139991477924545e-06, "loss": 0.8486, "step": 13115 }, { "epoch": 0.40853183870465515, "grad_norm": 2.2616257667541504, "learning_rate": 3.113179717460422e-06, "loss": 0.7974, "step": 13120 }, { "epoch": 0.4086875291919664, "grad_norm": 3.773043632507324, "learning_rate": 3.1123602871283887e-06, "loss": 0.7933, "step": 13125 }, { "epoch": 0.4088432196792776, "grad_norm": 1.923654317855835, "learning_rate": 3.1115408567963556e-06, "loss": 0.6967, "step": 13130 }, { "epoch": 0.40899891016658885, "grad_norm": 2.145859718322754, "learning_rate": 3.110721426464322e-06, "loss": 0.7948, "step": 13135 }, { "epoch": 0.4091546006539, "grad_norm": 2.30155611038208, "learning_rate": 3.109901996132289e-06, "loss": 0.783, "step": 13140 }, { "epoch": 0.40931029114121126, "grad_norm": 1.9164904356002808, "learning_rate": 3.109082565800256e-06, "loss": 0.8032, "step": 13145 }, { "epoch": 0.4094659816285225, "grad_norm": 2.2297282218933105, "learning_rate": 3.108263135468223e-06, "loss": 0.7628, "step": 13150 }, { "epoch": 0.4096216721158337, "grad_norm": 1.965471863746643, "learning_rate": 3.1074437051361895e-06, "loss": 0.7447, "step": 13155 }, { "epoch": 0.40977736260314496, "grad_norm": 1.9934086799621582, "learning_rate": 3.1066242748041564e-06, "loss": 0.8201, "step": 13160 }, { "epoch": 0.4099330530904562, "grad_norm": 2.1836159229278564, "learning_rate": 3.1058048444721232e-06, "loss": 0.7759, "step": 13165 }, { "epoch": 0.4100887435777674, "grad_norm": 1.9760764837265015, "learning_rate": 3.1049854141400897e-06, "loss": 0.8528, "step": 13170 }, { "epoch": 0.4102444340650786, "grad_norm": 1.9356292486190796, "learning_rate": 3.104165983808057e-06, "loss": 0.7416, "step": 13175 }, { "epoch": 0.41040012455238983, "grad_norm": 2.1547234058380127, "learning_rate": 3.103346553476024e-06, "loss": 0.7518, "step": 13180 }, { "epoch": 0.41055581503970107, "grad_norm": 2.1905250549316406, "learning_rate": 3.1025271231439907e-06, "loss": 0.8216, "step": 13185 }, { "epoch": 0.4107115055270123, "grad_norm": 2.2486908435821533, "learning_rate": 3.101707692811957e-06, "loss": 0.8597, "step": 13190 }, { "epoch": 0.41086719601432353, "grad_norm": 2.2227134704589844, "learning_rate": 3.100888262479924e-06, "loss": 0.7922, "step": 13195 }, { "epoch": 0.41102288650163477, "grad_norm": 2.577580690383911, "learning_rate": 3.1000688321478913e-06, "loss": 0.7997, "step": 13200 }, { "epoch": 0.411178576988946, "grad_norm": 2.103489637374878, "learning_rate": 3.099249401815858e-06, "loss": 0.8072, "step": 13205 }, { "epoch": 0.4113342674762572, "grad_norm": 2.010565757751465, "learning_rate": 3.0984299714838246e-06, "loss": 0.8892, "step": 13210 }, { "epoch": 0.4114899579635684, "grad_norm": 2.3281517028808594, "learning_rate": 3.0976105411517915e-06, "loss": 0.7733, "step": 13215 }, { "epoch": 0.41164564845087964, "grad_norm": 1.9357573986053467, "learning_rate": 3.0967911108197584e-06, "loss": 0.7638, "step": 13220 }, { "epoch": 0.4118013389381909, "grad_norm": 2.2236602306365967, "learning_rate": 3.0959716804877257e-06, "loss": 0.7068, "step": 13225 }, { "epoch": 0.4119570294255021, "grad_norm": 2.1568071842193604, "learning_rate": 3.0951522501556917e-06, "loss": 0.7821, "step": 13230 }, { "epoch": 0.41211271991281334, "grad_norm": 1.7827173471450806, "learning_rate": 3.094332819823659e-06, "loss": 0.8751, "step": 13235 }, { "epoch": 0.4122684104001246, "grad_norm": 1.7312664985656738, "learning_rate": 3.093513389491626e-06, "loss": 0.8044, "step": 13240 }, { "epoch": 0.41242410088743575, "grad_norm": 1.9544111490249634, "learning_rate": 3.0926939591595923e-06, "loss": 0.7784, "step": 13245 }, { "epoch": 0.412579791374747, "grad_norm": 2.0611374378204346, "learning_rate": 3.091874528827559e-06, "loss": 0.7638, "step": 13250 }, { "epoch": 0.4127354818620582, "grad_norm": 2.018110513687134, "learning_rate": 3.091055098495526e-06, "loss": 0.8105, "step": 13255 }, { "epoch": 0.41289117234936945, "grad_norm": 2.106863021850586, "learning_rate": 3.0902356681634933e-06, "loss": 0.8342, "step": 13260 }, { "epoch": 0.4130468628366807, "grad_norm": 1.866456389427185, "learning_rate": 3.0894162378314598e-06, "loss": 0.7971, "step": 13265 }, { "epoch": 0.4132025533239919, "grad_norm": 2.1736180782318115, "learning_rate": 3.0885968074994266e-06, "loss": 0.8443, "step": 13270 }, { "epoch": 0.41335824381130315, "grad_norm": 1.9118188619613647, "learning_rate": 3.0877773771673935e-06, "loss": 0.8314, "step": 13275 }, { "epoch": 0.4135139342986143, "grad_norm": 2.455136299133301, "learning_rate": 3.0869579468353604e-06, "loss": 0.8284, "step": 13280 }, { "epoch": 0.41366962478592556, "grad_norm": 2.3688511848449707, "learning_rate": 3.086138516503327e-06, "loss": 0.8356, "step": 13285 }, { "epoch": 0.4138253152732368, "grad_norm": 2.219301462173462, "learning_rate": 3.085319086171294e-06, "loss": 0.8241, "step": 13290 }, { "epoch": 0.413981005760548, "grad_norm": 2.1406056880950928, "learning_rate": 3.084499655839261e-06, "loss": 0.8415, "step": 13295 }, { "epoch": 0.41413669624785926, "grad_norm": 2.169203996658325, "learning_rate": 3.083680225507228e-06, "loss": 0.7951, "step": 13300 }, { "epoch": 0.4142923867351705, "grad_norm": 3.0633959770202637, "learning_rate": 3.0828607951751943e-06, "loss": 0.7552, "step": 13305 }, { "epoch": 0.4144480772224817, "grad_norm": 2.1580069065093994, "learning_rate": 3.082041364843161e-06, "loss": 0.8947, "step": 13310 }, { "epoch": 0.41460376770979296, "grad_norm": 2.147008180618286, "learning_rate": 3.0812219345111285e-06, "loss": 0.8049, "step": 13315 }, { "epoch": 0.41475945819710414, "grad_norm": 2.214064359664917, "learning_rate": 3.080402504179095e-06, "loss": 0.7428, "step": 13320 }, { "epoch": 0.41491514868441537, "grad_norm": 2.277512550354004, "learning_rate": 3.0795830738470618e-06, "loss": 0.7812, "step": 13325 }, { "epoch": 0.4150708391717266, "grad_norm": 2.407313108444214, "learning_rate": 3.0787636435150286e-06, "loss": 0.7457, "step": 13330 }, { "epoch": 0.41522652965903784, "grad_norm": 1.8650486469268799, "learning_rate": 3.0779442131829955e-06, "loss": 0.8042, "step": 13335 }, { "epoch": 0.41538222014634907, "grad_norm": 1.8801015615463257, "learning_rate": 3.077124782850962e-06, "loss": 0.7478, "step": 13340 }, { "epoch": 0.4155379106336603, "grad_norm": 2.296083927154541, "learning_rate": 3.076305352518929e-06, "loss": 0.8642, "step": 13345 }, { "epoch": 0.41569360112097153, "grad_norm": 1.8748278617858887, "learning_rate": 3.075485922186896e-06, "loss": 0.7749, "step": 13350 }, { "epoch": 0.4158492916082827, "grad_norm": 2.1981706619262695, "learning_rate": 3.074666491854863e-06, "loss": 0.924, "step": 13355 }, { "epoch": 0.41600498209559394, "grad_norm": 2.208070755004883, "learning_rate": 3.0738470615228294e-06, "loss": 0.8007, "step": 13360 }, { "epoch": 0.4161606725829052, "grad_norm": 2.0522067546844482, "learning_rate": 3.0730276311907963e-06, "loss": 0.7448, "step": 13365 }, { "epoch": 0.4163163630702164, "grad_norm": 1.806470274925232, "learning_rate": 3.072208200858763e-06, "loss": 0.7993, "step": 13370 }, { "epoch": 0.41647205355752764, "grad_norm": 2.3670549392700195, "learning_rate": 3.0713887705267305e-06, "loss": 0.8198, "step": 13375 }, { "epoch": 0.4166277440448389, "grad_norm": 2.163137197494507, "learning_rate": 3.070569340194697e-06, "loss": 0.8264, "step": 13380 }, { "epoch": 0.4167834345321501, "grad_norm": 1.7763606309890747, "learning_rate": 3.0697499098626638e-06, "loss": 0.737, "step": 13385 }, { "epoch": 0.4169391250194613, "grad_norm": 2.322039842605591, "learning_rate": 3.0689304795306307e-06, "loss": 0.8257, "step": 13390 }, { "epoch": 0.4170948155067725, "grad_norm": 1.990544319152832, "learning_rate": 3.068111049198597e-06, "loss": 0.7627, "step": 13395 }, { "epoch": 0.41725050599408375, "grad_norm": 1.70354425907135, "learning_rate": 3.067291618866564e-06, "loss": 0.7818, "step": 13400 }, { "epoch": 0.417406196481395, "grad_norm": 2.1031110286712646, "learning_rate": 3.0664721885345313e-06, "loss": 0.8354, "step": 13405 }, { "epoch": 0.4175618869687062, "grad_norm": 2.1867563724517822, "learning_rate": 3.065652758202498e-06, "loss": 0.7902, "step": 13410 }, { "epoch": 0.41771757745601745, "grad_norm": 2.713710308074951, "learning_rate": 3.0648333278704646e-06, "loss": 0.8461, "step": 13415 }, { "epoch": 0.4178732679433287, "grad_norm": 2.2262134552001953, "learning_rate": 3.0640138975384314e-06, "loss": 0.7796, "step": 13420 }, { "epoch": 0.41802895843063986, "grad_norm": 2.1284022331237793, "learning_rate": 3.0631944672063983e-06, "loss": 0.7747, "step": 13425 }, { "epoch": 0.4181846489179511, "grad_norm": 2.1669812202453613, "learning_rate": 3.0623750368743656e-06, "loss": 0.7644, "step": 13430 }, { "epoch": 0.41834033940526233, "grad_norm": 2.5071332454681396, "learning_rate": 3.061555606542332e-06, "loss": 0.7972, "step": 13435 }, { "epoch": 0.41849602989257356, "grad_norm": 2.2873449325561523, "learning_rate": 3.060736176210299e-06, "loss": 0.8183, "step": 13440 }, { "epoch": 0.4186517203798848, "grad_norm": 2.323770761489868, "learning_rate": 3.059916745878266e-06, "loss": 0.7876, "step": 13445 }, { "epoch": 0.41880741086719603, "grad_norm": 1.951582431793213, "learning_rate": 3.0590973155462322e-06, "loss": 0.7428, "step": 13450 }, { "epoch": 0.41896310135450726, "grad_norm": 3.422929048538208, "learning_rate": 3.058277885214199e-06, "loss": 0.8241, "step": 13455 }, { "epoch": 0.41911879184181844, "grad_norm": 2.424652576446533, "learning_rate": 3.0574584548821664e-06, "loss": 0.8576, "step": 13460 }, { "epoch": 0.41927448232912967, "grad_norm": 2.2837626934051514, "learning_rate": 3.0566390245501333e-06, "loss": 0.7154, "step": 13465 }, { "epoch": 0.4194301728164409, "grad_norm": 2.4387824535369873, "learning_rate": 3.0558195942180997e-06, "loss": 0.8867, "step": 13470 }, { "epoch": 0.41958586330375214, "grad_norm": 2.2517402172088623, "learning_rate": 3.0550001638860666e-06, "loss": 0.7747, "step": 13475 }, { "epoch": 0.41974155379106337, "grad_norm": 3.1990444660186768, "learning_rate": 3.0541807335540335e-06, "loss": 0.8819, "step": 13480 }, { "epoch": 0.4198972442783746, "grad_norm": 1.6942952871322632, "learning_rate": 3.0533613032220003e-06, "loss": 0.7607, "step": 13485 }, { "epoch": 0.42005293476568584, "grad_norm": 2.2220449447631836, "learning_rate": 3.0525418728899668e-06, "loss": 0.7519, "step": 13490 }, { "epoch": 0.420208625252997, "grad_norm": 4.047983646392822, "learning_rate": 3.051722442557934e-06, "loss": 0.8314, "step": 13495 }, { "epoch": 0.42036431574030825, "grad_norm": 2.7514452934265137, "learning_rate": 3.050903012225901e-06, "loss": 0.8254, "step": 13500 }, { "epoch": 0.4205200062276195, "grad_norm": 2.040163516998291, "learning_rate": 3.050083581893868e-06, "loss": 0.8106, "step": 13505 }, { "epoch": 0.4206756967149307, "grad_norm": 2.225435733795166, "learning_rate": 3.0492641515618343e-06, "loss": 0.7461, "step": 13510 }, { "epoch": 0.42083138720224195, "grad_norm": 2.254944086074829, "learning_rate": 3.048444721229801e-06, "loss": 0.79, "step": 13515 }, { "epoch": 0.4209870776895532, "grad_norm": 2.1738972663879395, "learning_rate": 3.0476252908977684e-06, "loss": 0.8053, "step": 13520 }, { "epoch": 0.4211427681768644, "grad_norm": 2.0783064365386963, "learning_rate": 3.046805860565735e-06, "loss": 0.7684, "step": 13525 }, { "epoch": 0.42129845866417565, "grad_norm": 2.118757486343384, "learning_rate": 3.0459864302337017e-06, "loss": 0.8281, "step": 13530 }, { "epoch": 0.4214541491514868, "grad_norm": 2.1279897689819336, "learning_rate": 3.0451669999016686e-06, "loss": 0.798, "step": 13535 }, { "epoch": 0.42160983963879806, "grad_norm": 2.352376699447632, "learning_rate": 3.0443475695696355e-06, "loss": 0.7509, "step": 13540 }, { "epoch": 0.4217655301261093, "grad_norm": 1.901790976524353, "learning_rate": 3.043528139237602e-06, "loss": 0.8107, "step": 13545 }, { "epoch": 0.4219212206134205, "grad_norm": 2.1181676387786865, "learning_rate": 3.042708708905569e-06, "loss": 0.7957, "step": 13550 }, { "epoch": 0.42207691110073176, "grad_norm": 1.7356103658676147, "learning_rate": 3.041889278573536e-06, "loss": 0.7856, "step": 13555 }, { "epoch": 0.422232601588043, "grad_norm": 2.270698308944702, "learning_rate": 3.041069848241503e-06, "loss": 0.9132, "step": 13560 }, { "epoch": 0.4223882920753542, "grad_norm": 1.9208015203475952, "learning_rate": 3.0402504179094694e-06, "loss": 0.7729, "step": 13565 }, { "epoch": 0.4225439825626654, "grad_norm": 2.26824688911438, "learning_rate": 3.0394309875774363e-06, "loss": 0.7735, "step": 13570 }, { "epoch": 0.42269967304997663, "grad_norm": 2.0290184020996094, "learning_rate": 3.0386115572454036e-06, "loss": 0.7731, "step": 13575 }, { "epoch": 0.42285536353728786, "grad_norm": 2.4008193016052246, "learning_rate": 3.0377921269133704e-06, "loss": 0.8052, "step": 13580 }, { "epoch": 0.4230110540245991, "grad_norm": 2.237267255783081, "learning_rate": 3.036972696581337e-06, "loss": 0.8419, "step": 13585 }, { "epoch": 0.42316674451191033, "grad_norm": 2.5576021671295166, "learning_rate": 3.0361532662493037e-06, "loss": 0.8528, "step": 13590 }, { "epoch": 0.42332243499922156, "grad_norm": 1.9411817789077759, "learning_rate": 3.0353338359172706e-06, "loss": 0.8036, "step": 13595 }, { "epoch": 0.4234781254865328, "grad_norm": 2.106426477432251, "learning_rate": 3.034514405585237e-06, "loss": 0.7724, "step": 13600 }, { "epoch": 0.423633815973844, "grad_norm": 2.160773992538452, "learning_rate": 3.033694975253204e-06, "loss": 0.8629, "step": 13605 }, { "epoch": 0.4237895064611552, "grad_norm": 2.14762020111084, "learning_rate": 3.0328755449211712e-06, "loss": 0.6892, "step": 13610 }, { "epoch": 0.42394519694846644, "grad_norm": 2.433014154434204, "learning_rate": 3.032056114589138e-06, "loss": 0.7587, "step": 13615 }, { "epoch": 0.4241008874357777, "grad_norm": 2.030233144760132, "learning_rate": 3.0312366842571045e-06, "loss": 0.7135, "step": 13620 }, { "epoch": 0.4242565779230889, "grad_norm": 2.113502025604248, "learning_rate": 3.0304172539250714e-06, "loss": 0.867, "step": 13625 }, { "epoch": 0.42441226841040014, "grad_norm": 2.0541090965270996, "learning_rate": 3.0295978235930383e-06, "loss": 0.794, "step": 13630 }, { "epoch": 0.4245679588977114, "grad_norm": 2.1623127460479736, "learning_rate": 3.0287783932610056e-06, "loss": 0.7848, "step": 13635 }, { "epoch": 0.42472364938502255, "grad_norm": 1.9741672277450562, "learning_rate": 3.027958962928972e-06, "loss": 0.7511, "step": 13640 }, { "epoch": 0.4248793398723338, "grad_norm": 2.0400302410125732, "learning_rate": 3.027139532596939e-06, "loss": 0.8148, "step": 13645 }, { "epoch": 0.425035030359645, "grad_norm": 2.262099266052246, "learning_rate": 3.0263201022649058e-06, "loss": 0.7855, "step": 13650 }, { "epoch": 0.42519072084695625, "grad_norm": 2.2558300495147705, "learning_rate": 3.0255006719328726e-06, "loss": 0.79, "step": 13655 }, { "epoch": 0.4253464113342675, "grad_norm": 2.3043289184570312, "learning_rate": 3.024681241600839e-06, "loss": 0.8332, "step": 13660 }, { "epoch": 0.4255021018215787, "grad_norm": 2.258775234222412, "learning_rate": 3.0238618112688064e-06, "loss": 0.8512, "step": 13665 }, { "epoch": 0.42565779230888995, "grad_norm": 2.5714149475097656, "learning_rate": 3.0230423809367732e-06, "loss": 0.7856, "step": 13670 }, { "epoch": 0.4258134827962011, "grad_norm": 2.03263521194458, "learning_rate": 3.0222229506047397e-06, "loss": 0.7517, "step": 13675 }, { "epoch": 0.42596917328351236, "grad_norm": 2.1300783157348633, "learning_rate": 3.0214035202727065e-06, "loss": 0.7377, "step": 13680 }, { "epoch": 0.4261248637708236, "grad_norm": 2.0705013275146484, "learning_rate": 3.0205840899406734e-06, "loss": 0.7873, "step": 13685 }, { "epoch": 0.4262805542581348, "grad_norm": 2.0556066036224365, "learning_rate": 3.0197646596086407e-06, "loss": 0.8165, "step": 13690 }, { "epoch": 0.42643624474544606, "grad_norm": 1.9857594966888428, "learning_rate": 3.0189452292766067e-06, "loss": 0.7755, "step": 13695 }, { "epoch": 0.4265919352327573, "grad_norm": 2.0222463607788086, "learning_rate": 3.018125798944574e-06, "loss": 0.7853, "step": 13700 }, { "epoch": 0.4267476257200685, "grad_norm": 2.3842618465423584, "learning_rate": 3.017306368612541e-06, "loss": 0.7315, "step": 13705 }, { "epoch": 0.4269033162073797, "grad_norm": 2.0738065242767334, "learning_rate": 3.0164869382805078e-06, "loss": 0.7712, "step": 13710 }, { "epoch": 0.42705900669469093, "grad_norm": 2.400949239730835, "learning_rate": 3.015667507948474e-06, "loss": 0.7096, "step": 13715 }, { "epoch": 0.42721469718200217, "grad_norm": 2.463639497756958, "learning_rate": 3.014848077616441e-06, "loss": 0.9097, "step": 13720 }, { "epoch": 0.4273703876693134, "grad_norm": 2.0942351818084717, "learning_rate": 3.0140286472844084e-06, "loss": 0.8167, "step": 13725 }, { "epoch": 0.42752607815662463, "grad_norm": 1.8490722179412842, "learning_rate": 3.0132092169523752e-06, "loss": 0.7876, "step": 13730 }, { "epoch": 0.42768176864393587, "grad_norm": 2.366694450378418, "learning_rate": 3.0123897866203417e-06, "loss": 0.844, "step": 13735 }, { "epoch": 0.4278374591312471, "grad_norm": 2.2429099082946777, "learning_rate": 3.0115703562883086e-06, "loss": 0.8203, "step": 13740 }, { "epoch": 0.42799314961855833, "grad_norm": 2.0125083923339844, "learning_rate": 3.0107509259562754e-06, "loss": 0.8005, "step": 13745 }, { "epoch": 0.4281488401058695, "grad_norm": 2.3095030784606934, "learning_rate": 3.009931495624242e-06, "loss": 0.7666, "step": 13750 }, { "epoch": 0.42830453059318074, "grad_norm": 2.417649507522583, "learning_rate": 3.009112065292209e-06, "loss": 0.8461, "step": 13755 }, { "epoch": 0.428460221080492, "grad_norm": 1.9010289907455444, "learning_rate": 3.008292634960176e-06, "loss": 0.7849, "step": 13760 }, { "epoch": 0.4286159115678032, "grad_norm": 2.05843448638916, "learning_rate": 3.007473204628143e-06, "loss": 0.7163, "step": 13765 }, { "epoch": 0.42877160205511444, "grad_norm": 1.9458917379379272, "learning_rate": 3.0066537742961093e-06, "loss": 0.7539, "step": 13770 }, { "epoch": 0.4289272925424257, "grad_norm": 2.2006497383117676, "learning_rate": 3.0058343439640762e-06, "loss": 0.8251, "step": 13775 }, { "epoch": 0.4290829830297369, "grad_norm": 2.009782552719116, "learning_rate": 3.0050149136320435e-06, "loss": 0.8088, "step": 13780 }, { "epoch": 0.4292386735170481, "grad_norm": 1.995909571647644, "learning_rate": 3.0041954833000104e-06, "loss": 0.7739, "step": 13785 }, { "epoch": 0.4293943640043593, "grad_norm": 2.0796470642089844, "learning_rate": 3.003376052967977e-06, "loss": 0.8486, "step": 13790 }, { "epoch": 0.42955005449167055, "grad_norm": 2.024883508682251, "learning_rate": 3.0025566226359437e-06, "loss": 0.8203, "step": 13795 }, { "epoch": 0.4297057449789818, "grad_norm": 2.423394203186035, "learning_rate": 3.0017371923039106e-06, "loss": 0.8168, "step": 13800 }, { "epoch": 0.429861435466293, "grad_norm": 2.0175254344940186, "learning_rate": 3.000917761971877e-06, "loss": 0.8734, "step": 13805 }, { "epoch": 0.43001712595360425, "grad_norm": 2.131512403488159, "learning_rate": 3.0000983316398443e-06, "loss": 0.8072, "step": 13810 }, { "epoch": 0.4301728164409155, "grad_norm": 2.3161141872406006, "learning_rate": 2.999278901307811e-06, "loss": 0.8456, "step": 13815 }, { "epoch": 0.43032850692822666, "grad_norm": 2.02905535697937, "learning_rate": 2.998459470975778e-06, "loss": 0.8758, "step": 13820 }, { "epoch": 0.4304841974155379, "grad_norm": 2.2954585552215576, "learning_rate": 2.9976400406437445e-06, "loss": 0.8504, "step": 13825 }, { "epoch": 0.43063988790284913, "grad_norm": 2.008868932723999, "learning_rate": 2.9968206103117114e-06, "loss": 0.755, "step": 13830 }, { "epoch": 0.43079557839016036, "grad_norm": 2.0344784259796143, "learning_rate": 2.9960011799796782e-06, "loss": 0.7836, "step": 13835 }, { "epoch": 0.4309512688774716, "grad_norm": 1.8916431665420532, "learning_rate": 2.9951817496476455e-06, "loss": 0.821, "step": 13840 }, { "epoch": 0.4311069593647828, "grad_norm": 2.1317315101623535, "learning_rate": 2.994362319315612e-06, "loss": 0.7756, "step": 13845 }, { "epoch": 0.43126264985209406, "grad_norm": 2.055306911468506, "learning_rate": 2.993542888983579e-06, "loss": 0.7944, "step": 13850 }, { "epoch": 0.43141834033940524, "grad_norm": 2.0822384357452393, "learning_rate": 2.9927234586515457e-06, "loss": 0.802, "step": 13855 }, { "epoch": 0.43157403082671647, "grad_norm": 2.3977673053741455, "learning_rate": 2.9919040283195126e-06, "loss": 0.8399, "step": 13860 }, { "epoch": 0.4317297213140277, "grad_norm": 2.1798481941223145, "learning_rate": 2.991084597987479e-06, "loss": 0.8149, "step": 13865 }, { "epoch": 0.43188541180133894, "grad_norm": 1.9546767473220825, "learning_rate": 2.9902651676554463e-06, "loss": 0.8384, "step": 13870 }, { "epoch": 0.43204110228865017, "grad_norm": 2.05446720123291, "learning_rate": 2.989445737323413e-06, "loss": 0.7794, "step": 13875 }, { "epoch": 0.4321967927759614, "grad_norm": 1.8975105285644531, "learning_rate": 2.9886263069913796e-06, "loss": 0.7697, "step": 13880 }, { "epoch": 0.43235248326327264, "grad_norm": 2.019479990005493, "learning_rate": 2.9878068766593465e-06, "loss": 0.7389, "step": 13885 }, { "epoch": 0.4325081737505838, "grad_norm": 2.5734424591064453, "learning_rate": 2.9869874463273134e-06, "loss": 0.8151, "step": 13890 }, { "epoch": 0.43266386423789505, "grad_norm": 1.8918558359146118, "learning_rate": 2.9861680159952807e-06, "loss": 0.7983, "step": 13895 }, { "epoch": 0.4328195547252063, "grad_norm": 2.1177685260772705, "learning_rate": 2.985348585663247e-06, "loss": 0.8146, "step": 13900 }, { "epoch": 0.4329752452125175, "grad_norm": 1.8746541738510132, "learning_rate": 2.984529155331214e-06, "loss": 0.7575, "step": 13905 }, { "epoch": 0.43313093569982875, "grad_norm": 2.2994163036346436, "learning_rate": 2.983709724999181e-06, "loss": 0.7999, "step": 13910 }, { "epoch": 0.43328662618714, "grad_norm": 2.248389482498169, "learning_rate": 2.9828902946671477e-06, "loss": 0.8352, "step": 13915 }, { "epoch": 0.4334423166744512, "grad_norm": 2.292520046234131, "learning_rate": 2.982070864335114e-06, "loss": 0.8095, "step": 13920 }, { "epoch": 0.4335980071617624, "grad_norm": 2.6231815814971924, "learning_rate": 2.9812514340030815e-06, "loss": 0.8201, "step": 13925 }, { "epoch": 0.4337536976490736, "grad_norm": 2.1338541507720947, "learning_rate": 2.9804320036710483e-06, "loss": 0.8235, "step": 13930 }, { "epoch": 0.43390938813638485, "grad_norm": 1.8539053201675415, "learning_rate": 2.979612573339015e-06, "loss": 0.7785, "step": 13935 }, { "epoch": 0.4340650786236961, "grad_norm": 2.5411086082458496, "learning_rate": 2.9787931430069816e-06, "loss": 0.8235, "step": 13940 }, { "epoch": 0.4342207691110073, "grad_norm": 2.3772125244140625, "learning_rate": 2.9779737126749485e-06, "loss": 0.7869, "step": 13945 }, { "epoch": 0.43437645959831855, "grad_norm": 3.2574028968811035, "learning_rate": 2.9771542823429154e-06, "loss": 0.894, "step": 13950 }, { "epoch": 0.4345321500856298, "grad_norm": 2.325700044631958, "learning_rate": 2.976334852010882e-06, "loss": 0.7953, "step": 13955 }, { "epoch": 0.434687840572941, "grad_norm": 2.4619107246398926, "learning_rate": 2.975515421678849e-06, "loss": 0.8901, "step": 13960 }, { "epoch": 0.4348435310602522, "grad_norm": 1.8813344240188599, "learning_rate": 2.974695991346816e-06, "loss": 0.7535, "step": 13965 }, { "epoch": 0.43499922154756343, "grad_norm": 2.1694495677948, "learning_rate": 2.973876561014783e-06, "loss": 0.8572, "step": 13970 }, { "epoch": 0.43515491203487466, "grad_norm": 2.1441550254821777, "learning_rate": 2.9730571306827493e-06, "loss": 0.8588, "step": 13975 }, { "epoch": 0.4353106025221859, "grad_norm": 2.0288596153259277, "learning_rate": 2.972237700350716e-06, "loss": 0.7512, "step": 13980 }, { "epoch": 0.43546629300949713, "grad_norm": 2.8753652572631836, "learning_rate": 2.9714182700186835e-06, "loss": 0.7214, "step": 13985 }, { "epoch": 0.43562198349680836, "grad_norm": 1.9515068531036377, "learning_rate": 2.9705988396866503e-06, "loss": 0.7872, "step": 13990 }, { "epoch": 0.4357776739841196, "grad_norm": 2.058389186859131, "learning_rate": 2.9697794093546168e-06, "loss": 0.7516, "step": 13995 }, { "epoch": 0.4359333644714308, "grad_norm": 2.601066827774048, "learning_rate": 2.9689599790225837e-06, "loss": 0.7588, "step": 14000 }, { "epoch": 0.436089054958742, "grad_norm": 2.01212477684021, "learning_rate": 2.9681405486905505e-06, "loss": 0.8446, "step": 14005 }, { "epoch": 0.43624474544605324, "grad_norm": 1.922843337059021, "learning_rate": 2.967321118358518e-06, "loss": 0.7781, "step": 14010 }, { "epoch": 0.4364004359333645, "grad_norm": 2.075157403945923, "learning_rate": 2.9665016880264843e-06, "loss": 0.7706, "step": 14015 }, { "epoch": 0.4365561264206757, "grad_norm": 2.0097265243530273, "learning_rate": 2.965682257694451e-06, "loss": 0.8529, "step": 14020 }, { "epoch": 0.43671181690798694, "grad_norm": 2.134040355682373, "learning_rate": 2.964862827362418e-06, "loss": 0.826, "step": 14025 }, { "epoch": 0.43686750739529817, "grad_norm": 2.1847822666168213, "learning_rate": 2.9640433970303844e-06, "loss": 0.8331, "step": 14030 }, { "epoch": 0.43702319788260935, "grad_norm": 3.3098456859588623, "learning_rate": 2.9632239666983513e-06, "loss": 0.8174, "step": 14035 }, { "epoch": 0.4371788883699206, "grad_norm": 2.4525225162506104, "learning_rate": 2.9624045363663186e-06, "loss": 0.8053, "step": 14040 }, { "epoch": 0.4373345788572318, "grad_norm": 2.022251844406128, "learning_rate": 2.9615851060342855e-06, "loss": 0.8416, "step": 14045 }, { "epoch": 0.43749026934454305, "grad_norm": 1.8964534997940063, "learning_rate": 2.960765675702252e-06, "loss": 0.8672, "step": 14050 }, { "epoch": 0.4376459598318543, "grad_norm": 2.3790624141693115, "learning_rate": 2.959946245370219e-06, "loss": 0.7848, "step": 14055 }, { "epoch": 0.4378016503191655, "grad_norm": 2.2879159450531006, "learning_rate": 2.9591268150381857e-06, "loss": 0.8328, "step": 14060 }, { "epoch": 0.43795734080647675, "grad_norm": 1.7672314643859863, "learning_rate": 2.958307384706153e-06, "loss": 0.7504, "step": 14065 }, { "epoch": 0.4381130312937879, "grad_norm": 1.8800792694091797, "learning_rate": 2.957487954374119e-06, "loss": 0.8031, "step": 14070 }, { "epoch": 0.43826872178109916, "grad_norm": 2.088282585144043, "learning_rate": 2.9566685240420863e-06, "loss": 0.8245, "step": 14075 }, { "epoch": 0.4384244122684104, "grad_norm": 1.951485276222229, "learning_rate": 2.955849093710053e-06, "loss": 0.8433, "step": 14080 }, { "epoch": 0.4385801027557216, "grad_norm": 2.4101462364196777, "learning_rate": 2.95502966337802e-06, "loss": 0.7349, "step": 14085 }, { "epoch": 0.43873579324303286, "grad_norm": 2.2155263423919678, "learning_rate": 2.9542102330459865e-06, "loss": 0.8233, "step": 14090 }, { "epoch": 0.4388914837303441, "grad_norm": 1.6911743879318237, "learning_rate": 2.9533908027139533e-06, "loss": 0.7531, "step": 14095 }, { "epoch": 0.4390471742176553, "grad_norm": 2.4974958896636963, "learning_rate": 2.9525713723819206e-06, "loss": 0.8038, "step": 14100 }, { "epoch": 0.4392028647049665, "grad_norm": 2.5631086826324463, "learning_rate": 2.951751942049887e-06, "loss": 0.7655, "step": 14105 }, { "epoch": 0.43935855519227773, "grad_norm": 2.0136594772338867, "learning_rate": 2.950932511717854e-06, "loss": 0.7196, "step": 14110 }, { "epoch": 0.43951424567958897, "grad_norm": 2.0280532836914062, "learning_rate": 2.950113081385821e-06, "loss": 0.8282, "step": 14115 }, { "epoch": 0.4396699361669002, "grad_norm": 2.151984930038452, "learning_rate": 2.9492936510537877e-06, "loss": 0.8124, "step": 14120 }, { "epoch": 0.43982562665421143, "grad_norm": 2.140225410461426, "learning_rate": 2.948474220721754e-06, "loss": 0.8217, "step": 14125 }, { "epoch": 0.43998131714152267, "grad_norm": 2.072765827178955, "learning_rate": 2.9476547903897214e-06, "loss": 0.8366, "step": 14130 }, { "epoch": 0.4401370076288339, "grad_norm": 2.1958069801330566, "learning_rate": 2.9468353600576883e-06, "loss": 0.7463, "step": 14135 }, { "epoch": 0.4402926981161451, "grad_norm": 2.214543581008911, "learning_rate": 2.946015929725655e-06, "loss": 0.7836, "step": 14140 }, { "epoch": 0.4404483886034563, "grad_norm": 1.8123184442520142, "learning_rate": 2.9451964993936216e-06, "loss": 0.7378, "step": 14145 }, { "epoch": 0.44060407909076754, "grad_norm": 2.3441343307495117, "learning_rate": 2.9443770690615885e-06, "loss": 0.8401, "step": 14150 }, { "epoch": 0.4407597695780788, "grad_norm": 2.035686492919922, "learning_rate": 2.9435576387295558e-06, "loss": 0.7673, "step": 14155 }, { "epoch": 0.44091546006539, "grad_norm": 2.13787841796875, "learning_rate": 2.9427382083975226e-06, "loss": 0.8939, "step": 14160 }, { "epoch": 0.44107115055270124, "grad_norm": 2.252913236618042, "learning_rate": 2.941918778065489e-06, "loss": 0.8164, "step": 14165 }, { "epoch": 0.4412268410400125, "grad_norm": 2.208378553390503, "learning_rate": 2.941099347733456e-06, "loss": 0.7372, "step": 14170 }, { "epoch": 0.4413825315273237, "grad_norm": 1.8633642196655273, "learning_rate": 2.940279917401423e-06, "loss": 0.6961, "step": 14175 }, { "epoch": 0.4415382220146349, "grad_norm": 2.2011070251464844, "learning_rate": 2.9394604870693893e-06, "loss": 0.8115, "step": 14180 }, { "epoch": 0.4416939125019461, "grad_norm": 2.161130428314209, "learning_rate": 2.938641056737356e-06, "loss": 0.8082, "step": 14185 }, { "epoch": 0.44184960298925735, "grad_norm": 2.745062828063965, "learning_rate": 2.9378216264053234e-06, "loss": 0.7646, "step": 14190 }, { "epoch": 0.4420052934765686, "grad_norm": 2.042224407196045, "learning_rate": 2.9370021960732903e-06, "loss": 0.8164, "step": 14195 }, { "epoch": 0.4421609839638798, "grad_norm": 2.1944398880004883, "learning_rate": 2.9361827657412567e-06, "loss": 0.7959, "step": 14200 }, { "epoch": 0.44231667445119105, "grad_norm": 2.0938055515289307, "learning_rate": 2.9353633354092236e-06, "loss": 0.7902, "step": 14205 }, { "epoch": 0.4424723649385023, "grad_norm": 2.202910900115967, "learning_rate": 2.9345439050771905e-06, "loss": 0.8302, "step": 14210 }, { "epoch": 0.44262805542581346, "grad_norm": 2.149322032928467, "learning_rate": 2.9337244747451578e-06, "loss": 0.8465, "step": 14215 }, { "epoch": 0.4427837459131247, "grad_norm": 2.019604444503784, "learning_rate": 2.9329050444131242e-06, "loss": 0.7145, "step": 14220 }, { "epoch": 0.4429394364004359, "grad_norm": 2.065242052078247, "learning_rate": 2.932085614081091e-06, "loss": 0.7863, "step": 14225 }, { "epoch": 0.44309512688774716, "grad_norm": 2.168734312057495, "learning_rate": 2.931266183749058e-06, "loss": 0.8172, "step": 14230 }, { "epoch": 0.4432508173750584, "grad_norm": 1.802476406097412, "learning_rate": 2.9304467534170244e-06, "loss": 0.7576, "step": 14235 }, { "epoch": 0.4434065078623696, "grad_norm": 2.085218667984009, "learning_rate": 2.9296273230849913e-06, "loss": 0.8661, "step": 14240 }, { "epoch": 0.44356219834968086, "grad_norm": 1.8767619132995605, "learning_rate": 2.9288078927529586e-06, "loss": 0.7168, "step": 14245 }, { "epoch": 0.44371788883699204, "grad_norm": 2.118241786956787, "learning_rate": 2.9279884624209254e-06, "loss": 0.7998, "step": 14250 }, { "epoch": 0.44387357932430327, "grad_norm": 2.1018497943878174, "learning_rate": 2.927169032088892e-06, "loss": 0.8587, "step": 14255 }, { "epoch": 0.4440292698116145, "grad_norm": 1.8192695379257202, "learning_rate": 2.9263496017568588e-06, "loss": 0.7357, "step": 14260 }, { "epoch": 0.44418496029892574, "grad_norm": 2.034369707107544, "learning_rate": 2.9255301714248256e-06, "loss": 0.7487, "step": 14265 }, { "epoch": 0.44434065078623697, "grad_norm": 1.9183491468429565, "learning_rate": 2.924710741092793e-06, "loss": 0.822, "step": 14270 }, { "epoch": 0.4444963412735482, "grad_norm": 1.93926203250885, "learning_rate": 2.9238913107607594e-06, "loss": 0.7538, "step": 14275 }, { "epoch": 0.44465203176085943, "grad_norm": 2.134807586669922, "learning_rate": 2.9230718804287262e-06, "loss": 0.8575, "step": 14280 }, { "epoch": 0.4448077222481706, "grad_norm": 2.0214030742645264, "learning_rate": 2.922252450096693e-06, "loss": 0.8162, "step": 14285 }, { "epoch": 0.44496341273548184, "grad_norm": 2.4135780334472656, "learning_rate": 2.92143301976466e-06, "loss": 0.8007, "step": 14290 }, { "epoch": 0.4451191032227931, "grad_norm": 2.049395799636841, "learning_rate": 2.9206135894326264e-06, "loss": 0.8196, "step": 14295 }, { "epoch": 0.4452747937101043, "grad_norm": 2.6675198078155518, "learning_rate": 2.9197941591005937e-06, "loss": 0.835, "step": 14300 }, { "epoch": 0.44543048419741554, "grad_norm": 2.0716326236724854, "learning_rate": 2.9189747287685606e-06, "loss": 0.7673, "step": 14305 }, { "epoch": 0.4455861746847268, "grad_norm": 2.5465967655181885, "learning_rate": 2.918155298436527e-06, "loss": 0.8498, "step": 14310 }, { "epoch": 0.445741865172038, "grad_norm": 2.1335198879241943, "learning_rate": 2.917335868104494e-06, "loss": 0.8145, "step": 14315 }, { "epoch": 0.4458975556593492, "grad_norm": 2.0220046043395996, "learning_rate": 2.9165164377724608e-06, "loss": 0.8254, "step": 14320 }, { "epoch": 0.4460532461466604, "grad_norm": 2.3091135025024414, "learning_rate": 2.9156970074404276e-06, "loss": 0.7777, "step": 14325 }, { "epoch": 0.44620893663397165, "grad_norm": 2.7552311420440674, "learning_rate": 2.914877577108394e-06, "loss": 0.8342, "step": 14330 }, { "epoch": 0.4463646271212829, "grad_norm": 1.7970390319824219, "learning_rate": 2.9140581467763614e-06, "loss": 0.7467, "step": 14335 }, { "epoch": 0.4465203176085941, "grad_norm": 2.4174983501434326, "learning_rate": 2.9132387164443282e-06, "loss": 0.7196, "step": 14340 }, { "epoch": 0.44667600809590535, "grad_norm": 2.0206308364868164, "learning_rate": 2.912419286112295e-06, "loss": 0.769, "step": 14345 }, { "epoch": 0.4468316985832166, "grad_norm": 2.0627617835998535, "learning_rate": 2.9115998557802616e-06, "loss": 0.8075, "step": 14350 }, { "epoch": 0.44698738907052776, "grad_norm": 1.712224006652832, "learning_rate": 2.9107804254482284e-06, "loss": 0.7576, "step": 14355 }, { "epoch": 0.447143079557839, "grad_norm": 2.0881357192993164, "learning_rate": 2.9099609951161957e-06, "loss": 0.7805, "step": 14360 }, { "epoch": 0.44729877004515023, "grad_norm": 2.248185396194458, "learning_rate": 2.9091415647841626e-06, "loss": 0.7757, "step": 14365 }, { "epoch": 0.44745446053246146, "grad_norm": 2.406233310699463, "learning_rate": 2.908322134452129e-06, "loss": 0.7925, "step": 14370 }, { "epoch": 0.4476101510197727, "grad_norm": 1.9517446756362915, "learning_rate": 2.907502704120096e-06, "loss": 0.8341, "step": 14375 }, { "epoch": 0.44776584150708393, "grad_norm": 2.1122069358825684, "learning_rate": 2.9066832737880628e-06, "loss": 0.746, "step": 14380 }, { "epoch": 0.44792153199439516, "grad_norm": 1.9885926246643066, "learning_rate": 2.9058638434560292e-06, "loss": 0.8027, "step": 14385 }, { "epoch": 0.4480772224817064, "grad_norm": 2.0651214122772217, "learning_rate": 2.9050444131239965e-06, "loss": 0.8074, "step": 14390 }, { "epoch": 0.44823291296901757, "grad_norm": 2.106539726257324, "learning_rate": 2.9042249827919634e-06, "loss": 0.8088, "step": 14395 }, { "epoch": 0.4483886034563288, "grad_norm": 1.964404821395874, "learning_rate": 2.9034055524599302e-06, "loss": 0.7244, "step": 14400 }, { "epoch": 0.44854429394364004, "grad_norm": 1.9888672828674316, "learning_rate": 2.9025861221278967e-06, "loss": 0.8549, "step": 14405 }, { "epoch": 0.44869998443095127, "grad_norm": 2.2060492038726807, "learning_rate": 2.9017666917958636e-06, "loss": 0.739, "step": 14410 }, { "epoch": 0.4488556749182625, "grad_norm": 2.0663812160491943, "learning_rate": 2.900947261463831e-06, "loss": 0.7913, "step": 14415 }, { "epoch": 0.44901136540557374, "grad_norm": 2.131798028945923, "learning_rate": 2.9001278311317977e-06, "loss": 0.766, "step": 14420 }, { "epoch": 0.44916705589288497, "grad_norm": 2.2585642337799072, "learning_rate": 2.899308400799764e-06, "loss": 0.7552, "step": 14425 }, { "epoch": 0.44932274638019615, "grad_norm": 2.436196804046631, "learning_rate": 2.898488970467731e-06, "loss": 0.807, "step": 14430 }, { "epoch": 0.4494784368675074, "grad_norm": 1.9008671045303345, "learning_rate": 2.897669540135698e-06, "loss": 0.8265, "step": 14435 }, { "epoch": 0.4496341273548186, "grad_norm": 2.3312885761260986, "learning_rate": 2.8968501098036648e-06, "loss": 0.8346, "step": 14440 }, { "epoch": 0.44978981784212985, "grad_norm": 2.4662280082702637, "learning_rate": 2.8960306794716312e-06, "loss": 0.8369, "step": 14445 }, { "epoch": 0.4499455083294411, "grad_norm": 2.1708765029907227, "learning_rate": 2.8952112491395985e-06, "loss": 0.771, "step": 14450 }, { "epoch": 0.4501011988167523, "grad_norm": 2.431166172027588, "learning_rate": 2.8943918188075654e-06, "loss": 0.747, "step": 14455 }, { "epoch": 0.45025688930406355, "grad_norm": 1.7804001569747925, "learning_rate": 2.893572388475532e-06, "loss": 0.7882, "step": 14460 }, { "epoch": 0.4504125797913747, "grad_norm": 2.245576858520508, "learning_rate": 2.8927529581434987e-06, "loss": 0.7626, "step": 14465 }, { "epoch": 0.45056827027868596, "grad_norm": 2.339508056640625, "learning_rate": 2.8919335278114656e-06, "loss": 0.8008, "step": 14470 }, { "epoch": 0.4507239607659972, "grad_norm": 2.7198848724365234, "learning_rate": 2.891114097479433e-06, "loss": 0.7857, "step": 14475 }, { "epoch": 0.4508796512533084, "grad_norm": 2.08196759223938, "learning_rate": 2.8902946671473993e-06, "loss": 0.7896, "step": 14480 }, { "epoch": 0.45103534174061966, "grad_norm": 1.8896925449371338, "learning_rate": 2.889475236815366e-06, "loss": 0.8443, "step": 14485 }, { "epoch": 0.4511910322279309, "grad_norm": 2.2455334663391113, "learning_rate": 2.888655806483333e-06, "loss": 0.7988, "step": 14490 }, { "epoch": 0.4513467227152421, "grad_norm": 1.943755865097046, "learning_rate": 2.8878363761513e-06, "loss": 0.7467, "step": 14495 }, { "epoch": 0.4515024132025533, "grad_norm": 2.1858274936676025, "learning_rate": 2.8870169458192664e-06, "loss": 0.7869, "step": 14500 }, { "epoch": 0.45165810368986453, "grad_norm": 1.8501629829406738, "learning_rate": 2.8861975154872337e-06, "loss": 0.9065, "step": 14505 }, { "epoch": 0.45181379417717576, "grad_norm": 2.3020639419555664, "learning_rate": 2.8853780851552005e-06, "loss": 0.7493, "step": 14510 }, { "epoch": 0.451969484664487, "grad_norm": 2.179029703140259, "learning_rate": 2.8845586548231674e-06, "loss": 0.8027, "step": 14515 }, { "epoch": 0.45212517515179823, "grad_norm": 2.082723617553711, "learning_rate": 2.883739224491134e-06, "loss": 0.7659, "step": 14520 }, { "epoch": 0.45228086563910946, "grad_norm": 1.9559422731399536, "learning_rate": 2.8829197941591007e-06, "loss": 0.7729, "step": 14525 }, { "epoch": 0.4524365561264207, "grad_norm": 2.5195744037628174, "learning_rate": 2.882100363827068e-06, "loss": 0.7758, "step": 14530 }, { "epoch": 0.4525922466137319, "grad_norm": 2.101911783218384, "learning_rate": 2.881280933495034e-06, "loss": 0.8506, "step": 14535 }, { "epoch": 0.4527479371010431, "grad_norm": 2.776430130004883, "learning_rate": 2.8804615031630013e-06, "loss": 0.7757, "step": 14540 }, { "epoch": 0.45290362758835434, "grad_norm": 2.273723602294922, "learning_rate": 2.879642072830968e-06, "loss": 0.7475, "step": 14545 }, { "epoch": 0.4530593180756656, "grad_norm": 1.969170331954956, "learning_rate": 2.878822642498935e-06, "loss": 0.7259, "step": 14550 }, { "epoch": 0.4532150085629768, "grad_norm": 1.6651759147644043, "learning_rate": 2.8780032121669015e-06, "loss": 0.7683, "step": 14555 }, { "epoch": 0.45337069905028804, "grad_norm": 1.942535638809204, "learning_rate": 2.8771837818348684e-06, "loss": 0.7847, "step": 14560 }, { "epoch": 0.4535263895375993, "grad_norm": 1.8949469327926636, "learning_rate": 2.8763643515028357e-06, "loss": 0.8379, "step": 14565 }, { "epoch": 0.4536820800249105, "grad_norm": 2.229186773300171, "learning_rate": 2.8755449211708025e-06, "loss": 0.8651, "step": 14570 }, { "epoch": 0.4538377705122217, "grad_norm": 2.18025803565979, "learning_rate": 2.874725490838769e-06, "loss": 0.7691, "step": 14575 }, { "epoch": 0.4539934609995329, "grad_norm": 2.185898780822754, "learning_rate": 2.873906060506736e-06, "loss": 0.8723, "step": 14580 }, { "epoch": 0.45414915148684415, "grad_norm": 2.7986273765563965, "learning_rate": 2.8730866301747027e-06, "loss": 0.7817, "step": 14585 }, { "epoch": 0.4543048419741554, "grad_norm": 2.2412965297698975, "learning_rate": 2.87226719984267e-06, "loss": 0.808, "step": 14590 }, { "epoch": 0.4544605324614666, "grad_norm": 2.3582959175109863, "learning_rate": 2.8714477695106365e-06, "loss": 0.8097, "step": 14595 }, { "epoch": 0.45461622294877785, "grad_norm": 1.7612723112106323, "learning_rate": 2.8706283391786033e-06, "loss": 0.7923, "step": 14600 }, { "epoch": 0.4547719134360891, "grad_norm": 1.8625714778900146, "learning_rate": 2.86980890884657e-06, "loss": 0.8603, "step": 14605 }, { "epoch": 0.45492760392340026, "grad_norm": 1.7890281677246094, "learning_rate": 2.8689894785145367e-06, "loss": 0.7064, "step": 14610 }, { "epoch": 0.4550832944107115, "grad_norm": 2.0055036544799805, "learning_rate": 2.8681700481825035e-06, "loss": 0.7711, "step": 14615 }, { "epoch": 0.4552389848980227, "grad_norm": 2.016078472137451, "learning_rate": 2.867350617850471e-06, "loss": 0.7971, "step": 14620 }, { "epoch": 0.45539467538533396, "grad_norm": 2.1466832160949707, "learning_rate": 2.8665311875184377e-06, "loss": 0.7669, "step": 14625 }, { "epoch": 0.4555503658726452, "grad_norm": 2.2300548553466797, "learning_rate": 2.865711757186404e-06, "loss": 0.7885, "step": 14630 }, { "epoch": 0.4557060563599564, "grad_norm": 2.062824249267578, "learning_rate": 2.864892326854371e-06, "loss": 0.8199, "step": 14635 }, { "epoch": 0.45586174684726766, "grad_norm": 1.8957847356796265, "learning_rate": 2.864072896522338e-06, "loss": 0.7782, "step": 14640 }, { "epoch": 0.45601743733457883, "grad_norm": 2.461468458175659, "learning_rate": 2.863253466190305e-06, "loss": 0.7401, "step": 14645 }, { "epoch": 0.45617312782189007, "grad_norm": 1.9924417734146118, "learning_rate": 2.8624340358582716e-06, "loss": 0.8365, "step": 14650 }, { "epoch": 0.4563288183092013, "grad_norm": 2.120123863220215, "learning_rate": 2.8616146055262385e-06, "loss": 0.7554, "step": 14655 }, { "epoch": 0.45648450879651253, "grad_norm": 2.6818530559539795, "learning_rate": 2.8607951751942053e-06, "loss": 0.79, "step": 14660 }, { "epoch": 0.45664019928382377, "grad_norm": 2.178976058959961, "learning_rate": 2.859975744862172e-06, "loss": 0.7862, "step": 14665 }, { "epoch": 0.456795889771135, "grad_norm": 2.0930142402648926, "learning_rate": 2.8591563145301387e-06, "loss": 0.7491, "step": 14670 }, { "epoch": 0.45695158025844623, "grad_norm": 2.4351553916931152, "learning_rate": 2.8583368841981055e-06, "loss": 0.7742, "step": 14675 }, { "epoch": 0.4571072707457574, "grad_norm": 2.3581981658935547, "learning_rate": 2.857517453866073e-06, "loss": 0.8173, "step": 14680 }, { "epoch": 0.45726296123306864, "grad_norm": 2.032527208328247, "learning_rate": 2.8566980235340393e-06, "loss": 0.7749, "step": 14685 }, { "epoch": 0.4574186517203799, "grad_norm": 1.9000686407089233, "learning_rate": 2.855878593202006e-06, "loss": 0.8544, "step": 14690 }, { "epoch": 0.4575743422076911, "grad_norm": 1.8642325401306152, "learning_rate": 2.855059162869973e-06, "loss": 0.7998, "step": 14695 }, { "epoch": 0.45773003269500234, "grad_norm": 2.520719289779663, "learning_rate": 2.85423973253794e-06, "loss": 0.7715, "step": 14700 }, { "epoch": 0.4578857231823136, "grad_norm": 2.109828472137451, "learning_rate": 2.8534203022059063e-06, "loss": 0.8081, "step": 14705 }, { "epoch": 0.4580414136696248, "grad_norm": 2.12058162689209, "learning_rate": 2.8526008718738736e-06, "loss": 0.7504, "step": 14710 }, { "epoch": 0.458197104156936, "grad_norm": 2.13230037689209, "learning_rate": 2.8517814415418405e-06, "loss": 0.8292, "step": 14715 }, { "epoch": 0.4583527946442472, "grad_norm": 2.2190582752227783, "learning_rate": 2.8509620112098074e-06, "loss": 0.7634, "step": 14720 }, { "epoch": 0.45850848513155845, "grad_norm": 2.3159680366516113, "learning_rate": 2.850142580877774e-06, "loss": 0.7668, "step": 14725 }, { "epoch": 0.4586641756188697, "grad_norm": 2.017124652862549, "learning_rate": 2.8493231505457407e-06, "loss": 0.8179, "step": 14730 }, { "epoch": 0.4588198661061809, "grad_norm": 2.385345935821533, "learning_rate": 2.848503720213708e-06, "loss": 0.7842, "step": 14735 }, { "epoch": 0.45897555659349215, "grad_norm": 2.0806970596313477, "learning_rate": 2.8476842898816744e-06, "loss": 0.7736, "step": 14740 }, { "epoch": 0.4591312470808034, "grad_norm": 2.3275797367095947, "learning_rate": 2.8468648595496413e-06, "loss": 0.7985, "step": 14745 }, { "epoch": 0.45928693756811456, "grad_norm": 2.0540406703948975, "learning_rate": 2.846045429217608e-06, "loss": 0.8099, "step": 14750 }, { "epoch": 0.4594426280554258, "grad_norm": 2.062317371368408, "learning_rate": 2.845225998885575e-06, "loss": 0.8402, "step": 14755 }, { "epoch": 0.459598318542737, "grad_norm": 1.767553687095642, "learning_rate": 2.8444065685535415e-06, "loss": 0.7852, "step": 14760 }, { "epoch": 0.45975400903004826, "grad_norm": 1.954114556312561, "learning_rate": 2.8435871382215088e-06, "loss": 0.7822, "step": 14765 }, { "epoch": 0.4599096995173595, "grad_norm": 1.9248762130737305, "learning_rate": 2.8427677078894756e-06, "loss": 0.6908, "step": 14770 }, { "epoch": 0.4600653900046707, "grad_norm": 2.043982744216919, "learning_rate": 2.8419482775574425e-06, "loss": 0.7402, "step": 14775 }, { "epoch": 0.46022108049198196, "grad_norm": 1.7637909650802612, "learning_rate": 2.841128847225409e-06, "loss": 0.8363, "step": 14780 }, { "epoch": 0.4603767709792932, "grad_norm": 2.008272647857666, "learning_rate": 2.840309416893376e-06, "loss": 0.8248, "step": 14785 }, { "epoch": 0.46053246146660437, "grad_norm": 2.3480329513549805, "learning_rate": 2.8394899865613427e-06, "loss": 0.8078, "step": 14790 }, { "epoch": 0.4606881519539156, "grad_norm": 2.0355567932128906, "learning_rate": 2.83867055622931e-06, "loss": 0.8103, "step": 14795 }, { "epoch": 0.46084384244122684, "grad_norm": 1.8321492671966553, "learning_rate": 2.8378511258972764e-06, "loss": 0.8148, "step": 14800 }, { "epoch": 0.46099953292853807, "grad_norm": 2.1927003860473633, "learning_rate": 2.8370316955652433e-06, "loss": 0.759, "step": 14805 }, { "epoch": 0.4611552234158493, "grad_norm": 1.9473897218704224, "learning_rate": 2.83621226523321e-06, "loss": 0.7403, "step": 14810 }, { "epoch": 0.46131091390316054, "grad_norm": 1.8924212455749512, "learning_rate": 2.8353928349011766e-06, "loss": 0.8077, "step": 14815 }, { "epoch": 0.46146660439047177, "grad_norm": 2.1941325664520264, "learning_rate": 2.8345734045691435e-06, "loss": 0.8179, "step": 14820 }, { "epoch": 0.46162229487778295, "grad_norm": 2.8278555870056152, "learning_rate": 2.8337539742371108e-06, "loss": 0.8442, "step": 14825 }, { "epoch": 0.4617779853650942, "grad_norm": 2.2350165843963623, "learning_rate": 2.8329345439050776e-06, "loss": 0.7641, "step": 14830 }, { "epoch": 0.4619336758524054, "grad_norm": 1.9542553424835205, "learning_rate": 2.832115113573044e-06, "loss": 0.7756, "step": 14835 }, { "epoch": 0.46208936633971665, "grad_norm": 2.2883872985839844, "learning_rate": 2.831295683241011e-06, "loss": 0.7696, "step": 14840 }, { "epoch": 0.4622450568270279, "grad_norm": 2.6031508445739746, "learning_rate": 2.830476252908978e-06, "loss": 0.8459, "step": 14845 }, { "epoch": 0.4624007473143391, "grad_norm": 2.176377058029175, "learning_rate": 2.829656822576945e-06, "loss": 0.7542, "step": 14850 }, { "epoch": 0.46255643780165034, "grad_norm": 1.9947136640548706, "learning_rate": 2.8288373922449116e-06, "loss": 0.8032, "step": 14855 }, { "epoch": 0.4627121282889615, "grad_norm": 2.426485538482666, "learning_rate": 2.8280179619128784e-06, "loss": 0.8, "step": 14860 }, { "epoch": 0.46286781877627275, "grad_norm": 1.9874123334884644, "learning_rate": 2.8271985315808453e-06, "loss": 0.8245, "step": 14865 }, { "epoch": 0.463023509263584, "grad_norm": 3.825559616088867, "learning_rate": 2.826379101248812e-06, "loss": 0.7718, "step": 14870 }, { "epoch": 0.4631791997508952, "grad_norm": 2.0918331146240234, "learning_rate": 2.8255596709167786e-06, "loss": 0.7863, "step": 14875 }, { "epoch": 0.46333489023820645, "grad_norm": 2.4257280826568604, "learning_rate": 2.824740240584746e-06, "loss": 0.7446, "step": 14880 }, { "epoch": 0.4634905807255177, "grad_norm": 2.075526475906372, "learning_rate": 2.8239208102527128e-06, "loss": 0.8185, "step": 14885 }, { "epoch": 0.4636462712128289, "grad_norm": 2.6332814693450928, "learning_rate": 2.8231013799206792e-06, "loss": 0.8554, "step": 14890 }, { "epoch": 0.4638019617001401, "grad_norm": 2.260009288787842, "learning_rate": 2.822281949588646e-06, "loss": 0.8158, "step": 14895 }, { "epoch": 0.46395765218745133, "grad_norm": 2.380868434906006, "learning_rate": 2.821462519256613e-06, "loss": 0.7311, "step": 14900 }, { "epoch": 0.46411334267476256, "grad_norm": 2.4222683906555176, "learning_rate": 2.8206430889245803e-06, "loss": 0.7942, "step": 14905 }, { "epoch": 0.4642690331620738, "grad_norm": 2.5594515800476074, "learning_rate": 2.8198236585925463e-06, "loss": 0.81, "step": 14910 }, { "epoch": 0.46442472364938503, "grad_norm": 2.1153271198272705, "learning_rate": 2.8190042282605136e-06, "loss": 0.794, "step": 14915 }, { "epoch": 0.46458041413669626, "grad_norm": 1.9373985528945923, "learning_rate": 2.8181847979284804e-06, "loss": 0.8607, "step": 14920 }, { "epoch": 0.4647361046240075, "grad_norm": 1.9149552583694458, "learning_rate": 2.8173653675964473e-06, "loss": 0.7591, "step": 14925 }, { "epoch": 0.4648917951113187, "grad_norm": 2.277618408203125, "learning_rate": 2.8165459372644138e-06, "loss": 0.7864, "step": 14930 }, { "epoch": 0.4650474855986299, "grad_norm": 2.159595251083374, "learning_rate": 2.8157265069323806e-06, "loss": 0.7709, "step": 14935 }, { "epoch": 0.46520317608594114, "grad_norm": 2.1388471126556396, "learning_rate": 2.814907076600348e-06, "loss": 0.7772, "step": 14940 }, { "epoch": 0.46535886657325237, "grad_norm": 2.183729410171509, "learning_rate": 2.814087646268315e-06, "loss": 0.8303, "step": 14945 }, { "epoch": 0.4655145570605636, "grad_norm": 3.1695234775543213, "learning_rate": 2.8132682159362812e-06, "loss": 0.7693, "step": 14950 }, { "epoch": 0.46567024754787484, "grad_norm": 2.134094715118408, "learning_rate": 2.812448785604248e-06, "loss": 0.7814, "step": 14955 }, { "epoch": 0.46582593803518607, "grad_norm": 2.0301311016082764, "learning_rate": 2.811629355272215e-06, "loss": 0.9019, "step": 14960 }, { "epoch": 0.46598162852249725, "grad_norm": 2.545562982559204, "learning_rate": 2.8108099249401814e-06, "loss": 0.7813, "step": 14965 }, { "epoch": 0.4661373190098085, "grad_norm": 1.8586012125015259, "learning_rate": 2.8099904946081487e-06, "loss": 0.7754, "step": 14970 }, { "epoch": 0.4662930094971197, "grad_norm": 1.943566918373108, "learning_rate": 2.8091710642761156e-06, "loss": 0.8621, "step": 14975 }, { "epoch": 0.46644869998443095, "grad_norm": 2.358293294906616, "learning_rate": 2.8083516339440825e-06, "loss": 0.8642, "step": 14980 }, { "epoch": 0.4666043904717422, "grad_norm": 2.1425185203552246, "learning_rate": 2.807532203612049e-06, "loss": 0.8539, "step": 14985 }, { "epoch": 0.4667600809590534, "grad_norm": 2.0584943294525146, "learning_rate": 2.8067127732800158e-06, "loss": 0.7754, "step": 14990 }, { "epoch": 0.46691577144636465, "grad_norm": 2.1697521209716797, "learning_rate": 2.805893342947983e-06, "loss": 0.8141, "step": 14995 }, { "epoch": 0.4670714619336759, "grad_norm": 2.476196527481079, "learning_rate": 2.80507391261595e-06, "loss": 0.7438, "step": 15000 }, { "epoch": 0.46722715242098706, "grad_norm": 2.094558000564575, "learning_rate": 2.8042544822839164e-06, "loss": 0.825, "step": 15005 }, { "epoch": 0.4673828429082983, "grad_norm": 2.184968948364258, "learning_rate": 2.8034350519518832e-06, "loss": 0.7996, "step": 15010 }, { "epoch": 0.4675385333956095, "grad_norm": 1.9113894701004028, "learning_rate": 2.80261562161985e-06, "loss": 0.7476, "step": 15015 }, { "epoch": 0.46769422388292076, "grad_norm": 2.157996892929077, "learning_rate": 2.8017961912878174e-06, "loss": 0.7484, "step": 15020 }, { "epoch": 0.467849914370232, "grad_norm": 2.0339009761810303, "learning_rate": 2.8009767609557834e-06, "loss": 0.7626, "step": 15025 }, { "epoch": 0.4680056048575432, "grad_norm": 2.572075128555298, "learning_rate": 2.8001573306237507e-06, "loss": 0.7961, "step": 15030 }, { "epoch": 0.46816129534485446, "grad_norm": 2.343118190765381, "learning_rate": 2.7993379002917176e-06, "loss": 0.8211, "step": 15035 }, { "epoch": 0.46831698583216563, "grad_norm": 1.9392625093460083, "learning_rate": 2.798518469959684e-06, "loss": 0.864, "step": 15040 }, { "epoch": 0.46847267631947687, "grad_norm": 2.133180618286133, "learning_rate": 2.797699039627651e-06, "loss": 0.7773, "step": 15045 }, { "epoch": 0.4686283668067881, "grad_norm": 2.336014747619629, "learning_rate": 2.7968796092956178e-06, "loss": 0.8451, "step": 15050 }, { "epoch": 0.46878405729409933, "grad_norm": 2.168135643005371, "learning_rate": 2.796060178963585e-06, "loss": 0.7414, "step": 15055 }, { "epoch": 0.46893974778141057, "grad_norm": 2.1849348545074463, "learning_rate": 2.7952407486315515e-06, "loss": 0.7839, "step": 15060 }, { "epoch": 0.4690954382687218, "grad_norm": 2.022008180618286, "learning_rate": 2.7944213182995184e-06, "loss": 0.7339, "step": 15065 }, { "epoch": 0.46925112875603303, "grad_norm": 2.6496872901916504, "learning_rate": 2.7936018879674853e-06, "loss": 0.856, "step": 15070 }, { "epoch": 0.4694068192433442, "grad_norm": 2.354628801345825, "learning_rate": 2.792782457635452e-06, "loss": 0.7654, "step": 15075 }, { "epoch": 0.46956250973065544, "grad_norm": 1.8699716329574585, "learning_rate": 2.7919630273034186e-06, "loss": 0.7829, "step": 15080 }, { "epoch": 0.4697182002179667, "grad_norm": 2.038498878479004, "learning_rate": 2.791143596971386e-06, "loss": 0.7623, "step": 15085 }, { "epoch": 0.4698738907052779, "grad_norm": 1.8996630907058716, "learning_rate": 2.7903241666393527e-06, "loss": 0.8017, "step": 15090 }, { "epoch": 0.47002958119258914, "grad_norm": 2.2713630199432373, "learning_rate": 2.789504736307319e-06, "loss": 0.716, "step": 15095 }, { "epoch": 0.4701852716799004, "grad_norm": 1.9100912809371948, "learning_rate": 2.788685305975286e-06, "loss": 0.8282, "step": 15100 }, { "epoch": 0.4703409621672116, "grad_norm": 2.554546594619751, "learning_rate": 2.787865875643253e-06, "loss": 0.762, "step": 15105 }, { "epoch": 0.4704966526545228, "grad_norm": 1.794365406036377, "learning_rate": 2.7870464453112202e-06, "loss": 0.7945, "step": 15110 }, { "epoch": 0.470652343141834, "grad_norm": 2.0410921573638916, "learning_rate": 2.7862270149791867e-06, "loss": 0.7795, "step": 15115 }, { "epoch": 0.47080803362914525, "grad_norm": 2.3079640865325928, "learning_rate": 2.7854075846471535e-06, "loss": 0.7872, "step": 15120 }, { "epoch": 0.4709637241164565, "grad_norm": 2.3519108295440674, "learning_rate": 2.7845881543151204e-06, "loss": 0.7856, "step": 15125 }, { "epoch": 0.4711194146037677, "grad_norm": 2.3856184482574463, "learning_rate": 2.7837687239830873e-06, "loss": 0.835, "step": 15130 }, { "epoch": 0.47127510509107895, "grad_norm": 2.078765630722046, "learning_rate": 2.7829492936510537e-06, "loss": 0.8188, "step": 15135 }, { "epoch": 0.4714307955783902, "grad_norm": 1.9041680097579956, "learning_rate": 2.7821298633190206e-06, "loss": 0.8014, "step": 15140 }, { "epoch": 0.47158648606570136, "grad_norm": 2.1106960773468018, "learning_rate": 2.781310432986988e-06, "loss": 0.8027, "step": 15145 }, { "epoch": 0.4717421765530126, "grad_norm": 2.406804084777832, "learning_rate": 2.7804910026549547e-06, "loss": 0.7718, "step": 15150 }, { "epoch": 0.4718978670403238, "grad_norm": 2.462951421737671, "learning_rate": 2.779671572322921e-06, "loss": 0.7866, "step": 15155 }, { "epoch": 0.47205355752763506, "grad_norm": 2.494061231613159, "learning_rate": 2.778852141990888e-06, "loss": 0.8929, "step": 15160 }, { "epoch": 0.4722092480149463, "grad_norm": 2.044463872909546, "learning_rate": 2.778032711658855e-06, "loss": 0.8077, "step": 15165 }, { "epoch": 0.4723649385022575, "grad_norm": 2.451615810394287, "learning_rate": 2.7772132813268214e-06, "loss": 0.8009, "step": 15170 }, { "epoch": 0.47252062898956876, "grad_norm": 2.7416765689849854, "learning_rate": 2.7763938509947887e-06, "loss": 0.7995, "step": 15175 }, { "epoch": 0.47267631947687994, "grad_norm": 2.1526992321014404, "learning_rate": 2.7755744206627555e-06, "loss": 0.7503, "step": 15180 }, { "epoch": 0.47283200996419117, "grad_norm": 2.0625152587890625, "learning_rate": 2.7747549903307224e-06, "loss": 0.8048, "step": 15185 }, { "epoch": 0.4729877004515024, "grad_norm": 2.1308138370513916, "learning_rate": 2.773935559998689e-06, "loss": 0.7377, "step": 15190 }, { "epoch": 0.47314339093881363, "grad_norm": 1.9721055030822754, "learning_rate": 2.7731161296666557e-06, "loss": 0.7456, "step": 15195 }, { "epoch": 0.47329908142612487, "grad_norm": 1.9547721147537231, "learning_rate": 2.772296699334623e-06, "loss": 0.8358, "step": 15200 }, { "epoch": 0.4734547719134361, "grad_norm": 2.1042022705078125, "learning_rate": 2.77147726900259e-06, "loss": 0.7903, "step": 15205 }, { "epoch": 0.47361046240074733, "grad_norm": 2.144186019897461, "learning_rate": 2.7706578386705563e-06, "loss": 0.7853, "step": 15210 }, { "epoch": 0.47376615288805857, "grad_norm": 2.256258010864258, "learning_rate": 2.769838408338523e-06, "loss": 0.8123, "step": 15215 }, { "epoch": 0.47392184337536974, "grad_norm": 2.7542691230773926, "learning_rate": 2.76901897800649e-06, "loss": 0.8308, "step": 15220 }, { "epoch": 0.474077533862681, "grad_norm": 2.6401429176330566, "learning_rate": 2.7681995476744574e-06, "loss": 0.8172, "step": 15225 }, { "epoch": 0.4742332243499922, "grad_norm": 2.1597542762756348, "learning_rate": 2.767380117342424e-06, "loss": 0.8189, "step": 15230 }, { "epoch": 0.47438891483730344, "grad_norm": 2.1108806133270264, "learning_rate": 2.7665606870103907e-06, "loss": 0.7022, "step": 15235 }, { "epoch": 0.4745446053246147, "grad_norm": 2.131598472595215, "learning_rate": 2.7657412566783576e-06, "loss": 0.7748, "step": 15240 }, { "epoch": 0.4747002958119259, "grad_norm": 2.4257256984710693, "learning_rate": 2.764921826346324e-06, "loss": 0.8693, "step": 15245 }, { "epoch": 0.47485598629923714, "grad_norm": 2.263580322265625, "learning_rate": 2.764102396014291e-06, "loss": 0.7651, "step": 15250 }, { "epoch": 0.4750116767865483, "grad_norm": 1.7097724676132202, "learning_rate": 2.763282965682258e-06, "loss": 0.8162, "step": 15255 }, { "epoch": 0.47516736727385955, "grad_norm": 2.325366735458374, "learning_rate": 2.762463535350225e-06, "loss": 0.8538, "step": 15260 }, { "epoch": 0.4753230577611708, "grad_norm": 2.708568572998047, "learning_rate": 2.7616441050181915e-06, "loss": 0.863, "step": 15265 }, { "epoch": 0.475478748248482, "grad_norm": 2.148256778717041, "learning_rate": 2.7608246746861583e-06, "loss": 0.8157, "step": 15270 }, { "epoch": 0.47563443873579325, "grad_norm": 1.9404646158218384, "learning_rate": 2.7600052443541252e-06, "loss": 0.7753, "step": 15275 }, { "epoch": 0.4757901292231045, "grad_norm": 1.6779192686080933, "learning_rate": 2.759185814022092e-06, "loss": 0.7798, "step": 15280 }, { "epoch": 0.4759458197104157, "grad_norm": 2.3484489917755127, "learning_rate": 2.7583663836900585e-06, "loss": 0.7881, "step": 15285 }, { "epoch": 0.4761015101977269, "grad_norm": 2.254242181777954, "learning_rate": 2.757546953358026e-06, "loss": 0.7906, "step": 15290 }, { "epoch": 0.47625720068503813, "grad_norm": 2.296776056289673, "learning_rate": 2.7567275230259927e-06, "loss": 0.7066, "step": 15295 }, { "epoch": 0.47641289117234936, "grad_norm": 1.8387680053710938, "learning_rate": 2.7559080926939596e-06, "loss": 0.7977, "step": 15300 }, { "epoch": 0.4765685816596606, "grad_norm": 2.199002265930176, "learning_rate": 2.755088662361926e-06, "loss": 0.8414, "step": 15305 }, { "epoch": 0.47672427214697183, "grad_norm": 2.308215856552124, "learning_rate": 2.754269232029893e-06, "loss": 0.794, "step": 15310 }, { "epoch": 0.47687996263428306, "grad_norm": 2.3216450214385986, "learning_rate": 2.75344980169786e-06, "loss": 0.8355, "step": 15315 }, { "epoch": 0.4770356531215943, "grad_norm": 1.8205324411392212, "learning_rate": 2.7526303713658266e-06, "loss": 0.8193, "step": 15320 }, { "epoch": 0.47719134360890547, "grad_norm": 2.275029420852661, "learning_rate": 2.7518109410337935e-06, "loss": 0.7545, "step": 15325 }, { "epoch": 0.4773470340962167, "grad_norm": 2.3057940006256104, "learning_rate": 2.7509915107017604e-06, "loss": 0.79, "step": 15330 }, { "epoch": 0.47750272458352794, "grad_norm": 1.8163328170776367, "learning_rate": 2.7501720803697272e-06, "loss": 0.8167, "step": 15335 }, { "epoch": 0.47765841507083917, "grad_norm": 2.0611073970794678, "learning_rate": 2.7493526500376937e-06, "loss": 0.7454, "step": 15340 }, { "epoch": 0.4778141055581504, "grad_norm": 2.1000821590423584, "learning_rate": 2.748533219705661e-06, "loss": 0.7654, "step": 15345 }, { "epoch": 0.47796979604546164, "grad_norm": 2.222506284713745, "learning_rate": 2.747713789373628e-06, "loss": 0.862, "step": 15350 }, { "epoch": 0.47812548653277287, "grad_norm": 2.0327813625335693, "learning_rate": 2.7468943590415947e-06, "loss": 0.9397, "step": 15355 }, { "epoch": 0.47828117702008405, "grad_norm": 2.3580474853515625, "learning_rate": 2.746074928709561e-06, "loss": 0.7193, "step": 15360 }, { "epoch": 0.4784368675073953, "grad_norm": 2.15504789352417, "learning_rate": 2.745255498377528e-06, "loss": 0.7413, "step": 15365 }, { "epoch": 0.4785925579947065, "grad_norm": 2.512669563293457, "learning_rate": 2.7444360680454953e-06, "loss": 0.7907, "step": 15370 }, { "epoch": 0.47874824848201775, "grad_norm": 2.4806740283966064, "learning_rate": 2.743616637713462e-06, "loss": 0.8016, "step": 15375 }, { "epoch": 0.478903938969329, "grad_norm": 1.9517390727996826, "learning_rate": 2.7427972073814286e-06, "loss": 0.8185, "step": 15380 }, { "epoch": 0.4790596294566402, "grad_norm": 1.974878191947937, "learning_rate": 2.7419777770493955e-06, "loss": 0.8296, "step": 15385 }, { "epoch": 0.47921531994395145, "grad_norm": 2.835355043411255, "learning_rate": 2.7411583467173624e-06, "loss": 0.7605, "step": 15390 }, { "epoch": 0.4793710104312626, "grad_norm": 1.930693507194519, "learning_rate": 2.740338916385329e-06, "loss": 0.7635, "step": 15395 }, { "epoch": 0.47952670091857386, "grad_norm": 1.9030969142913818, "learning_rate": 2.7395194860532957e-06, "loss": 0.7566, "step": 15400 }, { "epoch": 0.4796823914058851, "grad_norm": 1.9704782962799072, "learning_rate": 2.738700055721263e-06, "loss": 0.7289, "step": 15405 }, { "epoch": 0.4798380818931963, "grad_norm": 1.88360595703125, "learning_rate": 2.73788062538923e-06, "loss": 0.7433, "step": 15410 }, { "epoch": 0.47999377238050756, "grad_norm": 1.919818639755249, "learning_rate": 2.7370611950571963e-06, "loss": 0.867, "step": 15415 }, { "epoch": 0.4801494628678188, "grad_norm": 3.337660074234009, "learning_rate": 2.736241764725163e-06, "loss": 0.8387, "step": 15420 }, { "epoch": 0.48030515335513, "grad_norm": 2.2537636756896973, "learning_rate": 2.73542233439313e-06, "loss": 0.8151, "step": 15425 }, { "epoch": 0.48046084384244125, "grad_norm": 1.984165906906128, "learning_rate": 2.7346029040610973e-06, "loss": 0.7861, "step": 15430 }, { "epoch": 0.48061653432975243, "grad_norm": 2.0279104709625244, "learning_rate": 2.7337834737290638e-06, "loss": 0.7643, "step": 15435 }, { "epoch": 0.48077222481706366, "grad_norm": 2.04630970954895, "learning_rate": 2.7329640433970306e-06, "loss": 0.7919, "step": 15440 }, { "epoch": 0.4809279153043749, "grad_norm": 2.250441551208496, "learning_rate": 2.7321446130649975e-06, "loss": 0.8238, "step": 15445 }, { "epoch": 0.48108360579168613, "grad_norm": 2.251389503479004, "learning_rate": 2.7313251827329644e-06, "loss": 0.7478, "step": 15450 }, { "epoch": 0.48123929627899736, "grad_norm": 2.404102325439453, "learning_rate": 2.730505752400931e-06, "loss": 0.7435, "step": 15455 }, { "epoch": 0.4813949867663086, "grad_norm": 2.069581985473633, "learning_rate": 2.729686322068898e-06, "loss": 0.7929, "step": 15460 }, { "epoch": 0.48155067725361983, "grad_norm": 2.307274341583252, "learning_rate": 2.728866891736865e-06, "loss": 0.8192, "step": 15465 }, { "epoch": 0.481706367740931, "grad_norm": 2.351080894470215, "learning_rate": 2.7280474614048314e-06, "loss": 0.7505, "step": 15470 }, { "epoch": 0.48186205822824224, "grad_norm": 2.07269024848938, "learning_rate": 2.7272280310727983e-06, "loss": 0.754, "step": 15475 }, { "epoch": 0.4820177487155535, "grad_norm": 2.0086615085601807, "learning_rate": 2.726408600740765e-06, "loss": 0.7873, "step": 15480 }, { "epoch": 0.4821734392028647, "grad_norm": 2.32079815864563, "learning_rate": 2.7255891704087325e-06, "loss": 0.7801, "step": 15485 }, { "epoch": 0.48232912969017594, "grad_norm": 2.019852638244629, "learning_rate": 2.724769740076699e-06, "loss": 0.7557, "step": 15490 }, { "epoch": 0.4824848201774872, "grad_norm": 2.0329506397247314, "learning_rate": 2.7239503097446658e-06, "loss": 0.8011, "step": 15495 }, { "epoch": 0.4826405106647984, "grad_norm": 2.964344024658203, "learning_rate": 2.7231308794126326e-06, "loss": 0.7616, "step": 15500 }, { "epoch": 0.4827962011521096, "grad_norm": 2.643594264984131, "learning_rate": 2.7223114490805995e-06, "loss": 0.8124, "step": 15505 }, { "epoch": 0.4829518916394208, "grad_norm": 1.9905946254730225, "learning_rate": 2.721492018748566e-06, "loss": 0.7385, "step": 15510 }, { "epoch": 0.48310758212673205, "grad_norm": 2.484571695327759, "learning_rate": 2.720672588416533e-06, "loss": 0.7718, "step": 15515 }, { "epoch": 0.4832632726140433, "grad_norm": 2.204389810562134, "learning_rate": 2.7198531580845e-06, "loss": 0.7363, "step": 15520 }, { "epoch": 0.4834189631013545, "grad_norm": 2.0090763568878174, "learning_rate": 2.7190337277524666e-06, "loss": 0.7347, "step": 15525 }, { "epoch": 0.48357465358866575, "grad_norm": 2.1975855827331543, "learning_rate": 2.7182142974204334e-06, "loss": 0.761, "step": 15530 }, { "epoch": 0.483730344075977, "grad_norm": 2.243488073348999, "learning_rate": 2.7173948670884003e-06, "loss": 0.832, "step": 15535 }, { "epoch": 0.48388603456328816, "grad_norm": 2.698720693588257, "learning_rate": 2.716575436756367e-06, "loss": 0.8391, "step": 15540 }, { "epoch": 0.4840417250505994, "grad_norm": 2.1802492141723633, "learning_rate": 2.7157560064243336e-06, "loss": 0.7788, "step": 15545 }, { "epoch": 0.4841974155379106, "grad_norm": 2.0664589405059814, "learning_rate": 2.714936576092301e-06, "loss": 0.762, "step": 15550 }, { "epoch": 0.48435310602522186, "grad_norm": 2.017120599746704, "learning_rate": 2.714117145760268e-06, "loss": 0.7218, "step": 15555 }, { "epoch": 0.4845087965125331, "grad_norm": 2.054292917251587, "learning_rate": 2.7132977154282347e-06, "loss": 0.7963, "step": 15560 }, { "epoch": 0.4846644869998443, "grad_norm": 2.2741031646728516, "learning_rate": 2.712478285096201e-06, "loss": 0.8563, "step": 15565 }, { "epoch": 0.48482017748715556, "grad_norm": 1.832482933998108, "learning_rate": 2.711658854764168e-06, "loss": 0.7686, "step": 15570 }, { "epoch": 0.48497586797446673, "grad_norm": 2.2079501152038574, "learning_rate": 2.7108394244321353e-06, "loss": 0.7814, "step": 15575 }, { "epoch": 0.48513155846177797, "grad_norm": 2.113436222076416, "learning_rate": 2.710019994100102e-06, "loss": 0.806, "step": 15580 }, { "epoch": 0.4852872489490892, "grad_norm": 2.4244749546051025, "learning_rate": 2.7092005637680686e-06, "loss": 0.7947, "step": 15585 }, { "epoch": 0.48544293943640043, "grad_norm": 2.0786335468292236, "learning_rate": 2.7083811334360355e-06, "loss": 0.7776, "step": 15590 }, { "epoch": 0.48559862992371167, "grad_norm": 2.031247854232788, "learning_rate": 2.7075617031040023e-06, "loss": 0.8133, "step": 15595 }, { "epoch": 0.4857543204110229, "grad_norm": 2.14579701423645, "learning_rate": 2.7067422727719688e-06, "loss": 0.7781, "step": 15600 }, { "epoch": 0.48591001089833413, "grad_norm": 2.2534236907958984, "learning_rate": 2.705922842439936e-06, "loss": 0.8065, "step": 15605 }, { "epoch": 0.4860657013856453, "grad_norm": 2.1920034885406494, "learning_rate": 2.705103412107903e-06, "loss": 0.8383, "step": 15610 }, { "epoch": 0.48622139187295654, "grad_norm": 2.1581711769104004, "learning_rate": 2.70428398177587e-06, "loss": 0.7943, "step": 15615 }, { "epoch": 0.4863770823602678, "grad_norm": 2.1577367782592773, "learning_rate": 2.7034645514438362e-06, "loss": 0.8594, "step": 15620 }, { "epoch": 0.486532772847579, "grad_norm": 2.2963218688964844, "learning_rate": 2.702645121111803e-06, "loss": 0.705, "step": 15625 }, { "epoch": 0.48668846333489024, "grad_norm": 2.455808162689209, "learning_rate": 2.70182569077977e-06, "loss": 0.8402, "step": 15630 }, { "epoch": 0.4868441538222015, "grad_norm": 2.4058573246002197, "learning_rate": 2.7010062604477373e-06, "loss": 0.8888, "step": 15635 }, { "epoch": 0.4869998443095127, "grad_norm": 1.9664491415023804, "learning_rate": 2.7001868301157037e-06, "loss": 0.7834, "step": 15640 }, { "epoch": 0.48715553479682394, "grad_norm": 2.7463221549987793, "learning_rate": 2.6993673997836706e-06, "loss": 0.8782, "step": 15645 }, { "epoch": 0.4873112252841351, "grad_norm": 2.492769479751587, "learning_rate": 2.6985479694516375e-06, "loss": 0.8, "step": 15650 }, { "epoch": 0.48746691577144635, "grad_norm": 2.4242660999298096, "learning_rate": 2.6977285391196043e-06, "loss": 0.8491, "step": 15655 }, { "epoch": 0.4876226062587576, "grad_norm": 1.7897075414657593, "learning_rate": 2.6969091087875708e-06, "loss": 0.8327, "step": 15660 }, { "epoch": 0.4877782967460688, "grad_norm": 2.1000006198883057, "learning_rate": 2.696089678455538e-06, "loss": 0.7788, "step": 15665 }, { "epoch": 0.48793398723338005, "grad_norm": 2.193622350692749, "learning_rate": 2.695270248123505e-06, "loss": 0.803, "step": 15670 }, { "epoch": 0.4880896777206913, "grad_norm": 2.1149110794067383, "learning_rate": 2.6944508177914714e-06, "loss": 0.7854, "step": 15675 }, { "epoch": 0.4882453682080025, "grad_norm": 2.0854790210723877, "learning_rate": 2.6936313874594383e-06, "loss": 0.8073, "step": 15680 }, { "epoch": 0.4884010586953137, "grad_norm": 2.154684066772461, "learning_rate": 2.692811957127405e-06, "loss": 0.7779, "step": 15685 }, { "epoch": 0.4885567491826249, "grad_norm": 2.602816104888916, "learning_rate": 2.6919925267953724e-06, "loss": 0.8169, "step": 15690 }, { "epoch": 0.48871243966993616, "grad_norm": 2.314378023147583, "learning_rate": 2.691173096463339e-06, "loss": 0.7467, "step": 15695 }, { "epoch": 0.4888681301572474, "grad_norm": 2.028359889984131, "learning_rate": 2.6903536661313057e-06, "loss": 0.8214, "step": 15700 }, { "epoch": 0.4890238206445586, "grad_norm": 2.3706352710723877, "learning_rate": 2.6895342357992726e-06, "loss": 0.8271, "step": 15705 }, { "epoch": 0.48917951113186986, "grad_norm": 2.146854877471924, "learning_rate": 2.6887148054672395e-06, "loss": 0.7289, "step": 15710 }, { "epoch": 0.4893352016191811, "grad_norm": 2.5107200145721436, "learning_rate": 2.687895375135206e-06, "loss": 0.864, "step": 15715 }, { "epoch": 0.48949089210649227, "grad_norm": 3.1229872703552246, "learning_rate": 2.6870759448031732e-06, "loss": 0.8179, "step": 15720 }, { "epoch": 0.4896465825938035, "grad_norm": 2.113809823989868, "learning_rate": 2.68625651447114e-06, "loss": 0.8361, "step": 15725 }, { "epoch": 0.48980227308111474, "grad_norm": 2.3430137634277344, "learning_rate": 2.685437084139107e-06, "loss": 0.7996, "step": 15730 }, { "epoch": 0.48995796356842597, "grad_norm": 2.286999464035034, "learning_rate": 2.6846176538070734e-06, "loss": 0.7561, "step": 15735 }, { "epoch": 0.4901136540557372, "grad_norm": 1.9535222053527832, "learning_rate": 2.6837982234750403e-06, "loss": 0.8112, "step": 15740 }, { "epoch": 0.49026934454304844, "grad_norm": 2.246321201324463, "learning_rate": 2.6829787931430076e-06, "loss": 0.7497, "step": 15745 }, { "epoch": 0.49042503503035967, "grad_norm": 2.09155011177063, "learning_rate": 2.6821593628109736e-06, "loss": 0.7535, "step": 15750 }, { "epoch": 0.49058072551767085, "grad_norm": 2.264463186264038, "learning_rate": 2.681339932478941e-06, "loss": 0.8471, "step": 15755 }, { "epoch": 0.4907364160049821, "grad_norm": 2.341024398803711, "learning_rate": 2.6805205021469077e-06, "loss": 0.8376, "step": 15760 }, { "epoch": 0.4908921064922933, "grad_norm": 1.9494167566299438, "learning_rate": 2.6797010718148746e-06, "loss": 0.8124, "step": 15765 }, { "epoch": 0.49104779697960454, "grad_norm": 2.1364123821258545, "learning_rate": 2.678881641482841e-06, "loss": 0.8039, "step": 15770 }, { "epoch": 0.4912034874669158, "grad_norm": 2.1285743713378906, "learning_rate": 2.678062211150808e-06, "loss": 0.857, "step": 15775 }, { "epoch": 0.491359177954227, "grad_norm": 2.1546056270599365, "learning_rate": 2.6772427808187752e-06, "loss": 0.8789, "step": 15780 }, { "epoch": 0.49151486844153824, "grad_norm": 2.931929111480713, "learning_rate": 2.676423350486742e-06, "loss": 0.7605, "step": 15785 }, { "epoch": 0.4916705589288494, "grad_norm": 2.339850664138794, "learning_rate": 2.6756039201547085e-06, "loss": 0.7285, "step": 15790 }, { "epoch": 0.49182624941616065, "grad_norm": 1.7098225355148315, "learning_rate": 2.6747844898226754e-06, "loss": 0.7638, "step": 15795 }, { "epoch": 0.4919819399034719, "grad_norm": 2.505789041519165, "learning_rate": 2.6739650594906423e-06, "loss": 0.8021, "step": 15800 }, { "epoch": 0.4921376303907831, "grad_norm": 2.01015305519104, "learning_rate": 2.6731456291586096e-06, "loss": 0.7976, "step": 15805 }, { "epoch": 0.49229332087809435, "grad_norm": 2.2943074703216553, "learning_rate": 2.672326198826576e-06, "loss": 0.7699, "step": 15810 }, { "epoch": 0.4924490113654056, "grad_norm": 2.0511691570281982, "learning_rate": 2.671506768494543e-06, "loss": 0.7728, "step": 15815 }, { "epoch": 0.4926047018527168, "grad_norm": 1.976211667060852, "learning_rate": 2.6706873381625098e-06, "loss": 0.8021, "step": 15820 }, { "epoch": 0.492760392340028, "grad_norm": 2.0315654277801514, "learning_rate": 2.669867907830476e-06, "loss": 0.734, "step": 15825 }, { "epoch": 0.49291608282733923, "grad_norm": 2.3114473819732666, "learning_rate": 2.669048477498443e-06, "loss": 0.7761, "step": 15830 }, { "epoch": 0.49307177331465046, "grad_norm": 1.92875075340271, "learning_rate": 2.6682290471664104e-06, "loss": 0.7667, "step": 15835 }, { "epoch": 0.4932274638019617, "grad_norm": 1.9903706312179565, "learning_rate": 2.6674096168343772e-06, "loss": 0.7656, "step": 15840 }, { "epoch": 0.49338315428927293, "grad_norm": 2.067753314971924, "learning_rate": 2.6665901865023437e-06, "loss": 0.8122, "step": 15845 }, { "epoch": 0.49353884477658416, "grad_norm": 2.7358458042144775, "learning_rate": 2.6657707561703105e-06, "loss": 0.757, "step": 15850 }, { "epoch": 0.4936945352638954, "grad_norm": 1.9306544065475464, "learning_rate": 2.6649513258382774e-06, "loss": 0.6974, "step": 15855 }, { "epoch": 0.49385022575120663, "grad_norm": 2.28680157661438, "learning_rate": 2.6641318955062447e-06, "loss": 0.8661, "step": 15860 }, { "epoch": 0.4940059162385178, "grad_norm": 2.4561195373535156, "learning_rate": 2.6633124651742107e-06, "loss": 0.729, "step": 15865 }, { "epoch": 0.49416160672582904, "grad_norm": 1.8034789562225342, "learning_rate": 2.662493034842178e-06, "loss": 0.7105, "step": 15870 }, { "epoch": 0.49431729721314027, "grad_norm": 2.060248851776123, "learning_rate": 2.661673604510145e-06, "loss": 0.7468, "step": 15875 }, { "epoch": 0.4944729877004515, "grad_norm": 2.3315603733062744, "learning_rate": 2.6608541741781118e-06, "loss": 0.8318, "step": 15880 }, { "epoch": 0.49462867818776274, "grad_norm": 2.196178913116455, "learning_rate": 2.6600347438460782e-06, "loss": 0.7812, "step": 15885 }, { "epoch": 0.49478436867507397, "grad_norm": 2.0519139766693115, "learning_rate": 2.659215313514045e-06, "loss": 0.7935, "step": 15890 }, { "epoch": 0.4949400591623852, "grad_norm": 2.116476535797119, "learning_rate": 2.6583958831820124e-06, "loss": 0.8814, "step": 15895 }, { "epoch": 0.4950957496496964, "grad_norm": 2.1091721057891846, "learning_rate": 2.657576452849979e-06, "loss": 0.7593, "step": 15900 }, { "epoch": 0.4952514401370076, "grad_norm": 2.6487836837768555, "learning_rate": 2.6567570225179457e-06, "loss": 0.8165, "step": 15905 }, { "epoch": 0.49540713062431885, "grad_norm": 2.132014751434326, "learning_rate": 2.6559375921859126e-06, "loss": 0.8075, "step": 15910 }, { "epoch": 0.4955628211116301, "grad_norm": 2.3267276287078857, "learning_rate": 2.6551181618538794e-06, "loss": 0.7397, "step": 15915 }, { "epoch": 0.4957185115989413, "grad_norm": 2.2625482082366943, "learning_rate": 2.654298731521846e-06, "loss": 0.8026, "step": 15920 }, { "epoch": 0.49587420208625255, "grad_norm": 1.891472578048706, "learning_rate": 2.653479301189813e-06, "loss": 0.7882, "step": 15925 }, { "epoch": 0.4960298925735638, "grad_norm": 1.8535560369491577, "learning_rate": 2.65265987085778e-06, "loss": 0.7875, "step": 15930 }, { "epoch": 0.49618558306087496, "grad_norm": 2.5490996837615967, "learning_rate": 2.651840440525747e-06, "loss": 0.8148, "step": 15935 }, { "epoch": 0.4963412735481862, "grad_norm": 2.103217840194702, "learning_rate": 2.6510210101937134e-06, "loss": 0.8067, "step": 15940 }, { "epoch": 0.4964969640354974, "grad_norm": 2.2225961685180664, "learning_rate": 2.6502015798616802e-06, "loss": 0.6882, "step": 15945 }, { "epoch": 0.49665265452280866, "grad_norm": 2.105349063873291, "learning_rate": 2.6493821495296475e-06, "loss": 0.7077, "step": 15950 }, { "epoch": 0.4968083450101199, "grad_norm": 2.6425509452819824, "learning_rate": 2.648562719197614e-06, "loss": 0.7992, "step": 15955 }, { "epoch": 0.4969640354974311, "grad_norm": 2.6842379570007324, "learning_rate": 2.647743288865581e-06, "loss": 0.8184, "step": 15960 }, { "epoch": 0.49711972598474236, "grad_norm": 2.0148448944091797, "learning_rate": 2.6469238585335477e-06, "loss": 0.7654, "step": 15965 }, { "epoch": 0.49727541647205353, "grad_norm": 2.1614787578582764, "learning_rate": 2.6461044282015146e-06, "loss": 0.7765, "step": 15970 }, { "epoch": 0.49743110695936477, "grad_norm": 2.2981414794921875, "learning_rate": 2.645284997869481e-06, "loss": 0.7578, "step": 15975 }, { "epoch": 0.497586797446676, "grad_norm": 2.0757405757904053, "learning_rate": 2.644465567537448e-06, "loss": 0.8133, "step": 15980 }, { "epoch": 0.49774248793398723, "grad_norm": 2.0270543098449707, "learning_rate": 2.643646137205415e-06, "loss": 0.7712, "step": 15985 }, { "epoch": 0.49789817842129847, "grad_norm": 2.4046847820281982, "learning_rate": 2.642826706873382e-06, "loss": 0.6943, "step": 15990 }, { "epoch": 0.4980538689086097, "grad_norm": 2.128756284713745, "learning_rate": 2.6420072765413485e-06, "loss": 0.7439, "step": 15995 }, { "epoch": 0.49820955939592093, "grad_norm": 2.7966887950897217, "learning_rate": 2.6411878462093154e-06, "loss": 0.7839, "step": 16000 }, { "epoch": 0.4983652498832321, "grad_norm": 2.1728575229644775, "learning_rate": 2.6403684158772822e-06, "loss": 0.8181, "step": 16005 }, { "epoch": 0.49852094037054334, "grad_norm": 2.1277432441711426, "learning_rate": 2.6395489855452495e-06, "loss": 0.7526, "step": 16010 }, { "epoch": 0.4986766308578546, "grad_norm": 2.416593313217163, "learning_rate": 2.638729555213216e-06, "loss": 0.785, "step": 16015 }, { "epoch": 0.4988323213451658, "grad_norm": 2.1967034339904785, "learning_rate": 2.637910124881183e-06, "loss": 0.8067, "step": 16020 }, { "epoch": 0.49898801183247704, "grad_norm": 2.3003385066986084, "learning_rate": 2.6370906945491497e-06, "loss": 0.8399, "step": 16025 }, { "epoch": 0.4991437023197883, "grad_norm": 2.0713789463043213, "learning_rate": 2.636271264217116e-06, "loss": 0.7985, "step": 16030 }, { "epoch": 0.4992993928070995, "grad_norm": 2.2643911838531494, "learning_rate": 2.635451833885083e-06, "loss": 0.7388, "step": 16035 }, { "epoch": 0.4994550832944107, "grad_norm": 2.0404393672943115, "learning_rate": 2.6346324035530503e-06, "loss": 0.802, "step": 16040 }, { "epoch": 0.4996107737817219, "grad_norm": 1.9348660707473755, "learning_rate": 2.633812973221017e-06, "loss": 0.7544, "step": 16045 }, { "epoch": 0.49976646426903315, "grad_norm": 1.9695121049880981, "learning_rate": 2.6329935428889836e-06, "loss": 0.7471, "step": 16050 }, { "epoch": 0.4999221547563444, "grad_norm": 1.848836064338684, "learning_rate": 2.6321741125569505e-06, "loss": 0.8171, "step": 16055 }, { "epoch": 0.5000778452436556, "grad_norm": 2.277841567993164, "learning_rate": 2.6313546822249174e-06, "loss": 0.8218, "step": 16060 }, { "epoch": 0.5002335357309668, "grad_norm": 2.8052916526794434, "learning_rate": 2.6305352518928847e-06, "loss": 0.8362, "step": 16065 }, { "epoch": 0.5003892262182781, "grad_norm": 2.4020955562591553, "learning_rate": 2.629715821560851e-06, "loss": 0.7179, "step": 16070 }, { "epoch": 0.5005449167055893, "grad_norm": 1.7330559492111206, "learning_rate": 2.628896391228818e-06, "loss": 0.7724, "step": 16075 }, { "epoch": 0.5007006071929005, "grad_norm": 2.110334634780884, "learning_rate": 2.628076960896785e-06, "loss": 0.8011, "step": 16080 }, { "epoch": 0.5008562976802118, "grad_norm": 2.0951974391937256, "learning_rate": 2.6272575305647517e-06, "loss": 0.7373, "step": 16085 }, { "epoch": 0.501011988167523, "grad_norm": 2.0380473136901855, "learning_rate": 2.626438100232718e-06, "loss": 0.7503, "step": 16090 }, { "epoch": 0.5011676786548341, "grad_norm": 2.3321926593780518, "learning_rate": 2.6256186699006855e-06, "loss": 0.6935, "step": 16095 }, { "epoch": 0.5013233691421454, "grad_norm": 2.029120683670044, "learning_rate": 2.6247992395686523e-06, "loss": 0.7149, "step": 16100 }, { "epoch": 0.5014790596294566, "grad_norm": 2.4118828773498535, "learning_rate": 2.6239798092366188e-06, "loss": 0.7957, "step": 16105 }, { "epoch": 0.5016347501167678, "grad_norm": 1.989721417427063, "learning_rate": 2.6231603789045856e-06, "loss": 0.8266, "step": 16110 }, { "epoch": 0.5017904406040791, "grad_norm": 2.110856056213379, "learning_rate": 2.6223409485725525e-06, "loss": 0.7859, "step": 16115 }, { "epoch": 0.5019461310913903, "grad_norm": 2.084652900695801, "learning_rate": 2.6215215182405194e-06, "loss": 0.7914, "step": 16120 }, { "epoch": 0.5021018215787015, "grad_norm": 2.3821122646331787, "learning_rate": 2.620702087908486e-06, "loss": 0.8345, "step": 16125 }, { "epoch": 0.5022575120660128, "grad_norm": 2.419938087463379, "learning_rate": 2.619882657576453e-06, "loss": 0.7752, "step": 16130 }, { "epoch": 0.502413202553324, "grad_norm": 1.8362089395523071, "learning_rate": 2.61906322724442e-06, "loss": 0.7979, "step": 16135 }, { "epoch": 0.5025688930406352, "grad_norm": 2.455915689468384, "learning_rate": 2.618243796912387e-06, "loss": 0.7652, "step": 16140 }, { "epoch": 0.5027245835279465, "grad_norm": 2.1292407512664795, "learning_rate": 2.6174243665803533e-06, "loss": 0.7849, "step": 16145 }, { "epoch": 0.5028802740152577, "grad_norm": 2.4319334030151367, "learning_rate": 2.61660493624832e-06, "loss": 0.8352, "step": 16150 }, { "epoch": 0.5030359645025689, "grad_norm": 2.298145055770874, "learning_rate": 2.6157855059162875e-06, "loss": 0.8303, "step": 16155 }, { "epoch": 0.5031916549898802, "grad_norm": 1.7136452198028564, "learning_rate": 2.6149660755842543e-06, "loss": 0.8107, "step": 16160 }, { "epoch": 0.5033473454771914, "grad_norm": 1.921036958694458, "learning_rate": 2.6141466452522208e-06, "loss": 0.7545, "step": 16165 }, { "epoch": 0.5035030359645025, "grad_norm": 2.1562557220458984, "learning_rate": 2.6133272149201877e-06, "loss": 0.7379, "step": 16170 }, { "epoch": 0.5036587264518138, "grad_norm": 2.516854763031006, "learning_rate": 2.6125077845881545e-06, "loss": 0.7418, "step": 16175 }, { "epoch": 0.503814416939125, "grad_norm": 2.067450523376465, "learning_rate": 2.611688354256121e-06, "loss": 0.8364, "step": 16180 }, { "epoch": 0.5039701074264362, "grad_norm": 2.0176563262939453, "learning_rate": 2.6108689239240883e-06, "loss": 0.8481, "step": 16185 }, { "epoch": 0.5041257979137475, "grad_norm": 2.3608992099761963, "learning_rate": 2.610049493592055e-06, "loss": 0.814, "step": 16190 }, { "epoch": 0.5042814884010587, "grad_norm": 2.1662065982818604, "learning_rate": 2.609230063260022e-06, "loss": 0.794, "step": 16195 }, { "epoch": 0.5044371788883699, "grad_norm": 2.4039504528045654, "learning_rate": 2.6084106329279885e-06, "loss": 0.7698, "step": 16200 }, { "epoch": 0.5045928693756812, "grad_norm": 2.942456007003784, "learning_rate": 2.6075912025959553e-06, "loss": 0.7319, "step": 16205 }, { "epoch": 0.5047485598629924, "grad_norm": 2.5124852657318115, "learning_rate": 2.6067717722639226e-06, "loss": 0.7374, "step": 16210 }, { "epoch": 0.5049042503503036, "grad_norm": 1.6765838861465454, "learning_rate": 2.6059523419318895e-06, "loss": 0.78, "step": 16215 }, { "epoch": 0.5050599408376149, "grad_norm": 1.9009848833084106, "learning_rate": 2.605132911599856e-06, "loss": 0.7438, "step": 16220 }, { "epoch": 0.5052156313249261, "grad_norm": 2.4126477241516113, "learning_rate": 2.604313481267823e-06, "loss": 0.8396, "step": 16225 }, { "epoch": 0.5053713218122373, "grad_norm": 2.672879695892334, "learning_rate": 2.6034940509357897e-06, "loss": 0.774, "step": 16230 }, { "epoch": 0.5055270122995486, "grad_norm": 1.9289636611938477, "learning_rate": 2.602674620603757e-06, "loss": 0.8685, "step": 16235 }, { "epoch": 0.5056827027868597, "grad_norm": 2.3302719593048096, "learning_rate": 2.601855190271723e-06, "loss": 0.7928, "step": 16240 }, { "epoch": 0.5058383932741709, "grad_norm": 2.2489540576934814, "learning_rate": 2.6010357599396903e-06, "loss": 0.7619, "step": 16245 }, { "epoch": 0.5059940837614821, "grad_norm": 2.311786413192749, "learning_rate": 2.600216329607657e-06, "loss": 0.7419, "step": 16250 }, { "epoch": 0.5061497742487934, "grad_norm": 2.5677671432495117, "learning_rate": 2.5993968992756236e-06, "loss": 0.818, "step": 16255 }, { "epoch": 0.5063054647361046, "grad_norm": 2.5951414108276367, "learning_rate": 2.5985774689435905e-06, "loss": 0.7748, "step": 16260 }, { "epoch": 0.5064611552234158, "grad_norm": 2.072031021118164, "learning_rate": 2.5977580386115573e-06, "loss": 0.8781, "step": 16265 }, { "epoch": 0.5066168457107271, "grad_norm": 1.9736557006835938, "learning_rate": 2.5969386082795246e-06, "loss": 0.7909, "step": 16270 }, { "epoch": 0.5067725361980383, "grad_norm": 1.9800684452056885, "learning_rate": 2.596119177947491e-06, "loss": 0.7516, "step": 16275 }, { "epoch": 0.5069282266853495, "grad_norm": 2.110379457473755, "learning_rate": 2.595299747615458e-06, "loss": 0.8551, "step": 16280 }, { "epoch": 0.5070839171726608, "grad_norm": 2.33089017868042, "learning_rate": 2.594480317283425e-06, "loss": 0.8069, "step": 16285 }, { "epoch": 0.507239607659972, "grad_norm": 1.8508813381195068, "learning_rate": 2.5936608869513917e-06, "loss": 0.8379, "step": 16290 }, { "epoch": 0.5073952981472832, "grad_norm": 1.8212255239486694, "learning_rate": 2.592841456619358e-06, "loss": 0.9044, "step": 16295 }, { "epoch": 0.5075509886345945, "grad_norm": 2.4645771980285645, "learning_rate": 2.5920220262873254e-06, "loss": 0.8318, "step": 16300 }, { "epoch": 0.5077066791219057, "grad_norm": 1.9457443952560425, "learning_rate": 2.5912025959552923e-06, "loss": 0.7746, "step": 16305 }, { "epoch": 0.5078623696092168, "grad_norm": 2.013441324234009, "learning_rate": 2.590383165623259e-06, "loss": 0.8175, "step": 16310 }, { "epoch": 0.5080180600965281, "grad_norm": 2.4700675010681152, "learning_rate": 2.5895637352912256e-06, "loss": 0.7738, "step": 16315 }, { "epoch": 0.5081737505838393, "grad_norm": 2.369208335876465, "learning_rate": 2.5887443049591925e-06, "loss": 0.8005, "step": 16320 }, { "epoch": 0.5083294410711505, "grad_norm": 1.9191855192184448, "learning_rate": 2.5879248746271598e-06, "loss": 0.7952, "step": 16325 }, { "epoch": 0.5084851315584618, "grad_norm": 1.9269551038742065, "learning_rate": 2.587105444295126e-06, "loss": 0.7956, "step": 16330 }, { "epoch": 0.508640822045773, "grad_norm": 2.325718402862549, "learning_rate": 2.586286013963093e-06, "loss": 0.7027, "step": 16335 }, { "epoch": 0.5087965125330842, "grad_norm": 2.530777931213379, "learning_rate": 2.58546658363106e-06, "loss": 0.7417, "step": 16340 }, { "epoch": 0.5089522030203955, "grad_norm": 2.2627670764923096, "learning_rate": 2.584647153299027e-06, "loss": 0.7491, "step": 16345 }, { "epoch": 0.5091078935077067, "grad_norm": 2.243849754333496, "learning_rate": 2.5838277229669933e-06, "loss": 0.7846, "step": 16350 }, { "epoch": 0.5092635839950179, "grad_norm": 1.9242048263549805, "learning_rate": 2.58300829263496e-06, "loss": 0.8251, "step": 16355 }, { "epoch": 0.5094192744823292, "grad_norm": 2.0374035835266113, "learning_rate": 2.5821888623029274e-06, "loss": 0.7654, "step": 16360 }, { "epoch": 0.5095749649696404, "grad_norm": 1.946533203125, "learning_rate": 2.5813694319708943e-06, "loss": 0.7433, "step": 16365 }, { "epoch": 0.5097306554569516, "grad_norm": 1.978732943534851, "learning_rate": 2.5805500016388607e-06, "loss": 0.6929, "step": 16370 }, { "epoch": 0.5098863459442629, "grad_norm": 1.8573046922683716, "learning_rate": 2.5797305713068276e-06, "loss": 0.7671, "step": 16375 }, { "epoch": 0.5100420364315741, "grad_norm": 1.9758877754211426, "learning_rate": 2.5789111409747945e-06, "loss": 0.7291, "step": 16380 }, { "epoch": 0.5101977269188852, "grad_norm": 1.8239091634750366, "learning_rate": 2.578091710642761e-06, "loss": 0.7995, "step": 16385 }, { "epoch": 0.5103534174061964, "grad_norm": 2.200080394744873, "learning_rate": 2.5772722803107282e-06, "loss": 0.7975, "step": 16390 }, { "epoch": 0.5105091078935077, "grad_norm": 2.0911309719085693, "learning_rate": 2.576452849978695e-06, "loss": 0.7533, "step": 16395 }, { "epoch": 0.5106647983808189, "grad_norm": 2.1646370887756348, "learning_rate": 2.575633419646662e-06, "loss": 0.8545, "step": 16400 }, { "epoch": 0.5108204888681301, "grad_norm": 2.0881288051605225, "learning_rate": 2.5748139893146284e-06, "loss": 0.8189, "step": 16405 }, { "epoch": 0.5109761793554414, "grad_norm": 1.8041021823883057, "learning_rate": 2.5739945589825953e-06, "loss": 0.698, "step": 16410 }, { "epoch": 0.5111318698427526, "grad_norm": 2.4759256839752197, "learning_rate": 2.5731751286505626e-06, "loss": 0.802, "step": 16415 }, { "epoch": 0.5112875603300638, "grad_norm": 2.0649943351745605, "learning_rate": 2.5723556983185294e-06, "loss": 0.7613, "step": 16420 }, { "epoch": 0.5114432508173751, "grad_norm": 2.4389848709106445, "learning_rate": 2.571536267986496e-06, "loss": 0.7716, "step": 16425 }, { "epoch": 0.5115989413046863, "grad_norm": 2.725058078765869, "learning_rate": 2.5707168376544628e-06, "loss": 0.7446, "step": 16430 }, { "epoch": 0.5117546317919975, "grad_norm": 2.4945480823516846, "learning_rate": 2.5698974073224296e-06, "loss": 0.6809, "step": 16435 }, { "epoch": 0.5119103222793088, "grad_norm": 1.9102966785430908, "learning_rate": 2.569077976990397e-06, "loss": 0.727, "step": 16440 }, { "epoch": 0.51206601276662, "grad_norm": 2.618016481399536, "learning_rate": 2.5682585466583634e-06, "loss": 0.7732, "step": 16445 }, { "epoch": 0.5122217032539312, "grad_norm": 1.8309634923934937, "learning_rate": 2.5674391163263302e-06, "loss": 0.794, "step": 16450 }, { "epoch": 0.5123773937412424, "grad_norm": 2.113051652908325, "learning_rate": 2.566619685994297e-06, "loss": 0.7841, "step": 16455 }, { "epoch": 0.5125330842285536, "grad_norm": 1.8729382753372192, "learning_rate": 2.5658002556622635e-06, "loss": 0.7814, "step": 16460 }, { "epoch": 0.5126887747158648, "grad_norm": 2.2339775562286377, "learning_rate": 2.5649808253302304e-06, "loss": 0.7719, "step": 16465 }, { "epoch": 0.5128444652031761, "grad_norm": 3.1834161281585693, "learning_rate": 2.5641613949981973e-06, "loss": 0.7681, "step": 16470 }, { "epoch": 0.5130001556904873, "grad_norm": 2.0509042739868164, "learning_rate": 2.5633419646661646e-06, "loss": 0.8411, "step": 16475 }, { "epoch": 0.5131558461777985, "grad_norm": 2.236114025115967, "learning_rate": 2.562522534334131e-06, "loss": 0.7488, "step": 16480 }, { "epoch": 0.5133115366651098, "grad_norm": 1.878620982170105, "learning_rate": 2.561703104002098e-06, "loss": 0.7225, "step": 16485 }, { "epoch": 0.513467227152421, "grad_norm": 1.856181263923645, "learning_rate": 2.5608836736700648e-06, "loss": 0.7639, "step": 16490 }, { "epoch": 0.5136229176397322, "grad_norm": 2.236206531524658, "learning_rate": 2.5600642433380316e-06, "loss": 0.7981, "step": 16495 }, { "epoch": 0.5137786081270435, "grad_norm": 2.096116065979004, "learning_rate": 2.559244813005998e-06, "loss": 0.8966, "step": 16500 }, { "epoch": 0.5139342986143547, "grad_norm": 1.9920424222946167, "learning_rate": 2.5584253826739654e-06, "loss": 0.7649, "step": 16505 }, { "epoch": 0.5140899891016659, "grad_norm": 1.9830145835876465, "learning_rate": 2.5576059523419322e-06, "loss": 0.7851, "step": 16510 }, { "epoch": 0.5142456795889772, "grad_norm": 2.0180859565734863, "learning_rate": 2.556786522009899e-06, "loss": 0.7996, "step": 16515 }, { "epoch": 0.5144013700762884, "grad_norm": 1.8464301824569702, "learning_rate": 2.5559670916778656e-06, "loss": 0.824, "step": 16520 }, { "epoch": 0.5145570605635995, "grad_norm": 2.144656181335449, "learning_rate": 2.5551476613458324e-06, "loss": 0.721, "step": 16525 }, { "epoch": 0.5147127510509107, "grad_norm": 2.1253180503845215, "learning_rate": 2.5543282310137997e-06, "loss": 0.7543, "step": 16530 }, { "epoch": 0.514868441538222, "grad_norm": 2.383434295654297, "learning_rate": 2.553508800681766e-06, "loss": 0.7832, "step": 16535 }, { "epoch": 0.5150241320255332, "grad_norm": 1.8511799573898315, "learning_rate": 2.552689370349733e-06, "loss": 0.7815, "step": 16540 }, { "epoch": 0.5151798225128444, "grad_norm": 1.9662878513336182, "learning_rate": 2.5518699400177e-06, "loss": 0.8029, "step": 16545 }, { "epoch": 0.5153355130001557, "grad_norm": 1.9468923807144165, "learning_rate": 2.5510505096856668e-06, "loss": 0.7603, "step": 16550 }, { "epoch": 0.5154912034874669, "grad_norm": 2.0719821453094482, "learning_rate": 2.5502310793536332e-06, "loss": 0.7798, "step": 16555 }, { "epoch": 0.5156468939747781, "grad_norm": 2.037076234817505, "learning_rate": 2.5494116490216005e-06, "loss": 0.7472, "step": 16560 }, { "epoch": 0.5158025844620894, "grad_norm": 2.1831717491149902, "learning_rate": 2.5485922186895674e-06, "loss": 0.7669, "step": 16565 }, { "epoch": 0.5159582749494006, "grad_norm": 2.5067853927612305, "learning_rate": 2.5477727883575343e-06, "loss": 0.8453, "step": 16570 }, { "epoch": 0.5161139654367118, "grad_norm": 1.8741081953048706, "learning_rate": 2.5469533580255007e-06, "loss": 0.7559, "step": 16575 }, { "epoch": 0.5162696559240231, "grad_norm": 2.1676197052001953, "learning_rate": 2.5461339276934676e-06, "loss": 0.7708, "step": 16580 }, { "epoch": 0.5164253464113343, "grad_norm": 1.8868952989578247, "learning_rate": 2.545314497361435e-06, "loss": 0.7189, "step": 16585 }, { "epoch": 0.5165810368986455, "grad_norm": 3.495553970336914, "learning_rate": 2.5444950670294017e-06, "loss": 0.7776, "step": 16590 }, { "epoch": 0.5167367273859568, "grad_norm": 2.0724992752075195, "learning_rate": 2.543675636697368e-06, "loss": 0.8127, "step": 16595 }, { "epoch": 0.5168924178732679, "grad_norm": 2.0141959190368652, "learning_rate": 2.542856206365335e-06, "loss": 0.7477, "step": 16600 }, { "epoch": 0.5170481083605791, "grad_norm": 2.180086851119995, "learning_rate": 2.542036776033302e-06, "loss": 0.7809, "step": 16605 }, { "epoch": 0.5172037988478904, "grad_norm": 1.8168797492980957, "learning_rate": 2.5412173457012684e-06, "loss": 0.7948, "step": 16610 }, { "epoch": 0.5173594893352016, "grad_norm": 1.900174856185913, "learning_rate": 2.5403979153692352e-06, "loss": 0.749, "step": 16615 }, { "epoch": 0.5175151798225128, "grad_norm": 2.9132838249206543, "learning_rate": 2.5395784850372025e-06, "loss": 0.8579, "step": 16620 }, { "epoch": 0.5176708703098241, "grad_norm": 2.1430368423461914, "learning_rate": 2.5387590547051694e-06, "loss": 0.7521, "step": 16625 }, { "epoch": 0.5178265607971353, "grad_norm": 1.8762887716293335, "learning_rate": 2.537939624373136e-06, "loss": 0.8133, "step": 16630 }, { "epoch": 0.5179822512844465, "grad_norm": 2.1714634895324707, "learning_rate": 2.5371201940411027e-06, "loss": 0.7828, "step": 16635 }, { "epoch": 0.5181379417717578, "grad_norm": 2.075443744659424, "learning_rate": 2.5363007637090696e-06, "loss": 0.7296, "step": 16640 }, { "epoch": 0.518293632259069, "grad_norm": 2.559262275695801, "learning_rate": 2.535481333377037e-06, "loss": 0.813, "step": 16645 }, { "epoch": 0.5184493227463802, "grad_norm": 1.9728927612304688, "learning_rate": 2.5346619030450033e-06, "loss": 0.8116, "step": 16650 }, { "epoch": 0.5186050132336915, "grad_norm": 2.1803648471832275, "learning_rate": 2.53384247271297e-06, "loss": 0.7477, "step": 16655 }, { "epoch": 0.5187607037210027, "grad_norm": 2.00338077545166, "learning_rate": 2.533023042380937e-06, "loss": 0.7449, "step": 16660 }, { "epoch": 0.5189163942083139, "grad_norm": 2.093920946121216, "learning_rate": 2.532203612048904e-06, "loss": 0.7184, "step": 16665 }, { "epoch": 0.519072084695625, "grad_norm": 1.894675850868225, "learning_rate": 2.5313841817168704e-06, "loss": 0.7947, "step": 16670 }, { "epoch": 0.5192277751829363, "grad_norm": 2.0088744163513184, "learning_rate": 2.5305647513848377e-06, "loss": 0.8065, "step": 16675 }, { "epoch": 0.5193834656702475, "grad_norm": 1.8931695222854614, "learning_rate": 2.5297453210528045e-06, "loss": 0.7546, "step": 16680 }, { "epoch": 0.5195391561575587, "grad_norm": 2.041273355484009, "learning_rate": 2.528925890720771e-06, "loss": 0.7439, "step": 16685 }, { "epoch": 0.51969484664487, "grad_norm": 2.137869119644165, "learning_rate": 2.528106460388738e-06, "loss": 0.7296, "step": 16690 }, { "epoch": 0.5198505371321812, "grad_norm": 1.9879751205444336, "learning_rate": 2.5272870300567047e-06, "loss": 0.8035, "step": 16695 }, { "epoch": 0.5200062276194924, "grad_norm": 2.0197343826293945, "learning_rate": 2.526467599724672e-06, "loss": 0.7957, "step": 16700 }, { "epoch": 0.5201619181068037, "grad_norm": 2.3562123775482178, "learning_rate": 2.525648169392638e-06, "loss": 0.8545, "step": 16705 }, { "epoch": 0.5203176085941149, "grad_norm": 2.262402296066284, "learning_rate": 2.5248287390606053e-06, "loss": 0.7921, "step": 16710 }, { "epoch": 0.5204732990814261, "grad_norm": 2.332491159439087, "learning_rate": 2.524009308728572e-06, "loss": 0.76, "step": 16715 }, { "epoch": 0.5206289895687374, "grad_norm": 2.1581709384918213, "learning_rate": 2.523189878396539e-06, "loss": 0.8254, "step": 16720 }, { "epoch": 0.5207846800560486, "grad_norm": 2.6859753131866455, "learning_rate": 2.5223704480645055e-06, "loss": 0.8038, "step": 16725 }, { "epoch": 0.5209403705433598, "grad_norm": 1.96988046169281, "learning_rate": 2.5215510177324724e-06, "loss": 0.681, "step": 16730 }, { "epoch": 0.5210960610306711, "grad_norm": 2.334657669067383, "learning_rate": 2.5207315874004397e-06, "loss": 0.7215, "step": 16735 }, { "epoch": 0.5212517515179822, "grad_norm": 2.096984624862671, "learning_rate": 2.519912157068406e-06, "loss": 0.7792, "step": 16740 }, { "epoch": 0.5214074420052934, "grad_norm": 1.9755287170410156, "learning_rate": 2.519092726736373e-06, "loss": 0.7487, "step": 16745 }, { "epoch": 0.5215631324926047, "grad_norm": 2.072579860687256, "learning_rate": 2.51827329640434e-06, "loss": 0.8191, "step": 16750 }, { "epoch": 0.5217188229799159, "grad_norm": 1.9561935663223267, "learning_rate": 2.5174538660723067e-06, "loss": 0.7667, "step": 16755 }, { "epoch": 0.5218745134672271, "grad_norm": 2.9609928131103516, "learning_rate": 2.516634435740273e-06, "loss": 0.8127, "step": 16760 }, { "epoch": 0.5220302039545384, "grad_norm": 1.9175664186477661, "learning_rate": 2.5158150054082405e-06, "loss": 0.7676, "step": 16765 }, { "epoch": 0.5221858944418496, "grad_norm": 1.8558366298675537, "learning_rate": 2.5149955750762073e-06, "loss": 0.8294, "step": 16770 }, { "epoch": 0.5223415849291608, "grad_norm": 1.8229347467422485, "learning_rate": 2.514176144744174e-06, "loss": 0.7652, "step": 16775 }, { "epoch": 0.5224972754164721, "grad_norm": 2.5769217014312744, "learning_rate": 2.5133567144121407e-06, "loss": 0.8053, "step": 16780 }, { "epoch": 0.5226529659037833, "grad_norm": 1.7859293222427368, "learning_rate": 2.5125372840801075e-06, "loss": 0.7721, "step": 16785 }, { "epoch": 0.5228086563910945, "grad_norm": 2.485020875930786, "learning_rate": 2.511717853748075e-06, "loss": 0.8405, "step": 16790 }, { "epoch": 0.5229643468784058, "grad_norm": 1.9641954898834229, "learning_rate": 2.5108984234160417e-06, "loss": 0.7273, "step": 16795 }, { "epoch": 0.523120037365717, "grad_norm": 1.9311751127243042, "learning_rate": 2.510078993084008e-06, "loss": 0.7373, "step": 16800 }, { "epoch": 0.5232757278530282, "grad_norm": 2.2259316444396973, "learning_rate": 2.509259562751975e-06, "loss": 0.8033, "step": 16805 }, { "epoch": 0.5234314183403395, "grad_norm": 2.2936012744903564, "learning_rate": 2.508440132419942e-06, "loss": 0.7733, "step": 16810 }, { "epoch": 0.5235871088276506, "grad_norm": 2.5052833557128906, "learning_rate": 2.5076207020879083e-06, "loss": 0.7873, "step": 16815 }, { "epoch": 0.5237427993149618, "grad_norm": 2.058236837387085, "learning_rate": 2.506801271755875e-06, "loss": 0.7594, "step": 16820 }, { "epoch": 0.523898489802273, "grad_norm": 2.745450258255005, "learning_rate": 2.5059818414238425e-06, "loss": 0.8323, "step": 16825 }, { "epoch": 0.5240541802895843, "grad_norm": 1.8075666427612305, "learning_rate": 2.5051624110918094e-06, "loss": 0.8042, "step": 16830 }, { "epoch": 0.5242098707768955, "grad_norm": 2.3051822185516357, "learning_rate": 2.504342980759776e-06, "loss": 0.8633, "step": 16835 }, { "epoch": 0.5243655612642067, "grad_norm": 1.9542845487594604, "learning_rate": 2.5035235504277427e-06, "loss": 0.7682, "step": 16840 }, { "epoch": 0.524521251751518, "grad_norm": 1.864086627960205, "learning_rate": 2.5027041200957095e-06, "loss": 0.7802, "step": 16845 }, { "epoch": 0.5246769422388292, "grad_norm": 2.1927454471588135, "learning_rate": 2.501884689763677e-06, "loss": 0.8029, "step": 16850 }, { "epoch": 0.5248326327261404, "grad_norm": 2.149155855178833, "learning_rate": 2.5010652594316433e-06, "loss": 0.7686, "step": 16855 }, { "epoch": 0.5249883232134517, "grad_norm": 2.318812131881714, "learning_rate": 2.50024582909961e-06, "loss": 0.802, "step": 16860 }, { "epoch": 0.5251440137007629, "grad_norm": 1.8764715194702148, "learning_rate": 2.499426398767577e-06, "loss": 0.7639, "step": 16865 }, { "epoch": 0.5252997041880741, "grad_norm": 2.69130802154541, "learning_rate": 2.498606968435544e-06, "loss": 0.7869, "step": 16870 }, { "epoch": 0.5254553946753854, "grad_norm": 2.014634609222412, "learning_rate": 2.4977875381035108e-06, "loss": 0.8317, "step": 16875 }, { "epoch": 0.5256110851626966, "grad_norm": 2.403972864151001, "learning_rate": 2.4969681077714776e-06, "loss": 0.8092, "step": 16880 }, { "epoch": 0.5257667756500077, "grad_norm": 2.1933717727661133, "learning_rate": 2.496148677439444e-06, "loss": 0.7183, "step": 16885 }, { "epoch": 0.525922466137319, "grad_norm": 1.9586490392684937, "learning_rate": 2.495329247107411e-06, "loss": 0.7978, "step": 16890 }, { "epoch": 0.5260781566246302, "grad_norm": 1.7511292695999146, "learning_rate": 2.494509816775378e-06, "loss": 0.8226, "step": 16895 }, { "epoch": 0.5262338471119414, "grad_norm": 2.1425914764404297, "learning_rate": 2.4936903864433447e-06, "loss": 0.7847, "step": 16900 }, { "epoch": 0.5263895375992527, "grad_norm": 1.8933117389678955, "learning_rate": 2.4928709561113115e-06, "loss": 0.7579, "step": 16905 }, { "epoch": 0.5265452280865639, "grad_norm": 2.0101938247680664, "learning_rate": 2.4920515257792784e-06, "loss": 0.8167, "step": 16910 }, { "epoch": 0.5267009185738751, "grad_norm": 2.144894599914551, "learning_rate": 2.4912320954472453e-06, "loss": 0.6984, "step": 16915 }, { "epoch": 0.5268566090611864, "grad_norm": 1.8602508306503296, "learning_rate": 2.490412665115212e-06, "loss": 0.828, "step": 16920 }, { "epoch": 0.5270122995484976, "grad_norm": 2.422740936279297, "learning_rate": 2.489593234783179e-06, "loss": 0.7631, "step": 16925 }, { "epoch": 0.5271679900358088, "grad_norm": 1.911598801612854, "learning_rate": 2.488773804451146e-06, "loss": 0.8271, "step": 16930 }, { "epoch": 0.5273236805231201, "grad_norm": 1.986070156097412, "learning_rate": 2.4879543741191128e-06, "loss": 0.747, "step": 16935 }, { "epoch": 0.5274793710104313, "grad_norm": 2.16988205909729, "learning_rate": 2.4871349437870796e-06, "loss": 0.7731, "step": 16940 }, { "epoch": 0.5276350614977425, "grad_norm": 1.8794078826904297, "learning_rate": 2.486315513455046e-06, "loss": 0.7987, "step": 16945 }, { "epoch": 0.5277907519850538, "grad_norm": 2.0417733192443848, "learning_rate": 2.4854960831230134e-06, "loss": 0.8034, "step": 16950 }, { "epoch": 0.5279464424723649, "grad_norm": 2.096442222595215, "learning_rate": 2.48467665279098e-06, "loss": 0.7829, "step": 16955 }, { "epoch": 0.5281021329596761, "grad_norm": 2.4412519931793213, "learning_rate": 2.4838572224589467e-06, "loss": 0.7641, "step": 16960 }, { "epoch": 0.5282578234469874, "grad_norm": 2.4459826946258545, "learning_rate": 2.4830377921269136e-06, "loss": 0.7941, "step": 16965 }, { "epoch": 0.5284135139342986, "grad_norm": 2.2251789569854736, "learning_rate": 2.4822183617948804e-06, "loss": 0.709, "step": 16970 }, { "epoch": 0.5285692044216098, "grad_norm": 2.182681083679199, "learning_rate": 2.4813989314628473e-06, "loss": 0.728, "step": 16975 }, { "epoch": 0.528724894908921, "grad_norm": 1.946552038192749, "learning_rate": 2.480579501130814e-06, "loss": 0.8118, "step": 16980 }, { "epoch": 0.5288805853962323, "grad_norm": 1.739470362663269, "learning_rate": 2.479760070798781e-06, "loss": 0.7859, "step": 16985 }, { "epoch": 0.5290362758835435, "grad_norm": 1.8624536991119385, "learning_rate": 2.4789406404667475e-06, "loss": 0.7685, "step": 16990 }, { "epoch": 0.5291919663708547, "grad_norm": 1.9625993967056274, "learning_rate": 2.4781212101347148e-06, "loss": 0.7807, "step": 16995 }, { "epoch": 0.529347656858166, "grad_norm": 2.1648614406585693, "learning_rate": 2.4773017798026812e-06, "loss": 0.8493, "step": 17000 }, { "epoch": 0.5295033473454772, "grad_norm": 2.1587374210357666, "learning_rate": 2.4764823494706485e-06, "loss": 0.6989, "step": 17005 }, { "epoch": 0.5296590378327884, "grad_norm": 2.645493507385254, "learning_rate": 2.475662919138615e-06, "loss": 0.7493, "step": 17010 }, { "epoch": 0.5298147283200997, "grad_norm": 2.2522919178009033, "learning_rate": 2.474843488806582e-06, "loss": 0.7978, "step": 17015 }, { "epoch": 0.5299704188074109, "grad_norm": 1.8462718725204468, "learning_rate": 2.4740240584745487e-06, "loss": 0.7981, "step": 17020 }, { "epoch": 0.5301261092947221, "grad_norm": 2.063534736633301, "learning_rate": 2.4732046281425156e-06, "loss": 0.7871, "step": 17025 }, { "epoch": 0.5302817997820333, "grad_norm": 2.2202165126800537, "learning_rate": 2.4723851978104824e-06, "loss": 0.702, "step": 17030 }, { "epoch": 0.5304374902693445, "grad_norm": 1.9018875360488892, "learning_rate": 2.471565767478449e-06, "loss": 0.7615, "step": 17035 }, { "epoch": 0.5305931807566557, "grad_norm": 2.1154308319091797, "learning_rate": 2.470746337146416e-06, "loss": 0.8677, "step": 17040 }, { "epoch": 0.530748871243967, "grad_norm": 2.2561137676239014, "learning_rate": 2.4699269068143826e-06, "loss": 0.8607, "step": 17045 }, { "epoch": 0.5309045617312782, "grad_norm": 2.1812660694122314, "learning_rate": 2.46910747648235e-06, "loss": 0.8378, "step": 17050 }, { "epoch": 0.5310602522185894, "grad_norm": 2.079467296600342, "learning_rate": 2.4682880461503164e-06, "loss": 0.7264, "step": 17055 }, { "epoch": 0.5312159427059007, "grad_norm": 2.4744691848754883, "learning_rate": 2.4674686158182832e-06, "loss": 0.8626, "step": 17060 }, { "epoch": 0.5313716331932119, "grad_norm": 2.399488687515259, "learning_rate": 2.46664918548625e-06, "loss": 0.8521, "step": 17065 }, { "epoch": 0.5315273236805231, "grad_norm": 1.654237985610962, "learning_rate": 2.465829755154217e-06, "loss": 0.7653, "step": 17070 }, { "epoch": 0.5316830141678344, "grad_norm": 2.0138185024261475, "learning_rate": 2.465010324822184e-06, "loss": 0.7726, "step": 17075 }, { "epoch": 0.5318387046551456, "grad_norm": 2.2271530628204346, "learning_rate": 2.4641908944901507e-06, "loss": 0.7326, "step": 17080 }, { "epoch": 0.5319943951424568, "grad_norm": 1.8172906637191772, "learning_rate": 2.4633714641581176e-06, "loss": 0.8056, "step": 17085 }, { "epoch": 0.5321500856297681, "grad_norm": 2.7305684089660645, "learning_rate": 2.4625520338260844e-06, "loss": 0.7659, "step": 17090 }, { "epoch": 0.5323057761170793, "grad_norm": 2.1497018337249756, "learning_rate": 2.4617326034940513e-06, "loss": 0.7384, "step": 17095 }, { "epoch": 0.5324614666043904, "grad_norm": 2.14621639251709, "learning_rate": 2.4609131731620178e-06, "loss": 0.7302, "step": 17100 }, { "epoch": 0.5326171570917017, "grad_norm": 1.7897969484329224, "learning_rate": 2.4600937428299846e-06, "loss": 0.7857, "step": 17105 }, { "epoch": 0.5327728475790129, "grad_norm": 2.38124418258667, "learning_rate": 2.4592743124979515e-06, "loss": 0.7521, "step": 17110 }, { "epoch": 0.5329285380663241, "grad_norm": 1.5985809564590454, "learning_rate": 2.4584548821659184e-06, "loss": 0.7791, "step": 17115 }, { "epoch": 0.5330842285536354, "grad_norm": 1.9510653018951416, "learning_rate": 2.4576354518338852e-06, "loss": 0.7564, "step": 17120 }, { "epoch": 0.5332399190409466, "grad_norm": 2.1844065189361572, "learning_rate": 2.456816021501852e-06, "loss": 0.7628, "step": 17125 }, { "epoch": 0.5333956095282578, "grad_norm": 2.6673572063446045, "learning_rate": 2.455996591169819e-06, "loss": 0.7987, "step": 17130 }, { "epoch": 0.533551300015569, "grad_norm": 2.3551292419433594, "learning_rate": 2.455177160837786e-06, "loss": 0.7884, "step": 17135 }, { "epoch": 0.5337069905028803, "grad_norm": 2.491591691970825, "learning_rate": 2.4543577305057527e-06, "loss": 0.793, "step": 17140 }, { "epoch": 0.5338626809901915, "grad_norm": 1.8368160724639893, "learning_rate": 2.4535383001737196e-06, "loss": 0.7734, "step": 17145 }, { "epoch": 0.5340183714775028, "grad_norm": 1.849142074584961, "learning_rate": 2.452718869841686e-06, "loss": 0.821, "step": 17150 }, { "epoch": 0.534174061964814, "grad_norm": 2.3063206672668457, "learning_rate": 2.4518994395096533e-06, "loss": 0.7389, "step": 17155 }, { "epoch": 0.5343297524521252, "grad_norm": 1.9871206283569336, "learning_rate": 2.4510800091776198e-06, "loss": 0.7657, "step": 17160 }, { "epoch": 0.5344854429394365, "grad_norm": 2.17199969291687, "learning_rate": 2.450260578845587e-06, "loss": 0.7746, "step": 17165 }, { "epoch": 0.5346411334267476, "grad_norm": 2.4361307621002197, "learning_rate": 2.4494411485135535e-06, "loss": 0.7372, "step": 17170 }, { "epoch": 0.5347968239140588, "grad_norm": 1.899578332901001, "learning_rate": 2.4486217181815204e-06, "loss": 0.7623, "step": 17175 }, { "epoch": 0.53495251440137, "grad_norm": 2.6551027297973633, "learning_rate": 2.4478022878494873e-06, "loss": 0.7628, "step": 17180 }, { "epoch": 0.5351082048886813, "grad_norm": 1.8515924215316772, "learning_rate": 2.446982857517454e-06, "loss": 0.7432, "step": 17185 }, { "epoch": 0.5352638953759925, "grad_norm": 2.0854012966156006, "learning_rate": 2.446163427185421e-06, "loss": 0.767, "step": 17190 }, { "epoch": 0.5354195858633037, "grad_norm": 1.9039541482925415, "learning_rate": 2.4453439968533874e-06, "loss": 0.6941, "step": 17195 }, { "epoch": 0.535575276350615, "grad_norm": 2.2828502655029297, "learning_rate": 2.4445245665213547e-06, "loss": 0.8084, "step": 17200 }, { "epoch": 0.5357309668379262, "grad_norm": 2.3556132316589355, "learning_rate": 2.443705136189321e-06, "loss": 0.7473, "step": 17205 }, { "epoch": 0.5358866573252374, "grad_norm": 2.386868476867676, "learning_rate": 2.4428857058572885e-06, "loss": 0.7503, "step": 17210 }, { "epoch": 0.5360423478125487, "grad_norm": 2.1082797050476074, "learning_rate": 2.442066275525255e-06, "loss": 0.7691, "step": 17215 }, { "epoch": 0.5361980382998599, "grad_norm": 2.230038642883301, "learning_rate": 2.4412468451932218e-06, "loss": 0.8001, "step": 17220 }, { "epoch": 0.5363537287871711, "grad_norm": 2.232372522354126, "learning_rate": 2.4404274148611887e-06, "loss": 0.7696, "step": 17225 }, { "epoch": 0.5365094192744824, "grad_norm": 2.4965126514434814, "learning_rate": 2.4396079845291555e-06, "loss": 0.7933, "step": 17230 }, { "epoch": 0.5366651097617936, "grad_norm": 2.0418124198913574, "learning_rate": 2.4387885541971224e-06, "loss": 0.89, "step": 17235 }, { "epoch": 0.5368208002491048, "grad_norm": 3.674294948577881, "learning_rate": 2.4379691238650893e-06, "loss": 0.7806, "step": 17240 }, { "epoch": 0.536976490736416, "grad_norm": 2.1526455879211426, "learning_rate": 2.437149693533056e-06, "loss": 0.7319, "step": 17245 }, { "epoch": 0.5371321812237272, "grad_norm": 1.8789880275726318, "learning_rate": 2.4363302632010226e-06, "loss": 0.8229, "step": 17250 }, { "epoch": 0.5372878717110384, "grad_norm": 1.7672898769378662, "learning_rate": 2.43551083286899e-06, "loss": 0.8102, "step": 17255 }, { "epoch": 0.5374435621983497, "grad_norm": 1.9583070278167725, "learning_rate": 2.4346914025369563e-06, "loss": 0.8391, "step": 17260 }, { "epoch": 0.5375992526856609, "grad_norm": 2.1684727668762207, "learning_rate": 2.433871972204923e-06, "loss": 0.7794, "step": 17265 }, { "epoch": 0.5377549431729721, "grad_norm": 2.0811381340026855, "learning_rate": 2.43305254187289e-06, "loss": 0.8096, "step": 17270 }, { "epoch": 0.5379106336602834, "grad_norm": 2.6430959701538086, "learning_rate": 2.432233111540857e-06, "loss": 0.7269, "step": 17275 }, { "epoch": 0.5380663241475946, "grad_norm": 2.0691070556640625, "learning_rate": 2.431413681208824e-06, "loss": 0.7815, "step": 17280 }, { "epoch": 0.5382220146349058, "grad_norm": 2.060082197189331, "learning_rate": 2.4305942508767907e-06, "loss": 0.7766, "step": 17285 }, { "epoch": 0.538377705122217, "grad_norm": 1.7639830112457275, "learning_rate": 2.4297748205447575e-06, "loss": 0.795, "step": 17290 }, { "epoch": 0.5385333956095283, "grad_norm": 2.0554115772247314, "learning_rate": 2.4289553902127244e-06, "loss": 0.7604, "step": 17295 }, { "epoch": 0.5386890860968395, "grad_norm": 2.047893524169922, "learning_rate": 2.4281359598806913e-06, "loss": 0.8796, "step": 17300 }, { "epoch": 0.5388447765841508, "grad_norm": 2.0224480628967285, "learning_rate": 2.427316529548658e-06, "loss": 0.7686, "step": 17305 }, { "epoch": 0.539000467071462, "grad_norm": 1.9334017038345337, "learning_rate": 2.4264970992166246e-06, "loss": 0.8147, "step": 17310 }, { "epoch": 0.5391561575587731, "grad_norm": 2.119934558868408, "learning_rate": 2.4256776688845915e-06, "loss": 0.7685, "step": 17315 }, { "epoch": 0.5393118480460843, "grad_norm": 1.9901243448257446, "learning_rate": 2.4248582385525583e-06, "loss": 0.7196, "step": 17320 }, { "epoch": 0.5394675385333956, "grad_norm": 2.287525177001953, "learning_rate": 2.424038808220525e-06, "loss": 0.7915, "step": 17325 }, { "epoch": 0.5396232290207068, "grad_norm": 2.3019046783447266, "learning_rate": 2.423219377888492e-06, "loss": 0.716, "step": 17330 }, { "epoch": 0.539778919508018, "grad_norm": 1.7849159240722656, "learning_rate": 2.422399947556459e-06, "loss": 0.7395, "step": 17335 }, { "epoch": 0.5399346099953293, "grad_norm": 2.0318989753723145, "learning_rate": 2.421580517224426e-06, "loss": 0.8088, "step": 17340 }, { "epoch": 0.5400903004826405, "grad_norm": 2.0387914180755615, "learning_rate": 2.4207610868923927e-06, "loss": 0.6936, "step": 17345 }, { "epoch": 0.5402459909699517, "grad_norm": 1.7110984325408936, "learning_rate": 2.4199416565603595e-06, "loss": 0.7419, "step": 17350 }, { "epoch": 0.540401681457263, "grad_norm": 2.444714069366455, "learning_rate": 2.4191222262283264e-06, "loss": 0.7394, "step": 17355 }, { "epoch": 0.5405573719445742, "grad_norm": 2.3936681747436523, "learning_rate": 2.4183027958962933e-06, "loss": 0.7199, "step": 17360 }, { "epoch": 0.5407130624318854, "grad_norm": 2.286801815032959, "learning_rate": 2.4174833655642597e-06, "loss": 0.8121, "step": 17365 }, { "epoch": 0.5408687529191967, "grad_norm": 1.7934516668319702, "learning_rate": 2.416663935232227e-06, "loss": 0.779, "step": 17370 }, { "epoch": 0.5410244434065079, "grad_norm": 2.1884899139404297, "learning_rate": 2.4158445049001935e-06, "loss": 0.8676, "step": 17375 }, { "epoch": 0.5411801338938191, "grad_norm": 2.111811399459839, "learning_rate": 2.4150250745681603e-06, "loss": 0.7643, "step": 17380 }, { "epoch": 0.5413358243811303, "grad_norm": 2.146803855895996, "learning_rate": 2.414205644236127e-06, "loss": 0.8552, "step": 17385 }, { "epoch": 0.5414915148684415, "grad_norm": 2.0975182056427, "learning_rate": 2.413386213904094e-06, "loss": 0.7874, "step": 17390 }, { "epoch": 0.5416472053557527, "grad_norm": 2.239408493041992, "learning_rate": 2.412566783572061e-06, "loss": 0.8023, "step": 17395 }, { "epoch": 0.541802895843064, "grad_norm": 1.9121640920639038, "learning_rate": 2.411747353240028e-06, "loss": 0.7336, "step": 17400 }, { "epoch": 0.5419585863303752, "grad_norm": 1.9009944200515747, "learning_rate": 2.4109279229079947e-06, "loss": 0.7596, "step": 17405 }, { "epoch": 0.5421142768176864, "grad_norm": 2.119605541229248, "learning_rate": 2.410108492575961e-06, "loss": 0.8515, "step": 17410 }, { "epoch": 0.5422699673049977, "grad_norm": 2.00470232963562, "learning_rate": 2.4092890622439284e-06, "loss": 0.7897, "step": 17415 }, { "epoch": 0.5424256577923089, "grad_norm": 2.075958490371704, "learning_rate": 2.408469631911895e-06, "loss": 0.739, "step": 17420 }, { "epoch": 0.5425813482796201, "grad_norm": 2.1290955543518066, "learning_rate": 2.407650201579862e-06, "loss": 0.7584, "step": 17425 }, { "epoch": 0.5427370387669314, "grad_norm": 2.3126533031463623, "learning_rate": 2.4068307712478286e-06, "loss": 0.7625, "step": 17430 }, { "epoch": 0.5428927292542426, "grad_norm": 2.3405067920684814, "learning_rate": 2.4060113409157955e-06, "loss": 0.7457, "step": 17435 }, { "epoch": 0.5430484197415538, "grad_norm": 1.8794386386871338, "learning_rate": 2.4051919105837623e-06, "loss": 0.7682, "step": 17440 }, { "epoch": 0.5432041102288651, "grad_norm": 2.141727924346924, "learning_rate": 2.4043724802517292e-06, "loss": 0.7994, "step": 17445 }, { "epoch": 0.5433598007161763, "grad_norm": 2.0009775161743164, "learning_rate": 2.403553049919696e-06, "loss": 0.7367, "step": 17450 }, { "epoch": 0.5435154912034875, "grad_norm": 2.264970064163208, "learning_rate": 2.402733619587663e-06, "loss": 0.8074, "step": 17455 }, { "epoch": 0.5436711816907986, "grad_norm": 2.077967882156372, "learning_rate": 2.40191418925563e-06, "loss": 0.6735, "step": 17460 }, { "epoch": 0.5438268721781099, "grad_norm": 3.409212589263916, "learning_rate": 2.4010947589235963e-06, "loss": 0.8213, "step": 17465 }, { "epoch": 0.5439825626654211, "grad_norm": 2.08475923538208, "learning_rate": 2.4002753285915636e-06, "loss": 0.8296, "step": 17470 }, { "epoch": 0.5441382531527323, "grad_norm": 2.0309770107269287, "learning_rate": 2.39945589825953e-06, "loss": 0.7167, "step": 17475 }, { "epoch": 0.5442939436400436, "grad_norm": 2.1417722702026367, "learning_rate": 2.398636467927497e-06, "loss": 0.7978, "step": 17480 }, { "epoch": 0.5444496341273548, "grad_norm": 2.376990556716919, "learning_rate": 2.3978170375954638e-06, "loss": 0.7534, "step": 17485 }, { "epoch": 0.544605324614666, "grad_norm": 2.8139002323150635, "learning_rate": 2.3969976072634306e-06, "loss": 0.7756, "step": 17490 }, { "epoch": 0.5447610151019773, "grad_norm": 2.1783883571624756, "learning_rate": 2.3961781769313975e-06, "loss": 0.8022, "step": 17495 }, { "epoch": 0.5449167055892885, "grad_norm": 2.1624348163604736, "learning_rate": 2.3953587465993644e-06, "loss": 0.7628, "step": 17500 }, { "epoch": 0.5450723960765997, "grad_norm": 1.9623332023620605, "learning_rate": 2.3945393162673312e-06, "loss": 0.8032, "step": 17505 }, { "epoch": 0.545228086563911, "grad_norm": 1.9206688404083252, "learning_rate": 2.393719885935298e-06, "loss": 0.7507, "step": 17510 }, { "epoch": 0.5453837770512222, "grad_norm": 1.9196281433105469, "learning_rate": 2.392900455603265e-06, "loss": 0.7689, "step": 17515 }, { "epoch": 0.5455394675385334, "grad_norm": 2.686551809310913, "learning_rate": 2.392081025271232e-06, "loss": 0.724, "step": 17520 }, { "epoch": 0.5456951580258447, "grad_norm": 2.4520554542541504, "learning_rate": 2.3912615949391983e-06, "loss": 0.7406, "step": 17525 }, { "epoch": 0.5458508485131558, "grad_norm": 2.2821309566497803, "learning_rate": 2.390442164607165e-06, "loss": 0.7685, "step": 17530 }, { "epoch": 0.546006539000467, "grad_norm": 2.246826410293579, "learning_rate": 2.389622734275132e-06, "loss": 0.831, "step": 17535 }, { "epoch": 0.5461622294877783, "grad_norm": 1.8157275915145874, "learning_rate": 2.388803303943099e-06, "loss": 0.7551, "step": 17540 }, { "epoch": 0.5463179199750895, "grad_norm": 1.9550062417984009, "learning_rate": 2.3879838736110658e-06, "loss": 0.8069, "step": 17545 }, { "epoch": 0.5464736104624007, "grad_norm": 2.052183151245117, "learning_rate": 2.3871644432790326e-06, "loss": 0.7598, "step": 17550 }, { "epoch": 0.546629300949712, "grad_norm": 2.070401906967163, "learning_rate": 2.3863450129469995e-06, "loss": 0.7672, "step": 17555 }, { "epoch": 0.5467849914370232, "grad_norm": 1.7953002452850342, "learning_rate": 2.3855255826149664e-06, "loss": 0.8015, "step": 17560 }, { "epoch": 0.5469406819243344, "grad_norm": 1.898749828338623, "learning_rate": 2.3847061522829332e-06, "loss": 0.808, "step": 17565 }, { "epoch": 0.5470963724116457, "grad_norm": 2.266695499420166, "learning_rate": 2.3838867219508997e-06, "loss": 0.8105, "step": 17570 }, { "epoch": 0.5472520628989569, "grad_norm": 2.0849926471710205, "learning_rate": 2.383067291618867e-06, "loss": 0.7951, "step": 17575 }, { "epoch": 0.5474077533862681, "grad_norm": 2.3390963077545166, "learning_rate": 2.3822478612868334e-06, "loss": 0.8103, "step": 17580 }, { "epoch": 0.5475634438735794, "grad_norm": 1.9152108430862427, "learning_rate": 2.3814284309548007e-06, "loss": 0.8186, "step": 17585 }, { "epoch": 0.5477191343608906, "grad_norm": 2.033418893814087, "learning_rate": 2.380609000622767e-06, "loss": 0.6961, "step": 17590 }, { "epoch": 0.5478748248482018, "grad_norm": 2.0237550735473633, "learning_rate": 2.379789570290734e-06, "loss": 0.7587, "step": 17595 }, { "epoch": 0.548030515335513, "grad_norm": 1.8512420654296875, "learning_rate": 2.378970139958701e-06, "loss": 0.7705, "step": 17600 }, { "epoch": 0.5481862058228242, "grad_norm": 2.343501329421997, "learning_rate": 2.3781507096266678e-06, "loss": 0.8065, "step": 17605 }, { "epoch": 0.5483418963101354, "grad_norm": 2.243131637573242, "learning_rate": 2.3773312792946346e-06, "loss": 0.7624, "step": 17610 }, { "epoch": 0.5484975867974466, "grad_norm": 2.4987952709198, "learning_rate": 2.376511848962601e-06, "loss": 0.8145, "step": 17615 }, { "epoch": 0.5486532772847579, "grad_norm": 2.321521759033203, "learning_rate": 2.3756924186305684e-06, "loss": 0.7736, "step": 17620 }, { "epoch": 0.5488089677720691, "grad_norm": 2.7177059650421143, "learning_rate": 2.374872988298535e-06, "loss": 0.7221, "step": 17625 }, { "epoch": 0.5489646582593803, "grad_norm": 1.8483600616455078, "learning_rate": 2.374053557966502e-06, "loss": 0.7508, "step": 17630 }, { "epoch": 0.5491203487466916, "grad_norm": 2.0649635791778564, "learning_rate": 2.3732341276344686e-06, "loss": 0.7265, "step": 17635 }, { "epoch": 0.5492760392340028, "grad_norm": 2.0418224334716797, "learning_rate": 2.3724146973024354e-06, "loss": 0.7557, "step": 17640 }, { "epoch": 0.549431729721314, "grad_norm": 2.359281539916992, "learning_rate": 2.3715952669704023e-06, "loss": 0.7614, "step": 17645 }, { "epoch": 0.5495874202086253, "grad_norm": 2.224519968032837, "learning_rate": 2.370775836638369e-06, "loss": 0.7928, "step": 17650 }, { "epoch": 0.5497431106959365, "grad_norm": 1.9847126007080078, "learning_rate": 2.369956406306336e-06, "loss": 0.6771, "step": 17655 }, { "epoch": 0.5498988011832477, "grad_norm": 2.00077748298645, "learning_rate": 2.369136975974303e-06, "loss": 0.721, "step": 17660 }, { "epoch": 0.550054491670559, "grad_norm": 2.3054492473602295, "learning_rate": 2.3683175456422698e-06, "loss": 0.8362, "step": 17665 }, { "epoch": 0.5502101821578702, "grad_norm": 2.01511549949646, "learning_rate": 2.3674981153102367e-06, "loss": 0.8012, "step": 17670 }, { "epoch": 0.5503658726451813, "grad_norm": 2.0694973468780518, "learning_rate": 2.3666786849782035e-06, "loss": 0.7731, "step": 17675 }, { "epoch": 0.5505215631324926, "grad_norm": 2.07845401763916, "learning_rate": 2.36585925464617e-06, "loss": 0.7773, "step": 17680 }, { "epoch": 0.5506772536198038, "grad_norm": 2.346687078475952, "learning_rate": 2.365039824314137e-06, "loss": 0.7731, "step": 17685 }, { "epoch": 0.550832944107115, "grad_norm": 2.594149112701416, "learning_rate": 2.3642203939821037e-06, "loss": 0.7476, "step": 17690 }, { "epoch": 0.5509886345944263, "grad_norm": 2.119964361190796, "learning_rate": 2.3634009636500706e-06, "loss": 0.787, "step": 17695 }, { "epoch": 0.5511443250817375, "grad_norm": 2.127048969268799, "learning_rate": 2.3625815333180374e-06, "loss": 0.7667, "step": 17700 }, { "epoch": 0.5513000155690487, "grad_norm": 2.123248338699341, "learning_rate": 2.3617621029860043e-06, "loss": 0.7752, "step": 17705 }, { "epoch": 0.55145570605636, "grad_norm": 2.249671459197998, "learning_rate": 2.360942672653971e-06, "loss": 0.763, "step": 17710 }, { "epoch": 0.5516113965436712, "grad_norm": 1.7793352603912354, "learning_rate": 2.360123242321938e-06, "loss": 0.7247, "step": 17715 }, { "epoch": 0.5517670870309824, "grad_norm": 1.811904788017273, "learning_rate": 2.359303811989905e-06, "loss": 0.7409, "step": 17720 }, { "epoch": 0.5519227775182937, "grad_norm": 2.261566162109375, "learning_rate": 2.358484381657872e-06, "loss": 0.7724, "step": 17725 }, { "epoch": 0.5520784680056049, "grad_norm": 2.41841983795166, "learning_rate": 2.3576649513258382e-06, "loss": 0.7963, "step": 17730 }, { "epoch": 0.5522341584929161, "grad_norm": 2.0442891120910645, "learning_rate": 2.3568455209938055e-06, "loss": 0.8174, "step": 17735 }, { "epoch": 0.5523898489802274, "grad_norm": 3.0963728427886963, "learning_rate": 2.356026090661772e-06, "loss": 0.7766, "step": 17740 }, { "epoch": 0.5525455394675385, "grad_norm": 1.8942079544067383, "learning_rate": 2.355206660329739e-06, "loss": 0.7703, "step": 17745 }, { "epoch": 0.5527012299548497, "grad_norm": 2.2137503623962402, "learning_rate": 2.3543872299977057e-06, "loss": 0.7625, "step": 17750 }, { "epoch": 0.552856920442161, "grad_norm": 1.9874054193496704, "learning_rate": 2.3535677996656726e-06, "loss": 0.7441, "step": 17755 }, { "epoch": 0.5530126109294722, "grad_norm": 2.0038743019104004, "learning_rate": 2.3527483693336395e-06, "loss": 0.7993, "step": 17760 }, { "epoch": 0.5531683014167834, "grad_norm": 1.8096368312835693, "learning_rate": 2.3519289390016063e-06, "loss": 0.7802, "step": 17765 }, { "epoch": 0.5533239919040946, "grad_norm": 2.524873733520508, "learning_rate": 2.351109508669573e-06, "loss": 0.7592, "step": 17770 }, { "epoch": 0.5534796823914059, "grad_norm": 2.072472095489502, "learning_rate": 2.35029007833754e-06, "loss": 0.7549, "step": 17775 }, { "epoch": 0.5536353728787171, "grad_norm": 1.828343152999878, "learning_rate": 2.349470648005507e-06, "loss": 0.7844, "step": 17780 }, { "epoch": 0.5537910633660283, "grad_norm": 2.9367241859436035, "learning_rate": 2.3486512176734734e-06, "loss": 0.7811, "step": 17785 }, { "epoch": 0.5539467538533396, "grad_norm": 3.033996820449829, "learning_rate": 2.3478317873414407e-06, "loss": 0.7109, "step": 17790 }, { "epoch": 0.5541024443406508, "grad_norm": 1.9166090488433838, "learning_rate": 2.347012357009407e-06, "loss": 0.7935, "step": 17795 }, { "epoch": 0.554258134827962, "grad_norm": 2.249969482421875, "learning_rate": 2.346192926677374e-06, "loss": 0.7587, "step": 17800 }, { "epoch": 0.5544138253152733, "grad_norm": 2.1190173625946045, "learning_rate": 2.345373496345341e-06, "loss": 0.7991, "step": 17805 }, { "epoch": 0.5545695158025845, "grad_norm": 2.200932502746582, "learning_rate": 2.3445540660133077e-06, "loss": 0.8646, "step": 17810 }, { "epoch": 0.5547252062898956, "grad_norm": 2.0762476921081543, "learning_rate": 2.3437346356812746e-06, "loss": 0.7369, "step": 17815 }, { "epoch": 0.5548808967772069, "grad_norm": 1.9358985424041748, "learning_rate": 2.3429152053492415e-06, "loss": 0.7566, "step": 17820 }, { "epoch": 0.5550365872645181, "grad_norm": 2.6626651287078857, "learning_rate": 2.3420957750172083e-06, "loss": 0.7851, "step": 17825 }, { "epoch": 0.5551922777518293, "grad_norm": 2.0878801345825195, "learning_rate": 2.3412763446851748e-06, "loss": 0.8923, "step": 17830 }, { "epoch": 0.5553479682391406, "grad_norm": 2.092021942138672, "learning_rate": 2.340456914353142e-06, "loss": 0.7503, "step": 17835 }, { "epoch": 0.5555036587264518, "grad_norm": 2.0484414100646973, "learning_rate": 2.3396374840211085e-06, "loss": 0.7377, "step": 17840 }, { "epoch": 0.555659349213763, "grad_norm": 2.0522475242614746, "learning_rate": 2.338818053689076e-06, "loss": 0.7387, "step": 17845 }, { "epoch": 0.5558150397010743, "grad_norm": 2.037569522857666, "learning_rate": 2.3379986233570423e-06, "loss": 0.7413, "step": 17850 }, { "epoch": 0.5559707301883855, "grad_norm": 2.4925339221954346, "learning_rate": 2.337179193025009e-06, "loss": 0.7473, "step": 17855 }, { "epoch": 0.5561264206756967, "grad_norm": 2.046750545501709, "learning_rate": 2.336359762692976e-06, "loss": 0.7312, "step": 17860 }, { "epoch": 0.556282111163008, "grad_norm": 2.520355463027954, "learning_rate": 2.335540332360943e-06, "loss": 0.7369, "step": 17865 }, { "epoch": 0.5564378016503192, "grad_norm": 2.3027241230010986, "learning_rate": 2.3347209020289097e-06, "loss": 0.832, "step": 17870 }, { "epoch": 0.5565934921376304, "grad_norm": 1.9767940044403076, "learning_rate": 2.3339014716968766e-06, "loss": 0.8169, "step": 17875 }, { "epoch": 0.5567491826249417, "grad_norm": 2.02260422706604, "learning_rate": 2.3330820413648435e-06, "loss": 0.8397, "step": 17880 }, { "epoch": 0.5569048731122529, "grad_norm": 2.8567798137664795, "learning_rate": 2.3322626110328103e-06, "loss": 0.92, "step": 17885 }, { "epoch": 0.557060563599564, "grad_norm": 2.0040183067321777, "learning_rate": 2.3314431807007772e-06, "loss": 0.797, "step": 17890 }, { "epoch": 0.5572162540868753, "grad_norm": 2.085700035095215, "learning_rate": 2.3306237503687437e-06, "loss": 0.7767, "step": 17895 }, { "epoch": 0.5573719445741865, "grad_norm": 2.1862540245056152, "learning_rate": 2.3298043200367105e-06, "loss": 0.7548, "step": 17900 }, { "epoch": 0.5575276350614977, "grad_norm": 2.132143497467041, "learning_rate": 2.3289848897046774e-06, "loss": 0.7169, "step": 17905 }, { "epoch": 0.557683325548809, "grad_norm": 2.336595058441162, "learning_rate": 2.3281654593726443e-06, "loss": 0.8432, "step": 17910 }, { "epoch": 0.5578390160361202, "grad_norm": 2.387266159057617, "learning_rate": 2.327346029040611e-06, "loss": 0.8422, "step": 17915 }, { "epoch": 0.5579947065234314, "grad_norm": 1.9415557384490967, "learning_rate": 2.326526598708578e-06, "loss": 0.7341, "step": 17920 }, { "epoch": 0.5581503970107426, "grad_norm": 2.333508014678955, "learning_rate": 2.325707168376545e-06, "loss": 0.7453, "step": 17925 }, { "epoch": 0.5583060874980539, "grad_norm": 1.9264235496520996, "learning_rate": 2.3248877380445117e-06, "loss": 0.8237, "step": 17930 }, { "epoch": 0.5584617779853651, "grad_norm": 2.0131781101226807, "learning_rate": 2.3240683077124786e-06, "loss": 0.7405, "step": 17935 }, { "epoch": 0.5586174684726763, "grad_norm": 1.9859415292739868, "learning_rate": 2.3232488773804455e-06, "loss": 0.7169, "step": 17940 }, { "epoch": 0.5587731589599876, "grad_norm": 1.9696294069290161, "learning_rate": 2.322429447048412e-06, "loss": 0.689, "step": 17945 }, { "epoch": 0.5589288494472988, "grad_norm": 2.251035451889038, "learning_rate": 2.3216100167163792e-06, "loss": 0.8818, "step": 17950 }, { "epoch": 0.55908453993461, "grad_norm": 1.8868262767791748, "learning_rate": 2.3207905863843457e-06, "loss": 0.7157, "step": 17955 }, { "epoch": 0.5592402304219212, "grad_norm": 2.343886137008667, "learning_rate": 2.3199711560523125e-06, "loss": 0.8186, "step": 17960 }, { "epoch": 0.5593959209092324, "grad_norm": 2.5047383308410645, "learning_rate": 2.3191517257202794e-06, "loss": 0.7073, "step": 17965 }, { "epoch": 0.5595516113965436, "grad_norm": 2.1251885890960693, "learning_rate": 2.3183322953882463e-06, "loss": 0.7559, "step": 17970 }, { "epoch": 0.5597073018838549, "grad_norm": 2.1752400398254395, "learning_rate": 2.317512865056213e-06, "loss": 0.7819, "step": 17975 }, { "epoch": 0.5598629923711661, "grad_norm": 2.1751222610473633, "learning_rate": 2.31669343472418e-06, "loss": 0.8051, "step": 17980 }, { "epoch": 0.5600186828584773, "grad_norm": 1.9513026475906372, "learning_rate": 2.315874004392147e-06, "loss": 0.7707, "step": 17985 }, { "epoch": 0.5601743733457886, "grad_norm": 2.0260891914367676, "learning_rate": 2.3150545740601133e-06, "loss": 0.7664, "step": 17990 }, { "epoch": 0.5603300638330998, "grad_norm": 1.8252527713775635, "learning_rate": 2.3142351437280806e-06, "loss": 0.66, "step": 17995 }, { "epoch": 0.560485754320411, "grad_norm": 2.1290204524993896, "learning_rate": 2.313415713396047e-06, "loss": 0.7934, "step": 18000 }, { "epoch": 0.5606414448077223, "grad_norm": 2.8389992713928223, "learning_rate": 2.3125962830640144e-06, "loss": 0.7644, "step": 18005 }, { "epoch": 0.5607971352950335, "grad_norm": 1.6939301490783691, "learning_rate": 2.311776852731981e-06, "loss": 0.754, "step": 18010 }, { "epoch": 0.5609528257823447, "grad_norm": 2.2141520977020264, "learning_rate": 2.3109574223999477e-06, "loss": 0.7772, "step": 18015 }, { "epoch": 0.561108516269656, "grad_norm": 1.9314332008361816, "learning_rate": 2.3101379920679146e-06, "loss": 0.7743, "step": 18020 }, { "epoch": 0.5612642067569672, "grad_norm": 2.3754899501800537, "learning_rate": 2.3093185617358814e-06, "loss": 0.8124, "step": 18025 }, { "epoch": 0.5614198972442783, "grad_norm": 2.114412307739258, "learning_rate": 2.3084991314038483e-06, "loss": 0.7407, "step": 18030 }, { "epoch": 0.5615755877315896, "grad_norm": 2.159449338912964, "learning_rate": 2.3076797010718147e-06, "loss": 0.7398, "step": 18035 }, { "epoch": 0.5617312782189008, "grad_norm": 2.0477709770202637, "learning_rate": 2.306860270739782e-06, "loss": 0.7245, "step": 18040 }, { "epoch": 0.561886968706212, "grad_norm": 2.286315441131592, "learning_rate": 2.3060408404077485e-06, "loss": 0.7889, "step": 18045 }, { "epoch": 0.5620426591935233, "grad_norm": 1.9560552835464478, "learning_rate": 2.3052214100757158e-06, "loss": 0.7774, "step": 18050 }, { "epoch": 0.5621983496808345, "grad_norm": 2.592207431793213, "learning_rate": 2.3044019797436822e-06, "loss": 0.7877, "step": 18055 }, { "epoch": 0.5623540401681457, "grad_norm": 2.056260347366333, "learning_rate": 2.303582549411649e-06, "loss": 0.731, "step": 18060 }, { "epoch": 0.562509730655457, "grad_norm": 2.487065553665161, "learning_rate": 2.302763119079616e-06, "loss": 0.7944, "step": 18065 }, { "epoch": 0.5626654211427682, "grad_norm": 2.208946704864502, "learning_rate": 2.301943688747583e-06, "loss": 0.7842, "step": 18070 }, { "epoch": 0.5628211116300794, "grad_norm": 2.325603485107422, "learning_rate": 2.3011242584155497e-06, "loss": 0.8275, "step": 18075 }, { "epoch": 0.5629768021173907, "grad_norm": 2.3034918308258057, "learning_rate": 2.3003048280835166e-06, "loss": 0.7539, "step": 18080 }, { "epoch": 0.5631324926047019, "grad_norm": 2.221842050552368, "learning_rate": 2.2994853977514834e-06, "loss": 0.7921, "step": 18085 }, { "epoch": 0.5632881830920131, "grad_norm": 2.1008806228637695, "learning_rate": 2.2986659674194503e-06, "loss": 0.748, "step": 18090 }, { "epoch": 0.5634438735793244, "grad_norm": 2.0547313690185547, "learning_rate": 2.297846537087417e-06, "loss": 0.7733, "step": 18095 }, { "epoch": 0.5635995640666356, "grad_norm": 2.207951307296753, "learning_rate": 2.2970271067553836e-06, "loss": 0.8563, "step": 18100 }, { "epoch": 0.5637552545539467, "grad_norm": 1.9667106866836548, "learning_rate": 2.2962076764233505e-06, "loss": 0.7987, "step": 18105 }, { "epoch": 0.5639109450412579, "grad_norm": 1.896812915802002, "learning_rate": 2.2953882460913174e-06, "loss": 0.7481, "step": 18110 }, { "epoch": 0.5640666355285692, "grad_norm": 2.0028600692749023, "learning_rate": 2.2945688157592842e-06, "loss": 0.7419, "step": 18115 }, { "epoch": 0.5642223260158804, "grad_norm": 2.2454159259796143, "learning_rate": 2.293749385427251e-06, "loss": 0.7571, "step": 18120 }, { "epoch": 0.5643780165031916, "grad_norm": 1.9386368989944458, "learning_rate": 2.292929955095218e-06, "loss": 0.7728, "step": 18125 }, { "epoch": 0.5645337069905029, "grad_norm": 2.480219602584839, "learning_rate": 2.292110524763185e-06, "loss": 0.7903, "step": 18130 }, { "epoch": 0.5646893974778141, "grad_norm": 1.647660255432129, "learning_rate": 2.2912910944311517e-06, "loss": 0.7222, "step": 18135 }, { "epoch": 0.5648450879651253, "grad_norm": 2.009638786315918, "learning_rate": 2.2904716640991186e-06, "loss": 0.7396, "step": 18140 }, { "epoch": 0.5650007784524366, "grad_norm": 2.2743682861328125, "learning_rate": 2.2896522337670854e-06, "loss": 0.8544, "step": 18145 }, { "epoch": 0.5651564689397478, "grad_norm": 1.9866573810577393, "learning_rate": 2.288832803435052e-06, "loss": 0.7518, "step": 18150 }, { "epoch": 0.565312159427059, "grad_norm": 2.185410261154175, "learning_rate": 2.288013373103019e-06, "loss": 0.7639, "step": 18155 }, { "epoch": 0.5654678499143703, "grad_norm": 2.176436424255371, "learning_rate": 2.2871939427709856e-06, "loss": 0.8017, "step": 18160 }, { "epoch": 0.5656235404016815, "grad_norm": 2.2763540744781494, "learning_rate": 2.286374512438953e-06, "loss": 0.7581, "step": 18165 }, { "epoch": 0.5657792308889927, "grad_norm": 1.8781198263168335, "learning_rate": 2.2855550821069194e-06, "loss": 0.7369, "step": 18170 }, { "epoch": 0.5659349213763039, "grad_norm": 2.1142170429229736, "learning_rate": 2.2847356517748862e-06, "loss": 0.7847, "step": 18175 }, { "epoch": 0.5660906118636151, "grad_norm": 1.9548649787902832, "learning_rate": 2.283916221442853e-06, "loss": 0.8112, "step": 18180 }, { "epoch": 0.5662463023509263, "grad_norm": 2.7153704166412354, "learning_rate": 2.28309679111082e-06, "loss": 0.7393, "step": 18185 }, { "epoch": 0.5664019928382376, "grad_norm": 2.56178617477417, "learning_rate": 2.282277360778787e-06, "loss": 0.7948, "step": 18190 }, { "epoch": 0.5665576833255488, "grad_norm": 2.7626640796661377, "learning_rate": 2.2814579304467537e-06, "loss": 0.8243, "step": 18195 }, { "epoch": 0.56671337381286, "grad_norm": 2.5358669757843018, "learning_rate": 2.2806385001147206e-06, "loss": 0.6788, "step": 18200 }, { "epoch": 0.5668690643001713, "grad_norm": 2.054068088531494, "learning_rate": 2.279819069782687e-06, "loss": 0.7968, "step": 18205 }, { "epoch": 0.5670247547874825, "grad_norm": 2.0467677116394043, "learning_rate": 2.2789996394506543e-06, "loss": 0.8462, "step": 18210 }, { "epoch": 0.5671804452747937, "grad_norm": 2.2584996223449707, "learning_rate": 2.2781802091186208e-06, "loss": 0.8196, "step": 18215 }, { "epoch": 0.567336135762105, "grad_norm": 2.136249542236328, "learning_rate": 2.2773607787865876e-06, "loss": 0.8794, "step": 18220 }, { "epoch": 0.5674918262494162, "grad_norm": 2.0559723377227783, "learning_rate": 2.2765413484545545e-06, "loss": 0.7675, "step": 18225 }, { "epoch": 0.5676475167367274, "grad_norm": 3.002230405807495, "learning_rate": 2.2757219181225214e-06, "loss": 0.848, "step": 18230 }, { "epoch": 0.5678032072240387, "grad_norm": 2.264066457748413, "learning_rate": 2.2749024877904882e-06, "loss": 0.7694, "step": 18235 }, { "epoch": 0.5679588977113499, "grad_norm": 2.5205886363983154, "learning_rate": 2.274083057458455e-06, "loss": 0.7353, "step": 18240 }, { "epoch": 0.568114588198661, "grad_norm": 1.871394395828247, "learning_rate": 2.273263627126422e-06, "loss": 0.7879, "step": 18245 }, { "epoch": 0.5682702786859722, "grad_norm": 1.9378297328948975, "learning_rate": 2.2724441967943884e-06, "loss": 0.7791, "step": 18250 }, { "epoch": 0.5684259691732835, "grad_norm": 2.0771772861480713, "learning_rate": 2.2716247664623557e-06, "loss": 0.7704, "step": 18255 }, { "epoch": 0.5685816596605947, "grad_norm": 2.1754541397094727, "learning_rate": 2.270805336130322e-06, "loss": 0.8303, "step": 18260 }, { "epoch": 0.5687373501479059, "grad_norm": 1.9765207767486572, "learning_rate": 2.2699859057982895e-06, "loss": 0.7688, "step": 18265 }, { "epoch": 0.5688930406352172, "grad_norm": 2.776031732559204, "learning_rate": 2.269166475466256e-06, "loss": 0.7917, "step": 18270 }, { "epoch": 0.5690487311225284, "grad_norm": 1.9427154064178467, "learning_rate": 2.2683470451342228e-06, "loss": 0.7828, "step": 18275 }, { "epoch": 0.5692044216098396, "grad_norm": 2.2943546772003174, "learning_rate": 2.2675276148021896e-06, "loss": 0.7663, "step": 18280 }, { "epoch": 0.5693601120971509, "grad_norm": 2.014004945755005, "learning_rate": 2.2667081844701565e-06, "loss": 0.8625, "step": 18285 }, { "epoch": 0.5695158025844621, "grad_norm": 2.012925148010254, "learning_rate": 2.2658887541381234e-06, "loss": 0.7749, "step": 18290 }, { "epoch": 0.5696714930717733, "grad_norm": 1.8352420330047607, "learning_rate": 2.2650693238060903e-06, "loss": 0.7592, "step": 18295 }, { "epoch": 0.5698271835590846, "grad_norm": 2.0084433555603027, "learning_rate": 2.264249893474057e-06, "loss": 0.7944, "step": 18300 }, { "epoch": 0.5699828740463958, "grad_norm": 2.407437324523926, "learning_rate": 2.263430463142024e-06, "loss": 0.8615, "step": 18305 }, { "epoch": 0.570138564533707, "grad_norm": 2.0084517002105713, "learning_rate": 2.262611032809991e-06, "loss": 0.8066, "step": 18310 }, { "epoch": 0.5702942550210183, "grad_norm": 2.1673147678375244, "learning_rate": 2.2617916024779573e-06, "loss": 0.7398, "step": 18315 }, { "epoch": 0.5704499455083294, "grad_norm": 1.887374758720398, "learning_rate": 2.260972172145924e-06, "loss": 0.7805, "step": 18320 }, { "epoch": 0.5706056359956406, "grad_norm": 1.9707884788513184, "learning_rate": 2.260152741813891e-06, "loss": 0.7678, "step": 18325 }, { "epoch": 0.5707613264829519, "grad_norm": 2.243584394454956, "learning_rate": 2.259333311481858e-06, "loss": 0.7021, "step": 18330 }, { "epoch": 0.5709170169702631, "grad_norm": 2.057835102081299, "learning_rate": 2.258513881149825e-06, "loss": 0.7454, "step": 18335 }, { "epoch": 0.5710727074575743, "grad_norm": 1.960133671760559, "learning_rate": 2.2576944508177917e-06, "loss": 0.7416, "step": 18340 }, { "epoch": 0.5712283979448856, "grad_norm": 2.2096686363220215, "learning_rate": 2.2568750204857585e-06, "loss": 0.7886, "step": 18345 }, { "epoch": 0.5713840884321968, "grad_norm": 2.138233184814453, "learning_rate": 2.2560555901537254e-06, "loss": 0.7745, "step": 18350 }, { "epoch": 0.571539778919508, "grad_norm": 2.2237510681152344, "learning_rate": 2.2552361598216923e-06, "loss": 0.8444, "step": 18355 }, { "epoch": 0.5716954694068193, "grad_norm": 2.5481417179107666, "learning_rate": 2.254416729489659e-06, "loss": 0.7783, "step": 18360 }, { "epoch": 0.5718511598941305, "grad_norm": 2.2299535274505615, "learning_rate": 2.2535972991576256e-06, "loss": 0.7413, "step": 18365 }, { "epoch": 0.5720068503814417, "grad_norm": 2.5706980228424072, "learning_rate": 2.252777868825593e-06, "loss": 0.7514, "step": 18370 }, { "epoch": 0.572162540868753, "grad_norm": 2.5955116748809814, "learning_rate": 2.2519584384935593e-06, "loss": 0.8406, "step": 18375 }, { "epoch": 0.5723182313560642, "grad_norm": 2.292273759841919, "learning_rate": 2.2511390081615266e-06, "loss": 0.8302, "step": 18380 }, { "epoch": 0.5724739218433754, "grad_norm": 2.020047426223755, "learning_rate": 2.250319577829493e-06, "loss": 0.7526, "step": 18385 }, { "epoch": 0.5726296123306865, "grad_norm": 1.894270658493042, "learning_rate": 2.24950014749746e-06, "loss": 0.8217, "step": 18390 }, { "epoch": 0.5727853028179978, "grad_norm": 2.531451940536499, "learning_rate": 2.248680717165427e-06, "loss": 0.8059, "step": 18395 }, { "epoch": 0.572940993305309, "grad_norm": 1.7115823030471802, "learning_rate": 2.2478612868333937e-06, "loss": 0.7357, "step": 18400 }, { "epoch": 0.5730966837926202, "grad_norm": 2.298046112060547, "learning_rate": 2.2470418565013605e-06, "loss": 0.7846, "step": 18405 }, { "epoch": 0.5732523742799315, "grad_norm": 2.1414849758148193, "learning_rate": 2.246222426169327e-06, "loss": 0.7832, "step": 18410 }, { "epoch": 0.5734080647672427, "grad_norm": 2.308016538619995, "learning_rate": 2.2454029958372943e-06, "loss": 0.7361, "step": 18415 }, { "epoch": 0.5735637552545539, "grad_norm": 2.1756327152252197, "learning_rate": 2.2445835655052607e-06, "loss": 0.7668, "step": 18420 }, { "epoch": 0.5737194457418652, "grad_norm": 2.2014517784118652, "learning_rate": 2.243764135173228e-06, "loss": 0.7806, "step": 18425 }, { "epoch": 0.5738751362291764, "grad_norm": 2.081740379333496, "learning_rate": 2.2429447048411945e-06, "loss": 0.8132, "step": 18430 }, { "epoch": 0.5740308267164876, "grad_norm": 2.5490660667419434, "learning_rate": 2.2421252745091613e-06, "loss": 0.861, "step": 18435 }, { "epoch": 0.5741865172037989, "grad_norm": 2.554025650024414, "learning_rate": 2.241305844177128e-06, "loss": 0.7893, "step": 18440 }, { "epoch": 0.5743422076911101, "grad_norm": 2.0641653537750244, "learning_rate": 2.240486413845095e-06, "loss": 0.8189, "step": 18445 }, { "epoch": 0.5744978981784213, "grad_norm": 1.9411125183105469, "learning_rate": 2.239666983513062e-06, "loss": 0.7021, "step": 18450 }, { "epoch": 0.5746535886657326, "grad_norm": 2.3388144969940186, "learning_rate": 2.238847553181029e-06, "loss": 0.7424, "step": 18455 }, { "epoch": 0.5748092791530437, "grad_norm": 2.6701548099517822, "learning_rate": 2.2380281228489957e-06, "loss": 0.7726, "step": 18460 }, { "epoch": 0.5749649696403549, "grad_norm": 2.2125496864318848, "learning_rate": 2.237208692516962e-06, "loss": 0.7671, "step": 18465 }, { "epoch": 0.5751206601276662, "grad_norm": 2.3470797538757324, "learning_rate": 2.2363892621849294e-06, "loss": 0.7047, "step": 18470 }, { "epoch": 0.5752763506149774, "grad_norm": 2.309513568878174, "learning_rate": 2.235569831852896e-06, "loss": 0.7465, "step": 18475 }, { "epoch": 0.5754320411022886, "grad_norm": 2.0783097743988037, "learning_rate": 2.2347504015208627e-06, "loss": 0.7607, "step": 18480 }, { "epoch": 0.5755877315895999, "grad_norm": 2.0020816326141357, "learning_rate": 2.2339309711888296e-06, "loss": 0.7934, "step": 18485 }, { "epoch": 0.5757434220769111, "grad_norm": 2.391005754470825, "learning_rate": 2.2331115408567965e-06, "loss": 0.7556, "step": 18490 }, { "epoch": 0.5758991125642223, "grad_norm": 2.2419865131378174, "learning_rate": 2.2322921105247633e-06, "loss": 0.7662, "step": 18495 }, { "epoch": 0.5760548030515336, "grad_norm": 1.9258484840393066, "learning_rate": 2.2314726801927302e-06, "loss": 0.7199, "step": 18500 }, { "epoch": 0.5762104935388448, "grad_norm": 2.617903709411621, "learning_rate": 2.230653249860697e-06, "loss": 0.8052, "step": 18505 }, { "epoch": 0.576366184026156, "grad_norm": 2.265629529953003, "learning_rate": 2.229833819528664e-06, "loss": 0.8287, "step": 18510 }, { "epoch": 0.5765218745134673, "grad_norm": 2.1552300453186035, "learning_rate": 2.229014389196631e-06, "loss": 0.7225, "step": 18515 }, { "epoch": 0.5766775650007785, "grad_norm": 1.9360274076461792, "learning_rate": 2.2281949588645977e-06, "loss": 0.817, "step": 18520 }, { "epoch": 0.5768332554880897, "grad_norm": 1.9363056421279907, "learning_rate": 2.227375528532564e-06, "loss": 0.7651, "step": 18525 }, { "epoch": 0.576988945975401, "grad_norm": 1.916691780090332, "learning_rate": 2.226556098200531e-06, "loss": 0.7309, "step": 18530 }, { "epoch": 0.5771446364627121, "grad_norm": 1.8275697231292725, "learning_rate": 2.225736667868498e-06, "loss": 0.7649, "step": 18535 }, { "epoch": 0.5773003269500233, "grad_norm": 2.119824171066284, "learning_rate": 2.2249172375364647e-06, "loss": 0.7601, "step": 18540 }, { "epoch": 0.5774560174373345, "grad_norm": 2.1818366050720215, "learning_rate": 2.2240978072044316e-06, "loss": 0.7588, "step": 18545 }, { "epoch": 0.5776117079246458, "grad_norm": 2.2847630977630615, "learning_rate": 2.2232783768723985e-06, "loss": 0.8352, "step": 18550 }, { "epoch": 0.577767398411957, "grad_norm": 2.5381276607513428, "learning_rate": 2.2224589465403654e-06, "loss": 0.772, "step": 18555 }, { "epoch": 0.5779230888992682, "grad_norm": 2.0083930492401123, "learning_rate": 2.2216395162083322e-06, "loss": 0.8029, "step": 18560 }, { "epoch": 0.5780787793865795, "grad_norm": 2.348170518875122, "learning_rate": 2.220820085876299e-06, "loss": 0.7329, "step": 18565 }, { "epoch": 0.5782344698738907, "grad_norm": 1.805125117301941, "learning_rate": 2.2200006555442655e-06, "loss": 0.8056, "step": 18570 }, { "epoch": 0.5783901603612019, "grad_norm": 2.208348512649536, "learning_rate": 2.219181225212233e-06, "loss": 0.7923, "step": 18575 }, { "epoch": 0.5785458508485132, "grad_norm": 2.1791837215423584, "learning_rate": 2.2183617948801993e-06, "loss": 0.8095, "step": 18580 }, { "epoch": 0.5787015413358244, "grad_norm": 2.1639020442962646, "learning_rate": 2.2175423645481666e-06, "loss": 0.8011, "step": 18585 }, { "epoch": 0.5788572318231356, "grad_norm": 2.31478214263916, "learning_rate": 2.216722934216133e-06, "loss": 0.6939, "step": 18590 }, { "epoch": 0.5790129223104469, "grad_norm": 1.894359827041626, "learning_rate": 2.2159035038841e-06, "loss": 0.782, "step": 18595 }, { "epoch": 0.5791686127977581, "grad_norm": 2.0580592155456543, "learning_rate": 2.2150840735520668e-06, "loss": 0.7868, "step": 18600 }, { "epoch": 0.5793243032850692, "grad_norm": 2.273937463760376, "learning_rate": 2.2142646432200336e-06, "loss": 0.8324, "step": 18605 }, { "epoch": 0.5794799937723805, "grad_norm": 1.8748377561569214, "learning_rate": 2.2134452128880005e-06, "loss": 0.7183, "step": 18610 }, { "epoch": 0.5796356842596917, "grad_norm": 1.9125630855560303, "learning_rate": 2.2126257825559674e-06, "loss": 0.8284, "step": 18615 }, { "epoch": 0.5797913747470029, "grad_norm": 2.301683187484741, "learning_rate": 2.2118063522239342e-06, "loss": 0.7681, "step": 18620 }, { "epoch": 0.5799470652343142, "grad_norm": 2.194020986557007, "learning_rate": 2.2109869218919007e-06, "loss": 0.8165, "step": 18625 }, { "epoch": 0.5801027557216254, "grad_norm": 1.942142367362976, "learning_rate": 2.210167491559868e-06, "loss": 0.7661, "step": 18630 }, { "epoch": 0.5802584462089366, "grad_norm": 2.2150683403015137, "learning_rate": 2.2093480612278344e-06, "loss": 0.8496, "step": 18635 }, { "epoch": 0.5804141366962479, "grad_norm": 2.341308355331421, "learning_rate": 2.2085286308958013e-06, "loss": 0.743, "step": 18640 }, { "epoch": 0.5805698271835591, "grad_norm": 1.7449891567230225, "learning_rate": 2.207709200563768e-06, "loss": 0.8342, "step": 18645 }, { "epoch": 0.5807255176708703, "grad_norm": 2.5368969440460205, "learning_rate": 2.206889770231735e-06, "loss": 0.7759, "step": 18650 }, { "epoch": 0.5808812081581816, "grad_norm": 2.0533556938171387, "learning_rate": 2.206070339899702e-06, "loss": 0.776, "step": 18655 }, { "epoch": 0.5810368986454928, "grad_norm": 2.4996840953826904, "learning_rate": 2.2052509095676688e-06, "loss": 0.7958, "step": 18660 }, { "epoch": 0.581192589132804, "grad_norm": 1.8029706478118896, "learning_rate": 2.2044314792356356e-06, "loss": 0.7385, "step": 18665 }, { "epoch": 0.5813482796201153, "grad_norm": 2.5006825923919678, "learning_rate": 2.2036120489036025e-06, "loss": 0.7746, "step": 18670 }, { "epoch": 0.5815039701074264, "grad_norm": 2.4713070392608643, "learning_rate": 2.2027926185715694e-06, "loss": 0.7744, "step": 18675 }, { "epoch": 0.5816596605947376, "grad_norm": 2.7465591430664062, "learning_rate": 2.201973188239536e-06, "loss": 0.7544, "step": 18680 }, { "epoch": 0.5818153510820488, "grad_norm": 1.9868500232696533, "learning_rate": 2.201153757907503e-06, "loss": 0.7386, "step": 18685 }, { "epoch": 0.5819710415693601, "grad_norm": 2.279245376586914, "learning_rate": 2.2003343275754696e-06, "loss": 0.7733, "step": 18690 }, { "epoch": 0.5821267320566713, "grad_norm": 1.9979875087738037, "learning_rate": 2.1995148972434364e-06, "loss": 0.8384, "step": 18695 }, { "epoch": 0.5822824225439825, "grad_norm": 2.3585329055786133, "learning_rate": 2.1986954669114033e-06, "loss": 0.8101, "step": 18700 }, { "epoch": 0.5824381130312938, "grad_norm": 2.2843849658966064, "learning_rate": 2.19787603657937e-06, "loss": 0.8308, "step": 18705 }, { "epoch": 0.582593803518605, "grad_norm": 2.263296365737915, "learning_rate": 2.197056606247337e-06, "loss": 0.8017, "step": 18710 }, { "epoch": 0.5827494940059162, "grad_norm": 1.982917070388794, "learning_rate": 2.196237175915304e-06, "loss": 0.7695, "step": 18715 }, { "epoch": 0.5829051844932275, "grad_norm": 2.3839030265808105, "learning_rate": 2.1954177455832708e-06, "loss": 0.7036, "step": 18720 }, { "epoch": 0.5830608749805387, "grad_norm": 2.873549461364746, "learning_rate": 2.1945983152512376e-06, "loss": 0.7846, "step": 18725 }, { "epoch": 0.58321656546785, "grad_norm": 2.5898211002349854, "learning_rate": 2.1937788849192045e-06, "loss": 0.8296, "step": 18730 }, { "epoch": 0.5833722559551612, "grad_norm": 2.3381235599517822, "learning_rate": 2.1929594545871714e-06, "loss": 0.7948, "step": 18735 }, { "epoch": 0.5835279464424724, "grad_norm": 1.925062656402588, "learning_rate": 2.192140024255138e-06, "loss": 0.6979, "step": 18740 }, { "epoch": 0.5836836369297836, "grad_norm": 1.97624933719635, "learning_rate": 2.1913205939231047e-06, "loss": 0.7915, "step": 18745 }, { "epoch": 0.5838393274170948, "grad_norm": 2.5977237224578857, "learning_rate": 2.1905011635910716e-06, "loss": 0.7731, "step": 18750 }, { "epoch": 0.583995017904406, "grad_norm": 1.907238245010376, "learning_rate": 2.1896817332590384e-06, "loss": 0.7601, "step": 18755 }, { "epoch": 0.5841507083917172, "grad_norm": 2.229234457015991, "learning_rate": 2.1888623029270053e-06, "loss": 0.7992, "step": 18760 }, { "epoch": 0.5843063988790285, "grad_norm": 2.156062126159668, "learning_rate": 2.188042872594972e-06, "loss": 0.743, "step": 18765 }, { "epoch": 0.5844620893663397, "grad_norm": 2.3711023330688477, "learning_rate": 2.187223442262939e-06, "loss": 0.7537, "step": 18770 }, { "epoch": 0.5846177798536509, "grad_norm": 1.8503302335739136, "learning_rate": 2.186404011930906e-06, "loss": 0.7537, "step": 18775 }, { "epoch": 0.5847734703409622, "grad_norm": 2.1845738887786865, "learning_rate": 2.185584581598873e-06, "loss": 0.7401, "step": 18780 }, { "epoch": 0.5849291608282734, "grad_norm": 2.2181551456451416, "learning_rate": 2.1847651512668392e-06, "loss": 0.7932, "step": 18785 }, { "epoch": 0.5850848513155846, "grad_norm": 1.771570086479187, "learning_rate": 2.1839457209348065e-06, "loss": 0.7823, "step": 18790 }, { "epoch": 0.5852405418028959, "grad_norm": 1.9557286500930786, "learning_rate": 2.183126290602773e-06, "loss": 0.752, "step": 18795 }, { "epoch": 0.5853962322902071, "grad_norm": 2.3584799766540527, "learning_rate": 2.1823068602707403e-06, "loss": 0.8056, "step": 18800 }, { "epoch": 0.5855519227775183, "grad_norm": 2.598912000656128, "learning_rate": 2.1814874299387067e-06, "loss": 0.8186, "step": 18805 }, { "epoch": 0.5857076132648296, "grad_norm": 2.548887014389038, "learning_rate": 2.1806679996066736e-06, "loss": 0.8043, "step": 18810 }, { "epoch": 0.5858633037521408, "grad_norm": 2.312166690826416, "learning_rate": 2.1798485692746405e-06, "loss": 0.8228, "step": 18815 }, { "epoch": 0.5860189942394519, "grad_norm": 2.0553178787231445, "learning_rate": 2.1790291389426073e-06, "loss": 0.708, "step": 18820 }, { "epoch": 0.5861746847267632, "grad_norm": 2.1874196529388428, "learning_rate": 2.178209708610574e-06, "loss": 0.7687, "step": 18825 }, { "epoch": 0.5863303752140744, "grad_norm": 1.9012784957885742, "learning_rate": 2.1773902782785406e-06, "loss": 0.8053, "step": 18830 }, { "epoch": 0.5864860657013856, "grad_norm": 2.027930736541748, "learning_rate": 2.176570847946508e-06, "loss": 0.768, "step": 18835 }, { "epoch": 0.5866417561886969, "grad_norm": 2.4307711124420166, "learning_rate": 2.1757514176144744e-06, "loss": 0.7619, "step": 18840 }, { "epoch": 0.5867974466760081, "grad_norm": 2.958613634109497, "learning_rate": 2.1749319872824417e-06, "loss": 0.8282, "step": 18845 }, { "epoch": 0.5869531371633193, "grad_norm": 2.3628923892974854, "learning_rate": 2.174112556950408e-06, "loss": 0.8155, "step": 18850 }, { "epoch": 0.5871088276506305, "grad_norm": 1.834259271621704, "learning_rate": 2.173293126618375e-06, "loss": 0.6873, "step": 18855 }, { "epoch": 0.5872645181379418, "grad_norm": 2.3220443725585938, "learning_rate": 2.172473696286342e-06, "loss": 0.8153, "step": 18860 }, { "epoch": 0.587420208625253, "grad_norm": 2.1092278957366943, "learning_rate": 2.1716542659543087e-06, "loss": 0.7839, "step": 18865 }, { "epoch": 0.5875758991125642, "grad_norm": 2.4438655376434326, "learning_rate": 2.1708348356222756e-06, "loss": 0.7375, "step": 18870 }, { "epoch": 0.5877315895998755, "grad_norm": 1.9934005737304688, "learning_rate": 2.1700154052902425e-06, "loss": 0.8083, "step": 18875 }, { "epoch": 0.5878872800871867, "grad_norm": 2.2434473037719727, "learning_rate": 2.1691959749582093e-06, "loss": 0.7552, "step": 18880 }, { "epoch": 0.588042970574498, "grad_norm": 2.091644287109375, "learning_rate": 2.168376544626176e-06, "loss": 0.7726, "step": 18885 }, { "epoch": 0.5881986610618091, "grad_norm": 2.140688180923462, "learning_rate": 2.167557114294143e-06, "loss": 0.7899, "step": 18890 }, { "epoch": 0.5883543515491203, "grad_norm": 2.2866146564483643, "learning_rate": 2.1667376839621095e-06, "loss": 0.7715, "step": 18895 }, { "epoch": 0.5885100420364315, "grad_norm": 2.063560962677002, "learning_rate": 2.1659182536300764e-06, "loss": 0.717, "step": 18900 }, { "epoch": 0.5886657325237428, "grad_norm": 2.419750928878784, "learning_rate": 2.1650988232980433e-06, "loss": 0.7505, "step": 18905 }, { "epoch": 0.588821423011054, "grad_norm": 2.82257080078125, "learning_rate": 2.16427939296601e-06, "loss": 0.8005, "step": 18910 }, { "epoch": 0.5889771134983652, "grad_norm": 2.1135451793670654, "learning_rate": 2.163459962633977e-06, "loss": 0.8203, "step": 18915 }, { "epoch": 0.5891328039856765, "grad_norm": 2.2693662643432617, "learning_rate": 2.162640532301944e-06, "loss": 0.7674, "step": 18920 }, { "epoch": 0.5892884944729877, "grad_norm": 2.3629627227783203, "learning_rate": 2.1618211019699107e-06, "loss": 0.7475, "step": 18925 }, { "epoch": 0.5894441849602989, "grad_norm": 2.012129306793213, "learning_rate": 2.1610016716378776e-06, "loss": 0.7517, "step": 18930 }, { "epoch": 0.5895998754476102, "grad_norm": 2.322115421295166, "learning_rate": 2.1601822413058445e-06, "loss": 0.7783, "step": 18935 }, { "epoch": 0.5897555659349214, "grad_norm": 2.6909713745117188, "learning_rate": 2.1593628109738113e-06, "loss": 0.8282, "step": 18940 }, { "epoch": 0.5899112564222326, "grad_norm": 2.2396483421325684, "learning_rate": 2.158543380641778e-06, "loss": 0.8396, "step": 18945 }, { "epoch": 0.5900669469095439, "grad_norm": 2.442368745803833, "learning_rate": 2.157723950309745e-06, "loss": 0.7942, "step": 18950 }, { "epoch": 0.5902226373968551, "grad_norm": 2.3830628395080566, "learning_rate": 2.1569045199777115e-06, "loss": 0.7566, "step": 18955 }, { "epoch": 0.5903783278841663, "grad_norm": 2.4114997386932373, "learning_rate": 2.1560850896456784e-06, "loss": 0.7829, "step": 18960 }, { "epoch": 0.5905340183714775, "grad_norm": 2.3444132804870605, "learning_rate": 2.1552656593136453e-06, "loss": 0.7414, "step": 18965 }, { "epoch": 0.5906897088587887, "grad_norm": 2.2385027408599854, "learning_rate": 2.154446228981612e-06, "loss": 0.8239, "step": 18970 }, { "epoch": 0.5908453993460999, "grad_norm": 2.002804756164551, "learning_rate": 2.153626798649579e-06, "loss": 0.7828, "step": 18975 }, { "epoch": 0.5910010898334112, "grad_norm": 2.0239036083221436, "learning_rate": 2.152807368317546e-06, "loss": 0.7654, "step": 18980 }, { "epoch": 0.5911567803207224, "grad_norm": 2.0033156871795654, "learning_rate": 2.1519879379855127e-06, "loss": 0.7541, "step": 18985 }, { "epoch": 0.5913124708080336, "grad_norm": 2.0476484298706055, "learning_rate": 2.151168507653479e-06, "loss": 0.797, "step": 18990 }, { "epoch": 0.5914681612953449, "grad_norm": 1.9190280437469482, "learning_rate": 2.1503490773214465e-06, "loss": 0.7412, "step": 18995 }, { "epoch": 0.5916238517826561, "grad_norm": 2.555785655975342, "learning_rate": 2.149529646989413e-06, "loss": 0.7757, "step": 19000 }, { "epoch": 0.5917795422699673, "grad_norm": 2.4786486625671387, "learning_rate": 2.1487102166573802e-06, "loss": 0.8075, "step": 19005 }, { "epoch": 0.5919352327572786, "grad_norm": 2.297720432281494, "learning_rate": 2.1478907863253467e-06, "loss": 0.7579, "step": 19010 }, { "epoch": 0.5920909232445898, "grad_norm": 2.2753875255584717, "learning_rate": 2.1470713559933135e-06, "loss": 0.7977, "step": 19015 }, { "epoch": 0.592246613731901, "grad_norm": 2.224167823791504, "learning_rate": 2.1462519256612804e-06, "loss": 0.7872, "step": 19020 }, { "epoch": 0.5924023042192123, "grad_norm": 2.209272861480713, "learning_rate": 2.1454324953292473e-06, "loss": 0.7624, "step": 19025 }, { "epoch": 0.5925579947065235, "grad_norm": 2.1116864681243896, "learning_rate": 2.144613064997214e-06, "loss": 0.7689, "step": 19030 }, { "epoch": 0.5927136851938346, "grad_norm": 2.3683836460113525, "learning_rate": 2.143793634665181e-06, "loss": 0.7233, "step": 19035 }, { "epoch": 0.5928693756811458, "grad_norm": 2.0201573371887207, "learning_rate": 2.142974204333148e-06, "loss": 0.7921, "step": 19040 }, { "epoch": 0.5930250661684571, "grad_norm": 2.6605894565582275, "learning_rate": 2.1421547740011143e-06, "loss": 0.7814, "step": 19045 }, { "epoch": 0.5931807566557683, "grad_norm": 1.9884010553359985, "learning_rate": 2.1413353436690816e-06, "loss": 0.7927, "step": 19050 }, { "epoch": 0.5933364471430795, "grad_norm": 2.207268238067627, "learning_rate": 2.140515913337048e-06, "loss": 0.7346, "step": 19055 }, { "epoch": 0.5934921376303908, "grad_norm": 1.967704176902771, "learning_rate": 2.139696483005015e-06, "loss": 0.7756, "step": 19060 }, { "epoch": 0.593647828117702, "grad_norm": 1.9877651929855347, "learning_rate": 2.138877052672982e-06, "loss": 0.6951, "step": 19065 }, { "epoch": 0.5938035186050132, "grad_norm": 2.202754259109497, "learning_rate": 2.1380576223409487e-06, "loss": 0.7899, "step": 19070 }, { "epoch": 0.5939592090923245, "grad_norm": 1.9530751705169678, "learning_rate": 2.1372381920089155e-06, "loss": 0.8105, "step": 19075 }, { "epoch": 0.5941148995796357, "grad_norm": 2.3465688228607178, "learning_rate": 2.1364187616768824e-06, "loss": 0.7813, "step": 19080 }, { "epoch": 0.5942705900669469, "grad_norm": 1.8427928686141968, "learning_rate": 2.1355993313448493e-06, "loss": 0.6213, "step": 19085 }, { "epoch": 0.5944262805542582, "grad_norm": 2.503201961517334, "learning_rate": 2.134779901012816e-06, "loss": 0.7645, "step": 19090 }, { "epoch": 0.5945819710415694, "grad_norm": 2.24407958984375, "learning_rate": 2.133960470680783e-06, "loss": 0.8316, "step": 19095 }, { "epoch": 0.5947376615288806, "grad_norm": 2.580364942550659, "learning_rate": 2.13314104034875e-06, "loss": 0.8444, "step": 19100 }, { "epoch": 0.5948933520161918, "grad_norm": 1.9228150844573975, "learning_rate": 2.1323216100167168e-06, "loss": 0.7221, "step": 19105 }, { "epoch": 0.595049042503503, "grad_norm": 2.06428599357605, "learning_rate": 2.1315021796846832e-06, "loss": 0.8519, "step": 19110 }, { "epoch": 0.5952047329908142, "grad_norm": 1.711287498474121, "learning_rate": 2.13068274935265e-06, "loss": 0.8412, "step": 19115 }, { "epoch": 0.5953604234781255, "grad_norm": 1.9877662658691406, "learning_rate": 2.129863319020617e-06, "loss": 0.7445, "step": 19120 }, { "epoch": 0.5955161139654367, "grad_norm": 2.308945894241333, "learning_rate": 2.129043888688584e-06, "loss": 0.7192, "step": 19125 }, { "epoch": 0.5956718044527479, "grad_norm": 1.9416438341140747, "learning_rate": 2.1282244583565507e-06, "loss": 0.7991, "step": 19130 }, { "epoch": 0.5958274949400592, "grad_norm": 1.8139313459396362, "learning_rate": 2.1274050280245176e-06, "loss": 0.7669, "step": 19135 }, { "epoch": 0.5959831854273704, "grad_norm": 2.506606340408325, "learning_rate": 2.1265855976924844e-06, "loss": 0.7877, "step": 19140 }, { "epoch": 0.5961388759146816, "grad_norm": 2.2998411655426025, "learning_rate": 2.1257661673604513e-06, "loss": 0.7999, "step": 19145 }, { "epoch": 0.5962945664019929, "grad_norm": 1.9685595035552979, "learning_rate": 2.124946737028418e-06, "loss": 0.7687, "step": 19150 }, { "epoch": 0.5964502568893041, "grad_norm": 2.148345470428467, "learning_rate": 2.124127306696385e-06, "loss": 0.7244, "step": 19155 }, { "epoch": 0.5966059473766153, "grad_norm": 2.4903223514556885, "learning_rate": 2.1233078763643515e-06, "loss": 0.8411, "step": 19160 }, { "epoch": 0.5967616378639266, "grad_norm": 2.3367249965667725, "learning_rate": 2.1224884460323188e-06, "loss": 0.7544, "step": 19165 }, { "epoch": 0.5969173283512378, "grad_norm": 2.112980842590332, "learning_rate": 2.1216690157002852e-06, "loss": 0.8593, "step": 19170 }, { "epoch": 0.597073018838549, "grad_norm": 2.45819091796875, "learning_rate": 2.120849585368252e-06, "loss": 0.8181, "step": 19175 }, { "epoch": 0.5972287093258601, "grad_norm": 1.982232928276062, "learning_rate": 2.120030155036219e-06, "loss": 0.7586, "step": 19180 }, { "epoch": 0.5973843998131714, "grad_norm": 2.156820297241211, "learning_rate": 2.119210724704186e-06, "loss": 0.8468, "step": 19185 }, { "epoch": 0.5975400903004826, "grad_norm": 2.2219691276550293, "learning_rate": 2.1183912943721527e-06, "loss": 0.8085, "step": 19190 }, { "epoch": 0.5976957807877938, "grad_norm": 1.9648489952087402, "learning_rate": 2.1175718640401196e-06, "loss": 0.7719, "step": 19195 }, { "epoch": 0.5978514712751051, "grad_norm": 2.059375524520874, "learning_rate": 2.1167524337080864e-06, "loss": 0.7496, "step": 19200 }, { "epoch": 0.5980071617624163, "grad_norm": 2.3310799598693848, "learning_rate": 2.115933003376053e-06, "loss": 0.7555, "step": 19205 }, { "epoch": 0.5981628522497275, "grad_norm": 2.1415724754333496, "learning_rate": 2.11511357304402e-06, "loss": 0.8792, "step": 19210 }, { "epoch": 0.5983185427370388, "grad_norm": 2.310553550720215, "learning_rate": 2.1142941427119866e-06, "loss": 0.8122, "step": 19215 }, { "epoch": 0.59847423322435, "grad_norm": 2.1101784706115723, "learning_rate": 2.113474712379954e-06, "loss": 0.861, "step": 19220 }, { "epoch": 0.5986299237116612, "grad_norm": 2.03712797164917, "learning_rate": 2.1126552820479204e-06, "loss": 0.7179, "step": 19225 }, { "epoch": 0.5987856141989725, "grad_norm": 1.9099664688110352, "learning_rate": 2.1118358517158872e-06, "loss": 0.7295, "step": 19230 }, { "epoch": 0.5989413046862837, "grad_norm": 2.293766736984253, "learning_rate": 2.111016421383854e-06, "loss": 0.8027, "step": 19235 }, { "epoch": 0.5990969951735949, "grad_norm": 2.0943188667297363, "learning_rate": 2.110196991051821e-06, "loss": 0.7578, "step": 19240 }, { "epoch": 0.5992526856609062, "grad_norm": 2.087395191192627, "learning_rate": 2.109377560719788e-06, "loss": 0.7654, "step": 19245 }, { "epoch": 0.5994083761482173, "grad_norm": 2.3308866024017334, "learning_rate": 2.1085581303877543e-06, "loss": 0.7661, "step": 19250 }, { "epoch": 0.5995640666355285, "grad_norm": 2.0412631034851074, "learning_rate": 2.1077387000557216e-06, "loss": 0.7699, "step": 19255 }, { "epoch": 0.5997197571228398, "grad_norm": 1.8601611852645874, "learning_rate": 2.106919269723688e-06, "loss": 0.7635, "step": 19260 }, { "epoch": 0.599875447610151, "grad_norm": 1.812693476676941, "learning_rate": 2.1060998393916553e-06, "loss": 0.7695, "step": 19265 }, { "epoch": 0.6000311380974622, "grad_norm": 1.7838112115859985, "learning_rate": 2.1052804090596218e-06, "loss": 0.7861, "step": 19270 }, { "epoch": 0.6001868285847735, "grad_norm": 2.1842105388641357, "learning_rate": 2.1044609787275886e-06, "loss": 0.7928, "step": 19275 }, { "epoch": 0.6003425190720847, "grad_norm": 2.1631975173950195, "learning_rate": 2.1036415483955555e-06, "loss": 0.7804, "step": 19280 }, { "epoch": 0.6004982095593959, "grad_norm": 2.2893385887145996, "learning_rate": 2.1028221180635224e-06, "loss": 0.7736, "step": 19285 }, { "epoch": 0.6006539000467072, "grad_norm": 1.8186016082763672, "learning_rate": 2.1020026877314892e-06, "loss": 0.7262, "step": 19290 }, { "epoch": 0.6008095905340184, "grad_norm": 1.7124221324920654, "learning_rate": 2.101183257399456e-06, "loss": 0.7535, "step": 19295 }, { "epoch": 0.6009652810213296, "grad_norm": 1.7088021039962769, "learning_rate": 2.100363827067423e-06, "loss": 0.8252, "step": 19300 }, { "epoch": 0.6011209715086409, "grad_norm": 1.913789987564087, "learning_rate": 2.09954439673539e-06, "loss": 0.7164, "step": 19305 }, { "epoch": 0.6012766619959521, "grad_norm": 1.9137741327285767, "learning_rate": 2.0987249664033567e-06, "loss": 0.7666, "step": 19310 }, { "epoch": 0.6014323524832633, "grad_norm": 1.953778624534607, "learning_rate": 2.0979055360713236e-06, "loss": 0.7211, "step": 19315 }, { "epoch": 0.6015880429705744, "grad_norm": 1.9983148574829102, "learning_rate": 2.09708610573929e-06, "loss": 0.7918, "step": 19320 }, { "epoch": 0.6017437334578857, "grad_norm": 2.254333019256592, "learning_rate": 2.096266675407257e-06, "loss": 0.7654, "step": 19325 }, { "epoch": 0.6018994239451969, "grad_norm": 2.769322395324707, "learning_rate": 2.0954472450752238e-06, "loss": 0.7822, "step": 19330 }, { "epoch": 0.6020551144325081, "grad_norm": 2.136601686477661, "learning_rate": 2.0946278147431906e-06, "loss": 0.7831, "step": 19335 }, { "epoch": 0.6022108049198194, "grad_norm": 2.0549442768096924, "learning_rate": 2.0938083844111575e-06, "loss": 0.8274, "step": 19340 }, { "epoch": 0.6023664954071306, "grad_norm": 2.0089831352233887, "learning_rate": 2.0929889540791244e-06, "loss": 0.7163, "step": 19345 }, { "epoch": 0.6025221858944418, "grad_norm": 2.072502851486206, "learning_rate": 2.0921695237470913e-06, "loss": 0.8418, "step": 19350 }, { "epoch": 0.6026778763817531, "grad_norm": 2.326129674911499, "learning_rate": 2.091350093415058e-06, "loss": 0.7572, "step": 19355 }, { "epoch": 0.6028335668690643, "grad_norm": 2.6728086471557617, "learning_rate": 2.090530663083025e-06, "loss": 0.8164, "step": 19360 }, { "epoch": 0.6029892573563755, "grad_norm": 2.120805025100708, "learning_rate": 2.0897112327509914e-06, "loss": 0.8504, "step": 19365 }, { "epoch": 0.6031449478436868, "grad_norm": 3.2146339416503906, "learning_rate": 2.0888918024189587e-06, "loss": 0.6971, "step": 19370 }, { "epoch": 0.603300638330998, "grad_norm": 2.341207981109619, "learning_rate": 2.088072372086925e-06, "loss": 0.7115, "step": 19375 }, { "epoch": 0.6034563288183092, "grad_norm": 2.062255382537842, "learning_rate": 2.0872529417548925e-06, "loss": 0.6355, "step": 19380 }, { "epoch": 0.6036120193056205, "grad_norm": 2.300360918045044, "learning_rate": 2.086433511422859e-06, "loss": 0.7624, "step": 19385 }, { "epoch": 0.6037677097929317, "grad_norm": 1.8310110569000244, "learning_rate": 2.0856140810908258e-06, "loss": 0.7079, "step": 19390 }, { "epoch": 0.6039234002802428, "grad_norm": 2.5874240398406982, "learning_rate": 2.0847946507587927e-06, "loss": 0.8256, "step": 19395 }, { "epoch": 0.6040790907675541, "grad_norm": 2.325965642929077, "learning_rate": 2.0839752204267595e-06, "loss": 0.764, "step": 19400 }, { "epoch": 0.6042347812548653, "grad_norm": 1.8352808952331543, "learning_rate": 2.0831557900947264e-06, "loss": 0.7905, "step": 19405 }, { "epoch": 0.6043904717421765, "grad_norm": 1.9865291118621826, "learning_rate": 2.082336359762693e-06, "loss": 0.7269, "step": 19410 }, { "epoch": 0.6045461622294878, "grad_norm": 1.9427465200424194, "learning_rate": 2.08151692943066e-06, "loss": 0.817, "step": 19415 }, { "epoch": 0.604701852716799, "grad_norm": 2.159109354019165, "learning_rate": 2.0806974990986266e-06, "loss": 0.7906, "step": 19420 }, { "epoch": 0.6048575432041102, "grad_norm": 1.8023650646209717, "learning_rate": 2.079878068766594e-06, "loss": 0.7435, "step": 19425 }, { "epoch": 0.6050132336914215, "grad_norm": 1.7214041948318481, "learning_rate": 2.0790586384345603e-06, "loss": 0.6913, "step": 19430 }, { "epoch": 0.6051689241787327, "grad_norm": 2.270540952682495, "learning_rate": 2.078239208102527e-06, "loss": 0.7986, "step": 19435 }, { "epoch": 0.6053246146660439, "grad_norm": 2.2197086811065674, "learning_rate": 2.077419777770494e-06, "loss": 0.7181, "step": 19440 }, { "epoch": 0.6054803051533552, "grad_norm": 2.3664679527282715, "learning_rate": 2.076600347438461e-06, "loss": 0.8427, "step": 19445 }, { "epoch": 0.6056359956406664, "grad_norm": 2.125919818878174, "learning_rate": 2.075780917106428e-06, "loss": 0.8472, "step": 19450 }, { "epoch": 0.6057916861279776, "grad_norm": 2.3440964221954346, "learning_rate": 2.0749614867743947e-06, "loss": 0.8003, "step": 19455 }, { "epoch": 0.6059473766152889, "grad_norm": 1.7799832820892334, "learning_rate": 2.0741420564423615e-06, "loss": 0.6624, "step": 19460 }, { "epoch": 0.6061030671026, "grad_norm": 2.1636362075805664, "learning_rate": 2.073322626110328e-06, "loss": 0.8055, "step": 19465 }, { "epoch": 0.6062587575899112, "grad_norm": 2.1030056476593018, "learning_rate": 2.0725031957782953e-06, "loss": 0.7702, "step": 19470 }, { "epoch": 0.6064144480772224, "grad_norm": 2.118715763092041, "learning_rate": 2.0716837654462617e-06, "loss": 0.8442, "step": 19475 }, { "epoch": 0.6065701385645337, "grad_norm": 1.8014941215515137, "learning_rate": 2.0708643351142286e-06, "loss": 0.7775, "step": 19480 }, { "epoch": 0.6067258290518449, "grad_norm": 2.6376442909240723, "learning_rate": 2.0700449047821955e-06, "loss": 0.8047, "step": 19485 }, { "epoch": 0.6068815195391561, "grad_norm": 2.24717116355896, "learning_rate": 2.0692254744501623e-06, "loss": 0.7644, "step": 19490 }, { "epoch": 0.6070372100264674, "grad_norm": 1.9955004453659058, "learning_rate": 2.068406044118129e-06, "loss": 0.8408, "step": 19495 }, { "epoch": 0.6071929005137786, "grad_norm": 2.2170674800872803, "learning_rate": 2.067586613786096e-06, "loss": 0.8798, "step": 19500 }, { "epoch": 0.6073485910010898, "grad_norm": 2.2156381607055664, "learning_rate": 2.066767183454063e-06, "loss": 0.8093, "step": 19505 }, { "epoch": 0.6075042814884011, "grad_norm": 2.2285287380218506, "learning_rate": 2.06594775312203e-06, "loss": 0.7389, "step": 19510 }, { "epoch": 0.6076599719757123, "grad_norm": 2.101100206375122, "learning_rate": 2.0651283227899967e-06, "loss": 0.7878, "step": 19515 }, { "epoch": 0.6078156624630235, "grad_norm": 1.8830868005752563, "learning_rate": 2.0643088924579635e-06, "loss": 0.8696, "step": 19520 }, { "epoch": 0.6079713529503348, "grad_norm": 2.7471959590911865, "learning_rate": 2.0634894621259304e-06, "loss": 0.8292, "step": 19525 }, { "epoch": 0.608127043437646, "grad_norm": 2.2349774837493896, "learning_rate": 2.0626700317938973e-06, "loss": 0.7665, "step": 19530 }, { "epoch": 0.6082827339249571, "grad_norm": 2.068577289581299, "learning_rate": 2.0618506014618637e-06, "loss": 0.7223, "step": 19535 }, { "epoch": 0.6084384244122684, "grad_norm": 2.124511241912842, "learning_rate": 2.0610311711298306e-06, "loss": 0.7366, "step": 19540 }, { "epoch": 0.6085941148995796, "grad_norm": 2.1087052822113037, "learning_rate": 2.0602117407977975e-06, "loss": 0.7602, "step": 19545 }, { "epoch": 0.6087498053868908, "grad_norm": 2.1602158546447754, "learning_rate": 2.0593923104657643e-06, "loss": 0.7628, "step": 19550 }, { "epoch": 0.6089054958742021, "grad_norm": 1.8905647993087769, "learning_rate": 2.058572880133731e-06, "loss": 0.804, "step": 19555 }, { "epoch": 0.6090611863615133, "grad_norm": 2.162766456604004, "learning_rate": 2.057753449801698e-06, "loss": 0.6719, "step": 19560 }, { "epoch": 0.6092168768488245, "grad_norm": 2.2102949619293213, "learning_rate": 2.056934019469665e-06, "loss": 0.8451, "step": 19565 }, { "epoch": 0.6093725673361358, "grad_norm": 2.1879963874816895, "learning_rate": 2.056114589137632e-06, "loss": 0.8396, "step": 19570 }, { "epoch": 0.609528257823447, "grad_norm": 1.9510548114776611, "learning_rate": 2.0552951588055987e-06, "loss": 0.7258, "step": 19575 }, { "epoch": 0.6096839483107582, "grad_norm": 1.9528515338897705, "learning_rate": 2.054475728473565e-06, "loss": 0.765, "step": 19580 }, { "epoch": 0.6098396387980695, "grad_norm": 1.702093482017517, "learning_rate": 2.0536562981415324e-06, "loss": 0.7586, "step": 19585 }, { "epoch": 0.6099953292853807, "grad_norm": 1.9392069578170776, "learning_rate": 2.052836867809499e-06, "loss": 0.7588, "step": 19590 }, { "epoch": 0.6101510197726919, "grad_norm": 2.186469316482544, "learning_rate": 2.052017437477466e-06, "loss": 0.7777, "step": 19595 }, { "epoch": 0.6103067102600032, "grad_norm": 1.9545317888259888, "learning_rate": 2.0511980071454326e-06, "loss": 0.7184, "step": 19600 }, { "epoch": 0.6104624007473144, "grad_norm": 2.3117001056671143, "learning_rate": 2.0503785768133995e-06, "loss": 0.7562, "step": 19605 }, { "epoch": 0.6106180912346255, "grad_norm": 2.177612543106079, "learning_rate": 2.0495591464813664e-06, "loss": 0.7801, "step": 19610 }, { "epoch": 0.6107737817219367, "grad_norm": 2.537139654159546, "learning_rate": 2.0487397161493332e-06, "loss": 0.7171, "step": 19615 }, { "epoch": 0.610929472209248, "grad_norm": 3.318146228790283, "learning_rate": 2.0479202858173e-06, "loss": 0.8539, "step": 19620 }, { "epoch": 0.6110851626965592, "grad_norm": 2.3129312992095947, "learning_rate": 2.0471008554852665e-06, "loss": 0.7657, "step": 19625 }, { "epoch": 0.6112408531838704, "grad_norm": 2.335657835006714, "learning_rate": 2.046281425153234e-06, "loss": 0.7439, "step": 19630 }, { "epoch": 0.6113965436711817, "grad_norm": 2.138406991958618, "learning_rate": 2.0454619948212003e-06, "loss": 0.8342, "step": 19635 }, { "epoch": 0.6115522341584929, "grad_norm": 2.020724296569824, "learning_rate": 2.0446425644891676e-06, "loss": 0.7147, "step": 19640 }, { "epoch": 0.6117079246458041, "grad_norm": 2.2063848972320557, "learning_rate": 2.043823134157134e-06, "loss": 0.7164, "step": 19645 }, { "epoch": 0.6118636151331154, "grad_norm": 2.1237738132476807, "learning_rate": 2.043003703825101e-06, "loss": 0.7546, "step": 19650 }, { "epoch": 0.6120193056204266, "grad_norm": 1.9647655487060547, "learning_rate": 2.0421842734930678e-06, "loss": 0.718, "step": 19655 }, { "epoch": 0.6121749961077378, "grad_norm": 1.8916312456130981, "learning_rate": 2.0413648431610346e-06, "loss": 0.6626, "step": 19660 }, { "epoch": 0.6123306865950491, "grad_norm": 2.584862232208252, "learning_rate": 2.0405454128290015e-06, "loss": 0.7982, "step": 19665 }, { "epoch": 0.6124863770823603, "grad_norm": 2.1974921226501465, "learning_rate": 2.0397259824969684e-06, "loss": 0.8494, "step": 19670 }, { "epoch": 0.6126420675696715, "grad_norm": 2.0097415447235107, "learning_rate": 2.0389065521649352e-06, "loss": 0.7417, "step": 19675 }, { "epoch": 0.6127977580569827, "grad_norm": 1.7974278926849365, "learning_rate": 2.0380871218329017e-06, "loss": 0.8185, "step": 19680 }, { "epoch": 0.6129534485442939, "grad_norm": 2.2807774543762207, "learning_rate": 2.037267691500869e-06, "loss": 0.7318, "step": 19685 }, { "epoch": 0.6131091390316051, "grad_norm": 2.7128825187683105, "learning_rate": 2.0364482611688354e-06, "loss": 0.776, "step": 19690 }, { "epoch": 0.6132648295189164, "grad_norm": 2.556414842605591, "learning_rate": 2.0356288308368023e-06, "loss": 0.7478, "step": 19695 }, { "epoch": 0.6134205200062276, "grad_norm": 2.454512357711792, "learning_rate": 2.034809400504769e-06, "loss": 0.7637, "step": 19700 }, { "epoch": 0.6135762104935388, "grad_norm": 2.413346767425537, "learning_rate": 2.033989970172736e-06, "loss": 0.7549, "step": 19705 }, { "epoch": 0.6137319009808501, "grad_norm": 1.961002230644226, "learning_rate": 2.033170539840703e-06, "loss": 0.7557, "step": 19710 }, { "epoch": 0.6138875914681613, "grad_norm": 2.2612409591674805, "learning_rate": 2.0323511095086698e-06, "loss": 0.7372, "step": 19715 }, { "epoch": 0.6140432819554725, "grad_norm": 1.975163459777832, "learning_rate": 2.0315316791766366e-06, "loss": 0.8064, "step": 19720 }, { "epoch": 0.6141989724427838, "grad_norm": 2.237125873565674, "learning_rate": 2.0307122488446035e-06, "loss": 0.7253, "step": 19725 }, { "epoch": 0.614354662930095, "grad_norm": 2.1256697177886963, "learning_rate": 2.0298928185125704e-06, "loss": 0.749, "step": 19730 }, { "epoch": 0.6145103534174062, "grad_norm": 2.258975028991699, "learning_rate": 2.0290733881805372e-06, "loss": 0.7746, "step": 19735 }, { "epoch": 0.6146660439047175, "grad_norm": 2.094640016555786, "learning_rate": 2.0282539578485037e-06, "loss": 0.7604, "step": 19740 }, { "epoch": 0.6148217343920287, "grad_norm": 2.2974700927734375, "learning_rate": 2.027434527516471e-06, "loss": 0.8174, "step": 19745 }, { "epoch": 0.6149774248793398, "grad_norm": 1.9124107360839844, "learning_rate": 2.0266150971844374e-06, "loss": 0.7663, "step": 19750 }, { "epoch": 0.615133115366651, "grad_norm": 2.2620913982391357, "learning_rate": 2.0257956668524043e-06, "loss": 0.7422, "step": 19755 }, { "epoch": 0.6152888058539623, "grad_norm": 1.8714638948440552, "learning_rate": 2.024976236520371e-06, "loss": 0.7704, "step": 19760 }, { "epoch": 0.6154444963412735, "grad_norm": 2.003847360610962, "learning_rate": 2.024156806188338e-06, "loss": 0.7172, "step": 19765 }, { "epoch": 0.6156001868285848, "grad_norm": 2.0365793704986572, "learning_rate": 2.023337375856305e-06, "loss": 0.7874, "step": 19770 }, { "epoch": 0.615755877315896, "grad_norm": 1.9765632152557373, "learning_rate": 2.0225179455242718e-06, "loss": 0.689, "step": 19775 }, { "epoch": 0.6159115678032072, "grad_norm": 2.0713396072387695, "learning_rate": 2.0216985151922386e-06, "loss": 0.7805, "step": 19780 }, { "epoch": 0.6160672582905184, "grad_norm": 3.6778953075408936, "learning_rate": 2.020879084860205e-06, "loss": 0.8742, "step": 19785 }, { "epoch": 0.6162229487778297, "grad_norm": 2.231182813644409, "learning_rate": 2.0200596545281724e-06, "loss": 0.8412, "step": 19790 }, { "epoch": 0.6163786392651409, "grad_norm": 2.2438130378723145, "learning_rate": 2.019240224196139e-06, "loss": 0.7549, "step": 19795 }, { "epoch": 0.6165343297524521, "grad_norm": 2.350081443786621, "learning_rate": 2.018420793864106e-06, "loss": 0.8291, "step": 19800 }, { "epoch": 0.6166900202397634, "grad_norm": 2.3764193058013916, "learning_rate": 2.0176013635320726e-06, "loss": 0.7847, "step": 19805 }, { "epoch": 0.6168457107270746, "grad_norm": 2.1806681156158447, "learning_rate": 2.0167819332000394e-06, "loss": 0.8015, "step": 19810 }, { "epoch": 0.6170014012143858, "grad_norm": 2.36745023727417, "learning_rate": 2.0159625028680063e-06, "loss": 0.7488, "step": 19815 }, { "epoch": 0.6171570917016971, "grad_norm": 1.7537782192230225, "learning_rate": 2.015143072535973e-06, "loss": 0.7548, "step": 19820 }, { "epoch": 0.6173127821890082, "grad_norm": 2.2767879962921143, "learning_rate": 2.01432364220394e-06, "loss": 0.78, "step": 19825 }, { "epoch": 0.6174684726763194, "grad_norm": 2.2105765342712402, "learning_rate": 2.0135042118719065e-06, "loss": 0.7962, "step": 19830 }, { "epoch": 0.6176241631636307, "grad_norm": 1.8358871936798096, "learning_rate": 2.0126847815398738e-06, "loss": 0.8033, "step": 19835 }, { "epoch": 0.6177798536509419, "grad_norm": 2.185662031173706, "learning_rate": 2.0118653512078402e-06, "loss": 0.7433, "step": 19840 }, { "epoch": 0.6179355441382531, "grad_norm": 2.0486769676208496, "learning_rate": 2.0110459208758075e-06, "loss": 0.7586, "step": 19845 }, { "epoch": 0.6180912346255644, "grad_norm": 1.7363197803497314, "learning_rate": 2.010226490543774e-06, "loss": 0.7717, "step": 19850 }, { "epoch": 0.6182469251128756, "grad_norm": 1.929573655128479, "learning_rate": 2.009407060211741e-06, "loss": 0.7493, "step": 19855 }, { "epoch": 0.6184026156001868, "grad_norm": 2.4110591411590576, "learning_rate": 2.0085876298797077e-06, "loss": 0.76, "step": 19860 }, { "epoch": 0.6185583060874981, "grad_norm": 2.1095361709594727, "learning_rate": 2.0077681995476746e-06, "loss": 0.7712, "step": 19865 }, { "epoch": 0.6187139965748093, "grad_norm": 2.152273416519165, "learning_rate": 2.0069487692156414e-06, "loss": 0.7912, "step": 19870 }, { "epoch": 0.6188696870621205, "grad_norm": 1.8380277156829834, "learning_rate": 2.0061293388836083e-06, "loss": 0.7302, "step": 19875 }, { "epoch": 0.6190253775494318, "grad_norm": 2.120377779006958, "learning_rate": 2.005309908551575e-06, "loss": 0.7419, "step": 19880 }, { "epoch": 0.619181068036743, "grad_norm": 2.0561633110046387, "learning_rate": 2.004490478219542e-06, "loss": 0.8513, "step": 19885 }, { "epoch": 0.6193367585240542, "grad_norm": 2.2236735820770264, "learning_rate": 2.003671047887509e-06, "loss": 0.7614, "step": 19890 }, { "epoch": 0.6194924490113654, "grad_norm": 2.097702741622925, "learning_rate": 2.0028516175554754e-06, "loss": 0.777, "step": 19895 }, { "epoch": 0.6196481394986766, "grad_norm": 1.9000107049942017, "learning_rate": 2.0020321872234422e-06, "loss": 0.8101, "step": 19900 }, { "epoch": 0.6198038299859878, "grad_norm": 2.418980598449707, "learning_rate": 2.001212756891409e-06, "loss": 0.7962, "step": 19905 }, { "epoch": 0.619959520473299, "grad_norm": 2.4891247749328613, "learning_rate": 2.000393326559376e-06, "loss": 0.7752, "step": 19910 }, { "epoch": 0.6201152109606103, "grad_norm": 2.2890212535858154, "learning_rate": 1.999573896227343e-06, "loss": 0.7596, "step": 19915 }, { "epoch": 0.6202709014479215, "grad_norm": 3.2904210090637207, "learning_rate": 1.9987544658953097e-06, "loss": 0.7859, "step": 19920 }, { "epoch": 0.6204265919352328, "grad_norm": 2.040310859680176, "learning_rate": 1.9979350355632766e-06, "loss": 0.788, "step": 19925 }, { "epoch": 0.620582282422544, "grad_norm": 2.31927227973938, "learning_rate": 1.9971156052312435e-06, "loss": 0.7302, "step": 19930 }, { "epoch": 0.6207379729098552, "grad_norm": 1.8053158521652222, "learning_rate": 1.9962961748992103e-06, "loss": 0.7809, "step": 19935 }, { "epoch": 0.6208936633971665, "grad_norm": 1.9895716905593872, "learning_rate": 1.995476744567177e-06, "loss": 0.816, "step": 19940 }, { "epoch": 0.6210493538844777, "grad_norm": 2.3810713291168213, "learning_rate": 1.994657314235144e-06, "loss": 0.7477, "step": 19945 }, { "epoch": 0.6212050443717889, "grad_norm": 1.8564612865447998, "learning_rate": 1.993837883903111e-06, "loss": 0.8513, "step": 19950 }, { "epoch": 0.6213607348591002, "grad_norm": 2.2665412425994873, "learning_rate": 1.9930184535710774e-06, "loss": 0.8159, "step": 19955 }, { "epoch": 0.6215164253464114, "grad_norm": 2.6600630283355713, "learning_rate": 1.9921990232390447e-06, "loss": 0.7667, "step": 19960 }, { "epoch": 0.6216721158337225, "grad_norm": 2.580083131790161, "learning_rate": 1.991379592907011e-06, "loss": 0.7689, "step": 19965 }, { "epoch": 0.6218278063210337, "grad_norm": 2.3615167140960693, "learning_rate": 1.990560162574978e-06, "loss": 0.8159, "step": 19970 }, { "epoch": 0.621983496808345, "grad_norm": 2.2667551040649414, "learning_rate": 1.989740732242945e-06, "loss": 0.8063, "step": 19975 }, { "epoch": 0.6221391872956562, "grad_norm": 2.485144853591919, "learning_rate": 1.9889213019109117e-06, "loss": 0.7629, "step": 19980 }, { "epoch": 0.6222948777829674, "grad_norm": 1.6582003831863403, "learning_rate": 1.9881018715788786e-06, "loss": 0.7856, "step": 19985 }, { "epoch": 0.6224505682702787, "grad_norm": 2.484271287918091, "learning_rate": 1.9872824412468455e-06, "loss": 0.7264, "step": 19990 }, { "epoch": 0.6226062587575899, "grad_norm": 2.046489715576172, "learning_rate": 1.9864630109148123e-06, "loss": 0.7877, "step": 19995 }, { "epoch": 0.6227619492449011, "grad_norm": 2.660674810409546, "learning_rate": 1.9856435805827788e-06, "loss": 0.7922, "step": 20000 }, { "epoch": 0.6229176397322124, "grad_norm": 2.11425518989563, "learning_rate": 1.984824150250746e-06, "loss": 0.8419, "step": 20005 }, { "epoch": 0.6230733302195236, "grad_norm": 1.8104478120803833, "learning_rate": 1.9840047199187125e-06, "loss": 0.7564, "step": 20010 }, { "epoch": 0.6232290207068348, "grad_norm": 2.1355319023132324, "learning_rate": 1.98318528958668e-06, "loss": 0.7605, "step": 20015 }, { "epoch": 0.6233847111941461, "grad_norm": 2.203839063644409, "learning_rate": 1.9823658592546463e-06, "loss": 0.8235, "step": 20020 }, { "epoch": 0.6235404016814573, "grad_norm": 2.6015877723693848, "learning_rate": 1.981546428922613e-06, "loss": 0.8347, "step": 20025 }, { "epoch": 0.6236960921687685, "grad_norm": 1.641438364982605, "learning_rate": 1.98072699859058e-06, "loss": 0.805, "step": 20030 }, { "epoch": 0.6238517826560798, "grad_norm": 2.5182344913482666, "learning_rate": 1.979907568258547e-06, "loss": 0.837, "step": 20035 }, { "epoch": 0.6240074731433909, "grad_norm": 2.047971487045288, "learning_rate": 1.9790881379265137e-06, "loss": 0.8082, "step": 20040 }, { "epoch": 0.6241631636307021, "grad_norm": 2.042898654937744, "learning_rate": 1.97826870759448e-06, "loss": 0.7528, "step": 20045 }, { "epoch": 0.6243188541180134, "grad_norm": 1.9642947912216187, "learning_rate": 1.9774492772624475e-06, "loss": 0.7609, "step": 20050 }, { "epoch": 0.6244745446053246, "grad_norm": 1.9314836263656616, "learning_rate": 1.976629846930414e-06, "loss": 0.8024, "step": 20055 }, { "epoch": 0.6246302350926358, "grad_norm": 2.105264902114868, "learning_rate": 1.9758104165983812e-06, "loss": 0.7532, "step": 20060 }, { "epoch": 0.624785925579947, "grad_norm": 1.9050222635269165, "learning_rate": 1.9749909862663477e-06, "loss": 0.7171, "step": 20065 }, { "epoch": 0.6249416160672583, "grad_norm": 2.300697088241577, "learning_rate": 1.9741715559343145e-06, "loss": 0.7449, "step": 20070 }, { "epoch": 0.6250973065545695, "grad_norm": 2.3424792289733887, "learning_rate": 1.9733521256022814e-06, "loss": 0.791, "step": 20075 }, { "epoch": 0.6252529970418808, "grad_norm": 2.5754523277282715, "learning_rate": 1.9725326952702483e-06, "loss": 0.8332, "step": 20080 }, { "epoch": 0.625408687529192, "grad_norm": 2.413306951522827, "learning_rate": 1.971713264938215e-06, "loss": 0.8402, "step": 20085 }, { "epoch": 0.6255643780165032, "grad_norm": 2.7308311462402344, "learning_rate": 1.970893834606182e-06, "loss": 0.7669, "step": 20090 }, { "epoch": 0.6257200685038145, "grad_norm": 2.163935661315918, "learning_rate": 1.970074404274149e-06, "loss": 0.8248, "step": 20095 }, { "epoch": 0.6258757589911257, "grad_norm": 2.4112460613250732, "learning_rate": 1.9692549739421158e-06, "loss": 0.749, "step": 20100 }, { "epoch": 0.6260314494784369, "grad_norm": 2.0716359615325928, "learning_rate": 1.9684355436100826e-06, "loss": 0.8444, "step": 20105 }, { "epoch": 0.626187139965748, "grad_norm": 1.894282341003418, "learning_rate": 1.967616113278049e-06, "loss": 0.699, "step": 20110 }, { "epoch": 0.6263428304530593, "grad_norm": 1.9615648984909058, "learning_rate": 1.966796682946016e-06, "loss": 0.7907, "step": 20115 }, { "epoch": 0.6264985209403705, "grad_norm": 2.204399585723877, "learning_rate": 1.965977252613983e-06, "loss": 0.757, "step": 20120 }, { "epoch": 0.6266542114276817, "grad_norm": 1.9542289972305298, "learning_rate": 1.9651578222819497e-06, "loss": 0.8089, "step": 20125 }, { "epoch": 0.626809901914993, "grad_norm": 2.0357542037963867, "learning_rate": 1.9643383919499165e-06, "loss": 0.8056, "step": 20130 }, { "epoch": 0.6269655924023042, "grad_norm": 2.166867733001709, "learning_rate": 1.9635189616178834e-06, "loss": 0.7196, "step": 20135 }, { "epoch": 0.6271212828896154, "grad_norm": 2.255671262741089, "learning_rate": 1.9626995312858503e-06, "loss": 0.804, "step": 20140 }, { "epoch": 0.6272769733769267, "grad_norm": 2.139672040939331, "learning_rate": 1.961880100953817e-06, "loss": 0.7396, "step": 20145 }, { "epoch": 0.6274326638642379, "grad_norm": 2.0599679946899414, "learning_rate": 1.961060670621784e-06, "loss": 0.7892, "step": 20150 }, { "epoch": 0.6275883543515491, "grad_norm": 1.8759485483169556, "learning_rate": 1.960241240289751e-06, "loss": 0.8154, "step": 20155 }, { "epoch": 0.6277440448388604, "grad_norm": 2.033463478088379, "learning_rate": 1.9594218099577173e-06, "loss": 0.7476, "step": 20160 }, { "epoch": 0.6278997353261716, "grad_norm": 2.462294816970825, "learning_rate": 1.9586023796256846e-06, "loss": 0.8555, "step": 20165 }, { "epoch": 0.6280554258134828, "grad_norm": 2.178741455078125, "learning_rate": 1.957782949293651e-06, "loss": 0.7189, "step": 20170 }, { "epoch": 0.6282111163007941, "grad_norm": 1.9883036613464355, "learning_rate": 1.9569635189616184e-06, "loss": 0.7807, "step": 20175 }, { "epoch": 0.6283668067881052, "grad_norm": 2.3831963539123535, "learning_rate": 1.956144088629585e-06, "loss": 0.7901, "step": 20180 }, { "epoch": 0.6285224972754164, "grad_norm": 2.3655972480773926, "learning_rate": 1.9553246582975517e-06, "loss": 0.7931, "step": 20185 }, { "epoch": 0.6286781877627277, "grad_norm": 2.3181416988372803, "learning_rate": 1.9545052279655186e-06, "loss": 0.8172, "step": 20190 }, { "epoch": 0.6288338782500389, "grad_norm": 2.056320905685425, "learning_rate": 1.9536857976334854e-06, "loss": 0.7509, "step": 20195 }, { "epoch": 0.6289895687373501, "grad_norm": 2.1929330825805664, "learning_rate": 1.9528663673014523e-06, "loss": 0.7667, "step": 20200 }, { "epoch": 0.6291452592246614, "grad_norm": 2.1474454402923584, "learning_rate": 1.9520469369694187e-06, "loss": 0.8221, "step": 20205 }, { "epoch": 0.6293009497119726, "grad_norm": 2.143770933151245, "learning_rate": 1.951227506637386e-06, "loss": 0.7423, "step": 20210 }, { "epoch": 0.6294566401992838, "grad_norm": 2.777338743209839, "learning_rate": 1.9504080763053525e-06, "loss": 0.7896, "step": 20215 }, { "epoch": 0.6296123306865951, "grad_norm": 2.0670721530914307, "learning_rate": 1.9495886459733198e-06, "loss": 0.8196, "step": 20220 }, { "epoch": 0.6297680211739063, "grad_norm": 2.5698599815368652, "learning_rate": 1.9487692156412862e-06, "loss": 0.7604, "step": 20225 }, { "epoch": 0.6299237116612175, "grad_norm": 2.397658586502075, "learning_rate": 1.947949785309253e-06, "loss": 0.7617, "step": 20230 }, { "epoch": 0.6300794021485288, "grad_norm": 2.0696864128112793, "learning_rate": 1.94713035497722e-06, "loss": 0.8066, "step": 20235 }, { "epoch": 0.63023509263584, "grad_norm": 2.35638427734375, "learning_rate": 1.946310924645187e-06, "loss": 0.8003, "step": 20240 }, { "epoch": 0.6303907831231512, "grad_norm": 1.9154222011566162, "learning_rate": 1.9454914943131537e-06, "loss": 0.8437, "step": 20245 }, { "epoch": 0.6305464736104625, "grad_norm": 2.0701658725738525, "learning_rate": 1.94467206398112e-06, "loss": 0.8319, "step": 20250 }, { "epoch": 0.6307021640977736, "grad_norm": 2.449718475341797, "learning_rate": 1.9438526336490874e-06, "loss": 0.7332, "step": 20255 }, { "epoch": 0.6308578545850848, "grad_norm": 2.393235445022583, "learning_rate": 1.943033203317054e-06, "loss": 0.7435, "step": 20260 }, { "epoch": 0.631013545072396, "grad_norm": 2.1347718238830566, "learning_rate": 1.942213772985021e-06, "loss": 0.8069, "step": 20265 }, { "epoch": 0.6311692355597073, "grad_norm": 1.9344520568847656, "learning_rate": 1.9413943426529876e-06, "loss": 0.799, "step": 20270 }, { "epoch": 0.6313249260470185, "grad_norm": 2.1003503799438477, "learning_rate": 1.9405749123209545e-06, "loss": 0.8524, "step": 20275 }, { "epoch": 0.6314806165343297, "grad_norm": 2.261193037033081, "learning_rate": 1.9397554819889214e-06, "loss": 0.7906, "step": 20280 }, { "epoch": 0.631636307021641, "grad_norm": 2.026007890701294, "learning_rate": 1.9389360516568882e-06, "loss": 0.7834, "step": 20285 }, { "epoch": 0.6317919975089522, "grad_norm": 2.1573002338409424, "learning_rate": 1.938116621324855e-06, "loss": 0.7721, "step": 20290 }, { "epoch": 0.6319476879962634, "grad_norm": 1.869695782661438, "learning_rate": 1.937297190992822e-06, "loss": 0.7401, "step": 20295 }, { "epoch": 0.6321033784835747, "grad_norm": 2.3848934173583984, "learning_rate": 1.936477760660789e-06, "loss": 0.7277, "step": 20300 }, { "epoch": 0.6322590689708859, "grad_norm": 2.6445796489715576, "learning_rate": 1.9356583303287557e-06, "loss": 0.8303, "step": 20305 }, { "epoch": 0.6324147594581971, "grad_norm": 1.9629884958267212, "learning_rate": 1.9348388999967226e-06, "loss": 0.7613, "step": 20310 }, { "epoch": 0.6325704499455084, "grad_norm": 1.8578521013259888, "learning_rate": 1.9340194696646894e-06, "loss": 0.8379, "step": 20315 }, { "epoch": 0.6327261404328196, "grad_norm": 1.8453683853149414, "learning_rate": 1.933200039332656e-06, "loss": 0.8108, "step": 20320 }, { "epoch": 0.6328818309201307, "grad_norm": 2.04013729095459, "learning_rate": 1.9323806090006228e-06, "loss": 0.7822, "step": 20325 }, { "epoch": 0.633037521407442, "grad_norm": 2.504053831100464, "learning_rate": 1.9315611786685896e-06, "loss": 0.7651, "step": 20330 }, { "epoch": 0.6331932118947532, "grad_norm": 1.8101863861083984, "learning_rate": 1.9307417483365565e-06, "loss": 0.6848, "step": 20335 }, { "epoch": 0.6333489023820644, "grad_norm": 2.6950738430023193, "learning_rate": 1.9299223180045234e-06, "loss": 0.7895, "step": 20340 }, { "epoch": 0.6335045928693757, "grad_norm": 2.5444347858428955, "learning_rate": 1.9291028876724902e-06, "loss": 0.7691, "step": 20345 }, { "epoch": 0.6336602833566869, "grad_norm": 2.6104531288146973, "learning_rate": 1.928283457340457e-06, "loss": 0.809, "step": 20350 }, { "epoch": 0.6338159738439981, "grad_norm": 2.243536949157715, "learning_rate": 1.927464027008424e-06, "loss": 0.7777, "step": 20355 }, { "epoch": 0.6339716643313094, "grad_norm": 2.1787171363830566, "learning_rate": 1.926644596676391e-06, "loss": 0.8757, "step": 20360 }, { "epoch": 0.6341273548186206, "grad_norm": 2.3395156860351562, "learning_rate": 1.9258251663443577e-06, "loss": 0.8203, "step": 20365 }, { "epoch": 0.6342830453059318, "grad_norm": 1.9218134880065918, "learning_rate": 1.9250057360123246e-06, "loss": 0.7957, "step": 20370 }, { "epoch": 0.6344387357932431, "grad_norm": 2.2298057079315186, "learning_rate": 1.924186305680291e-06, "loss": 0.8217, "step": 20375 }, { "epoch": 0.6345944262805543, "grad_norm": 2.128697395324707, "learning_rate": 1.9233668753482583e-06, "loss": 0.7602, "step": 20380 }, { "epoch": 0.6347501167678655, "grad_norm": 1.871226191520691, "learning_rate": 1.9225474450162248e-06, "loss": 0.7515, "step": 20385 }, { "epoch": 0.6349058072551768, "grad_norm": 2.246948719024658, "learning_rate": 1.9217280146841916e-06, "loss": 0.7641, "step": 20390 }, { "epoch": 0.6350614977424879, "grad_norm": 1.7512924671173096, "learning_rate": 1.9209085843521585e-06, "loss": 0.7891, "step": 20395 }, { "epoch": 0.6352171882297991, "grad_norm": 2.082447052001953, "learning_rate": 1.9200891540201254e-06, "loss": 0.7708, "step": 20400 }, { "epoch": 0.6353728787171103, "grad_norm": 1.9894074201583862, "learning_rate": 1.9192697236880923e-06, "loss": 0.7734, "step": 20405 }, { "epoch": 0.6355285692044216, "grad_norm": 2.0193042755126953, "learning_rate": 1.918450293356059e-06, "loss": 0.7363, "step": 20410 }, { "epoch": 0.6356842596917328, "grad_norm": 2.1250181198120117, "learning_rate": 1.917630863024026e-06, "loss": 0.7507, "step": 20415 }, { "epoch": 0.635839950179044, "grad_norm": 3.1550631523132324, "learning_rate": 1.9168114326919924e-06, "loss": 0.8049, "step": 20420 }, { "epoch": 0.6359956406663553, "grad_norm": 2.8026812076568604, "learning_rate": 1.9159920023599597e-06, "loss": 0.8556, "step": 20425 }, { "epoch": 0.6361513311536665, "grad_norm": 2.3353898525238037, "learning_rate": 1.915172572027926e-06, "loss": 0.71, "step": 20430 }, { "epoch": 0.6363070216409777, "grad_norm": 2.1017091274261475, "learning_rate": 1.9143531416958935e-06, "loss": 0.8083, "step": 20435 }, { "epoch": 0.636462712128289, "grad_norm": 2.2943708896636963, "learning_rate": 1.91353371136386e-06, "loss": 0.7182, "step": 20440 }, { "epoch": 0.6366184026156002, "grad_norm": 1.925365924835205, "learning_rate": 1.9127142810318268e-06, "loss": 0.7714, "step": 20445 }, { "epoch": 0.6367740931029114, "grad_norm": 2.153467893600464, "learning_rate": 1.9118948506997937e-06, "loss": 0.7929, "step": 20450 }, { "epoch": 0.6369297835902227, "grad_norm": 2.0811996459960938, "learning_rate": 1.9110754203677605e-06, "loss": 0.7592, "step": 20455 }, { "epoch": 0.6370854740775339, "grad_norm": 1.8688781261444092, "learning_rate": 1.9102559900357274e-06, "loss": 0.7926, "step": 20460 }, { "epoch": 0.6372411645648451, "grad_norm": 1.9808093309402466, "learning_rate": 1.909436559703694e-06, "loss": 0.7059, "step": 20465 }, { "epoch": 0.6373968550521563, "grad_norm": 2.114333391189575, "learning_rate": 1.908617129371661e-06, "loss": 0.7975, "step": 20470 }, { "epoch": 0.6375525455394675, "grad_norm": 2.2433016300201416, "learning_rate": 1.9077976990396276e-06, "loss": 0.8131, "step": 20475 }, { "epoch": 0.6377082360267787, "grad_norm": 2.6233675479888916, "learning_rate": 1.9069782687075947e-06, "loss": 0.7688, "step": 20480 }, { "epoch": 0.63786392651409, "grad_norm": 2.2052276134490967, "learning_rate": 1.9061588383755613e-06, "loss": 0.8252, "step": 20485 }, { "epoch": 0.6380196170014012, "grad_norm": 1.9947870969772339, "learning_rate": 1.9053394080435284e-06, "loss": 0.7966, "step": 20490 }, { "epoch": 0.6381753074887124, "grad_norm": 2.3993613719940186, "learning_rate": 1.904519977711495e-06, "loss": 0.8117, "step": 20495 }, { "epoch": 0.6383309979760237, "grad_norm": 2.7150392532348633, "learning_rate": 1.903700547379462e-06, "loss": 0.7554, "step": 20500 }, { "epoch": 0.6384866884633349, "grad_norm": 2.033679962158203, "learning_rate": 1.9028811170474288e-06, "loss": 0.7343, "step": 20505 }, { "epoch": 0.6386423789506461, "grad_norm": 2.1363706588745117, "learning_rate": 1.9020616867153957e-06, "loss": 0.7733, "step": 20510 }, { "epoch": 0.6387980694379574, "grad_norm": 1.8100236654281616, "learning_rate": 1.9012422563833623e-06, "loss": 0.8712, "step": 20515 }, { "epoch": 0.6389537599252686, "grad_norm": 2.163236379623413, "learning_rate": 1.9004228260513294e-06, "loss": 0.7329, "step": 20520 }, { "epoch": 0.6391094504125798, "grad_norm": 2.227510452270508, "learning_rate": 1.899603395719296e-06, "loss": 0.8326, "step": 20525 }, { "epoch": 0.6392651408998911, "grad_norm": 1.8495802879333496, "learning_rate": 1.8987839653872631e-06, "loss": 0.7081, "step": 20530 }, { "epoch": 0.6394208313872023, "grad_norm": 2.6094143390655518, "learning_rate": 1.8979645350552298e-06, "loss": 0.7844, "step": 20535 }, { "epoch": 0.6395765218745134, "grad_norm": 2.1167008876800537, "learning_rate": 1.8971451047231965e-06, "loss": 0.7314, "step": 20540 }, { "epoch": 0.6397322123618246, "grad_norm": 2.0201690196990967, "learning_rate": 1.8963256743911635e-06, "loss": 0.7478, "step": 20545 }, { "epoch": 0.6398879028491359, "grad_norm": 2.0158884525299072, "learning_rate": 1.8955062440591302e-06, "loss": 0.7142, "step": 20550 }, { "epoch": 0.6400435933364471, "grad_norm": 2.0145015716552734, "learning_rate": 1.894686813727097e-06, "loss": 0.7863, "step": 20555 }, { "epoch": 0.6401992838237583, "grad_norm": 2.314267873764038, "learning_rate": 1.8938673833950637e-06, "loss": 0.7532, "step": 20560 }, { "epoch": 0.6403549743110696, "grad_norm": 2.0483717918395996, "learning_rate": 1.8930479530630308e-06, "loss": 0.7319, "step": 20565 }, { "epoch": 0.6405106647983808, "grad_norm": 2.59797739982605, "learning_rate": 1.8922285227309975e-06, "loss": 0.7057, "step": 20570 }, { "epoch": 0.640666355285692, "grad_norm": 2.656752109527588, "learning_rate": 1.8914090923989645e-06, "loss": 0.8484, "step": 20575 }, { "epoch": 0.6408220457730033, "grad_norm": 1.8870888948440552, "learning_rate": 1.8905896620669312e-06, "loss": 0.8236, "step": 20580 }, { "epoch": 0.6409777362603145, "grad_norm": 1.8753002882003784, "learning_rate": 1.889770231734898e-06, "loss": 0.8328, "step": 20585 }, { "epoch": 0.6411334267476257, "grad_norm": 1.9832983016967773, "learning_rate": 1.888950801402865e-06, "loss": 0.8088, "step": 20590 }, { "epoch": 0.641289117234937, "grad_norm": 2.274418592453003, "learning_rate": 1.8881313710708318e-06, "loss": 0.745, "step": 20595 }, { "epoch": 0.6414448077222482, "grad_norm": 2.173912525177002, "learning_rate": 1.8873119407387985e-06, "loss": 0.8126, "step": 20600 }, { "epoch": 0.6416004982095594, "grad_norm": 2.011747360229492, "learning_rate": 1.8864925104067655e-06, "loss": 0.7407, "step": 20605 }, { "epoch": 0.6417561886968706, "grad_norm": 2.206580400466919, "learning_rate": 1.8856730800747322e-06, "loss": 0.8125, "step": 20610 }, { "epoch": 0.6419118791841818, "grad_norm": 2.016062021255493, "learning_rate": 1.8848536497426989e-06, "loss": 0.6653, "step": 20615 }, { "epoch": 0.642067569671493, "grad_norm": 2.062458038330078, "learning_rate": 1.884034219410666e-06, "loss": 0.7948, "step": 20620 }, { "epoch": 0.6422232601588043, "grad_norm": 2.151603937149048, "learning_rate": 1.8832147890786326e-06, "loss": 0.7912, "step": 20625 }, { "epoch": 0.6423789506461155, "grad_norm": 1.8556064367294312, "learning_rate": 1.8823953587465995e-06, "loss": 0.7287, "step": 20630 }, { "epoch": 0.6425346411334267, "grad_norm": 2.550429582595825, "learning_rate": 1.8815759284145663e-06, "loss": 0.7534, "step": 20635 }, { "epoch": 0.642690331620738, "grad_norm": 2.01680064201355, "learning_rate": 1.8807564980825332e-06, "loss": 0.7754, "step": 20640 }, { "epoch": 0.6428460221080492, "grad_norm": 2.256234884262085, "learning_rate": 1.8799370677504999e-06, "loss": 0.7123, "step": 20645 }, { "epoch": 0.6430017125953604, "grad_norm": 2.364692449569702, "learning_rate": 1.879117637418467e-06, "loss": 0.7821, "step": 20650 }, { "epoch": 0.6431574030826717, "grad_norm": 2.2692158222198486, "learning_rate": 1.8782982070864336e-06, "loss": 0.7941, "step": 20655 }, { "epoch": 0.6433130935699829, "grad_norm": 1.818105697631836, "learning_rate": 1.8774787767544007e-06, "loss": 0.7392, "step": 20660 }, { "epoch": 0.6434687840572941, "grad_norm": 1.9356249570846558, "learning_rate": 1.8766593464223673e-06, "loss": 0.7781, "step": 20665 }, { "epoch": 0.6436244745446054, "grad_norm": 2.0769598484039307, "learning_rate": 1.8758399160903342e-06, "loss": 0.7853, "step": 20670 }, { "epoch": 0.6437801650319166, "grad_norm": 2.1779186725616455, "learning_rate": 1.8750204857583009e-06, "loss": 0.8307, "step": 20675 }, { "epoch": 0.6439358555192278, "grad_norm": 2.461486339569092, "learning_rate": 1.8742010554262677e-06, "loss": 0.696, "step": 20680 }, { "epoch": 0.644091546006539, "grad_norm": 2.135791301727295, "learning_rate": 1.8733816250942346e-06, "loss": 0.7713, "step": 20685 }, { "epoch": 0.6442472364938502, "grad_norm": 1.8204352855682373, "learning_rate": 1.8725621947622013e-06, "loss": 0.7623, "step": 20690 }, { "epoch": 0.6444029269811614, "grad_norm": 1.9952384233474731, "learning_rate": 1.8717427644301684e-06, "loss": 0.8012, "step": 20695 }, { "epoch": 0.6445586174684726, "grad_norm": 2.098153591156006, "learning_rate": 1.870923334098135e-06, "loss": 0.7576, "step": 20700 }, { "epoch": 0.6447143079557839, "grad_norm": 1.881112813949585, "learning_rate": 1.870103903766102e-06, "loss": 0.7617, "step": 20705 }, { "epoch": 0.6448699984430951, "grad_norm": 2.45613956451416, "learning_rate": 1.8692844734340688e-06, "loss": 0.8776, "step": 20710 }, { "epoch": 0.6450256889304063, "grad_norm": 2.347116231918335, "learning_rate": 1.8684650431020356e-06, "loss": 0.825, "step": 20715 }, { "epoch": 0.6451813794177176, "grad_norm": 1.9725703001022339, "learning_rate": 1.8676456127700025e-06, "loss": 0.7326, "step": 20720 }, { "epoch": 0.6453370699050288, "grad_norm": 2.35730242729187, "learning_rate": 1.8668261824379694e-06, "loss": 0.7925, "step": 20725 }, { "epoch": 0.64549276039234, "grad_norm": 1.9624384641647339, "learning_rate": 1.866006752105936e-06, "loss": 0.7585, "step": 20730 }, { "epoch": 0.6456484508796513, "grad_norm": 2.271331548690796, "learning_rate": 1.865187321773903e-06, "loss": 0.8134, "step": 20735 }, { "epoch": 0.6458041413669625, "grad_norm": 2.1292166709899902, "learning_rate": 1.8643678914418698e-06, "loss": 0.7819, "step": 20740 }, { "epoch": 0.6459598318542737, "grad_norm": 2.2292211055755615, "learning_rate": 1.8635484611098366e-06, "loss": 0.7549, "step": 20745 }, { "epoch": 0.646115522341585, "grad_norm": 2.1295580863952637, "learning_rate": 1.8627290307778035e-06, "loss": 0.8089, "step": 20750 }, { "epoch": 0.6462712128288961, "grad_norm": 3.336226463317871, "learning_rate": 1.8619096004457702e-06, "loss": 0.8043, "step": 20755 }, { "epoch": 0.6464269033162073, "grad_norm": 1.994968295097351, "learning_rate": 1.861090170113737e-06, "loss": 0.7935, "step": 20760 }, { "epoch": 0.6465825938035186, "grad_norm": 2.186086654663086, "learning_rate": 1.8602707397817039e-06, "loss": 0.7845, "step": 20765 }, { "epoch": 0.6467382842908298, "grad_norm": 2.350602865219116, "learning_rate": 1.8594513094496708e-06, "loss": 0.7858, "step": 20770 }, { "epoch": 0.646893974778141, "grad_norm": 1.8066281080245972, "learning_rate": 1.8586318791176374e-06, "loss": 0.7465, "step": 20775 }, { "epoch": 0.6470496652654523, "grad_norm": 1.9700322151184082, "learning_rate": 1.8578124487856045e-06, "loss": 0.821, "step": 20780 }, { "epoch": 0.6472053557527635, "grad_norm": 1.9949204921722412, "learning_rate": 1.8569930184535712e-06, "loss": 0.7006, "step": 20785 }, { "epoch": 0.6473610462400747, "grad_norm": 2.058410167694092, "learning_rate": 1.8561735881215382e-06, "loss": 0.7654, "step": 20790 }, { "epoch": 0.647516736727386, "grad_norm": 2.279958963394165, "learning_rate": 1.855354157789505e-06, "loss": 0.7615, "step": 20795 }, { "epoch": 0.6476724272146972, "grad_norm": 2.6878769397735596, "learning_rate": 1.8545347274574718e-06, "loss": 0.8315, "step": 20800 }, { "epoch": 0.6478281177020084, "grad_norm": 2.4711825847625732, "learning_rate": 1.8537152971254384e-06, "loss": 0.7932, "step": 20805 }, { "epoch": 0.6479838081893197, "grad_norm": 2.110569715499878, "learning_rate": 1.8528958667934055e-06, "loss": 0.7724, "step": 20810 }, { "epoch": 0.6481394986766309, "grad_norm": 1.92970609664917, "learning_rate": 1.8520764364613722e-06, "loss": 0.7651, "step": 20815 }, { "epoch": 0.6482951891639421, "grad_norm": 2.043712854385376, "learning_rate": 1.8512570061293392e-06, "loss": 0.7981, "step": 20820 }, { "epoch": 0.6484508796512533, "grad_norm": 2.2034921646118164, "learning_rate": 1.850437575797306e-06, "loss": 0.7966, "step": 20825 }, { "epoch": 0.6486065701385645, "grad_norm": 2.1345090866088867, "learning_rate": 1.8496181454652726e-06, "loss": 0.7226, "step": 20830 }, { "epoch": 0.6487622606258757, "grad_norm": 1.8348995447158813, "learning_rate": 1.8487987151332396e-06, "loss": 0.764, "step": 20835 }, { "epoch": 0.648917951113187, "grad_norm": 1.9232274293899536, "learning_rate": 1.8479792848012063e-06, "loss": 0.8455, "step": 20840 }, { "epoch": 0.6490736416004982, "grad_norm": 2.029500961303711, "learning_rate": 1.8471598544691732e-06, "loss": 0.791, "step": 20845 }, { "epoch": 0.6492293320878094, "grad_norm": 2.1273679733276367, "learning_rate": 1.8463404241371398e-06, "loss": 0.8847, "step": 20850 }, { "epoch": 0.6493850225751207, "grad_norm": 1.822477102279663, "learning_rate": 1.845520993805107e-06, "loss": 0.7447, "step": 20855 }, { "epoch": 0.6495407130624319, "grad_norm": 2.370924234390259, "learning_rate": 1.8447015634730736e-06, "loss": 0.7895, "step": 20860 }, { "epoch": 0.6496964035497431, "grad_norm": 1.9028542041778564, "learning_rate": 1.8438821331410406e-06, "loss": 0.8109, "step": 20865 }, { "epoch": 0.6498520940370544, "grad_norm": 1.7252790927886963, "learning_rate": 1.8430627028090073e-06, "loss": 0.7853, "step": 20870 }, { "epoch": 0.6500077845243656, "grad_norm": 2.04158616065979, "learning_rate": 1.8422432724769742e-06, "loss": 0.7566, "step": 20875 }, { "epoch": 0.6501634750116768, "grad_norm": 2.386958360671997, "learning_rate": 1.841423842144941e-06, "loss": 0.689, "step": 20880 }, { "epoch": 0.650319165498988, "grad_norm": 2.249821186065674, "learning_rate": 1.840604411812908e-06, "loss": 0.7461, "step": 20885 }, { "epoch": 0.6504748559862993, "grad_norm": 2.2294647693634033, "learning_rate": 1.8397849814808746e-06, "loss": 0.8044, "step": 20890 }, { "epoch": 0.6506305464736105, "grad_norm": 2.2385075092315674, "learning_rate": 1.8389655511488414e-06, "loss": 0.723, "step": 20895 }, { "epoch": 0.6507862369609216, "grad_norm": 1.878367304801941, "learning_rate": 1.8381461208168083e-06, "loss": 0.6981, "step": 20900 }, { "epoch": 0.6509419274482329, "grad_norm": 1.9642198085784912, "learning_rate": 1.837326690484775e-06, "loss": 0.8392, "step": 20905 }, { "epoch": 0.6510976179355441, "grad_norm": 2.1621110439300537, "learning_rate": 1.836507260152742e-06, "loss": 0.7331, "step": 20910 }, { "epoch": 0.6512533084228553, "grad_norm": 2.285874366760254, "learning_rate": 1.8356878298207087e-06, "loss": 0.8216, "step": 20915 }, { "epoch": 0.6514089989101666, "grad_norm": 2.2952041625976562, "learning_rate": 1.8348683994886756e-06, "loss": 0.7354, "step": 20920 }, { "epoch": 0.6515646893974778, "grad_norm": 2.602569103240967, "learning_rate": 1.8340489691566424e-06, "loss": 0.8464, "step": 20925 }, { "epoch": 0.651720379884789, "grad_norm": 1.938300371170044, "learning_rate": 1.8332295388246093e-06, "loss": 0.7221, "step": 20930 }, { "epoch": 0.6518760703721003, "grad_norm": 1.9490407705307007, "learning_rate": 1.832410108492576e-06, "loss": 0.8458, "step": 20935 }, { "epoch": 0.6520317608594115, "grad_norm": 2.2114715576171875, "learning_rate": 1.831590678160543e-06, "loss": 0.8035, "step": 20940 }, { "epoch": 0.6521874513467227, "grad_norm": 2.1918392181396484, "learning_rate": 1.8307712478285097e-06, "loss": 0.8069, "step": 20945 }, { "epoch": 0.652343141834034, "grad_norm": 1.949981451034546, "learning_rate": 1.8299518174964768e-06, "loss": 0.7877, "step": 20950 }, { "epoch": 0.6524988323213452, "grad_norm": 2.0065276622772217, "learning_rate": 1.8291323871644435e-06, "loss": 0.7578, "step": 20955 }, { "epoch": 0.6526545228086564, "grad_norm": 2.489047050476074, "learning_rate": 1.8283129568324103e-06, "loss": 0.7368, "step": 20960 }, { "epoch": 0.6528102132959677, "grad_norm": 2.091144561767578, "learning_rate": 1.8274935265003772e-06, "loss": 0.7145, "step": 20965 }, { "epoch": 0.6529659037832788, "grad_norm": 2.052809000015259, "learning_rate": 1.8266740961683438e-06, "loss": 0.7975, "step": 20970 }, { "epoch": 0.65312159427059, "grad_norm": 2.604004383087158, "learning_rate": 1.8258546658363107e-06, "loss": 0.8431, "step": 20975 }, { "epoch": 0.6532772847579013, "grad_norm": 2.0382113456726074, "learning_rate": 1.8250352355042774e-06, "loss": 0.806, "step": 20980 }, { "epoch": 0.6534329752452125, "grad_norm": 3.048069477081299, "learning_rate": 1.8242158051722445e-06, "loss": 0.8343, "step": 20985 }, { "epoch": 0.6535886657325237, "grad_norm": 2.3349010944366455, "learning_rate": 1.8233963748402111e-06, "loss": 0.8282, "step": 20990 }, { "epoch": 0.653744356219835, "grad_norm": 2.025155782699585, "learning_rate": 1.8225769445081782e-06, "loss": 0.783, "step": 20995 }, { "epoch": 0.6539000467071462, "grad_norm": 2.282545328140259, "learning_rate": 1.8217575141761449e-06, "loss": 0.7512, "step": 21000 }, { "epoch": 0.6540557371944574, "grad_norm": 2.2009999752044678, "learning_rate": 1.8209380838441117e-06, "loss": 0.7918, "step": 21005 }, { "epoch": 0.6542114276817687, "grad_norm": 2.1375226974487305, "learning_rate": 1.8201186535120786e-06, "loss": 0.7997, "step": 21010 }, { "epoch": 0.6543671181690799, "grad_norm": 2.061211347579956, "learning_rate": 1.8192992231800455e-06, "loss": 0.8084, "step": 21015 }, { "epoch": 0.6545228086563911, "grad_norm": 2.071061611175537, "learning_rate": 1.8184797928480121e-06, "loss": 0.782, "step": 21020 }, { "epoch": 0.6546784991437024, "grad_norm": 2.426750421524048, "learning_rate": 1.8176603625159792e-06, "loss": 0.7968, "step": 21025 }, { "epoch": 0.6548341896310136, "grad_norm": 3.385232925415039, "learning_rate": 1.8168409321839459e-06, "loss": 0.7342, "step": 21030 }, { "epoch": 0.6549898801183248, "grad_norm": 2.0076792240142822, "learning_rate": 1.8160215018519125e-06, "loss": 0.7541, "step": 21035 }, { "epoch": 0.6551455706056359, "grad_norm": 2.02264404296875, "learning_rate": 1.8152020715198796e-06, "loss": 0.7343, "step": 21040 }, { "epoch": 0.6553012610929472, "grad_norm": 2.042160749435425, "learning_rate": 1.8143826411878463e-06, "loss": 0.829, "step": 21045 }, { "epoch": 0.6554569515802584, "grad_norm": 1.6891255378723145, "learning_rate": 1.8135632108558131e-06, "loss": 0.7353, "step": 21050 }, { "epoch": 0.6556126420675696, "grad_norm": 2.234734535217285, "learning_rate": 1.81274378052378e-06, "loss": 0.8754, "step": 21055 }, { "epoch": 0.6557683325548809, "grad_norm": 1.7004896402359009, "learning_rate": 1.8119243501917469e-06, "loss": 0.8375, "step": 21060 }, { "epoch": 0.6559240230421921, "grad_norm": 2.072457790374756, "learning_rate": 1.8111049198597135e-06, "loss": 0.7745, "step": 21065 }, { "epoch": 0.6560797135295033, "grad_norm": 1.8674736022949219, "learning_rate": 1.8102854895276806e-06, "loss": 0.7616, "step": 21070 }, { "epoch": 0.6562354040168146, "grad_norm": 2.1452865600585938, "learning_rate": 1.8094660591956473e-06, "loss": 0.7686, "step": 21075 }, { "epoch": 0.6563910945041258, "grad_norm": 2.2541232109069824, "learning_rate": 1.8086466288636143e-06, "loss": 0.7695, "step": 21080 }, { "epoch": 0.656546784991437, "grad_norm": 2.0637879371643066, "learning_rate": 1.807827198531581e-06, "loss": 0.7723, "step": 21085 }, { "epoch": 0.6567024754787483, "grad_norm": 2.2772433757781982, "learning_rate": 1.8070077681995479e-06, "loss": 0.804, "step": 21090 }, { "epoch": 0.6568581659660595, "grad_norm": 2.2880685329437256, "learning_rate": 1.8061883378675145e-06, "loss": 0.7521, "step": 21095 }, { "epoch": 0.6570138564533707, "grad_norm": 1.9726535081863403, "learning_rate": 1.8053689075354816e-06, "loss": 0.7694, "step": 21100 }, { "epoch": 0.657169546940682, "grad_norm": 2.500349998474121, "learning_rate": 1.8045494772034483e-06, "loss": 0.8127, "step": 21105 }, { "epoch": 0.6573252374279932, "grad_norm": 1.911129355430603, "learning_rate": 1.803730046871415e-06, "loss": 0.7325, "step": 21110 }, { "epoch": 0.6574809279153043, "grad_norm": 1.9749832153320312, "learning_rate": 1.802910616539382e-06, "loss": 0.7789, "step": 21115 }, { "epoch": 0.6576366184026156, "grad_norm": 2.6607234477996826, "learning_rate": 1.8020911862073487e-06, "loss": 0.7529, "step": 21120 }, { "epoch": 0.6577923088899268, "grad_norm": 3.0099802017211914, "learning_rate": 1.8012717558753157e-06, "loss": 0.7425, "step": 21125 }, { "epoch": 0.657947999377238, "grad_norm": 1.9561679363250732, "learning_rate": 1.8004523255432824e-06, "loss": 0.7915, "step": 21130 }, { "epoch": 0.6581036898645493, "grad_norm": 2.5881261825561523, "learning_rate": 1.7996328952112493e-06, "loss": 0.7962, "step": 21135 }, { "epoch": 0.6582593803518605, "grad_norm": 2.2659082412719727, "learning_rate": 1.7988134648792161e-06, "loss": 0.8181, "step": 21140 }, { "epoch": 0.6584150708391717, "grad_norm": 2.409221887588501, "learning_rate": 1.797994034547183e-06, "loss": 0.867, "step": 21145 }, { "epoch": 0.658570761326483, "grad_norm": 2.164188861846924, "learning_rate": 1.7971746042151497e-06, "loss": 0.8589, "step": 21150 }, { "epoch": 0.6587264518137942, "grad_norm": 2.4032177925109863, "learning_rate": 1.7963551738831167e-06, "loss": 0.8283, "step": 21155 }, { "epoch": 0.6588821423011054, "grad_norm": 2.0370852947235107, "learning_rate": 1.7955357435510834e-06, "loss": 0.8091, "step": 21160 }, { "epoch": 0.6590378327884167, "grad_norm": 2.080592632293701, "learning_rate": 1.7947163132190503e-06, "loss": 0.7183, "step": 21165 }, { "epoch": 0.6591935232757279, "grad_norm": 2.0713765621185303, "learning_rate": 1.7938968828870171e-06, "loss": 0.7798, "step": 21170 }, { "epoch": 0.6593492137630391, "grad_norm": 1.894504427909851, "learning_rate": 1.793077452554984e-06, "loss": 0.7098, "step": 21175 }, { "epoch": 0.6595049042503504, "grad_norm": 2.126812219619751, "learning_rate": 1.7922580222229507e-06, "loss": 0.6869, "step": 21180 }, { "epoch": 0.6596605947376615, "grad_norm": 1.8510977029800415, "learning_rate": 1.7914385918909175e-06, "loss": 0.7099, "step": 21185 }, { "epoch": 0.6598162852249727, "grad_norm": 1.8006062507629395, "learning_rate": 1.7906191615588844e-06, "loss": 0.762, "step": 21190 }, { "epoch": 0.6599719757122839, "grad_norm": 2.3651351928710938, "learning_rate": 1.789799731226851e-06, "loss": 0.7767, "step": 21195 }, { "epoch": 0.6601276661995952, "grad_norm": 2.1931819915771484, "learning_rate": 1.7889803008948182e-06, "loss": 0.7879, "step": 21200 }, { "epoch": 0.6602833566869064, "grad_norm": 2.126699924468994, "learning_rate": 1.7881608705627848e-06, "loss": 0.8322, "step": 21205 }, { "epoch": 0.6604390471742176, "grad_norm": 1.9057810306549072, "learning_rate": 1.7873414402307519e-06, "loss": 0.6861, "step": 21210 }, { "epoch": 0.6605947376615289, "grad_norm": 2.000302314758301, "learning_rate": 1.7865220098987185e-06, "loss": 0.7904, "step": 21215 }, { "epoch": 0.6607504281488401, "grad_norm": 2.5141215324401855, "learning_rate": 1.7857025795666854e-06, "loss": 0.7559, "step": 21220 }, { "epoch": 0.6609061186361513, "grad_norm": 1.9458051919937134, "learning_rate": 1.784883149234652e-06, "loss": 0.7292, "step": 21225 }, { "epoch": 0.6610618091234626, "grad_norm": 2.514429807662964, "learning_rate": 1.7840637189026192e-06, "loss": 0.7663, "step": 21230 }, { "epoch": 0.6612174996107738, "grad_norm": 2.0094282627105713, "learning_rate": 1.7832442885705858e-06, "loss": 0.7972, "step": 21235 }, { "epoch": 0.661373190098085, "grad_norm": 1.9813587665557861, "learning_rate": 1.782424858238553e-06, "loss": 0.7996, "step": 21240 }, { "epoch": 0.6615288805853963, "grad_norm": 2.2376816272735596, "learning_rate": 1.7816054279065196e-06, "loss": 0.7514, "step": 21245 }, { "epoch": 0.6616845710727075, "grad_norm": 2.322601795196533, "learning_rate": 1.7807859975744862e-06, "loss": 0.7224, "step": 21250 }, { "epoch": 0.6618402615600186, "grad_norm": 2.1858010292053223, "learning_rate": 1.7799665672424533e-06, "loss": 0.8105, "step": 21255 }, { "epoch": 0.6619959520473299, "grad_norm": 2.0824482440948486, "learning_rate": 1.77914713691042e-06, "loss": 0.7861, "step": 21260 }, { "epoch": 0.6621516425346411, "grad_norm": 2.0685417652130127, "learning_rate": 1.7783277065783868e-06, "loss": 0.733, "step": 21265 }, { "epoch": 0.6623073330219523, "grad_norm": 2.4341824054718018, "learning_rate": 1.7775082762463535e-06, "loss": 0.6929, "step": 21270 }, { "epoch": 0.6624630235092636, "grad_norm": 2.0680439472198486, "learning_rate": 1.7766888459143206e-06, "loss": 0.8197, "step": 21275 }, { "epoch": 0.6626187139965748, "grad_norm": 1.8493363857269287, "learning_rate": 1.7758694155822872e-06, "loss": 0.8103, "step": 21280 }, { "epoch": 0.662774404483886, "grad_norm": 2.228877305984497, "learning_rate": 1.7750499852502543e-06, "loss": 0.7735, "step": 21285 }, { "epoch": 0.6629300949711973, "grad_norm": 1.733608603477478, "learning_rate": 1.774230554918221e-06, "loss": 0.7845, "step": 21290 }, { "epoch": 0.6630857854585085, "grad_norm": 2.9238572120666504, "learning_rate": 1.7734111245861878e-06, "loss": 0.8344, "step": 21295 }, { "epoch": 0.6632414759458197, "grad_norm": 2.368586540222168, "learning_rate": 1.7725916942541547e-06, "loss": 0.7285, "step": 21300 }, { "epoch": 0.663397166433131, "grad_norm": 2.5205631256103516, "learning_rate": 1.7717722639221216e-06, "loss": 0.7886, "step": 21305 }, { "epoch": 0.6635528569204422, "grad_norm": 2.0538110733032227, "learning_rate": 1.7709528335900882e-06, "loss": 0.667, "step": 21310 }, { "epoch": 0.6637085474077534, "grad_norm": 2.247929573059082, "learning_rate": 1.7701334032580553e-06, "loss": 0.7028, "step": 21315 }, { "epoch": 0.6638642378950647, "grad_norm": 2.1306142807006836, "learning_rate": 1.769313972926022e-06, "loss": 0.7524, "step": 21320 }, { "epoch": 0.6640199283823759, "grad_norm": 2.8951971530914307, "learning_rate": 1.7684945425939886e-06, "loss": 0.7469, "step": 21325 }, { "epoch": 0.664175618869687, "grad_norm": 2.6283681392669678, "learning_rate": 1.7676751122619557e-06, "loss": 0.7293, "step": 21330 }, { "epoch": 0.6643313093569982, "grad_norm": 2.6411941051483154, "learning_rate": 1.7668556819299224e-06, "loss": 0.6879, "step": 21335 }, { "epoch": 0.6644869998443095, "grad_norm": 1.7993347644805908, "learning_rate": 1.7660362515978892e-06, "loss": 0.7137, "step": 21340 }, { "epoch": 0.6646426903316207, "grad_norm": 2.0070273876190186, "learning_rate": 1.765216821265856e-06, "loss": 0.8091, "step": 21345 }, { "epoch": 0.6647983808189319, "grad_norm": 2.155611276626587, "learning_rate": 1.764397390933823e-06, "loss": 0.7284, "step": 21350 }, { "epoch": 0.6649540713062432, "grad_norm": 2.0710208415985107, "learning_rate": 1.7635779606017896e-06, "loss": 0.7202, "step": 21355 }, { "epoch": 0.6651097617935544, "grad_norm": 2.280179738998413, "learning_rate": 1.7627585302697567e-06, "loss": 0.795, "step": 21360 }, { "epoch": 0.6652654522808656, "grad_norm": 2.4907450675964355, "learning_rate": 1.7619390999377234e-06, "loss": 0.777, "step": 21365 }, { "epoch": 0.6654211427681769, "grad_norm": 2.467884063720703, "learning_rate": 1.7611196696056904e-06, "loss": 0.7444, "step": 21370 }, { "epoch": 0.6655768332554881, "grad_norm": 2.3781323432922363, "learning_rate": 1.760300239273657e-06, "loss": 0.7595, "step": 21375 }, { "epoch": 0.6657325237427993, "grad_norm": 2.283165693283081, "learning_rate": 1.759480808941624e-06, "loss": 0.8493, "step": 21380 }, { "epoch": 0.6658882142301106, "grad_norm": 2.064472198486328, "learning_rate": 1.7586613786095908e-06, "loss": 0.8028, "step": 21385 }, { "epoch": 0.6660439047174218, "grad_norm": 2.1887400150299072, "learning_rate": 1.7578419482775577e-06, "loss": 0.7741, "step": 21390 }, { "epoch": 0.666199595204733, "grad_norm": 1.697095513343811, "learning_rate": 1.7570225179455244e-06, "loss": 0.7675, "step": 21395 }, { "epoch": 0.6663552856920442, "grad_norm": 2.086308717727661, "learning_rate": 1.756203087613491e-06, "loss": 0.7933, "step": 21400 }, { "epoch": 0.6665109761793554, "grad_norm": 2.9416205883026123, "learning_rate": 1.7553836572814581e-06, "loss": 0.7463, "step": 21405 }, { "epoch": 0.6666666666666666, "grad_norm": 2.276568651199341, "learning_rate": 1.7545642269494248e-06, "loss": 0.7354, "step": 21410 }, { "epoch": 0.6668223571539779, "grad_norm": 2.2630159854888916, "learning_rate": 1.7537447966173918e-06, "loss": 0.7763, "step": 21415 }, { "epoch": 0.6669780476412891, "grad_norm": 2.091865301132202, "learning_rate": 1.7529253662853585e-06, "loss": 0.7599, "step": 21420 }, { "epoch": 0.6671337381286003, "grad_norm": 1.7338024377822876, "learning_rate": 1.7521059359533254e-06, "loss": 0.7833, "step": 21425 }, { "epoch": 0.6672894286159116, "grad_norm": 2.693013906478882, "learning_rate": 1.7512865056212922e-06, "loss": 0.7791, "step": 21430 }, { "epoch": 0.6674451191032228, "grad_norm": 2.6205742359161377, "learning_rate": 1.7504670752892591e-06, "loss": 0.8344, "step": 21435 }, { "epoch": 0.667600809590534, "grad_norm": 3.879530668258667, "learning_rate": 1.7496476449572258e-06, "loss": 0.8506, "step": 21440 }, { "epoch": 0.6677565000778453, "grad_norm": 2.1740684509277344, "learning_rate": 1.7488282146251929e-06, "loss": 0.6606, "step": 21445 }, { "epoch": 0.6679121905651565, "grad_norm": 1.9849692583084106, "learning_rate": 1.7480087842931595e-06, "loss": 0.8123, "step": 21450 }, { "epoch": 0.6680678810524677, "grad_norm": 1.7692893743515015, "learning_rate": 1.7471893539611266e-06, "loss": 0.7718, "step": 21455 }, { "epoch": 0.668223571539779, "grad_norm": 1.989561676979065, "learning_rate": 1.7463699236290932e-06, "loss": 0.7644, "step": 21460 }, { "epoch": 0.6683792620270902, "grad_norm": 1.8976020812988281, "learning_rate": 1.74555049329706e-06, "loss": 0.7651, "step": 21465 }, { "epoch": 0.6685349525144013, "grad_norm": 2.5636441707611084, "learning_rate": 1.7447310629650268e-06, "loss": 0.7507, "step": 21470 }, { "epoch": 0.6686906430017125, "grad_norm": 2.2278473377227783, "learning_rate": 1.7439116326329936e-06, "loss": 0.7481, "step": 21475 }, { "epoch": 0.6688463334890238, "grad_norm": 2.0079596042633057, "learning_rate": 1.7430922023009605e-06, "loss": 0.7351, "step": 21480 }, { "epoch": 0.669002023976335, "grad_norm": 2.1932761669158936, "learning_rate": 1.7422727719689272e-06, "loss": 0.8135, "step": 21485 }, { "epoch": 0.6691577144636462, "grad_norm": 1.8114433288574219, "learning_rate": 1.7414533416368943e-06, "loss": 0.733, "step": 21490 }, { "epoch": 0.6693134049509575, "grad_norm": 2.1971635818481445, "learning_rate": 1.740633911304861e-06, "loss": 0.8139, "step": 21495 }, { "epoch": 0.6694690954382687, "grad_norm": 2.1678521633148193, "learning_rate": 1.739814480972828e-06, "loss": 0.7742, "step": 21500 }, { "epoch": 0.66962478592558, "grad_norm": 2.1966845989227295, "learning_rate": 1.7389950506407947e-06, "loss": 0.7835, "step": 21505 }, { "epoch": 0.6697804764128912, "grad_norm": 2.2176599502563477, "learning_rate": 1.7381756203087615e-06, "loss": 0.7624, "step": 21510 }, { "epoch": 0.6699361669002024, "grad_norm": 2.23469877243042, "learning_rate": 1.7373561899767282e-06, "loss": 0.7108, "step": 21515 }, { "epoch": 0.6700918573875136, "grad_norm": 2.1815412044525146, "learning_rate": 1.7365367596446953e-06, "loss": 0.8279, "step": 21520 }, { "epoch": 0.6702475478748249, "grad_norm": 2.04500150680542, "learning_rate": 1.735717329312662e-06, "loss": 0.7996, "step": 21525 }, { "epoch": 0.6704032383621361, "grad_norm": 1.9920397996902466, "learning_rate": 1.734897898980629e-06, "loss": 0.7598, "step": 21530 }, { "epoch": 0.6705589288494473, "grad_norm": 2.091552495956421, "learning_rate": 1.7340784686485957e-06, "loss": 0.755, "step": 21535 }, { "epoch": 0.6707146193367586, "grad_norm": 2.0804624557495117, "learning_rate": 1.7332590383165623e-06, "loss": 0.8725, "step": 21540 }, { "epoch": 0.6708703098240697, "grad_norm": 1.9514987468719482, "learning_rate": 1.7324396079845294e-06, "loss": 0.7662, "step": 21545 }, { "epoch": 0.6710260003113809, "grad_norm": 2.368666648864746, "learning_rate": 1.731620177652496e-06, "loss": 0.7821, "step": 21550 }, { "epoch": 0.6711816907986922, "grad_norm": 1.8808660507202148, "learning_rate": 1.730800747320463e-06, "loss": 0.7611, "step": 21555 }, { "epoch": 0.6713373812860034, "grad_norm": 2.0058887004852295, "learning_rate": 1.7299813169884298e-06, "loss": 0.715, "step": 21560 }, { "epoch": 0.6714930717733146, "grad_norm": 2.2949094772338867, "learning_rate": 1.7291618866563967e-06, "loss": 0.6946, "step": 21565 }, { "epoch": 0.6716487622606259, "grad_norm": 1.863627552986145, "learning_rate": 1.7283424563243633e-06, "loss": 0.6967, "step": 21570 }, { "epoch": 0.6718044527479371, "grad_norm": 2.1631064414978027, "learning_rate": 1.7275230259923304e-06, "loss": 0.8243, "step": 21575 }, { "epoch": 0.6719601432352483, "grad_norm": 2.018033027648926, "learning_rate": 1.726703595660297e-06, "loss": 0.6874, "step": 21580 }, { "epoch": 0.6721158337225596, "grad_norm": 2.277320146560669, "learning_rate": 1.725884165328264e-06, "loss": 0.8308, "step": 21585 }, { "epoch": 0.6722715242098708, "grad_norm": 2.002505302429199, "learning_rate": 1.7250647349962308e-06, "loss": 0.8169, "step": 21590 }, { "epoch": 0.672427214697182, "grad_norm": 1.7622758150100708, "learning_rate": 1.7242453046641977e-06, "loss": 0.626, "step": 21595 }, { "epoch": 0.6725829051844933, "grad_norm": 2.08516788482666, "learning_rate": 1.7234258743321643e-06, "loss": 0.8344, "step": 21600 }, { "epoch": 0.6727385956718045, "grad_norm": 2.0563461780548096, "learning_rate": 1.7226064440001314e-06, "loss": 0.7788, "step": 21605 }, { "epoch": 0.6728942861591157, "grad_norm": 1.9916375875473022, "learning_rate": 1.721787013668098e-06, "loss": 0.7454, "step": 21610 }, { "epoch": 0.6730499766464269, "grad_norm": 2.1752707958221436, "learning_rate": 1.7209675833360647e-06, "loss": 0.8252, "step": 21615 }, { "epoch": 0.6732056671337381, "grad_norm": 2.226426839828491, "learning_rate": 1.7201481530040318e-06, "loss": 0.7538, "step": 21620 }, { "epoch": 0.6733613576210493, "grad_norm": 2.0260000228881836, "learning_rate": 1.7193287226719985e-06, "loss": 0.7496, "step": 21625 }, { "epoch": 0.6735170481083605, "grad_norm": 1.7699408531188965, "learning_rate": 1.7185092923399655e-06, "loss": 0.7331, "step": 21630 }, { "epoch": 0.6736727385956718, "grad_norm": 2.2085254192352295, "learning_rate": 1.7176898620079322e-06, "loss": 0.7808, "step": 21635 }, { "epoch": 0.673828429082983, "grad_norm": 2.0665221214294434, "learning_rate": 1.716870431675899e-06, "loss": 0.7663, "step": 21640 }, { "epoch": 0.6739841195702942, "grad_norm": 2.0246517658233643, "learning_rate": 1.7160510013438657e-06, "loss": 0.8261, "step": 21645 }, { "epoch": 0.6741398100576055, "grad_norm": 2.151172399520874, "learning_rate": 1.7152315710118328e-06, "loss": 0.6984, "step": 21650 }, { "epoch": 0.6742955005449167, "grad_norm": 1.7726284265518188, "learning_rate": 1.7144121406797995e-06, "loss": 0.7537, "step": 21655 }, { "epoch": 0.674451191032228, "grad_norm": 2.1905109882354736, "learning_rate": 1.7135927103477665e-06, "loss": 0.8959, "step": 21660 }, { "epoch": 0.6746068815195392, "grad_norm": 2.156251907348633, "learning_rate": 1.7127732800157332e-06, "loss": 0.7278, "step": 21665 }, { "epoch": 0.6747625720068504, "grad_norm": 2.564474582672119, "learning_rate": 1.7119538496837e-06, "loss": 0.7278, "step": 21670 }, { "epoch": 0.6749182624941616, "grad_norm": 1.8734186887741089, "learning_rate": 1.711134419351667e-06, "loss": 0.7528, "step": 21675 }, { "epoch": 0.6750739529814729, "grad_norm": 1.8707109689712524, "learning_rate": 1.7103149890196336e-06, "loss": 0.7643, "step": 21680 }, { "epoch": 0.675229643468784, "grad_norm": 2.1265950202941895, "learning_rate": 1.7094955586876005e-06, "loss": 0.8488, "step": 21685 }, { "epoch": 0.6753853339560952, "grad_norm": 2.2032885551452637, "learning_rate": 1.7086761283555671e-06, "loss": 0.7511, "step": 21690 }, { "epoch": 0.6755410244434065, "grad_norm": 1.8227512836456299, "learning_rate": 1.7078566980235342e-06, "loss": 0.722, "step": 21695 }, { "epoch": 0.6756967149307177, "grad_norm": 1.975993037223816, "learning_rate": 1.7070372676915009e-06, "loss": 0.7469, "step": 21700 }, { "epoch": 0.6758524054180289, "grad_norm": 2.2670090198516846, "learning_rate": 1.706217837359468e-06, "loss": 0.7749, "step": 21705 }, { "epoch": 0.6760080959053402, "grad_norm": 1.9439136981964111, "learning_rate": 1.7053984070274346e-06, "loss": 0.8275, "step": 21710 }, { "epoch": 0.6761637863926514, "grad_norm": 2.0190939903259277, "learning_rate": 1.7045789766954015e-06, "loss": 0.7969, "step": 21715 }, { "epoch": 0.6763194768799626, "grad_norm": 2.1502199172973633, "learning_rate": 1.7037595463633683e-06, "loss": 0.7565, "step": 21720 }, { "epoch": 0.6764751673672739, "grad_norm": 2.6855196952819824, "learning_rate": 1.7029401160313352e-06, "loss": 0.7452, "step": 21725 }, { "epoch": 0.6766308578545851, "grad_norm": 2.326036214828491, "learning_rate": 1.7021206856993019e-06, "loss": 0.7372, "step": 21730 }, { "epoch": 0.6767865483418963, "grad_norm": 2.2045273780822754, "learning_rate": 1.701301255367269e-06, "loss": 0.8011, "step": 21735 }, { "epoch": 0.6769422388292076, "grad_norm": 2.0680174827575684, "learning_rate": 1.7004818250352356e-06, "loss": 0.6844, "step": 21740 }, { "epoch": 0.6770979293165188, "grad_norm": 2.414030075073242, "learning_rate": 1.6996623947032027e-06, "loss": 0.7513, "step": 21745 }, { "epoch": 0.67725361980383, "grad_norm": 2.5578835010528564, "learning_rate": 1.6988429643711694e-06, "loss": 0.7749, "step": 21750 }, { "epoch": 0.6774093102911413, "grad_norm": 1.7215702533721924, "learning_rate": 1.698023534039136e-06, "loss": 0.8041, "step": 21755 }, { "epoch": 0.6775650007784524, "grad_norm": 2.1822805404663086, "learning_rate": 1.6972041037071029e-06, "loss": 0.7839, "step": 21760 }, { "epoch": 0.6777206912657636, "grad_norm": 1.9559168815612793, "learning_rate": 1.6963846733750697e-06, "loss": 0.7511, "step": 21765 }, { "epoch": 0.6778763817530749, "grad_norm": 1.9223579168319702, "learning_rate": 1.6955652430430366e-06, "loss": 0.7444, "step": 21770 }, { "epoch": 0.6780320722403861, "grad_norm": 2.2814810276031494, "learning_rate": 1.6947458127110033e-06, "loss": 0.77, "step": 21775 }, { "epoch": 0.6781877627276973, "grad_norm": 1.9693135023117065, "learning_rate": 1.6939263823789704e-06, "loss": 0.7994, "step": 21780 }, { "epoch": 0.6783434532150086, "grad_norm": 2.047347068786621, "learning_rate": 1.693106952046937e-06, "loss": 0.7863, "step": 21785 }, { "epoch": 0.6784991437023198, "grad_norm": 2.227815628051758, "learning_rate": 1.692287521714904e-06, "loss": 0.789, "step": 21790 }, { "epoch": 0.678654834189631, "grad_norm": 2.288775682449341, "learning_rate": 1.6914680913828708e-06, "loss": 0.7615, "step": 21795 }, { "epoch": 0.6788105246769423, "grad_norm": 2.1645114421844482, "learning_rate": 1.6906486610508376e-06, "loss": 0.811, "step": 21800 }, { "epoch": 0.6789662151642535, "grad_norm": 1.9872020483016968, "learning_rate": 1.6898292307188045e-06, "loss": 0.7856, "step": 21805 }, { "epoch": 0.6791219056515647, "grad_norm": 2.3257734775543213, "learning_rate": 1.6890098003867714e-06, "loss": 0.7298, "step": 21810 }, { "epoch": 0.679277596138876, "grad_norm": 2.268890619277954, "learning_rate": 1.688190370054738e-06, "loss": 0.7397, "step": 21815 }, { "epoch": 0.6794332866261872, "grad_norm": 2.4554524421691895, "learning_rate": 1.687370939722705e-06, "loss": 0.8035, "step": 21820 }, { "epoch": 0.6795889771134984, "grad_norm": 2.1434977054595947, "learning_rate": 1.6865515093906718e-06, "loss": 0.7236, "step": 21825 }, { "epoch": 0.6797446676008095, "grad_norm": 2.175175666809082, "learning_rate": 1.6857320790586384e-06, "loss": 0.8041, "step": 21830 }, { "epoch": 0.6799003580881208, "grad_norm": 2.058112382888794, "learning_rate": 1.6849126487266055e-06, "loss": 0.7306, "step": 21835 }, { "epoch": 0.680056048575432, "grad_norm": 2.1310205459594727, "learning_rate": 1.6840932183945722e-06, "loss": 0.7695, "step": 21840 }, { "epoch": 0.6802117390627432, "grad_norm": 1.9134553670883179, "learning_rate": 1.683273788062539e-06, "loss": 0.8094, "step": 21845 }, { "epoch": 0.6803674295500545, "grad_norm": 2.2008628845214844, "learning_rate": 1.6824543577305059e-06, "loss": 0.7377, "step": 21850 }, { "epoch": 0.6805231200373657, "grad_norm": 2.2845239639282227, "learning_rate": 1.6816349273984728e-06, "loss": 0.7837, "step": 21855 }, { "epoch": 0.6806788105246769, "grad_norm": 1.8724173307418823, "learning_rate": 1.6808154970664394e-06, "loss": 0.7936, "step": 21860 }, { "epoch": 0.6808345010119882, "grad_norm": 2.0293662548065186, "learning_rate": 1.6799960667344065e-06, "loss": 0.8171, "step": 21865 }, { "epoch": 0.6809901914992994, "grad_norm": 2.0733375549316406, "learning_rate": 1.6791766364023732e-06, "loss": 0.7214, "step": 21870 }, { "epoch": 0.6811458819866106, "grad_norm": 2.350444793701172, "learning_rate": 1.6783572060703402e-06, "loss": 0.7831, "step": 21875 }, { "epoch": 0.6813015724739219, "grad_norm": 2.170469045639038, "learning_rate": 1.677537775738307e-06, "loss": 0.7922, "step": 21880 }, { "epoch": 0.6814572629612331, "grad_norm": 2.0769259929656982, "learning_rate": 1.6767183454062738e-06, "loss": 0.7506, "step": 21885 }, { "epoch": 0.6816129534485443, "grad_norm": 1.9988007545471191, "learning_rate": 1.6758989150742404e-06, "loss": 0.8087, "step": 21890 }, { "epoch": 0.6817686439358556, "grad_norm": 2.580167770385742, "learning_rate": 1.6750794847422073e-06, "loss": 0.8029, "step": 21895 }, { "epoch": 0.6819243344231667, "grad_norm": 2.3649144172668457, "learning_rate": 1.6742600544101742e-06, "loss": 0.8711, "step": 21900 }, { "epoch": 0.6820800249104779, "grad_norm": 2.115144729614258, "learning_rate": 1.6734406240781408e-06, "loss": 0.782, "step": 21905 }, { "epoch": 0.6822357153977892, "grad_norm": 2.1815147399902344, "learning_rate": 1.672621193746108e-06, "loss": 0.7417, "step": 21910 }, { "epoch": 0.6823914058851004, "grad_norm": 2.0244085788726807, "learning_rate": 1.6718017634140746e-06, "loss": 0.8207, "step": 21915 }, { "epoch": 0.6825470963724116, "grad_norm": 2.159862518310547, "learning_rate": 1.6709823330820416e-06, "loss": 0.7183, "step": 21920 }, { "epoch": 0.6827027868597229, "grad_norm": 2.04364275932312, "learning_rate": 1.6701629027500083e-06, "loss": 0.8055, "step": 21925 }, { "epoch": 0.6828584773470341, "grad_norm": 1.8819822072982788, "learning_rate": 1.6693434724179752e-06, "loss": 0.7188, "step": 21930 }, { "epoch": 0.6830141678343453, "grad_norm": 2.7841763496398926, "learning_rate": 1.6685240420859418e-06, "loss": 0.7096, "step": 21935 }, { "epoch": 0.6831698583216566, "grad_norm": 2.208387613296509, "learning_rate": 1.667704611753909e-06, "loss": 0.7329, "step": 21940 }, { "epoch": 0.6833255488089678, "grad_norm": 2.3321948051452637, "learning_rate": 1.6668851814218756e-06, "loss": 0.6648, "step": 21945 }, { "epoch": 0.683481239296279, "grad_norm": 2.067223310470581, "learning_rate": 1.6660657510898426e-06, "loss": 0.7949, "step": 21950 }, { "epoch": 0.6836369297835903, "grad_norm": 2.2627930641174316, "learning_rate": 1.6652463207578093e-06, "loss": 0.7047, "step": 21955 }, { "epoch": 0.6837926202709015, "grad_norm": 2.409686326980591, "learning_rate": 1.6644268904257762e-06, "loss": 0.8156, "step": 21960 }, { "epoch": 0.6839483107582127, "grad_norm": 2.492239236831665, "learning_rate": 1.663607460093743e-06, "loss": 0.8082, "step": 21965 }, { "epoch": 0.684104001245524, "grad_norm": 2.2008461952209473, "learning_rate": 1.6627880297617097e-06, "loss": 0.7684, "step": 21970 }, { "epoch": 0.6842596917328351, "grad_norm": 2.0945920944213867, "learning_rate": 1.6619685994296766e-06, "loss": 0.7806, "step": 21975 }, { "epoch": 0.6844153822201463, "grad_norm": 2.353506326675415, "learning_rate": 1.6611491690976434e-06, "loss": 0.7559, "step": 21980 }, { "epoch": 0.6845710727074575, "grad_norm": 1.982800841331482, "learning_rate": 1.6603297387656103e-06, "loss": 0.7301, "step": 21985 }, { "epoch": 0.6847267631947688, "grad_norm": 2.135887861251831, "learning_rate": 1.659510308433577e-06, "loss": 0.8493, "step": 21990 }, { "epoch": 0.68488245368208, "grad_norm": 2.1970932483673096, "learning_rate": 1.658690878101544e-06, "loss": 0.8049, "step": 21995 }, { "epoch": 0.6850381441693912, "grad_norm": 2.6308560371398926, "learning_rate": 1.6578714477695107e-06, "loss": 0.7007, "step": 22000 }, { "epoch": 0.6851938346567025, "grad_norm": 1.7066528797149658, "learning_rate": 1.6570520174374776e-06, "loss": 0.7483, "step": 22005 }, { "epoch": 0.6853495251440137, "grad_norm": 1.8309991359710693, "learning_rate": 1.6562325871054444e-06, "loss": 0.755, "step": 22010 }, { "epoch": 0.6855052156313249, "grad_norm": 2.6211531162261963, "learning_rate": 1.6554131567734113e-06, "loss": 0.8043, "step": 22015 }, { "epoch": 0.6856609061186362, "grad_norm": 1.9622349739074707, "learning_rate": 1.654593726441378e-06, "loss": 0.7388, "step": 22020 }, { "epoch": 0.6858165966059474, "grad_norm": 2.2436418533325195, "learning_rate": 1.653774296109345e-06, "loss": 0.7612, "step": 22025 }, { "epoch": 0.6859722870932586, "grad_norm": 1.936234474182129, "learning_rate": 1.6529548657773117e-06, "loss": 0.708, "step": 22030 }, { "epoch": 0.6861279775805699, "grad_norm": 1.8589829206466675, "learning_rate": 1.6521354354452788e-06, "loss": 0.6995, "step": 22035 }, { "epoch": 0.6862836680678811, "grad_norm": 1.8703851699829102, "learning_rate": 1.6513160051132455e-06, "loss": 0.7538, "step": 22040 }, { "epoch": 0.6864393585551922, "grad_norm": 2.3488965034484863, "learning_rate": 1.6504965747812121e-06, "loss": 0.7733, "step": 22045 }, { "epoch": 0.6865950490425035, "grad_norm": 2.1073341369628906, "learning_rate": 1.6496771444491792e-06, "loss": 0.769, "step": 22050 }, { "epoch": 0.6867507395298147, "grad_norm": 2.0388364791870117, "learning_rate": 1.6488577141171458e-06, "loss": 0.7462, "step": 22055 }, { "epoch": 0.6869064300171259, "grad_norm": 2.3727855682373047, "learning_rate": 1.6480382837851127e-06, "loss": 0.804, "step": 22060 }, { "epoch": 0.6870621205044372, "grad_norm": 2.0156071186065674, "learning_rate": 1.6472188534530794e-06, "loss": 0.7623, "step": 22065 }, { "epoch": 0.6872178109917484, "grad_norm": 2.2097010612487793, "learning_rate": 1.6463994231210465e-06, "loss": 0.7716, "step": 22070 }, { "epoch": 0.6873735014790596, "grad_norm": 2.3841419219970703, "learning_rate": 1.6455799927890131e-06, "loss": 0.7095, "step": 22075 }, { "epoch": 0.6875291919663709, "grad_norm": 2.5340349674224854, "learning_rate": 1.6447605624569802e-06, "loss": 0.7429, "step": 22080 }, { "epoch": 0.6876848824536821, "grad_norm": 2.1460208892822266, "learning_rate": 1.6439411321249469e-06, "loss": 0.8091, "step": 22085 }, { "epoch": 0.6878405729409933, "grad_norm": 2.27191162109375, "learning_rate": 1.6431217017929137e-06, "loss": 0.7007, "step": 22090 }, { "epoch": 0.6879962634283046, "grad_norm": 2.197087526321411, "learning_rate": 1.6423022714608806e-06, "loss": 0.6766, "step": 22095 }, { "epoch": 0.6881519539156158, "grad_norm": 1.8580678701400757, "learning_rate": 1.6414828411288475e-06, "loss": 0.7715, "step": 22100 }, { "epoch": 0.688307644402927, "grad_norm": 2.116844415664673, "learning_rate": 1.6406634107968141e-06, "loss": 0.7574, "step": 22105 }, { "epoch": 0.6884633348902383, "grad_norm": 2.226611614227295, "learning_rate": 1.6398439804647808e-06, "loss": 0.7324, "step": 22110 }, { "epoch": 0.6886190253775494, "grad_norm": 2.0244548320770264, "learning_rate": 1.6390245501327479e-06, "loss": 0.8107, "step": 22115 }, { "epoch": 0.6887747158648606, "grad_norm": 1.9304416179656982, "learning_rate": 1.6382051198007145e-06, "loss": 0.8137, "step": 22120 }, { "epoch": 0.6889304063521718, "grad_norm": 2.170426607131958, "learning_rate": 1.6373856894686816e-06, "loss": 0.6934, "step": 22125 }, { "epoch": 0.6890860968394831, "grad_norm": 3.304036855697632, "learning_rate": 1.6365662591366483e-06, "loss": 0.7228, "step": 22130 }, { "epoch": 0.6892417873267943, "grad_norm": 2.2494003772735596, "learning_rate": 1.6357468288046151e-06, "loss": 0.7478, "step": 22135 }, { "epoch": 0.6893974778141055, "grad_norm": 1.84719979763031, "learning_rate": 1.634927398472582e-06, "loss": 0.7606, "step": 22140 }, { "epoch": 0.6895531683014168, "grad_norm": 2.03438138961792, "learning_rate": 1.6341079681405489e-06, "loss": 0.7589, "step": 22145 }, { "epoch": 0.689708858788728, "grad_norm": 2.1062748432159424, "learning_rate": 1.6332885378085155e-06, "loss": 0.6751, "step": 22150 }, { "epoch": 0.6898645492760392, "grad_norm": 1.6935914754867554, "learning_rate": 1.6324691074764826e-06, "loss": 0.795, "step": 22155 }, { "epoch": 0.6900202397633505, "grad_norm": 2.247971534729004, "learning_rate": 1.6316496771444493e-06, "loss": 0.7344, "step": 22160 }, { "epoch": 0.6901759302506617, "grad_norm": 2.162978172302246, "learning_rate": 1.6308302468124163e-06, "loss": 0.7806, "step": 22165 }, { "epoch": 0.6903316207379729, "grad_norm": 2.078561305999756, "learning_rate": 1.630010816480383e-06, "loss": 0.7904, "step": 22170 }, { "epoch": 0.6904873112252842, "grad_norm": 2.1824891567230225, "learning_rate": 1.6291913861483499e-06, "loss": 0.8033, "step": 22175 }, { "epoch": 0.6906430017125954, "grad_norm": 2.131819248199463, "learning_rate": 1.6283719558163165e-06, "loss": 0.7355, "step": 22180 }, { "epoch": 0.6907986921999066, "grad_norm": 2.4931254386901855, "learning_rate": 1.6275525254842834e-06, "loss": 0.8151, "step": 22185 }, { "epoch": 0.6909543826872178, "grad_norm": 2.0214755535125732, "learning_rate": 1.6267330951522503e-06, "loss": 0.7565, "step": 22190 }, { "epoch": 0.691110073174529, "grad_norm": 2.135085344314575, "learning_rate": 1.625913664820217e-06, "loss": 0.7958, "step": 22195 }, { "epoch": 0.6912657636618402, "grad_norm": 2.084434747695923, "learning_rate": 1.625094234488184e-06, "loss": 0.7548, "step": 22200 }, { "epoch": 0.6914214541491515, "grad_norm": 1.8155308961868286, "learning_rate": 1.6242748041561507e-06, "loss": 0.6799, "step": 22205 }, { "epoch": 0.6915771446364627, "grad_norm": 2.538611888885498, "learning_rate": 1.6234553738241177e-06, "loss": 0.7214, "step": 22210 }, { "epoch": 0.6917328351237739, "grad_norm": 1.9210067987442017, "learning_rate": 1.6226359434920844e-06, "loss": 0.7429, "step": 22215 }, { "epoch": 0.6918885256110852, "grad_norm": 4.242439270019531, "learning_rate": 1.6218165131600513e-06, "loss": 0.752, "step": 22220 }, { "epoch": 0.6920442160983964, "grad_norm": 2.1793932914733887, "learning_rate": 1.6209970828280181e-06, "loss": 0.7924, "step": 22225 }, { "epoch": 0.6921999065857076, "grad_norm": 2.1164517402648926, "learning_rate": 1.620177652495985e-06, "loss": 0.7644, "step": 22230 }, { "epoch": 0.6923555970730189, "grad_norm": 1.603344202041626, "learning_rate": 1.6193582221639517e-06, "loss": 0.8265, "step": 22235 }, { "epoch": 0.6925112875603301, "grad_norm": 2.4220948219299316, "learning_rate": 1.6185387918319188e-06, "loss": 0.7516, "step": 22240 }, { "epoch": 0.6926669780476413, "grad_norm": 2.7588889598846436, "learning_rate": 1.6177193614998854e-06, "loss": 0.8287, "step": 22245 }, { "epoch": 0.6928226685349526, "grad_norm": 2.227727174758911, "learning_rate": 1.6168999311678523e-06, "loss": 0.7138, "step": 22250 }, { "epoch": 0.6929783590222638, "grad_norm": 2.6251778602600098, "learning_rate": 1.6160805008358191e-06, "loss": 0.8001, "step": 22255 }, { "epoch": 0.6931340495095749, "grad_norm": 2.403721332550049, "learning_rate": 1.6152610705037858e-06, "loss": 0.8391, "step": 22260 }, { "epoch": 0.6932897399968861, "grad_norm": 1.9556914567947388, "learning_rate": 1.6144416401717527e-06, "loss": 0.731, "step": 22265 }, { "epoch": 0.6934454304841974, "grad_norm": 2.0407590866088867, "learning_rate": 1.6136222098397195e-06, "loss": 0.7186, "step": 22270 }, { "epoch": 0.6936011209715086, "grad_norm": 2.198071002960205, "learning_rate": 1.6128027795076864e-06, "loss": 0.7822, "step": 22275 }, { "epoch": 0.6937568114588198, "grad_norm": 1.8996111154556274, "learning_rate": 1.611983349175653e-06, "loss": 0.7628, "step": 22280 }, { "epoch": 0.6939125019461311, "grad_norm": 2.4710142612457275, "learning_rate": 1.6111639188436202e-06, "loss": 0.7575, "step": 22285 }, { "epoch": 0.6940681924334423, "grad_norm": 2.018345594406128, "learning_rate": 1.6103444885115868e-06, "loss": 0.7485, "step": 22290 }, { "epoch": 0.6942238829207535, "grad_norm": 2.3436710834503174, "learning_rate": 1.6095250581795539e-06, "loss": 0.8304, "step": 22295 }, { "epoch": 0.6943795734080648, "grad_norm": 1.9997920989990234, "learning_rate": 1.6087056278475205e-06, "loss": 0.6903, "step": 22300 }, { "epoch": 0.694535263895376, "grad_norm": 2.6311123371124268, "learning_rate": 1.6078861975154874e-06, "loss": 0.7808, "step": 22305 }, { "epoch": 0.6946909543826872, "grad_norm": 2.610124111175537, "learning_rate": 1.607066767183454e-06, "loss": 0.8555, "step": 22310 }, { "epoch": 0.6948466448699985, "grad_norm": 2.2311511039733887, "learning_rate": 1.6062473368514212e-06, "loss": 0.6725, "step": 22315 }, { "epoch": 0.6950023353573097, "grad_norm": 2.2989730834960938, "learning_rate": 1.6054279065193878e-06, "loss": 0.8431, "step": 22320 }, { "epoch": 0.6951580258446209, "grad_norm": 2.052368402481079, "learning_rate": 1.6046084761873545e-06, "loss": 0.7344, "step": 22325 }, { "epoch": 0.6953137163319321, "grad_norm": 2.424527406692505, "learning_rate": 1.6037890458553216e-06, "loss": 0.7852, "step": 22330 }, { "epoch": 0.6954694068192433, "grad_norm": 1.868094801902771, "learning_rate": 1.6029696155232882e-06, "loss": 0.8482, "step": 22335 }, { "epoch": 0.6956250973065545, "grad_norm": 2.291426420211792, "learning_rate": 1.6021501851912553e-06, "loss": 0.7405, "step": 22340 }, { "epoch": 0.6957807877938658, "grad_norm": 1.835871696472168, "learning_rate": 1.601330754859222e-06, "loss": 0.7292, "step": 22345 }, { "epoch": 0.695936478281177, "grad_norm": 2.0458247661590576, "learning_rate": 1.6005113245271888e-06, "loss": 0.7863, "step": 22350 }, { "epoch": 0.6960921687684882, "grad_norm": 2.037752628326416, "learning_rate": 1.5996918941951555e-06, "loss": 0.7144, "step": 22355 }, { "epoch": 0.6962478592557995, "grad_norm": 1.95692777633667, "learning_rate": 1.5988724638631226e-06, "loss": 0.733, "step": 22360 }, { "epoch": 0.6964035497431107, "grad_norm": 2.074341058731079, "learning_rate": 1.5980530335310892e-06, "loss": 0.801, "step": 22365 }, { "epoch": 0.6965592402304219, "grad_norm": 2.0403854846954346, "learning_rate": 1.5972336031990563e-06, "loss": 0.8, "step": 22370 }, { "epoch": 0.6967149307177332, "grad_norm": 2.4371907711029053, "learning_rate": 1.596414172867023e-06, "loss": 0.773, "step": 22375 }, { "epoch": 0.6968706212050444, "grad_norm": 2.6470701694488525, "learning_rate": 1.5955947425349898e-06, "loss": 0.7254, "step": 22380 }, { "epoch": 0.6970263116923556, "grad_norm": 2.2775230407714844, "learning_rate": 1.5947753122029567e-06, "loss": 0.7993, "step": 22385 }, { "epoch": 0.6971820021796669, "grad_norm": 2.213381767272949, "learning_rate": 1.5939558818709236e-06, "loss": 0.8138, "step": 22390 }, { "epoch": 0.6973376926669781, "grad_norm": 1.736997127532959, "learning_rate": 1.5931364515388902e-06, "loss": 0.7397, "step": 22395 }, { "epoch": 0.6974933831542893, "grad_norm": 1.9919102191925049, "learning_rate": 1.592317021206857e-06, "loss": 0.7194, "step": 22400 }, { "epoch": 0.6976490736416004, "grad_norm": 1.862502098083496, "learning_rate": 1.591497590874824e-06, "loss": 0.8403, "step": 22405 }, { "epoch": 0.6978047641289117, "grad_norm": 2.1086745262145996, "learning_rate": 1.5906781605427906e-06, "loss": 0.8591, "step": 22410 }, { "epoch": 0.6979604546162229, "grad_norm": 2.666325807571411, "learning_rate": 1.5898587302107577e-06, "loss": 0.8012, "step": 22415 }, { "epoch": 0.6981161451035341, "grad_norm": 2.09818696975708, "learning_rate": 1.5890392998787244e-06, "loss": 0.7292, "step": 22420 }, { "epoch": 0.6982718355908454, "grad_norm": 2.0157124996185303, "learning_rate": 1.5882198695466912e-06, "loss": 0.7798, "step": 22425 }, { "epoch": 0.6984275260781566, "grad_norm": 1.7618626356124878, "learning_rate": 1.587400439214658e-06, "loss": 0.7045, "step": 22430 }, { "epoch": 0.6985832165654678, "grad_norm": 2.416489362716675, "learning_rate": 1.586581008882625e-06, "loss": 0.7221, "step": 22435 }, { "epoch": 0.6987389070527791, "grad_norm": 3.226562738418579, "learning_rate": 1.5857615785505916e-06, "loss": 0.8161, "step": 22440 }, { "epoch": 0.6988945975400903, "grad_norm": 2.5005013942718506, "learning_rate": 1.5849421482185587e-06, "loss": 0.8032, "step": 22445 }, { "epoch": 0.6990502880274015, "grad_norm": 1.8937482833862305, "learning_rate": 1.5841227178865254e-06, "loss": 0.7484, "step": 22450 }, { "epoch": 0.6992059785147128, "grad_norm": 2.103405237197876, "learning_rate": 1.5833032875544924e-06, "loss": 0.7579, "step": 22455 }, { "epoch": 0.699361669002024, "grad_norm": 2.1781651973724365, "learning_rate": 1.582483857222459e-06, "loss": 0.7528, "step": 22460 }, { "epoch": 0.6995173594893352, "grad_norm": 1.8176007270812988, "learning_rate": 1.581664426890426e-06, "loss": 0.7238, "step": 22465 }, { "epoch": 0.6996730499766465, "grad_norm": 2.4899814128875732, "learning_rate": 1.5808449965583928e-06, "loss": 0.7428, "step": 22470 }, { "epoch": 0.6998287404639576, "grad_norm": 2.271855592727661, "learning_rate": 1.5800255662263595e-06, "loss": 0.684, "step": 22475 }, { "epoch": 0.6999844309512688, "grad_norm": 2.110650062561035, "learning_rate": 1.5792061358943264e-06, "loss": 0.7771, "step": 22480 }, { "epoch": 0.7001401214385801, "grad_norm": 1.8891230821609497, "learning_rate": 1.578386705562293e-06, "loss": 0.8447, "step": 22485 }, { "epoch": 0.7002958119258913, "grad_norm": 2.4731523990631104, "learning_rate": 1.5775672752302601e-06, "loss": 0.7299, "step": 22490 }, { "epoch": 0.7004515024132025, "grad_norm": 2.059035301208496, "learning_rate": 1.5767478448982268e-06, "loss": 0.7941, "step": 22495 }, { "epoch": 0.7006071929005138, "grad_norm": 2.0505640506744385, "learning_rate": 1.5759284145661938e-06, "loss": 0.8347, "step": 22500 }, { "epoch": 0.700762883387825, "grad_norm": 2.3863000869750977, "learning_rate": 1.5751089842341605e-06, "loss": 0.7745, "step": 22505 }, { "epoch": 0.7009185738751362, "grad_norm": 1.8277666568756104, "learning_rate": 1.5742895539021274e-06, "loss": 0.7357, "step": 22510 }, { "epoch": 0.7010742643624475, "grad_norm": 2.4199633598327637, "learning_rate": 1.5734701235700942e-06, "loss": 0.8586, "step": 22515 }, { "epoch": 0.7012299548497587, "grad_norm": 1.9461265802383423, "learning_rate": 1.5726506932380611e-06, "loss": 0.7885, "step": 22520 }, { "epoch": 0.7013856453370699, "grad_norm": 2.6215415000915527, "learning_rate": 1.5718312629060278e-06, "loss": 0.7705, "step": 22525 }, { "epoch": 0.7015413358243812, "grad_norm": 1.9839482307434082, "learning_rate": 1.5710118325739949e-06, "loss": 0.7504, "step": 22530 }, { "epoch": 0.7016970263116924, "grad_norm": 1.820220708847046, "learning_rate": 1.5701924022419615e-06, "loss": 0.7373, "step": 22535 }, { "epoch": 0.7018527167990036, "grad_norm": 2.9127936363220215, "learning_rate": 1.5693729719099282e-06, "loss": 0.8405, "step": 22540 }, { "epoch": 0.7020084072863149, "grad_norm": 2.0071115493774414, "learning_rate": 1.5685535415778952e-06, "loss": 0.7831, "step": 22545 }, { "epoch": 0.702164097773626, "grad_norm": 2.176785469055176, "learning_rate": 1.567734111245862e-06, "loss": 0.7433, "step": 22550 }, { "epoch": 0.7023197882609372, "grad_norm": 2.684664726257324, "learning_rate": 1.5669146809138288e-06, "loss": 0.7258, "step": 22555 }, { "epoch": 0.7024754787482484, "grad_norm": 1.5935478210449219, "learning_rate": 1.5660952505817956e-06, "loss": 0.7438, "step": 22560 }, { "epoch": 0.7026311692355597, "grad_norm": 1.924401879310608, "learning_rate": 1.5652758202497625e-06, "loss": 0.7704, "step": 22565 }, { "epoch": 0.7027868597228709, "grad_norm": 2.5062782764434814, "learning_rate": 1.5644563899177292e-06, "loss": 0.7418, "step": 22570 }, { "epoch": 0.7029425502101821, "grad_norm": 2.147850751876831, "learning_rate": 1.5636369595856963e-06, "loss": 0.7899, "step": 22575 }, { "epoch": 0.7030982406974934, "grad_norm": 2.647132396697998, "learning_rate": 1.562817529253663e-06, "loss": 0.759, "step": 22580 }, { "epoch": 0.7032539311848046, "grad_norm": 1.9654390811920166, "learning_rate": 1.56199809892163e-06, "loss": 0.7218, "step": 22585 }, { "epoch": 0.7034096216721158, "grad_norm": 2.2665529251098633, "learning_rate": 1.5611786685895967e-06, "loss": 0.7547, "step": 22590 }, { "epoch": 0.7035653121594271, "grad_norm": 2.799625873565674, "learning_rate": 1.5603592382575635e-06, "loss": 0.7577, "step": 22595 }, { "epoch": 0.7037210026467383, "grad_norm": 2.3102667331695557, "learning_rate": 1.5595398079255302e-06, "loss": 0.7669, "step": 22600 }, { "epoch": 0.7038766931340495, "grad_norm": 2.120204448699951, "learning_rate": 1.5587203775934973e-06, "loss": 0.744, "step": 22605 }, { "epoch": 0.7040323836213608, "grad_norm": 2.148007392883301, "learning_rate": 1.557900947261464e-06, "loss": 0.8118, "step": 22610 }, { "epoch": 0.704188074108672, "grad_norm": 2.2827117443084717, "learning_rate": 1.5570815169294306e-06, "loss": 0.759, "step": 22615 }, { "epoch": 0.7043437645959831, "grad_norm": 2.616726875305176, "learning_rate": 1.5562620865973977e-06, "loss": 0.8176, "step": 22620 }, { "epoch": 0.7044994550832944, "grad_norm": 2.273772716522217, "learning_rate": 1.5554426562653643e-06, "loss": 0.653, "step": 22625 }, { "epoch": 0.7046551455706056, "grad_norm": 2.116854667663574, "learning_rate": 1.5546232259333314e-06, "loss": 0.7545, "step": 22630 }, { "epoch": 0.7048108360579168, "grad_norm": 3.1968178749084473, "learning_rate": 1.553803795601298e-06, "loss": 0.7769, "step": 22635 }, { "epoch": 0.7049665265452281, "grad_norm": 3.3028669357299805, "learning_rate": 1.552984365269265e-06, "loss": 0.8134, "step": 22640 }, { "epoch": 0.7051222170325393, "grad_norm": 2.2711470127105713, "learning_rate": 1.5521649349372318e-06, "loss": 0.7472, "step": 22645 }, { "epoch": 0.7052779075198505, "grad_norm": 2.2000069618225098, "learning_rate": 1.5513455046051987e-06, "loss": 0.7531, "step": 22650 }, { "epoch": 0.7054335980071618, "grad_norm": 2.541963577270508, "learning_rate": 1.5505260742731653e-06, "loss": 0.7818, "step": 22655 }, { "epoch": 0.705589288494473, "grad_norm": 2.012676477432251, "learning_rate": 1.5497066439411324e-06, "loss": 0.7371, "step": 22660 }, { "epoch": 0.7057449789817842, "grad_norm": 2.3548500537872314, "learning_rate": 1.548887213609099e-06, "loss": 0.7632, "step": 22665 }, { "epoch": 0.7059006694690955, "grad_norm": 2.013436794281006, "learning_rate": 1.548067783277066e-06, "loss": 0.7682, "step": 22670 }, { "epoch": 0.7060563599564067, "grad_norm": 2.3321468830108643, "learning_rate": 1.5472483529450328e-06, "loss": 0.8013, "step": 22675 }, { "epoch": 0.7062120504437179, "grad_norm": 2.0287160873413086, "learning_rate": 1.5464289226129997e-06, "loss": 0.7302, "step": 22680 }, { "epoch": 0.7063677409310292, "grad_norm": 1.9536596536636353, "learning_rate": 1.5456094922809663e-06, "loss": 0.8233, "step": 22685 }, { "epoch": 0.7065234314183403, "grad_norm": 1.9600247144699097, "learning_rate": 1.5447900619489332e-06, "loss": 0.7466, "step": 22690 }, { "epoch": 0.7066791219056515, "grad_norm": 2.5772593021392822, "learning_rate": 1.5439706316169e-06, "loss": 0.7828, "step": 22695 }, { "epoch": 0.7068348123929628, "grad_norm": 2.327056407928467, "learning_rate": 1.5431512012848667e-06, "loss": 0.7896, "step": 22700 }, { "epoch": 0.706990502880274, "grad_norm": 2.0190367698669434, "learning_rate": 1.5423317709528338e-06, "loss": 0.7948, "step": 22705 }, { "epoch": 0.7071461933675852, "grad_norm": 2.0049197673797607, "learning_rate": 1.5415123406208005e-06, "loss": 0.738, "step": 22710 }, { "epoch": 0.7073018838548965, "grad_norm": 2.04587984085083, "learning_rate": 1.5406929102887675e-06, "loss": 0.6832, "step": 22715 }, { "epoch": 0.7074575743422077, "grad_norm": 2.1597018241882324, "learning_rate": 1.5398734799567342e-06, "loss": 0.7826, "step": 22720 }, { "epoch": 0.7076132648295189, "grad_norm": 2.2809786796569824, "learning_rate": 1.539054049624701e-06, "loss": 0.765, "step": 22725 }, { "epoch": 0.7077689553168302, "grad_norm": 1.986648440361023, "learning_rate": 1.5382346192926677e-06, "loss": 0.846, "step": 22730 }, { "epoch": 0.7079246458041414, "grad_norm": 1.8041763305664062, "learning_rate": 1.5374151889606348e-06, "loss": 0.7027, "step": 22735 }, { "epoch": 0.7080803362914526, "grad_norm": 1.7106902599334717, "learning_rate": 1.5365957586286015e-06, "loss": 0.7358, "step": 22740 }, { "epoch": 0.7082360267787638, "grad_norm": 2.1502175331115723, "learning_rate": 1.5357763282965685e-06, "loss": 0.8772, "step": 22745 }, { "epoch": 0.7083917172660751, "grad_norm": 1.976296067237854, "learning_rate": 1.5349568979645352e-06, "loss": 0.7621, "step": 22750 }, { "epoch": 0.7085474077533863, "grad_norm": 1.9826463460922241, "learning_rate": 1.5341374676325019e-06, "loss": 0.7509, "step": 22755 }, { "epoch": 0.7087030982406975, "grad_norm": 1.7985954284667969, "learning_rate": 1.533318037300469e-06, "loss": 0.7876, "step": 22760 }, { "epoch": 0.7088587887280087, "grad_norm": 1.831363320350647, "learning_rate": 1.5324986069684356e-06, "loss": 0.7357, "step": 22765 }, { "epoch": 0.7090144792153199, "grad_norm": 1.8717870712280273, "learning_rate": 1.5316791766364025e-06, "loss": 0.796, "step": 22770 }, { "epoch": 0.7091701697026311, "grad_norm": 2.286304473876953, "learning_rate": 1.5308597463043691e-06, "loss": 0.69, "step": 22775 }, { "epoch": 0.7093258601899424, "grad_norm": 2.2279176712036133, "learning_rate": 1.5300403159723362e-06, "loss": 0.8607, "step": 22780 }, { "epoch": 0.7094815506772536, "grad_norm": 2.2362191677093506, "learning_rate": 1.5292208856403029e-06, "loss": 0.7728, "step": 22785 }, { "epoch": 0.7096372411645648, "grad_norm": 2.153897523880005, "learning_rate": 1.52840145530827e-06, "loss": 0.7834, "step": 22790 }, { "epoch": 0.7097929316518761, "grad_norm": 2.275094747543335, "learning_rate": 1.5275820249762366e-06, "loss": 0.7944, "step": 22795 }, { "epoch": 0.7099486221391873, "grad_norm": 2.258880853652954, "learning_rate": 1.5267625946442035e-06, "loss": 0.7592, "step": 22800 }, { "epoch": 0.7101043126264985, "grad_norm": 1.959037184715271, "learning_rate": 1.5259431643121703e-06, "loss": 0.7387, "step": 22805 }, { "epoch": 0.7102600031138098, "grad_norm": 2.18493390083313, "learning_rate": 1.5251237339801372e-06, "loss": 0.7612, "step": 22810 }, { "epoch": 0.710415693601121, "grad_norm": 1.7390012741088867, "learning_rate": 1.5243043036481039e-06, "loss": 0.6919, "step": 22815 }, { "epoch": 0.7105713840884322, "grad_norm": 2.222139358520508, "learning_rate": 1.523484873316071e-06, "loss": 0.7329, "step": 22820 }, { "epoch": 0.7107270745757435, "grad_norm": 2.2960078716278076, "learning_rate": 1.5226654429840376e-06, "loss": 0.761, "step": 22825 }, { "epoch": 0.7108827650630547, "grad_norm": 1.947702169418335, "learning_rate": 1.5218460126520043e-06, "loss": 0.7625, "step": 22830 }, { "epoch": 0.7110384555503658, "grad_norm": 2.050490379333496, "learning_rate": 1.5210265823199714e-06, "loss": 0.7239, "step": 22835 }, { "epoch": 0.711194146037677, "grad_norm": 2.032142400741577, "learning_rate": 1.520207151987938e-06, "loss": 0.7402, "step": 22840 }, { "epoch": 0.7113498365249883, "grad_norm": 3.1439414024353027, "learning_rate": 1.5193877216559049e-06, "loss": 0.794, "step": 22845 }, { "epoch": 0.7115055270122995, "grad_norm": 1.906662106513977, "learning_rate": 1.5185682913238717e-06, "loss": 0.7685, "step": 22850 }, { "epoch": 0.7116612174996108, "grad_norm": 1.9883944988250732, "learning_rate": 1.5177488609918386e-06, "loss": 0.768, "step": 22855 }, { "epoch": 0.711816907986922, "grad_norm": 2.2934486865997314, "learning_rate": 1.5169294306598053e-06, "loss": 0.6973, "step": 22860 }, { "epoch": 0.7119725984742332, "grad_norm": 1.9637491703033447, "learning_rate": 1.5161100003277724e-06, "loss": 0.7815, "step": 22865 }, { "epoch": 0.7121282889615445, "grad_norm": 1.7595820426940918, "learning_rate": 1.515290569995739e-06, "loss": 0.7288, "step": 22870 }, { "epoch": 0.7122839794488557, "grad_norm": 1.9576926231384277, "learning_rate": 1.514471139663706e-06, "loss": 0.7188, "step": 22875 }, { "epoch": 0.7124396699361669, "grad_norm": 2.2898333072662354, "learning_rate": 1.5136517093316728e-06, "loss": 0.8152, "step": 22880 }, { "epoch": 0.7125953604234782, "grad_norm": 2.219350576400757, "learning_rate": 1.5128322789996396e-06, "loss": 0.7587, "step": 22885 }, { "epoch": 0.7127510509107894, "grad_norm": 1.9146366119384766, "learning_rate": 1.5120128486676065e-06, "loss": 0.7261, "step": 22890 }, { "epoch": 0.7129067413981006, "grad_norm": 2.175062894821167, "learning_rate": 1.5111934183355734e-06, "loss": 0.803, "step": 22895 }, { "epoch": 0.7130624318854119, "grad_norm": 1.9083362817764282, "learning_rate": 1.51037398800354e-06, "loss": 0.8529, "step": 22900 }, { "epoch": 0.713218122372723, "grad_norm": 2.509827136993408, "learning_rate": 1.5095545576715067e-06, "loss": 0.7277, "step": 22905 }, { "epoch": 0.7133738128600342, "grad_norm": 2.025475263595581, "learning_rate": 1.5087351273394738e-06, "loss": 0.7698, "step": 22910 }, { "epoch": 0.7135295033473454, "grad_norm": 1.9501464366912842, "learning_rate": 1.5079156970074404e-06, "loss": 0.7749, "step": 22915 }, { "epoch": 0.7136851938346567, "grad_norm": 1.8455657958984375, "learning_rate": 1.5070962666754075e-06, "loss": 0.7444, "step": 22920 }, { "epoch": 0.7138408843219679, "grad_norm": 2.1000964641571045, "learning_rate": 1.5062768363433742e-06, "loss": 0.7683, "step": 22925 }, { "epoch": 0.7139965748092791, "grad_norm": 2.2846131324768066, "learning_rate": 1.505457406011341e-06, "loss": 0.7861, "step": 22930 }, { "epoch": 0.7141522652965904, "grad_norm": 1.959779977798462, "learning_rate": 1.504637975679308e-06, "loss": 0.6653, "step": 22935 }, { "epoch": 0.7143079557839016, "grad_norm": 4.716142177581787, "learning_rate": 1.5038185453472748e-06, "loss": 0.7754, "step": 22940 }, { "epoch": 0.7144636462712128, "grad_norm": 2.051635503768921, "learning_rate": 1.5029991150152414e-06, "loss": 0.7569, "step": 22945 }, { "epoch": 0.7146193367585241, "grad_norm": 2.2106575965881348, "learning_rate": 1.5021796846832085e-06, "loss": 0.7445, "step": 22950 }, { "epoch": 0.7147750272458353, "grad_norm": 2.1215128898620605, "learning_rate": 1.5013602543511752e-06, "loss": 0.8126, "step": 22955 }, { "epoch": 0.7149307177331465, "grad_norm": 2.17376971244812, "learning_rate": 1.5005408240191422e-06, "loss": 0.8228, "step": 22960 }, { "epoch": 0.7150864082204578, "grad_norm": 2.0919296741485596, "learning_rate": 1.499721393687109e-06, "loss": 0.8227, "step": 22965 }, { "epoch": 0.715242098707769, "grad_norm": 1.958869457244873, "learning_rate": 1.4989019633550756e-06, "loss": 0.7683, "step": 22970 }, { "epoch": 0.7153977891950802, "grad_norm": 2.363881826400757, "learning_rate": 1.4980825330230424e-06, "loss": 0.7992, "step": 22975 }, { "epoch": 0.7155534796823914, "grad_norm": 1.9550174474716187, "learning_rate": 1.4972631026910093e-06, "loss": 0.7101, "step": 22980 }, { "epoch": 0.7157091701697026, "grad_norm": 2.1948862075805664, "learning_rate": 1.4964436723589762e-06, "loss": 0.7108, "step": 22985 }, { "epoch": 0.7158648606570138, "grad_norm": 2.5427472591400146, "learning_rate": 1.4956242420269428e-06, "loss": 0.8605, "step": 22990 }, { "epoch": 0.7160205511443251, "grad_norm": 2.209642171859741, "learning_rate": 1.49480481169491e-06, "loss": 0.8112, "step": 22995 }, { "epoch": 0.7161762416316363, "grad_norm": 2.1253111362457275, "learning_rate": 1.4939853813628766e-06, "loss": 0.7699, "step": 23000 }, { "epoch": 0.7163319321189475, "grad_norm": 1.9660248756408691, "learning_rate": 1.4931659510308436e-06, "loss": 0.7417, "step": 23005 }, { "epoch": 0.7164876226062588, "grad_norm": 2.407477378845215, "learning_rate": 1.4923465206988103e-06, "loss": 0.6714, "step": 23010 }, { "epoch": 0.71664331309357, "grad_norm": 2.0113415718078613, "learning_rate": 1.4915270903667772e-06, "loss": 0.7908, "step": 23015 }, { "epoch": 0.7167990035808812, "grad_norm": 1.8280099630355835, "learning_rate": 1.4907076600347438e-06, "loss": 0.767, "step": 23020 }, { "epoch": 0.7169546940681925, "grad_norm": 2.23718523979187, "learning_rate": 1.489888229702711e-06, "loss": 0.7399, "step": 23025 }, { "epoch": 0.7171103845555037, "grad_norm": 2.099968194961548, "learning_rate": 1.4890687993706776e-06, "loss": 0.6794, "step": 23030 }, { "epoch": 0.7172660750428149, "grad_norm": 2.463977336883545, "learning_rate": 1.4882493690386447e-06, "loss": 0.7857, "step": 23035 }, { "epoch": 0.7174217655301262, "grad_norm": 2.1412835121154785, "learning_rate": 1.4874299387066113e-06, "loss": 0.7854, "step": 23040 }, { "epoch": 0.7175774560174374, "grad_norm": 1.903649926185608, "learning_rate": 1.486610508374578e-06, "loss": 0.7399, "step": 23045 }, { "epoch": 0.7177331465047485, "grad_norm": 2.044362783432007, "learning_rate": 1.485791078042545e-06, "loss": 0.7565, "step": 23050 }, { "epoch": 0.7178888369920597, "grad_norm": 1.8406767845153809, "learning_rate": 1.4849716477105117e-06, "loss": 0.7759, "step": 23055 }, { "epoch": 0.718044527479371, "grad_norm": 2.494641065597534, "learning_rate": 1.4841522173784786e-06, "loss": 0.8568, "step": 23060 }, { "epoch": 0.7182002179666822, "grad_norm": 2.2922651767730713, "learning_rate": 1.4833327870464454e-06, "loss": 0.7607, "step": 23065 }, { "epoch": 0.7183559084539934, "grad_norm": 2.2379636764526367, "learning_rate": 1.4825133567144123e-06, "loss": 0.7583, "step": 23070 }, { "epoch": 0.7185115989413047, "grad_norm": 2.0212857723236084, "learning_rate": 1.481693926382379e-06, "loss": 0.7862, "step": 23075 }, { "epoch": 0.7186672894286159, "grad_norm": 2.1508595943450928, "learning_rate": 1.480874496050346e-06, "loss": 0.7634, "step": 23080 }, { "epoch": 0.7188229799159271, "grad_norm": 2.593498706817627, "learning_rate": 1.4800550657183127e-06, "loss": 0.7783, "step": 23085 }, { "epoch": 0.7189786704032384, "grad_norm": 2.3670895099639893, "learning_rate": 1.4792356353862796e-06, "loss": 0.7912, "step": 23090 }, { "epoch": 0.7191343608905496, "grad_norm": 2.593085289001465, "learning_rate": 1.4784162050542464e-06, "loss": 0.6945, "step": 23095 }, { "epoch": 0.7192900513778608, "grad_norm": 1.9727157354354858, "learning_rate": 1.4775967747222133e-06, "loss": 0.7979, "step": 23100 }, { "epoch": 0.7194457418651721, "grad_norm": 2.1127705574035645, "learning_rate": 1.47677734439018e-06, "loss": 0.7387, "step": 23105 }, { "epoch": 0.7196014323524833, "grad_norm": 2.2584359645843506, "learning_rate": 1.475957914058147e-06, "loss": 0.7544, "step": 23110 }, { "epoch": 0.7197571228397945, "grad_norm": 2.288999557495117, "learning_rate": 1.4751384837261137e-06, "loss": 0.7878, "step": 23115 }, { "epoch": 0.7199128133271057, "grad_norm": 2.5810182094573975, "learning_rate": 1.4743190533940804e-06, "loss": 0.7405, "step": 23120 }, { "epoch": 0.7200685038144169, "grad_norm": 2.002899169921875, "learning_rate": 1.4734996230620475e-06, "loss": 0.7808, "step": 23125 }, { "epoch": 0.7202241943017281, "grad_norm": 2.2438652515411377, "learning_rate": 1.4726801927300141e-06, "loss": 0.7693, "step": 23130 }, { "epoch": 0.7203798847890394, "grad_norm": 2.2800910472869873, "learning_rate": 1.4718607623979812e-06, "loss": 0.8268, "step": 23135 }, { "epoch": 0.7205355752763506, "grad_norm": 3.367141008377075, "learning_rate": 1.4710413320659479e-06, "loss": 0.8078, "step": 23140 }, { "epoch": 0.7206912657636618, "grad_norm": 2.0718913078308105, "learning_rate": 1.4702219017339147e-06, "loss": 0.8163, "step": 23145 }, { "epoch": 0.7208469562509731, "grad_norm": 2.236626148223877, "learning_rate": 1.4694024714018814e-06, "loss": 0.7987, "step": 23150 }, { "epoch": 0.7210026467382843, "grad_norm": 2.2369418144226074, "learning_rate": 1.4685830410698485e-06, "loss": 0.7328, "step": 23155 }, { "epoch": 0.7211583372255955, "grad_norm": 2.0830554962158203, "learning_rate": 1.4677636107378151e-06, "loss": 0.7891, "step": 23160 }, { "epoch": 0.7213140277129068, "grad_norm": 1.9829784631729126, "learning_rate": 1.4669441804057822e-06, "loss": 0.7352, "step": 23165 }, { "epoch": 0.721469718200218, "grad_norm": 2.4272334575653076, "learning_rate": 1.4661247500737489e-06, "loss": 0.8532, "step": 23170 }, { "epoch": 0.7216254086875292, "grad_norm": 1.9259552955627441, "learning_rate": 1.4653053197417157e-06, "loss": 0.7295, "step": 23175 }, { "epoch": 0.7217810991748405, "grad_norm": 2.086594343185425, "learning_rate": 1.4644858894096826e-06, "loss": 0.7747, "step": 23180 }, { "epoch": 0.7219367896621517, "grad_norm": 2.1909477710723877, "learning_rate": 1.4636664590776493e-06, "loss": 0.743, "step": 23185 }, { "epoch": 0.7220924801494629, "grad_norm": 2.081031560897827, "learning_rate": 1.4628470287456161e-06, "loss": 0.6958, "step": 23190 }, { "epoch": 0.722248170636774, "grad_norm": 2.1245760917663574, "learning_rate": 1.4620275984135828e-06, "loss": 0.6936, "step": 23195 }, { "epoch": 0.7224038611240853, "grad_norm": 1.89699387550354, "learning_rate": 1.4612081680815499e-06, "loss": 0.8196, "step": 23200 }, { "epoch": 0.7225595516113965, "grad_norm": 2.472557544708252, "learning_rate": 1.4603887377495165e-06, "loss": 0.75, "step": 23205 }, { "epoch": 0.7227152420987077, "grad_norm": 2.1417369842529297, "learning_rate": 1.4595693074174836e-06, "loss": 0.7645, "step": 23210 }, { "epoch": 0.722870932586019, "grad_norm": 2.5523219108581543, "learning_rate": 1.4587498770854503e-06, "loss": 0.7849, "step": 23215 }, { "epoch": 0.7230266230733302, "grad_norm": 2.3507208824157715, "learning_rate": 1.4579304467534171e-06, "loss": 0.732, "step": 23220 }, { "epoch": 0.7231823135606414, "grad_norm": 2.132498264312744, "learning_rate": 1.457111016421384e-06, "loss": 0.7242, "step": 23225 }, { "epoch": 0.7233380040479527, "grad_norm": 2.3660128116607666, "learning_rate": 1.4562915860893509e-06, "loss": 0.7718, "step": 23230 }, { "epoch": 0.7234936945352639, "grad_norm": 2.2995028495788574, "learning_rate": 1.4554721557573175e-06, "loss": 0.763, "step": 23235 }, { "epoch": 0.7236493850225751, "grad_norm": 2.0800392627716064, "learning_rate": 1.4546527254252846e-06, "loss": 0.7978, "step": 23240 }, { "epoch": 0.7238050755098864, "grad_norm": 2.6101338863372803, "learning_rate": 1.4538332950932513e-06, "loss": 0.7984, "step": 23245 }, { "epoch": 0.7239607659971976, "grad_norm": 2.0415351390838623, "learning_rate": 1.4530138647612183e-06, "loss": 0.777, "step": 23250 }, { "epoch": 0.7241164564845088, "grad_norm": 2.127861261367798, "learning_rate": 1.452194434429185e-06, "loss": 0.7639, "step": 23255 }, { "epoch": 0.7242721469718201, "grad_norm": 1.7568347454071045, "learning_rate": 1.4513750040971517e-06, "loss": 0.763, "step": 23260 }, { "epoch": 0.7244278374591312, "grad_norm": 2.248600959777832, "learning_rate": 1.4505555737651185e-06, "loss": 0.7719, "step": 23265 }, { "epoch": 0.7245835279464424, "grad_norm": 1.9461621046066284, "learning_rate": 1.4497361434330854e-06, "loss": 0.7657, "step": 23270 }, { "epoch": 0.7247392184337537, "grad_norm": 2.195108652114868, "learning_rate": 1.4489167131010523e-06, "loss": 0.7618, "step": 23275 }, { "epoch": 0.7248949089210649, "grad_norm": 1.8599978685379028, "learning_rate": 1.448097282769019e-06, "loss": 0.8043, "step": 23280 }, { "epoch": 0.7250505994083761, "grad_norm": 1.8544600009918213, "learning_rate": 1.447277852436986e-06, "loss": 0.7986, "step": 23285 }, { "epoch": 0.7252062898956874, "grad_norm": 2.294715642929077, "learning_rate": 1.4464584221049527e-06, "loss": 0.7669, "step": 23290 }, { "epoch": 0.7253619803829986, "grad_norm": 2.004873037338257, "learning_rate": 1.4456389917729197e-06, "loss": 0.7474, "step": 23295 }, { "epoch": 0.7255176708703098, "grad_norm": 1.9475913047790527, "learning_rate": 1.4448195614408864e-06, "loss": 0.7418, "step": 23300 }, { "epoch": 0.7256733613576211, "grad_norm": 1.8865145444869995, "learning_rate": 1.4440001311088533e-06, "loss": 0.697, "step": 23305 }, { "epoch": 0.7258290518449323, "grad_norm": 2.540836811065674, "learning_rate": 1.4431807007768201e-06, "loss": 0.7769, "step": 23310 }, { "epoch": 0.7259847423322435, "grad_norm": 1.8977853059768677, "learning_rate": 1.442361270444787e-06, "loss": 0.7964, "step": 23315 }, { "epoch": 0.7261404328195548, "grad_norm": 2.8590197563171387, "learning_rate": 1.4415418401127537e-06, "loss": 0.7886, "step": 23320 }, { "epoch": 0.726296123306866, "grad_norm": 2.042191505432129, "learning_rate": 1.4407224097807208e-06, "loss": 0.7597, "step": 23325 }, { "epoch": 0.7264518137941772, "grad_norm": 2.0453436374664307, "learning_rate": 1.4399029794486874e-06, "loss": 0.8454, "step": 23330 }, { "epoch": 0.7266075042814883, "grad_norm": 2.618574857711792, "learning_rate": 1.439083549116654e-06, "loss": 0.7911, "step": 23335 }, { "epoch": 0.7267631947687996, "grad_norm": 2.2858173847198486, "learning_rate": 1.4382641187846211e-06, "loss": 0.7377, "step": 23340 }, { "epoch": 0.7269188852561108, "grad_norm": 2.1062726974487305, "learning_rate": 1.4374446884525878e-06, "loss": 0.8194, "step": 23345 }, { "epoch": 0.727074575743422, "grad_norm": 2.3617939949035645, "learning_rate": 1.4366252581205547e-06, "loss": 0.6904, "step": 23350 }, { "epoch": 0.7272302662307333, "grad_norm": 2.0813207626342773, "learning_rate": 1.4358058277885215e-06, "loss": 0.7935, "step": 23355 }, { "epoch": 0.7273859567180445, "grad_norm": 1.9991111755371094, "learning_rate": 1.4349863974564884e-06, "loss": 0.7722, "step": 23360 }, { "epoch": 0.7275416472053557, "grad_norm": 1.9705467224121094, "learning_rate": 1.434166967124455e-06, "loss": 0.7422, "step": 23365 }, { "epoch": 0.727697337692667, "grad_norm": 2.2450437545776367, "learning_rate": 1.4333475367924222e-06, "loss": 0.7831, "step": 23370 }, { "epoch": 0.7278530281799782, "grad_norm": 2.1938815116882324, "learning_rate": 1.4325281064603888e-06, "loss": 0.8087, "step": 23375 }, { "epoch": 0.7280087186672894, "grad_norm": 1.8581569194793701, "learning_rate": 1.4317086761283559e-06, "loss": 0.6952, "step": 23380 }, { "epoch": 0.7281644091546007, "grad_norm": 2.276519536972046, "learning_rate": 1.4308892457963226e-06, "loss": 0.7295, "step": 23385 }, { "epoch": 0.7283200996419119, "grad_norm": 2.01389217376709, "learning_rate": 1.4300698154642894e-06, "loss": 0.839, "step": 23390 }, { "epoch": 0.7284757901292231, "grad_norm": 1.867721438407898, "learning_rate": 1.429250385132256e-06, "loss": 0.7548, "step": 23395 }, { "epoch": 0.7286314806165344, "grad_norm": 2.2123498916625977, "learning_rate": 1.428430954800223e-06, "loss": 0.6664, "step": 23400 }, { "epoch": 0.7287871711038456, "grad_norm": 1.9997905492782593, "learning_rate": 1.4276115244681898e-06, "loss": 0.7688, "step": 23405 }, { "epoch": 0.7289428615911567, "grad_norm": 2.0491747856140137, "learning_rate": 1.4267920941361565e-06, "loss": 0.799, "step": 23410 }, { "epoch": 0.729098552078468, "grad_norm": 2.6973257064819336, "learning_rate": 1.4259726638041236e-06, "loss": 0.7568, "step": 23415 }, { "epoch": 0.7292542425657792, "grad_norm": 2.149735689163208, "learning_rate": 1.4251532334720902e-06, "loss": 0.7459, "step": 23420 }, { "epoch": 0.7294099330530904, "grad_norm": 2.127955675125122, "learning_rate": 1.4243338031400573e-06, "loss": 0.7651, "step": 23425 }, { "epoch": 0.7295656235404017, "grad_norm": 1.8852124214172363, "learning_rate": 1.423514372808024e-06, "loss": 0.7522, "step": 23430 }, { "epoch": 0.7297213140277129, "grad_norm": 2.1648354530334473, "learning_rate": 1.4226949424759908e-06, "loss": 0.768, "step": 23435 }, { "epoch": 0.7298770045150241, "grad_norm": 2.146517515182495, "learning_rate": 1.4218755121439575e-06, "loss": 0.7398, "step": 23440 }, { "epoch": 0.7300326950023354, "grad_norm": 2.8255186080932617, "learning_rate": 1.4210560818119246e-06, "loss": 0.8276, "step": 23445 }, { "epoch": 0.7301883854896466, "grad_norm": 2.8724286556243896, "learning_rate": 1.4202366514798912e-06, "loss": 0.7484, "step": 23450 }, { "epoch": 0.7303440759769578, "grad_norm": 2.221461296081543, "learning_rate": 1.4194172211478583e-06, "loss": 0.7865, "step": 23455 }, { "epoch": 0.7304997664642691, "grad_norm": 2.192796230316162, "learning_rate": 1.418597790815825e-06, "loss": 0.7343, "step": 23460 }, { "epoch": 0.7306554569515803, "grad_norm": 2.1087779998779297, "learning_rate": 1.4177783604837918e-06, "loss": 0.7434, "step": 23465 }, { "epoch": 0.7308111474388915, "grad_norm": 1.9960298538208008, "learning_rate": 1.4169589301517587e-06, "loss": 0.7438, "step": 23470 }, { "epoch": 0.7309668379262028, "grad_norm": 2.5724363327026367, "learning_rate": 1.4161394998197254e-06, "loss": 0.6972, "step": 23475 }, { "epoch": 0.7311225284135139, "grad_norm": 2.522322177886963, "learning_rate": 1.4153200694876922e-06, "loss": 0.7334, "step": 23480 }, { "epoch": 0.7312782189008251, "grad_norm": 2.3749654293060303, "learning_rate": 1.414500639155659e-06, "loss": 0.7804, "step": 23485 }, { "epoch": 0.7314339093881363, "grad_norm": 2.0705370903015137, "learning_rate": 1.413681208823626e-06, "loss": 0.7627, "step": 23490 }, { "epoch": 0.7315895998754476, "grad_norm": 2.041713237762451, "learning_rate": 1.4128617784915926e-06, "loss": 0.7018, "step": 23495 }, { "epoch": 0.7317452903627588, "grad_norm": 2.0800273418426514, "learning_rate": 1.4120423481595597e-06, "loss": 0.6984, "step": 23500 }, { "epoch": 0.73190098085007, "grad_norm": 2.0245299339294434, "learning_rate": 1.4112229178275264e-06, "loss": 0.8236, "step": 23505 }, { "epoch": 0.7320566713373813, "grad_norm": 2.9616639614105225, "learning_rate": 1.4104034874954932e-06, "loss": 0.8094, "step": 23510 }, { "epoch": 0.7322123618246925, "grad_norm": 2.0625412464141846, "learning_rate": 1.40958405716346e-06, "loss": 0.8278, "step": 23515 }, { "epoch": 0.7323680523120037, "grad_norm": 2.597604751586914, "learning_rate": 1.408764626831427e-06, "loss": 0.8229, "step": 23520 }, { "epoch": 0.732523742799315, "grad_norm": 1.9627217054367065, "learning_rate": 1.4079451964993936e-06, "loss": 0.7287, "step": 23525 }, { "epoch": 0.7326794332866262, "grad_norm": 2.6869895458221436, "learning_rate": 1.4071257661673607e-06, "loss": 0.7755, "step": 23530 }, { "epoch": 0.7328351237739374, "grad_norm": 2.268056869506836, "learning_rate": 1.4063063358353274e-06, "loss": 0.8534, "step": 23535 }, { "epoch": 0.7329908142612487, "grad_norm": 2.349794864654541, "learning_rate": 1.4054869055032944e-06, "loss": 0.8094, "step": 23540 }, { "epoch": 0.7331465047485599, "grad_norm": 2.813750743865967, "learning_rate": 1.404667475171261e-06, "loss": 0.8552, "step": 23545 }, { "epoch": 0.733302195235871, "grad_norm": 2.1028358936309814, "learning_rate": 1.4038480448392278e-06, "loss": 0.7602, "step": 23550 }, { "epoch": 0.7334578857231823, "grad_norm": 2.702423334121704, "learning_rate": 1.4030286145071948e-06, "loss": 0.76, "step": 23555 }, { "epoch": 0.7336135762104935, "grad_norm": 1.9875847101211548, "learning_rate": 1.4022091841751615e-06, "loss": 0.7436, "step": 23560 }, { "epoch": 0.7337692666978047, "grad_norm": 2.0769336223602295, "learning_rate": 1.4013897538431284e-06, "loss": 0.7665, "step": 23565 }, { "epoch": 0.733924957185116, "grad_norm": 2.18072772026062, "learning_rate": 1.400570323511095e-06, "loss": 0.7997, "step": 23570 }, { "epoch": 0.7340806476724272, "grad_norm": 2.4999232292175293, "learning_rate": 1.3997508931790621e-06, "loss": 0.8082, "step": 23575 }, { "epoch": 0.7342363381597384, "grad_norm": 1.7715930938720703, "learning_rate": 1.3989314628470288e-06, "loss": 0.8016, "step": 23580 }, { "epoch": 0.7343920286470497, "grad_norm": 2.3119239807128906, "learning_rate": 1.3981120325149958e-06, "loss": 0.7882, "step": 23585 }, { "epoch": 0.7345477191343609, "grad_norm": 2.176257848739624, "learning_rate": 1.3972926021829625e-06, "loss": 0.8297, "step": 23590 }, { "epoch": 0.7347034096216721, "grad_norm": 2.2803843021392822, "learning_rate": 1.3964731718509294e-06, "loss": 0.7644, "step": 23595 }, { "epoch": 0.7348591001089834, "grad_norm": 2.190598726272583, "learning_rate": 1.3956537415188962e-06, "loss": 0.8119, "step": 23600 }, { "epoch": 0.7350147905962946, "grad_norm": 2.5574588775634766, "learning_rate": 1.3948343111868631e-06, "loss": 0.7389, "step": 23605 }, { "epoch": 0.7351704810836058, "grad_norm": 1.9881041049957275, "learning_rate": 1.3940148808548298e-06, "loss": 0.7657, "step": 23610 }, { "epoch": 0.7353261715709171, "grad_norm": 2.496886968612671, "learning_rate": 1.3931954505227964e-06, "loss": 0.7743, "step": 23615 }, { "epoch": 0.7354818620582283, "grad_norm": 2.299560546875, "learning_rate": 1.3923760201907635e-06, "loss": 0.7852, "step": 23620 }, { "epoch": 0.7356375525455394, "grad_norm": 2.016683578491211, "learning_rate": 1.3915565898587302e-06, "loss": 0.7639, "step": 23625 }, { "epoch": 0.7357932430328507, "grad_norm": 2.069159507751465, "learning_rate": 1.3907371595266973e-06, "loss": 0.775, "step": 23630 }, { "epoch": 0.7359489335201619, "grad_norm": 2.268223762512207, "learning_rate": 1.389917729194664e-06, "loss": 0.7875, "step": 23635 }, { "epoch": 0.7361046240074731, "grad_norm": 2.694751262664795, "learning_rate": 1.3890982988626308e-06, "loss": 0.7596, "step": 23640 }, { "epoch": 0.7362603144947844, "grad_norm": 1.9890743494033813, "learning_rate": 1.3882788685305976e-06, "loss": 0.7084, "step": 23645 }, { "epoch": 0.7364160049820956, "grad_norm": 2.0869994163513184, "learning_rate": 1.3874594381985645e-06, "loss": 0.7413, "step": 23650 }, { "epoch": 0.7365716954694068, "grad_norm": 2.217127799987793, "learning_rate": 1.3866400078665312e-06, "loss": 0.7957, "step": 23655 }, { "epoch": 0.736727385956718, "grad_norm": 2.4125235080718994, "learning_rate": 1.3858205775344983e-06, "loss": 0.7554, "step": 23660 }, { "epoch": 0.7368830764440293, "grad_norm": 2.29229736328125, "learning_rate": 1.385001147202465e-06, "loss": 0.7982, "step": 23665 }, { "epoch": 0.7370387669313405, "grad_norm": 1.8904507160186768, "learning_rate": 1.384181716870432e-06, "loss": 0.7241, "step": 23670 }, { "epoch": 0.7371944574186517, "grad_norm": 1.9993422031402588, "learning_rate": 1.3833622865383987e-06, "loss": 0.7122, "step": 23675 }, { "epoch": 0.737350147905963, "grad_norm": 1.9087220430374146, "learning_rate": 1.3825428562063655e-06, "loss": 0.6953, "step": 23680 }, { "epoch": 0.7375058383932742, "grad_norm": 2.0579380989074707, "learning_rate": 1.3817234258743322e-06, "loss": 0.7677, "step": 23685 }, { "epoch": 0.7376615288805854, "grad_norm": 2.087205171585083, "learning_rate": 1.380903995542299e-06, "loss": 0.7904, "step": 23690 }, { "epoch": 0.7378172193678966, "grad_norm": 2.2889695167541504, "learning_rate": 1.380084565210266e-06, "loss": 0.7524, "step": 23695 }, { "epoch": 0.7379729098552078, "grad_norm": 2.7318930625915527, "learning_rate": 1.3792651348782326e-06, "loss": 0.7842, "step": 23700 }, { "epoch": 0.738128600342519, "grad_norm": 2.630206346511841, "learning_rate": 1.3784457045461997e-06, "loss": 0.7849, "step": 23705 }, { "epoch": 0.7382842908298303, "grad_norm": 2.018859624862671, "learning_rate": 1.3776262742141663e-06, "loss": 0.7449, "step": 23710 }, { "epoch": 0.7384399813171415, "grad_norm": 2.468822956085205, "learning_rate": 1.3768068438821334e-06, "loss": 0.7943, "step": 23715 }, { "epoch": 0.7385956718044527, "grad_norm": 2.5430185794830322, "learning_rate": 1.3759874135501e-06, "loss": 0.7867, "step": 23720 }, { "epoch": 0.738751362291764, "grad_norm": 1.981002688407898, "learning_rate": 1.375167983218067e-06, "loss": 0.7589, "step": 23725 }, { "epoch": 0.7389070527790752, "grad_norm": 2.318878650665283, "learning_rate": 1.3743485528860338e-06, "loss": 0.7471, "step": 23730 }, { "epoch": 0.7390627432663864, "grad_norm": 2.200309991836548, "learning_rate": 1.3735291225540007e-06, "loss": 0.7498, "step": 23735 }, { "epoch": 0.7392184337536977, "grad_norm": 2.006500005722046, "learning_rate": 1.3727096922219673e-06, "loss": 0.874, "step": 23740 }, { "epoch": 0.7393741242410089, "grad_norm": 1.9237982034683228, "learning_rate": 1.3718902618899344e-06, "loss": 0.7396, "step": 23745 }, { "epoch": 0.7395298147283201, "grad_norm": 1.8448625802993774, "learning_rate": 1.371070831557901e-06, "loss": 0.7707, "step": 23750 }, { "epoch": 0.7396855052156314, "grad_norm": 2.0543291568756104, "learning_rate": 1.370251401225868e-06, "loss": 0.7936, "step": 23755 }, { "epoch": 0.7398411957029426, "grad_norm": 2.2434325218200684, "learning_rate": 1.3694319708938348e-06, "loss": 0.7626, "step": 23760 }, { "epoch": 0.7399968861902537, "grad_norm": 2.092104911804199, "learning_rate": 1.3686125405618015e-06, "loss": 0.7867, "step": 23765 }, { "epoch": 0.740152576677565, "grad_norm": 2.246424913406372, "learning_rate": 1.3677931102297683e-06, "loss": 0.7621, "step": 23770 }, { "epoch": 0.7403082671648762, "grad_norm": 2.1196296215057373, "learning_rate": 1.3669736798977352e-06, "loss": 0.8137, "step": 23775 }, { "epoch": 0.7404639576521874, "grad_norm": 1.6469721794128418, "learning_rate": 1.366154249565702e-06, "loss": 0.8184, "step": 23780 }, { "epoch": 0.7406196481394987, "grad_norm": 2.348670721054077, "learning_rate": 1.3653348192336687e-06, "loss": 0.8217, "step": 23785 }, { "epoch": 0.7407753386268099, "grad_norm": 1.7347056865692139, "learning_rate": 1.3645153889016358e-06, "loss": 0.7807, "step": 23790 }, { "epoch": 0.7409310291141211, "grad_norm": 2.086909294128418, "learning_rate": 1.3636959585696025e-06, "loss": 0.778, "step": 23795 }, { "epoch": 0.7410867196014324, "grad_norm": 2.5548691749572754, "learning_rate": 1.3628765282375695e-06, "loss": 0.731, "step": 23800 }, { "epoch": 0.7412424100887436, "grad_norm": 1.9755239486694336, "learning_rate": 1.3620570979055362e-06, "loss": 0.8735, "step": 23805 }, { "epoch": 0.7413981005760548, "grad_norm": 2.435182809829712, "learning_rate": 1.361237667573503e-06, "loss": 0.7576, "step": 23810 }, { "epoch": 0.741553791063366, "grad_norm": 2.1040220260620117, "learning_rate": 1.3604182372414697e-06, "loss": 0.7965, "step": 23815 }, { "epoch": 0.7417094815506773, "grad_norm": 1.8825864791870117, "learning_rate": 1.3595988069094368e-06, "loss": 0.7574, "step": 23820 }, { "epoch": 0.7418651720379885, "grad_norm": 2.074542999267578, "learning_rate": 1.3587793765774035e-06, "loss": 0.8137, "step": 23825 }, { "epoch": 0.7420208625252998, "grad_norm": 2.593616247177124, "learning_rate": 1.3579599462453701e-06, "loss": 0.7328, "step": 23830 }, { "epoch": 0.742176553012611, "grad_norm": 1.8467086553573608, "learning_rate": 1.3571405159133372e-06, "loss": 0.7269, "step": 23835 }, { "epoch": 0.7423322434999221, "grad_norm": 2.119600772857666, "learning_rate": 1.3563210855813039e-06, "loss": 0.7063, "step": 23840 }, { "epoch": 0.7424879339872333, "grad_norm": 2.3114047050476074, "learning_rate": 1.355501655249271e-06, "loss": 0.7645, "step": 23845 }, { "epoch": 0.7426436244745446, "grad_norm": 1.8865644931793213, "learning_rate": 1.3546822249172376e-06, "loss": 0.739, "step": 23850 }, { "epoch": 0.7427993149618558, "grad_norm": 2.35172176361084, "learning_rate": 1.3538627945852045e-06, "loss": 0.865, "step": 23855 }, { "epoch": 0.742955005449167, "grad_norm": 2.3330271244049072, "learning_rate": 1.3530433642531711e-06, "loss": 0.7739, "step": 23860 }, { "epoch": 0.7431106959364783, "grad_norm": 2.0058724880218506, "learning_rate": 1.3522239339211382e-06, "loss": 0.8056, "step": 23865 }, { "epoch": 0.7432663864237895, "grad_norm": 1.8171463012695312, "learning_rate": 1.3514045035891049e-06, "loss": 0.789, "step": 23870 }, { "epoch": 0.7434220769111007, "grad_norm": 2.366041421890259, "learning_rate": 1.350585073257072e-06, "loss": 0.8636, "step": 23875 }, { "epoch": 0.743577767398412, "grad_norm": 2.4156792163848877, "learning_rate": 1.3497656429250386e-06, "loss": 0.8669, "step": 23880 }, { "epoch": 0.7437334578857232, "grad_norm": 1.9660730361938477, "learning_rate": 1.3489462125930055e-06, "loss": 0.7824, "step": 23885 }, { "epoch": 0.7438891483730344, "grad_norm": 2.105194568634033, "learning_rate": 1.3481267822609723e-06, "loss": 0.8369, "step": 23890 }, { "epoch": 0.7440448388603457, "grad_norm": 2.624530792236328, "learning_rate": 1.3473073519289392e-06, "loss": 0.7792, "step": 23895 }, { "epoch": 0.7442005293476569, "grad_norm": 1.9294768571853638, "learning_rate": 1.3464879215969059e-06, "loss": 0.742, "step": 23900 }, { "epoch": 0.7443562198349681, "grad_norm": 1.826426386833191, "learning_rate": 1.3456684912648727e-06, "loss": 0.7145, "step": 23905 }, { "epoch": 0.7445119103222793, "grad_norm": 1.8949620723724365, "learning_rate": 1.3448490609328396e-06, "loss": 0.7547, "step": 23910 }, { "epoch": 0.7446676008095905, "grad_norm": 2.351224184036255, "learning_rate": 1.3440296306008063e-06, "loss": 0.8197, "step": 23915 }, { "epoch": 0.7448232912969017, "grad_norm": 2.0270936489105225, "learning_rate": 1.3432102002687734e-06, "loss": 0.7828, "step": 23920 }, { "epoch": 0.744978981784213, "grad_norm": 1.9846470355987549, "learning_rate": 1.34239076993674e-06, "loss": 0.6851, "step": 23925 }, { "epoch": 0.7451346722715242, "grad_norm": 2.6257309913635254, "learning_rate": 1.3415713396047069e-06, "loss": 0.7422, "step": 23930 }, { "epoch": 0.7452903627588354, "grad_norm": 2.2041544914245605, "learning_rate": 1.3407519092726738e-06, "loss": 0.7833, "step": 23935 }, { "epoch": 0.7454460532461467, "grad_norm": 2.1402034759521484, "learning_rate": 1.3399324789406406e-06, "loss": 0.8306, "step": 23940 }, { "epoch": 0.7456017437334579, "grad_norm": 1.9869707822799683, "learning_rate": 1.3391130486086073e-06, "loss": 0.8447, "step": 23945 }, { "epoch": 0.7457574342207691, "grad_norm": 1.9826719760894775, "learning_rate": 1.3382936182765744e-06, "loss": 0.8001, "step": 23950 }, { "epoch": 0.7459131247080804, "grad_norm": 2.1617469787597656, "learning_rate": 1.337474187944541e-06, "loss": 0.7477, "step": 23955 }, { "epoch": 0.7460688151953916, "grad_norm": 1.8092095851898193, "learning_rate": 1.336654757612508e-06, "loss": 0.7721, "step": 23960 }, { "epoch": 0.7462245056827028, "grad_norm": 2.306631565093994, "learning_rate": 1.3358353272804748e-06, "loss": 0.7675, "step": 23965 }, { "epoch": 0.746380196170014, "grad_norm": 1.9366158246994019, "learning_rate": 1.3350158969484414e-06, "loss": 0.7568, "step": 23970 }, { "epoch": 0.7465358866573253, "grad_norm": 2.084508180618286, "learning_rate": 1.3341964666164085e-06, "loss": 0.8125, "step": 23975 }, { "epoch": 0.7466915771446364, "grad_norm": 1.9624346494674683, "learning_rate": 1.3333770362843752e-06, "loss": 0.7749, "step": 23980 }, { "epoch": 0.7468472676319476, "grad_norm": 2.307450294494629, "learning_rate": 1.332557605952342e-06, "loss": 0.769, "step": 23985 }, { "epoch": 0.7470029581192589, "grad_norm": 2.610717296600342, "learning_rate": 1.3317381756203087e-06, "loss": 0.7947, "step": 23990 }, { "epoch": 0.7471586486065701, "grad_norm": 2.268523693084717, "learning_rate": 1.3309187452882758e-06, "loss": 0.7299, "step": 23995 }, { "epoch": 0.7473143390938813, "grad_norm": 1.996543526649475, "learning_rate": 1.3300993149562424e-06, "loss": 0.8351, "step": 24000 }, { "epoch": 0.7474700295811926, "grad_norm": 2.149266242980957, "learning_rate": 1.3292798846242095e-06, "loss": 0.7949, "step": 24005 }, { "epoch": 0.7476257200685038, "grad_norm": 1.9942280054092407, "learning_rate": 1.3284604542921762e-06, "loss": 0.7529, "step": 24010 }, { "epoch": 0.747781410555815, "grad_norm": 2.161073923110962, "learning_rate": 1.327641023960143e-06, "loss": 0.625, "step": 24015 }, { "epoch": 0.7479371010431263, "grad_norm": 2.116220235824585, "learning_rate": 1.32682159362811e-06, "loss": 0.7201, "step": 24020 }, { "epoch": 0.7480927915304375, "grad_norm": 1.8621883392333984, "learning_rate": 1.3260021632960768e-06, "loss": 0.8273, "step": 24025 }, { "epoch": 0.7482484820177487, "grad_norm": 1.785091757774353, "learning_rate": 1.3251827329640434e-06, "loss": 0.7362, "step": 24030 }, { "epoch": 0.74840417250506, "grad_norm": 2.45954966545105, "learning_rate": 1.3243633026320105e-06, "loss": 0.7734, "step": 24035 }, { "epoch": 0.7485598629923712, "grad_norm": 2.188462018966675, "learning_rate": 1.3235438722999772e-06, "loss": 0.7684, "step": 24040 }, { "epoch": 0.7487155534796824, "grad_norm": 2.225998878479004, "learning_rate": 1.3227244419679438e-06, "loss": 0.7121, "step": 24045 }, { "epoch": 0.7488712439669937, "grad_norm": 2.605164051055908, "learning_rate": 1.321905011635911e-06, "loss": 0.7442, "step": 24050 }, { "epoch": 0.7490269344543048, "grad_norm": 3.040111780166626, "learning_rate": 1.3210855813038776e-06, "loss": 0.7702, "step": 24055 }, { "epoch": 0.749182624941616, "grad_norm": 2.4901907444000244, "learning_rate": 1.3202661509718444e-06, "loss": 0.8032, "step": 24060 }, { "epoch": 0.7493383154289273, "grad_norm": 2.3155484199523926, "learning_rate": 1.3194467206398113e-06, "loss": 0.8295, "step": 24065 }, { "epoch": 0.7494940059162385, "grad_norm": 2.386164665222168, "learning_rate": 1.3186272903077782e-06, "loss": 0.779, "step": 24070 }, { "epoch": 0.7496496964035497, "grad_norm": 2.35429048538208, "learning_rate": 1.3178078599757448e-06, "loss": 0.8054, "step": 24075 }, { "epoch": 0.749805386890861, "grad_norm": 2.285951852798462, "learning_rate": 1.316988429643712e-06, "loss": 0.8318, "step": 24080 }, { "epoch": 0.7499610773781722, "grad_norm": 1.8328032493591309, "learning_rate": 1.3161689993116786e-06, "loss": 0.7546, "step": 24085 }, { "epoch": 0.7501167678654834, "grad_norm": 2.074450731277466, "learning_rate": 1.3153495689796456e-06, "loss": 0.7348, "step": 24090 }, { "epoch": 0.7502724583527947, "grad_norm": 2.2004354000091553, "learning_rate": 1.3145301386476123e-06, "loss": 0.7634, "step": 24095 }, { "epoch": 0.7504281488401059, "grad_norm": 1.96489679813385, "learning_rate": 1.3137107083155792e-06, "loss": 0.7444, "step": 24100 }, { "epoch": 0.7505838393274171, "grad_norm": 1.8920323848724365, "learning_rate": 1.3128912779835458e-06, "loss": 0.7566, "step": 24105 }, { "epoch": 0.7507395298147284, "grad_norm": 2.075137138366699, "learning_rate": 1.312071847651513e-06, "loss": 0.7644, "step": 24110 }, { "epoch": 0.7508952203020396, "grad_norm": 2.1364150047302246, "learning_rate": 1.3112524173194796e-06, "loss": 0.8159, "step": 24115 }, { "epoch": 0.7510509107893508, "grad_norm": 1.9278441667556763, "learning_rate": 1.3104329869874462e-06, "loss": 0.7329, "step": 24120 }, { "epoch": 0.751206601276662, "grad_norm": 2.2520408630371094, "learning_rate": 1.3096135566554133e-06, "loss": 0.8, "step": 24125 }, { "epoch": 0.7513622917639732, "grad_norm": 2.1794135570526123, "learning_rate": 1.30879412632338e-06, "loss": 0.7599, "step": 24130 }, { "epoch": 0.7515179822512844, "grad_norm": 2.5001697540283203, "learning_rate": 1.307974695991347e-06, "loss": 0.7972, "step": 24135 }, { "epoch": 0.7516736727385956, "grad_norm": 2.1277759075164795, "learning_rate": 1.3071552656593137e-06, "loss": 0.7253, "step": 24140 }, { "epoch": 0.7518293632259069, "grad_norm": 2.4406261444091797, "learning_rate": 1.3063358353272806e-06, "loss": 0.7663, "step": 24145 }, { "epoch": 0.7519850537132181, "grad_norm": 2.1366281509399414, "learning_rate": 1.3055164049952474e-06, "loss": 0.7986, "step": 24150 }, { "epoch": 0.7521407442005293, "grad_norm": 2.311892032623291, "learning_rate": 1.3046969746632143e-06, "loss": 0.8177, "step": 24155 }, { "epoch": 0.7522964346878406, "grad_norm": 2.0301835536956787, "learning_rate": 1.303877544331181e-06, "loss": 0.7727, "step": 24160 }, { "epoch": 0.7524521251751518, "grad_norm": 1.734013319015503, "learning_rate": 1.303058113999148e-06, "loss": 0.8212, "step": 24165 }, { "epoch": 0.752607815662463, "grad_norm": 2.6098244190216064, "learning_rate": 1.3022386836671147e-06, "loss": 0.8035, "step": 24170 }, { "epoch": 0.7527635061497743, "grad_norm": 2.3460140228271484, "learning_rate": 1.3014192533350816e-06, "loss": 0.772, "step": 24175 }, { "epoch": 0.7529191966370855, "grad_norm": 2.086164951324463, "learning_rate": 1.3005998230030485e-06, "loss": 0.7851, "step": 24180 }, { "epoch": 0.7530748871243967, "grad_norm": 2.4747722148895264, "learning_rate": 1.2997803926710151e-06, "loss": 0.7804, "step": 24185 }, { "epoch": 0.753230577611708, "grad_norm": 2.042313575744629, "learning_rate": 1.298960962338982e-06, "loss": 0.7504, "step": 24190 }, { "epoch": 0.7533862680990191, "grad_norm": 2.3958282470703125, "learning_rate": 1.2981415320069488e-06, "loss": 0.7632, "step": 24195 }, { "epoch": 0.7535419585863303, "grad_norm": 2.4429523944854736, "learning_rate": 1.2973221016749157e-06, "loss": 0.7231, "step": 24200 }, { "epoch": 0.7536976490736416, "grad_norm": 1.9735982418060303, "learning_rate": 1.2965026713428824e-06, "loss": 0.7557, "step": 24205 }, { "epoch": 0.7538533395609528, "grad_norm": 2.899205446243286, "learning_rate": 1.2956832410108495e-06, "loss": 0.771, "step": 24210 }, { "epoch": 0.754009030048264, "grad_norm": 2.381190061569214, "learning_rate": 1.2948638106788161e-06, "loss": 0.8002, "step": 24215 }, { "epoch": 0.7541647205355753, "grad_norm": 1.940488576889038, "learning_rate": 1.2940443803467832e-06, "loss": 0.742, "step": 24220 }, { "epoch": 0.7543204110228865, "grad_norm": 1.9416089057922363, "learning_rate": 1.2932249500147499e-06, "loss": 0.7292, "step": 24225 }, { "epoch": 0.7544761015101977, "grad_norm": 2.0659844875335693, "learning_rate": 1.2924055196827167e-06, "loss": 0.7342, "step": 24230 }, { "epoch": 0.754631791997509, "grad_norm": 1.8715860843658447, "learning_rate": 1.2915860893506834e-06, "loss": 0.7401, "step": 24235 }, { "epoch": 0.7547874824848202, "grad_norm": 2.5200774669647217, "learning_rate": 1.2907666590186505e-06, "loss": 0.7665, "step": 24240 }, { "epoch": 0.7549431729721314, "grad_norm": 2.0399322509765625, "learning_rate": 1.2899472286866171e-06, "loss": 0.807, "step": 24245 }, { "epoch": 0.7550988634594427, "grad_norm": 2.4277596473693848, "learning_rate": 1.2891277983545842e-06, "loss": 0.8001, "step": 24250 }, { "epoch": 0.7552545539467539, "grad_norm": 2.421984910964966, "learning_rate": 1.2883083680225509e-06, "loss": 0.7671, "step": 24255 }, { "epoch": 0.7554102444340651, "grad_norm": 2.199690341949463, "learning_rate": 1.2874889376905175e-06, "loss": 0.7831, "step": 24260 }, { "epoch": 0.7555659349213764, "grad_norm": 1.9614354372024536, "learning_rate": 1.2866695073584846e-06, "loss": 0.8052, "step": 24265 }, { "epoch": 0.7557216254086875, "grad_norm": 2.779249906539917, "learning_rate": 1.2858500770264513e-06, "loss": 0.6995, "step": 24270 }, { "epoch": 0.7558773158959987, "grad_norm": 2.0827908515930176, "learning_rate": 1.2850306466944181e-06, "loss": 0.7421, "step": 24275 }, { "epoch": 0.75603300638331, "grad_norm": 2.444277763366699, "learning_rate": 1.2842112163623848e-06, "loss": 0.7285, "step": 24280 }, { "epoch": 0.7561886968706212, "grad_norm": 1.919100046157837, "learning_rate": 1.2833917860303519e-06, "loss": 0.7846, "step": 24285 }, { "epoch": 0.7563443873579324, "grad_norm": 2.3632707595825195, "learning_rate": 1.2825723556983185e-06, "loss": 0.8024, "step": 24290 }, { "epoch": 0.7565000778452436, "grad_norm": 2.096055030822754, "learning_rate": 1.2817529253662856e-06, "loss": 0.729, "step": 24295 }, { "epoch": 0.7566557683325549, "grad_norm": 2.083235263824463, "learning_rate": 1.2809334950342523e-06, "loss": 0.6959, "step": 24300 }, { "epoch": 0.7568114588198661, "grad_norm": 2.3623292446136475, "learning_rate": 1.2801140647022191e-06, "loss": 0.7937, "step": 24305 }, { "epoch": 0.7569671493071773, "grad_norm": 3.1586923599243164, "learning_rate": 1.279294634370186e-06, "loss": 0.7752, "step": 24310 }, { "epoch": 0.7571228397944886, "grad_norm": 2.1972031593322754, "learning_rate": 1.2784752040381529e-06, "loss": 0.7812, "step": 24315 }, { "epoch": 0.7572785302817998, "grad_norm": 2.2256906032562256, "learning_rate": 1.2776557737061195e-06, "loss": 0.7306, "step": 24320 }, { "epoch": 0.757434220769111, "grad_norm": 1.962486982345581, "learning_rate": 1.2768363433740866e-06, "loss": 0.873, "step": 24325 }, { "epoch": 0.7575899112564223, "grad_norm": 2.035784959793091, "learning_rate": 1.2760169130420533e-06, "loss": 0.6871, "step": 24330 }, { "epoch": 0.7577456017437335, "grad_norm": 2.221372604370117, "learning_rate": 1.27519748271002e-06, "loss": 0.8367, "step": 24335 }, { "epoch": 0.7579012922310446, "grad_norm": 2.1849849224090576, "learning_rate": 1.274378052377987e-06, "loss": 0.8048, "step": 24340 }, { "epoch": 0.7580569827183559, "grad_norm": 2.051551342010498, "learning_rate": 1.2735586220459537e-06, "loss": 0.769, "step": 24345 }, { "epoch": 0.7582126732056671, "grad_norm": 2.411658763885498, "learning_rate": 1.2727391917139205e-06, "loss": 0.7837, "step": 24350 }, { "epoch": 0.7583683636929783, "grad_norm": 2.889045476913452, "learning_rate": 1.2719197613818874e-06, "loss": 0.7662, "step": 24355 }, { "epoch": 0.7585240541802896, "grad_norm": 1.9870195388793945, "learning_rate": 1.2711003310498543e-06, "loss": 0.7003, "step": 24360 }, { "epoch": 0.7586797446676008, "grad_norm": 2.1862235069274902, "learning_rate": 1.270280900717821e-06, "loss": 0.7317, "step": 24365 }, { "epoch": 0.758835435154912, "grad_norm": 2.051239013671875, "learning_rate": 1.269461470385788e-06, "loss": 0.7587, "step": 24370 }, { "epoch": 0.7589911256422233, "grad_norm": 1.9621102809906006, "learning_rate": 1.2686420400537547e-06, "loss": 0.6403, "step": 24375 }, { "epoch": 0.7591468161295345, "grad_norm": 1.97218918800354, "learning_rate": 1.2678226097217217e-06, "loss": 0.7865, "step": 24380 }, { "epoch": 0.7593025066168457, "grad_norm": 2.389556407928467, "learning_rate": 1.2670031793896884e-06, "loss": 0.779, "step": 24385 }, { "epoch": 0.759458197104157, "grad_norm": 2.292320728302002, "learning_rate": 1.2661837490576553e-06, "loss": 0.8118, "step": 24390 }, { "epoch": 0.7596138875914682, "grad_norm": 2.4176316261291504, "learning_rate": 1.2653643187256221e-06, "loss": 0.7524, "step": 24395 }, { "epoch": 0.7597695780787794, "grad_norm": 1.985213041305542, "learning_rate": 1.2645448883935888e-06, "loss": 0.6502, "step": 24400 }, { "epoch": 0.7599252685660907, "grad_norm": 1.769061803817749, "learning_rate": 1.2637254580615557e-06, "loss": 0.7409, "step": 24405 }, { "epoch": 0.7600809590534018, "grad_norm": 2.6454732418060303, "learning_rate": 1.2629060277295223e-06, "loss": 0.8145, "step": 24410 }, { "epoch": 0.760236649540713, "grad_norm": 2.2538228034973145, "learning_rate": 1.2620865973974894e-06, "loss": 0.7993, "step": 24415 }, { "epoch": 0.7603923400280242, "grad_norm": 2.213263750076294, "learning_rate": 1.261267167065456e-06, "loss": 0.8621, "step": 24420 }, { "epoch": 0.7605480305153355, "grad_norm": 2.1974661350250244, "learning_rate": 1.2604477367334232e-06, "loss": 0.7673, "step": 24425 }, { "epoch": 0.7607037210026467, "grad_norm": 1.8887462615966797, "learning_rate": 1.2596283064013898e-06, "loss": 0.8196, "step": 24430 }, { "epoch": 0.760859411489958, "grad_norm": 2.034111261367798, "learning_rate": 1.2588088760693567e-06, "loss": 0.7826, "step": 24435 }, { "epoch": 0.7610151019772692, "grad_norm": 1.9649512767791748, "learning_rate": 1.2579894457373235e-06, "loss": 0.7561, "step": 24440 }, { "epoch": 0.7611707924645804, "grad_norm": 2.0396509170532227, "learning_rate": 1.2571700154052904e-06, "loss": 0.8096, "step": 24445 }, { "epoch": 0.7613264829518916, "grad_norm": 2.08884859085083, "learning_rate": 1.256350585073257e-06, "loss": 0.7659, "step": 24450 }, { "epoch": 0.7614821734392029, "grad_norm": 2.3527884483337402, "learning_rate": 1.2555311547412242e-06, "loss": 0.9075, "step": 24455 }, { "epoch": 0.7616378639265141, "grad_norm": 1.8118613958358765, "learning_rate": 1.2547117244091908e-06, "loss": 0.7493, "step": 24460 }, { "epoch": 0.7617935544138253, "grad_norm": 2.1888840198516846, "learning_rate": 1.253892294077158e-06, "loss": 0.7344, "step": 24465 }, { "epoch": 0.7619492449011366, "grad_norm": 2.5121192932128906, "learning_rate": 1.2530728637451246e-06, "loss": 0.7453, "step": 24470 }, { "epoch": 0.7621049353884478, "grad_norm": 2.0267250537872314, "learning_rate": 1.2522534334130912e-06, "loss": 0.7797, "step": 24475 }, { "epoch": 0.762260625875759, "grad_norm": 2.1526918411254883, "learning_rate": 1.251434003081058e-06, "loss": 0.7564, "step": 24480 }, { "epoch": 0.7624163163630702, "grad_norm": 2.7484962940216064, "learning_rate": 1.250614572749025e-06, "loss": 0.7129, "step": 24485 }, { "epoch": 0.7625720068503814, "grad_norm": 2.0644171237945557, "learning_rate": 1.2497951424169918e-06, "loss": 0.7143, "step": 24490 }, { "epoch": 0.7627276973376926, "grad_norm": 2.1761560440063477, "learning_rate": 1.2489757120849587e-06, "loss": 0.7561, "step": 24495 }, { "epoch": 0.7628833878250039, "grad_norm": 1.9322327375411987, "learning_rate": 1.2481562817529256e-06, "loss": 0.7649, "step": 24500 }, { "epoch": 0.7630390783123151, "grad_norm": 2.109086036682129, "learning_rate": 1.2473368514208924e-06, "loss": 0.7298, "step": 24505 }, { "epoch": 0.7631947687996263, "grad_norm": 2.2743706703186035, "learning_rate": 1.246517421088859e-06, "loss": 0.7081, "step": 24510 }, { "epoch": 0.7633504592869376, "grad_norm": 1.9020622968673706, "learning_rate": 1.245697990756826e-06, "loss": 0.7612, "step": 24515 }, { "epoch": 0.7635061497742488, "grad_norm": 2.0501418113708496, "learning_rate": 1.2448785604247928e-06, "loss": 0.694, "step": 24520 }, { "epoch": 0.76366184026156, "grad_norm": 2.2453696727752686, "learning_rate": 1.2440591300927595e-06, "loss": 0.8053, "step": 24525 }, { "epoch": 0.7638175307488713, "grad_norm": 2.2556405067443848, "learning_rate": 1.2432396997607264e-06, "loss": 0.7097, "step": 24530 }, { "epoch": 0.7639732212361825, "grad_norm": 2.217836380004883, "learning_rate": 1.2424202694286932e-06, "loss": 0.8036, "step": 24535 }, { "epoch": 0.7641289117234937, "grad_norm": 1.944861888885498, "learning_rate": 1.24160083909666e-06, "loss": 0.7646, "step": 24540 }, { "epoch": 0.764284602210805, "grad_norm": 2.648405075073242, "learning_rate": 1.240781408764627e-06, "loss": 0.8314, "step": 24545 }, { "epoch": 0.7644402926981162, "grad_norm": 1.9660141468048096, "learning_rate": 1.2399619784325938e-06, "loss": 0.6709, "step": 24550 }, { "epoch": 0.7645959831854273, "grad_norm": 2.6424412727355957, "learning_rate": 1.2391425481005607e-06, "loss": 0.7289, "step": 24555 }, { "epoch": 0.7647516736727386, "grad_norm": 2.1454129219055176, "learning_rate": 1.2383231177685274e-06, "loss": 0.7296, "step": 24560 }, { "epoch": 0.7649073641600498, "grad_norm": 2.2440950870513916, "learning_rate": 1.2375036874364942e-06, "loss": 0.7214, "step": 24565 }, { "epoch": 0.765063054647361, "grad_norm": 2.3501980304718018, "learning_rate": 1.236684257104461e-06, "loss": 0.7864, "step": 24570 }, { "epoch": 0.7652187451346723, "grad_norm": 2.0936532020568848, "learning_rate": 1.235864826772428e-06, "loss": 0.7578, "step": 24575 }, { "epoch": 0.7653744356219835, "grad_norm": 2.1713461875915527, "learning_rate": 1.2350453964403946e-06, "loss": 0.6759, "step": 24580 }, { "epoch": 0.7655301261092947, "grad_norm": 2.23896861076355, "learning_rate": 1.2342259661083615e-06, "loss": 0.7844, "step": 24585 }, { "epoch": 0.765685816596606, "grad_norm": 1.7935137748718262, "learning_rate": 1.2334065357763284e-06, "loss": 0.7298, "step": 24590 }, { "epoch": 0.7658415070839172, "grad_norm": 2.1239097118377686, "learning_rate": 1.2325871054442952e-06, "loss": 0.7879, "step": 24595 }, { "epoch": 0.7659971975712284, "grad_norm": 2.1983180046081543, "learning_rate": 1.231767675112262e-06, "loss": 0.7287, "step": 24600 }, { "epoch": 0.7661528880585396, "grad_norm": 2.4417271614074707, "learning_rate": 1.230948244780229e-06, "loss": 0.7953, "step": 24605 }, { "epoch": 0.7663085785458509, "grad_norm": 2.2065048217773438, "learning_rate": 1.2301288144481956e-06, "loss": 0.7707, "step": 24610 }, { "epoch": 0.7664642690331621, "grad_norm": 2.102077007293701, "learning_rate": 1.2293093841161625e-06, "loss": 0.7263, "step": 24615 }, { "epoch": 0.7666199595204733, "grad_norm": 2.5540356636047363, "learning_rate": 1.2284899537841294e-06, "loss": 0.7931, "step": 24620 }, { "epoch": 0.7667756500077845, "grad_norm": 2.3732070922851562, "learning_rate": 1.2276705234520962e-06, "loss": 0.7799, "step": 24625 }, { "epoch": 0.7669313404950957, "grad_norm": 2.6939053535461426, "learning_rate": 1.2268510931200631e-06, "loss": 0.7578, "step": 24630 }, { "epoch": 0.7670870309824069, "grad_norm": 2.2108089923858643, "learning_rate": 1.22603166278803e-06, "loss": 0.7251, "step": 24635 }, { "epoch": 0.7672427214697182, "grad_norm": 2.2845053672790527, "learning_rate": 1.2252122324559968e-06, "loss": 0.8295, "step": 24640 }, { "epoch": 0.7673984119570294, "grad_norm": 2.2608516216278076, "learning_rate": 1.2243928021239635e-06, "loss": 0.8348, "step": 24645 }, { "epoch": 0.7675541024443406, "grad_norm": 2.336761713027954, "learning_rate": 1.2235733717919304e-06, "loss": 0.7911, "step": 24650 }, { "epoch": 0.7677097929316519, "grad_norm": 2.0426886081695557, "learning_rate": 1.222753941459897e-06, "loss": 0.7499, "step": 24655 }, { "epoch": 0.7678654834189631, "grad_norm": 1.9887946844100952, "learning_rate": 1.221934511127864e-06, "loss": 0.7891, "step": 24660 }, { "epoch": 0.7680211739062743, "grad_norm": 1.9226901531219482, "learning_rate": 1.2211150807958308e-06, "loss": 0.7339, "step": 24665 }, { "epoch": 0.7681768643935856, "grad_norm": 2.220038890838623, "learning_rate": 1.2202956504637976e-06, "loss": 0.6936, "step": 24670 }, { "epoch": 0.7683325548808968, "grad_norm": 2.0648930072784424, "learning_rate": 1.2194762201317645e-06, "loss": 0.7013, "step": 24675 }, { "epoch": 0.768488245368208, "grad_norm": 2.48162841796875, "learning_rate": 1.2186567897997314e-06, "loss": 0.7622, "step": 24680 }, { "epoch": 0.7686439358555193, "grad_norm": 2.168151378631592, "learning_rate": 1.2178373594676982e-06, "loss": 0.8046, "step": 24685 }, { "epoch": 0.7687996263428305, "grad_norm": 2.4209866523742676, "learning_rate": 1.217017929135665e-06, "loss": 0.7566, "step": 24690 }, { "epoch": 0.7689553168301417, "grad_norm": 2.1127431392669678, "learning_rate": 1.2161984988036318e-06, "loss": 0.7345, "step": 24695 }, { "epoch": 0.7691110073174529, "grad_norm": 1.8550344705581665, "learning_rate": 1.2153790684715986e-06, "loss": 0.7826, "step": 24700 }, { "epoch": 0.7692666978047641, "grad_norm": 1.8592369556427002, "learning_rate": 1.2145596381395655e-06, "loss": 0.7377, "step": 24705 }, { "epoch": 0.7694223882920753, "grad_norm": 2.2104806900024414, "learning_rate": 1.2137402078075324e-06, "loss": 0.8167, "step": 24710 }, { "epoch": 0.7695780787793866, "grad_norm": 1.8939646482467651, "learning_rate": 1.2129207774754993e-06, "loss": 0.7413, "step": 24715 }, { "epoch": 0.7697337692666978, "grad_norm": 1.9822419881820679, "learning_rate": 1.2121013471434661e-06, "loss": 0.7632, "step": 24720 }, { "epoch": 0.769889459754009, "grad_norm": 2.074369430541992, "learning_rate": 1.2112819168114328e-06, "loss": 0.8028, "step": 24725 }, { "epoch": 0.7700451502413203, "grad_norm": 1.8641250133514404, "learning_rate": 1.2104624864793997e-06, "loss": 0.7573, "step": 24730 }, { "epoch": 0.7702008407286315, "grad_norm": 2.0157055854797363, "learning_rate": 1.2096430561473663e-06, "loss": 0.6668, "step": 24735 }, { "epoch": 0.7703565312159427, "grad_norm": 2.0619027614593506, "learning_rate": 1.2088236258153332e-06, "loss": 0.7259, "step": 24740 }, { "epoch": 0.770512221703254, "grad_norm": 1.9149048328399658, "learning_rate": 1.2080041954833e-06, "loss": 0.7194, "step": 24745 }, { "epoch": 0.7706679121905652, "grad_norm": 2.6651322841644287, "learning_rate": 1.207184765151267e-06, "loss": 0.7085, "step": 24750 }, { "epoch": 0.7708236026778764, "grad_norm": 2.3176586627960205, "learning_rate": 1.2063653348192338e-06, "loss": 0.7923, "step": 24755 }, { "epoch": 0.7709792931651877, "grad_norm": 2.173673152923584, "learning_rate": 1.2055459044872007e-06, "loss": 0.7566, "step": 24760 }, { "epoch": 0.7711349836524989, "grad_norm": 1.8525222539901733, "learning_rate": 1.2047264741551675e-06, "loss": 0.8087, "step": 24765 }, { "epoch": 0.77129067413981, "grad_norm": 2.0925700664520264, "learning_rate": 1.2039070438231342e-06, "loss": 0.7154, "step": 24770 }, { "epoch": 0.7714463646271212, "grad_norm": 2.113227367401123, "learning_rate": 1.203087613491101e-06, "loss": 0.7727, "step": 24775 }, { "epoch": 0.7716020551144325, "grad_norm": 1.8018239736557007, "learning_rate": 1.202268183159068e-06, "loss": 0.7563, "step": 24780 }, { "epoch": 0.7717577456017437, "grad_norm": 2.175976514816284, "learning_rate": 1.2014487528270348e-06, "loss": 0.748, "step": 24785 }, { "epoch": 0.7719134360890549, "grad_norm": 2.245457649230957, "learning_rate": 1.2006293224950017e-06, "loss": 0.7646, "step": 24790 }, { "epoch": 0.7720691265763662, "grad_norm": 2.3253495693206787, "learning_rate": 1.1998098921629683e-06, "loss": 0.7939, "step": 24795 }, { "epoch": 0.7722248170636774, "grad_norm": 2.1454806327819824, "learning_rate": 1.1989904618309352e-06, "loss": 0.8874, "step": 24800 }, { "epoch": 0.7723805075509886, "grad_norm": 2.151207685470581, "learning_rate": 1.198171031498902e-06, "loss": 0.7893, "step": 24805 }, { "epoch": 0.7725361980382999, "grad_norm": 1.9717472791671753, "learning_rate": 1.197351601166869e-06, "loss": 0.7906, "step": 24810 }, { "epoch": 0.7726918885256111, "grad_norm": 2.1225695610046387, "learning_rate": 1.1965321708348358e-06, "loss": 0.7708, "step": 24815 }, { "epoch": 0.7728475790129223, "grad_norm": 2.1728711128234863, "learning_rate": 1.1957127405028025e-06, "loss": 0.7393, "step": 24820 }, { "epoch": 0.7730032695002336, "grad_norm": 2.5243585109710693, "learning_rate": 1.1948933101707693e-06, "loss": 0.7398, "step": 24825 }, { "epoch": 0.7731589599875448, "grad_norm": 2.0446135997772217, "learning_rate": 1.1940738798387362e-06, "loss": 0.7568, "step": 24830 }, { "epoch": 0.773314650474856, "grad_norm": 2.11271333694458, "learning_rate": 1.193254449506703e-06, "loss": 0.7776, "step": 24835 }, { "epoch": 0.7734703409621672, "grad_norm": 2.0342328548431396, "learning_rate": 1.19243501917467e-06, "loss": 0.66, "step": 24840 }, { "epoch": 0.7736260314494784, "grad_norm": 1.737362027168274, "learning_rate": 1.1916155888426368e-06, "loss": 0.7195, "step": 24845 }, { "epoch": 0.7737817219367896, "grad_norm": 1.8930506706237793, "learning_rate": 1.1907961585106037e-06, "loss": 0.7518, "step": 24850 }, { "epoch": 0.7739374124241009, "grad_norm": 3.7263503074645996, "learning_rate": 1.1899767281785703e-06, "loss": 0.7991, "step": 24855 }, { "epoch": 0.7740931029114121, "grad_norm": 1.8885776996612549, "learning_rate": 1.1891572978465372e-06, "loss": 0.7707, "step": 24860 }, { "epoch": 0.7742487933987233, "grad_norm": 2.1878390312194824, "learning_rate": 1.1883378675145039e-06, "loss": 0.7762, "step": 24865 }, { "epoch": 0.7744044838860346, "grad_norm": 2.350189208984375, "learning_rate": 1.1875184371824707e-06, "loss": 0.7679, "step": 24870 }, { "epoch": 0.7745601743733458, "grad_norm": 2.4853944778442383, "learning_rate": 1.1866990068504376e-06, "loss": 0.6973, "step": 24875 }, { "epoch": 0.774715864860657, "grad_norm": 2.689178705215454, "learning_rate": 1.1858795765184045e-06, "loss": 0.7566, "step": 24880 }, { "epoch": 0.7748715553479683, "grad_norm": 2.100430488586426, "learning_rate": 1.1850601461863713e-06, "loss": 0.785, "step": 24885 }, { "epoch": 0.7750272458352795, "grad_norm": 2.088157892227173, "learning_rate": 1.1842407158543382e-06, "loss": 0.7465, "step": 24890 }, { "epoch": 0.7751829363225907, "grad_norm": 2.267212152481079, "learning_rate": 1.183421285522305e-06, "loss": 0.7175, "step": 24895 }, { "epoch": 0.775338626809902, "grad_norm": 2.010072708129883, "learning_rate": 1.1826018551902717e-06, "loss": 0.7811, "step": 24900 }, { "epoch": 0.7754943172972132, "grad_norm": 2.183570623397827, "learning_rate": 1.1817824248582386e-06, "loss": 0.7018, "step": 24905 }, { "epoch": 0.7756500077845244, "grad_norm": 2.141541004180908, "learning_rate": 1.1809629945262055e-06, "loss": 0.7455, "step": 24910 }, { "epoch": 0.7758056982718355, "grad_norm": 2.2234444618225098, "learning_rate": 1.1801435641941723e-06, "loss": 0.7595, "step": 24915 }, { "epoch": 0.7759613887591468, "grad_norm": 2.4056551456451416, "learning_rate": 1.1793241338621392e-06, "loss": 0.7693, "step": 24920 }, { "epoch": 0.776117079246458, "grad_norm": 2.1266844272613525, "learning_rate": 1.178504703530106e-06, "loss": 0.7794, "step": 24925 }, { "epoch": 0.7762727697337692, "grad_norm": 1.917706847190857, "learning_rate": 1.177685273198073e-06, "loss": 0.7657, "step": 24930 }, { "epoch": 0.7764284602210805, "grad_norm": 2.1540071964263916, "learning_rate": 1.1768658428660396e-06, "loss": 0.7984, "step": 24935 }, { "epoch": 0.7765841507083917, "grad_norm": 1.8737391233444214, "learning_rate": 1.1760464125340065e-06, "loss": 0.8153, "step": 24940 }, { "epoch": 0.7767398411957029, "grad_norm": 1.896170973777771, "learning_rate": 1.1752269822019731e-06, "loss": 0.7212, "step": 24945 }, { "epoch": 0.7768955316830142, "grad_norm": 2.1503665447235107, "learning_rate": 1.17440755186994e-06, "loss": 0.7534, "step": 24950 }, { "epoch": 0.7770512221703254, "grad_norm": 2.03535532951355, "learning_rate": 1.1735881215379069e-06, "loss": 0.8556, "step": 24955 }, { "epoch": 0.7772069126576366, "grad_norm": 2.1977405548095703, "learning_rate": 1.1727686912058737e-06, "loss": 0.7699, "step": 24960 }, { "epoch": 0.7773626031449479, "grad_norm": 2.8264451026916504, "learning_rate": 1.1719492608738406e-06, "loss": 0.7411, "step": 24965 }, { "epoch": 0.7775182936322591, "grad_norm": 2.0204832553863525, "learning_rate": 1.1711298305418075e-06, "loss": 0.7239, "step": 24970 }, { "epoch": 0.7776739841195703, "grad_norm": 2.3005785942077637, "learning_rate": 1.1703104002097744e-06, "loss": 0.8188, "step": 24975 }, { "epoch": 0.7778296746068816, "grad_norm": 2.1790060997009277, "learning_rate": 1.169490969877741e-06, "loss": 0.744, "step": 24980 }, { "epoch": 0.7779853650941927, "grad_norm": 2.223829746246338, "learning_rate": 1.1686715395457079e-06, "loss": 0.7487, "step": 24985 }, { "epoch": 0.7781410555815039, "grad_norm": 2.259202718734741, "learning_rate": 1.1678521092136747e-06, "loss": 0.7486, "step": 24990 }, { "epoch": 0.7782967460688152, "grad_norm": 2.046321153640747, "learning_rate": 1.1670326788816416e-06, "loss": 0.683, "step": 24995 }, { "epoch": 0.7784524365561264, "grad_norm": 2.2317540645599365, "learning_rate": 1.1662132485496085e-06, "loss": 0.7729, "step": 25000 }, { "epoch": 0.7786081270434376, "grad_norm": 2.156005620956421, "learning_rate": 1.1653938182175754e-06, "loss": 0.7102, "step": 25005 }, { "epoch": 0.7787638175307489, "grad_norm": 2.3559060096740723, "learning_rate": 1.164574387885542e-06, "loss": 0.7273, "step": 25010 }, { "epoch": 0.7789195080180601, "grad_norm": 4.039304256439209, "learning_rate": 1.1637549575535089e-06, "loss": 0.6892, "step": 25015 }, { "epoch": 0.7790751985053713, "grad_norm": 2.3363003730773926, "learning_rate": 1.1629355272214758e-06, "loss": 0.8068, "step": 25020 }, { "epoch": 0.7792308889926826, "grad_norm": 2.0565476417541504, "learning_rate": 1.1621160968894426e-06, "loss": 0.7037, "step": 25025 }, { "epoch": 0.7793865794799938, "grad_norm": 2.198218822479248, "learning_rate": 1.1612966665574093e-06, "loss": 0.7358, "step": 25030 }, { "epoch": 0.779542269967305, "grad_norm": 2.4001519680023193, "learning_rate": 1.1604772362253761e-06, "loss": 0.8328, "step": 25035 }, { "epoch": 0.7796979604546163, "grad_norm": 2.061537981033325, "learning_rate": 1.159657805893343e-06, "loss": 0.7033, "step": 25040 }, { "epoch": 0.7798536509419275, "grad_norm": 2.169238567352295, "learning_rate": 1.1588383755613099e-06, "loss": 0.7452, "step": 25045 }, { "epoch": 0.7800093414292387, "grad_norm": 2.098144292831421, "learning_rate": 1.1580189452292768e-06, "loss": 0.7617, "step": 25050 }, { "epoch": 0.7801650319165498, "grad_norm": 2.035663366317749, "learning_rate": 1.1571995148972436e-06, "loss": 0.8226, "step": 25055 }, { "epoch": 0.7803207224038611, "grad_norm": 2.398002862930298, "learning_rate": 1.1563800845652105e-06, "loss": 0.7282, "step": 25060 }, { "epoch": 0.7804764128911723, "grad_norm": 2.288264036178589, "learning_rate": 1.1555606542331772e-06, "loss": 0.7917, "step": 25065 }, { "epoch": 0.7806321033784835, "grad_norm": 2.038195848464966, "learning_rate": 1.154741223901144e-06, "loss": 0.7461, "step": 25070 }, { "epoch": 0.7807877938657948, "grad_norm": 2.250082492828369, "learning_rate": 1.1539217935691109e-06, "loss": 0.7177, "step": 25075 }, { "epoch": 0.780943484353106, "grad_norm": 1.9260239601135254, "learning_rate": 1.1531023632370776e-06, "loss": 0.7482, "step": 25080 }, { "epoch": 0.7810991748404172, "grad_norm": 2.2102468013763428, "learning_rate": 1.1522829329050444e-06, "loss": 0.7637, "step": 25085 }, { "epoch": 0.7812548653277285, "grad_norm": 2.3604462146759033, "learning_rate": 1.1514635025730113e-06, "loss": 0.8, "step": 25090 }, { "epoch": 0.7814105558150397, "grad_norm": 2.1707873344421387, "learning_rate": 1.1506440722409782e-06, "loss": 0.8083, "step": 25095 }, { "epoch": 0.7815662463023509, "grad_norm": 2.5515408515930176, "learning_rate": 1.149824641908945e-06, "loss": 0.7528, "step": 25100 }, { "epoch": 0.7817219367896622, "grad_norm": 1.9798678159713745, "learning_rate": 1.149005211576912e-06, "loss": 0.7395, "step": 25105 }, { "epoch": 0.7818776272769734, "grad_norm": 2.6227643489837646, "learning_rate": 1.1481857812448786e-06, "loss": 0.7583, "step": 25110 }, { "epoch": 0.7820333177642846, "grad_norm": 1.9039134979248047, "learning_rate": 1.1473663509128454e-06, "loss": 0.7081, "step": 25115 }, { "epoch": 0.7821890082515959, "grad_norm": 2.3206732273101807, "learning_rate": 1.1465469205808123e-06, "loss": 0.8457, "step": 25120 }, { "epoch": 0.7823446987389071, "grad_norm": 1.8649848699569702, "learning_rate": 1.1457274902487792e-06, "loss": 0.7935, "step": 25125 }, { "epoch": 0.7825003892262182, "grad_norm": 2.884464740753174, "learning_rate": 1.144908059916746e-06, "loss": 0.7994, "step": 25130 }, { "epoch": 0.7826560797135295, "grad_norm": 2.190215826034546, "learning_rate": 1.144088629584713e-06, "loss": 0.7456, "step": 25135 }, { "epoch": 0.7828117702008407, "grad_norm": 2.286506175994873, "learning_rate": 1.1432691992526798e-06, "loss": 0.7022, "step": 25140 }, { "epoch": 0.7829674606881519, "grad_norm": 2.2474091053009033, "learning_rate": 1.1424497689206464e-06, "loss": 0.7877, "step": 25145 }, { "epoch": 0.7831231511754632, "grad_norm": 2.9385828971862793, "learning_rate": 1.1416303385886133e-06, "loss": 0.8373, "step": 25150 }, { "epoch": 0.7832788416627744, "grad_norm": 2.4689972400665283, "learning_rate": 1.14081090825658e-06, "loss": 0.7474, "step": 25155 }, { "epoch": 0.7834345321500856, "grad_norm": 2.1728477478027344, "learning_rate": 1.1399914779245468e-06, "loss": 0.8001, "step": 25160 }, { "epoch": 0.7835902226373969, "grad_norm": 1.955625057220459, "learning_rate": 1.1391720475925137e-06, "loss": 0.7139, "step": 25165 }, { "epoch": 0.7837459131247081, "grad_norm": 2.188425302505493, "learning_rate": 1.1383526172604806e-06, "loss": 0.7377, "step": 25170 }, { "epoch": 0.7839016036120193, "grad_norm": 1.9455126523971558, "learning_rate": 1.1375331869284474e-06, "loss": 0.7077, "step": 25175 }, { "epoch": 0.7840572940993306, "grad_norm": 2.5352461338043213, "learning_rate": 1.1367137565964143e-06, "loss": 0.8173, "step": 25180 }, { "epoch": 0.7842129845866418, "grad_norm": 2.330394744873047, "learning_rate": 1.1358943262643812e-06, "loss": 0.8681, "step": 25185 }, { "epoch": 0.784368675073953, "grad_norm": 2.946267604827881, "learning_rate": 1.1350748959323478e-06, "loss": 0.75, "step": 25190 }, { "epoch": 0.7845243655612643, "grad_norm": 2.173346757888794, "learning_rate": 1.1342554656003147e-06, "loss": 0.7261, "step": 25195 }, { "epoch": 0.7846800560485754, "grad_norm": 1.9081878662109375, "learning_rate": 1.1334360352682816e-06, "loss": 0.7445, "step": 25200 }, { "epoch": 0.7848357465358866, "grad_norm": 2.386991024017334, "learning_rate": 1.1326166049362484e-06, "loss": 0.806, "step": 25205 }, { "epoch": 0.7849914370231978, "grad_norm": 2.4282758235931396, "learning_rate": 1.1317971746042153e-06, "loss": 0.7027, "step": 25210 }, { "epoch": 0.7851471275105091, "grad_norm": 1.8846113681793213, "learning_rate": 1.1309777442721822e-06, "loss": 0.7286, "step": 25215 }, { "epoch": 0.7853028179978203, "grad_norm": 2.4248316287994385, "learning_rate": 1.130158313940149e-06, "loss": 0.8579, "step": 25220 }, { "epoch": 0.7854585084851315, "grad_norm": 1.992447018623352, "learning_rate": 1.1293388836081157e-06, "loss": 0.7424, "step": 25225 }, { "epoch": 0.7856141989724428, "grad_norm": 2.394639015197754, "learning_rate": 1.1285194532760826e-06, "loss": 0.7161, "step": 25230 }, { "epoch": 0.785769889459754, "grad_norm": 2.187108039855957, "learning_rate": 1.1277000229440494e-06, "loss": 0.7451, "step": 25235 }, { "epoch": 0.7859255799470652, "grad_norm": 2.0059428215026855, "learning_rate": 1.126880592612016e-06, "loss": 0.7803, "step": 25240 }, { "epoch": 0.7860812704343765, "grad_norm": 2.0332462787628174, "learning_rate": 1.126061162279983e-06, "loss": 0.7479, "step": 25245 }, { "epoch": 0.7862369609216877, "grad_norm": 1.7668589353561401, "learning_rate": 1.1252417319479498e-06, "loss": 0.7189, "step": 25250 }, { "epoch": 0.7863926514089989, "grad_norm": 2.3856163024902344, "learning_rate": 1.1244223016159167e-06, "loss": 0.8148, "step": 25255 }, { "epoch": 0.7865483418963102, "grad_norm": 2.2613041400909424, "learning_rate": 1.1236028712838836e-06, "loss": 0.7516, "step": 25260 }, { "epoch": 0.7867040323836214, "grad_norm": 2.1053552627563477, "learning_rate": 1.1227834409518505e-06, "loss": 0.8345, "step": 25265 }, { "epoch": 0.7868597228709325, "grad_norm": 2.190009832382202, "learning_rate": 1.1219640106198173e-06, "loss": 0.82, "step": 25270 }, { "epoch": 0.7870154133582438, "grad_norm": 2.114866018295288, "learning_rate": 1.121144580287784e-06, "loss": 0.6975, "step": 25275 }, { "epoch": 0.787171103845555, "grad_norm": 2.2616071701049805, "learning_rate": 1.1203251499557508e-06, "loss": 0.7747, "step": 25280 }, { "epoch": 0.7873267943328662, "grad_norm": 2.1347639560699463, "learning_rate": 1.1195057196237177e-06, "loss": 0.7378, "step": 25285 }, { "epoch": 0.7874824848201775, "grad_norm": 2.169664144515991, "learning_rate": 1.1186862892916846e-06, "loss": 0.725, "step": 25290 }, { "epoch": 0.7876381753074887, "grad_norm": 2.6486880779266357, "learning_rate": 1.1178668589596512e-06, "loss": 0.7383, "step": 25295 }, { "epoch": 0.7877938657947999, "grad_norm": 2.1310815811157227, "learning_rate": 1.1170474286276181e-06, "loss": 0.7246, "step": 25300 }, { "epoch": 0.7879495562821112, "grad_norm": 1.9520118236541748, "learning_rate": 1.116227998295585e-06, "loss": 0.8046, "step": 25305 }, { "epoch": 0.7881052467694224, "grad_norm": 2.0879905223846436, "learning_rate": 1.1154085679635519e-06, "loss": 0.7382, "step": 25310 }, { "epoch": 0.7882609372567336, "grad_norm": 1.9844329357147217, "learning_rate": 1.1145891376315187e-06, "loss": 0.7666, "step": 25315 }, { "epoch": 0.7884166277440449, "grad_norm": 2.959773063659668, "learning_rate": 1.1137697072994854e-06, "loss": 0.718, "step": 25320 }, { "epoch": 0.7885723182313561, "grad_norm": 1.9265375137329102, "learning_rate": 1.1129502769674523e-06, "loss": 0.8612, "step": 25325 }, { "epoch": 0.7887280087186673, "grad_norm": 2.127161741256714, "learning_rate": 1.1121308466354191e-06, "loss": 0.7971, "step": 25330 }, { "epoch": 0.7888836992059786, "grad_norm": 2.865696907043457, "learning_rate": 1.111311416303386e-06, "loss": 0.6984, "step": 25335 }, { "epoch": 0.7890393896932898, "grad_norm": 2.1826274394989014, "learning_rate": 1.1104919859713529e-06, "loss": 0.7524, "step": 25340 }, { "epoch": 0.7891950801806009, "grad_norm": 2.5624821186065674, "learning_rate": 1.1096725556393197e-06, "loss": 0.7054, "step": 25345 }, { "epoch": 0.7893507706679121, "grad_norm": 2.130908966064453, "learning_rate": 1.1088531253072866e-06, "loss": 0.7242, "step": 25350 }, { "epoch": 0.7895064611552234, "grad_norm": 2.154087543487549, "learning_rate": 1.1080336949752533e-06, "loss": 0.7852, "step": 25355 }, { "epoch": 0.7896621516425346, "grad_norm": 1.712929606437683, "learning_rate": 1.1072142646432201e-06, "loss": 0.8125, "step": 25360 }, { "epoch": 0.7898178421298458, "grad_norm": 2.0223910808563232, "learning_rate": 1.106394834311187e-06, "loss": 0.7925, "step": 25365 }, { "epoch": 0.7899735326171571, "grad_norm": 2.246011972427368, "learning_rate": 1.1055754039791537e-06, "loss": 0.7388, "step": 25370 }, { "epoch": 0.7901292231044683, "grad_norm": 2.340243101119995, "learning_rate": 1.1047559736471205e-06, "loss": 0.7314, "step": 25375 }, { "epoch": 0.7902849135917795, "grad_norm": 1.964539885520935, "learning_rate": 1.1039365433150874e-06, "loss": 0.801, "step": 25380 }, { "epoch": 0.7904406040790908, "grad_norm": 2.0446536540985107, "learning_rate": 1.1031171129830543e-06, "loss": 0.7583, "step": 25385 }, { "epoch": 0.790596294566402, "grad_norm": 2.2394049167633057, "learning_rate": 1.1022976826510211e-06, "loss": 0.7939, "step": 25390 }, { "epoch": 0.7907519850537132, "grad_norm": 2.1951305866241455, "learning_rate": 1.101478252318988e-06, "loss": 0.8094, "step": 25395 }, { "epoch": 0.7909076755410245, "grad_norm": 2.4285268783569336, "learning_rate": 1.1006588219869547e-06, "loss": 0.7468, "step": 25400 }, { "epoch": 0.7910633660283357, "grad_norm": 2.303948402404785, "learning_rate": 1.0998393916549215e-06, "loss": 0.7924, "step": 25405 }, { "epoch": 0.791219056515647, "grad_norm": 5.142179489135742, "learning_rate": 1.0990199613228884e-06, "loss": 0.8587, "step": 25410 }, { "epoch": 0.7913747470029581, "grad_norm": 2.0697686672210693, "learning_rate": 1.0982005309908553e-06, "loss": 0.7117, "step": 25415 }, { "epoch": 0.7915304374902693, "grad_norm": 2.47698974609375, "learning_rate": 1.0973811006588221e-06, "loss": 0.77, "step": 25420 }, { "epoch": 0.7916861279775805, "grad_norm": 2.045783042907715, "learning_rate": 1.096561670326789e-06, "loss": 0.7738, "step": 25425 }, { "epoch": 0.7918418184648918, "grad_norm": 2.5020666122436523, "learning_rate": 1.0957422399947559e-06, "loss": 0.7656, "step": 25430 }, { "epoch": 0.791997508952203, "grad_norm": 2.705404043197632, "learning_rate": 1.0949228096627225e-06, "loss": 0.7187, "step": 25435 }, { "epoch": 0.7921531994395142, "grad_norm": 2.249326705932617, "learning_rate": 1.0941033793306894e-06, "loss": 0.8556, "step": 25440 }, { "epoch": 0.7923088899268255, "grad_norm": 2.6243555545806885, "learning_rate": 1.0932839489986563e-06, "loss": 0.7191, "step": 25445 }, { "epoch": 0.7924645804141367, "grad_norm": 2.1849610805511475, "learning_rate": 1.092464518666623e-06, "loss": 0.8216, "step": 25450 }, { "epoch": 0.7926202709014479, "grad_norm": 1.9494149684906006, "learning_rate": 1.0916450883345898e-06, "loss": 0.7885, "step": 25455 }, { "epoch": 0.7927759613887592, "grad_norm": 2.538789749145508, "learning_rate": 1.0908256580025567e-06, "loss": 0.7415, "step": 25460 }, { "epoch": 0.7929316518760704, "grad_norm": 1.9305381774902344, "learning_rate": 1.0900062276705235e-06, "loss": 0.7295, "step": 25465 }, { "epoch": 0.7930873423633816, "grad_norm": 1.9877495765686035, "learning_rate": 1.0891867973384904e-06, "loss": 0.6803, "step": 25470 }, { "epoch": 0.7932430328506929, "grad_norm": 2.2018942832946777, "learning_rate": 1.0883673670064573e-06, "loss": 0.8046, "step": 25475 }, { "epoch": 0.7933987233380041, "grad_norm": 2.250878095626831, "learning_rate": 1.0875479366744241e-06, "loss": 0.8251, "step": 25480 }, { "epoch": 0.7935544138253152, "grad_norm": 2.923096179962158, "learning_rate": 1.0867285063423908e-06, "loss": 0.7656, "step": 25485 }, { "epoch": 0.7937101043126265, "grad_norm": 2.2337357997894287, "learning_rate": 1.0859090760103577e-06, "loss": 0.7491, "step": 25490 }, { "epoch": 0.7938657947999377, "grad_norm": 2.1219494342803955, "learning_rate": 1.0850896456783245e-06, "loss": 0.7192, "step": 25495 }, { "epoch": 0.7940214852872489, "grad_norm": 1.8511955738067627, "learning_rate": 1.0842702153462914e-06, "loss": 0.8064, "step": 25500 }, { "epoch": 0.7941771757745602, "grad_norm": 2.3123373985290527, "learning_rate": 1.0834507850142583e-06, "loss": 0.728, "step": 25505 }, { "epoch": 0.7943328662618714, "grad_norm": 2.424091100692749, "learning_rate": 1.082631354682225e-06, "loss": 0.803, "step": 25510 }, { "epoch": 0.7944885567491826, "grad_norm": 2.0988128185272217, "learning_rate": 1.0818119243501918e-06, "loss": 0.7835, "step": 25515 }, { "epoch": 0.7946442472364939, "grad_norm": 2.2923121452331543, "learning_rate": 1.0809924940181587e-06, "loss": 0.7196, "step": 25520 }, { "epoch": 0.7947999377238051, "grad_norm": 1.7035146951675415, "learning_rate": 1.0801730636861255e-06, "loss": 0.7073, "step": 25525 }, { "epoch": 0.7949556282111163, "grad_norm": 2.1326897144317627, "learning_rate": 1.0793536333540922e-06, "loss": 0.7777, "step": 25530 }, { "epoch": 0.7951113186984275, "grad_norm": 2.4509217739105225, "learning_rate": 1.078534203022059e-06, "loss": 0.8034, "step": 25535 }, { "epoch": 0.7952670091857388, "grad_norm": 2.0656275749206543, "learning_rate": 1.077714772690026e-06, "loss": 0.8036, "step": 25540 }, { "epoch": 0.79542269967305, "grad_norm": 2.1549670696258545, "learning_rate": 1.0768953423579928e-06, "loss": 0.7113, "step": 25545 }, { "epoch": 0.7955783901603612, "grad_norm": 2.592311143875122, "learning_rate": 1.0760759120259597e-06, "loss": 0.7357, "step": 25550 }, { "epoch": 0.7957340806476725, "grad_norm": 2.092241048812866, "learning_rate": 1.0752564816939266e-06, "loss": 0.7463, "step": 25555 }, { "epoch": 0.7958897711349836, "grad_norm": 2.4288718700408936, "learning_rate": 1.0744370513618934e-06, "loss": 0.7991, "step": 25560 }, { "epoch": 0.7960454616222948, "grad_norm": 1.9506771564483643, "learning_rate": 1.07361762102986e-06, "loss": 0.7679, "step": 25565 }, { "epoch": 0.7962011521096061, "grad_norm": 1.8941705226898193, "learning_rate": 1.072798190697827e-06, "loss": 0.7753, "step": 25570 }, { "epoch": 0.7963568425969173, "grad_norm": 1.7595833539962769, "learning_rate": 1.0719787603657938e-06, "loss": 0.766, "step": 25575 }, { "epoch": 0.7965125330842285, "grad_norm": 2.1007285118103027, "learning_rate": 1.0711593300337607e-06, "loss": 0.7668, "step": 25580 }, { "epoch": 0.7966682235715398, "grad_norm": 2.2177906036376953, "learning_rate": 1.0703398997017273e-06, "loss": 0.788, "step": 25585 }, { "epoch": 0.796823914058851, "grad_norm": 2.057922124862671, "learning_rate": 1.0695204693696942e-06, "loss": 0.7031, "step": 25590 }, { "epoch": 0.7969796045461622, "grad_norm": 2.295093297958374, "learning_rate": 1.068701039037661e-06, "loss": 0.742, "step": 25595 }, { "epoch": 0.7971352950334735, "grad_norm": 2.7235164642333984, "learning_rate": 1.067881608705628e-06, "loss": 0.7975, "step": 25600 }, { "epoch": 0.7972909855207847, "grad_norm": 2.3424129486083984, "learning_rate": 1.0670621783735948e-06, "loss": 0.7875, "step": 25605 }, { "epoch": 0.7974466760080959, "grad_norm": 2.1101694107055664, "learning_rate": 1.0662427480415615e-06, "loss": 0.818, "step": 25610 }, { "epoch": 0.7976023664954072, "grad_norm": 2.205754280090332, "learning_rate": 1.0654233177095284e-06, "loss": 0.7693, "step": 25615 }, { "epoch": 0.7977580569827184, "grad_norm": 1.9726340770721436, "learning_rate": 1.0646038873774952e-06, "loss": 0.8423, "step": 25620 }, { "epoch": 0.7979137474700296, "grad_norm": 6.3675031661987305, "learning_rate": 1.063784457045462e-06, "loss": 0.7444, "step": 25625 }, { "epoch": 0.7980694379573408, "grad_norm": 2.312943458557129, "learning_rate": 1.062965026713429e-06, "loss": 0.7835, "step": 25630 }, { "epoch": 0.798225128444652, "grad_norm": 2.2381389141082764, "learning_rate": 1.0621455963813958e-06, "loss": 0.6687, "step": 25635 }, { "epoch": 0.7983808189319632, "grad_norm": 1.9630309343338013, "learning_rate": 1.0613261660493627e-06, "loss": 0.8099, "step": 25640 }, { "epoch": 0.7985365094192745, "grad_norm": 1.6646565198898315, "learning_rate": 1.0605067357173294e-06, "loss": 0.728, "step": 25645 }, { "epoch": 0.7986921999065857, "grad_norm": 2.1776163578033447, "learning_rate": 1.0596873053852962e-06, "loss": 0.7866, "step": 25650 }, { "epoch": 0.7988478903938969, "grad_norm": 1.9856407642364502, "learning_rate": 1.058867875053263e-06, "loss": 0.7572, "step": 25655 }, { "epoch": 0.7990035808812082, "grad_norm": 2.6216869354248047, "learning_rate": 1.0580484447212298e-06, "loss": 0.7438, "step": 25660 }, { "epoch": 0.7991592713685194, "grad_norm": 2.0423014163970947, "learning_rate": 1.0572290143891966e-06, "loss": 0.8144, "step": 25665 }, { "epoch": 0.7993149618558306, "grad_norm": 2.417344093322754, "learning_rate": 1.0564095840571635e-06, "loss": 0.8175, "step": 25670 }, { "epoch": 0.7994706523431419, "grad_norm": 1.929391860961914, "learning_rate": 1.0555901537251304e-06, "loss": 0.7315, "step": 25675 }, { "epoch": 0.7996263428304531, "grad_norm": 2.126965284347534, "learning_rate": 1.0547707233930972e-06, "loss": 0.7505, "step": 25680 }, { "epoch": 0.7997820333177643, "grad_norm": 1.8986790180206299, "learning_rate": 1.053951293061064e-06, "loss": 0.7632, "step": 25685 }, { "epoch": 0.7999377238050756, "grad_norm": 2.53434681892395, "learning_rate": 1.053131862729031e-06, "loss": 0.7837, "step": 25690 }, { "epoch": 0.8000934142923868, "grad_norm": 2.515329599380493, "learning_rate": 1.0523124323969976e-06, "loss": 0.809, "step": 25695 }, { "epoch": 0.8002491047796979, "grad_norm": 2.2181038856506348, "learning_rate": 1.0514930020649645e-06, "loss": 0.7682, "step": 25700 }, { "epoch": 0.8004047952670091, "grad_norm": 2.2206497192382812, "learning_rate": 1.0506735717329314e-06, "loss": 0.7411, "step": 25705 }, { "epoch": 0.8005604857543204, "grad_norm": 3.1494126319885254, "learning_rate": 1.0498541414008982e-06, "loss": 0.7713, "step": 25710 }, { "epoch": 0.8007161762416316, "grad_norm": 2.5727384090423584, "learning_rate": 1.0490347110688651e-06, "loss": 0.742, "step": 25715 }, { "epoch": 0.8008718667289428, "grad_norm": 2.121555805206299, "learning_rate": 1.048215280736832e-06, "loss": 0.8188, "step": 25720 }, { "epoch": 0.8010275572162541, "grad_norm": 2.1218302249908447, "learning_rate": 1.0473958504047986e-06, "loss": 0.7692, "step": 25725 }, { "epoch": 0.8011832477035653, "grad_norm": 2.20402455329895, "learning_rate": 1.0465764200727655e-06, "loss": 0.8351, "step": 25730 }, { "epoch": 0.8013389381908765, "grad_norm": 2.3809523582458496, "learning_rate": 1.0457569897407324e-06, "loss": 0.7521, "step": 25735 }, { "epoch": 0.8014946286781878, "grad_norm": 2.3011581897735596, "learning_rate": 1.044937559408699e-06, "loss": 0.7289, "step": 25740 }, { "epoch": 0.801650319165499, "grad_norm": 2.342151641845703, "learning_rate": 1.044118129076666e-06, "loss": 0.6901, "step": 25745 }, { "epoch": 0.8018060096528102, "grad_norm": 2.0236711502075195, "learning_rate": 1.0432986987446328e-06, "loss": 0.7699, "step": 25750 }, { "epoch": 0.8019617001401215, "grad_norm": 2.1257967948913574, "learning_rate": 1.0424792684125996e-06, "loss": 0.8221, "step": 25755 }, { "epoch": 0.8021173906274327, "grad_norm": 2.5789718627929688, "learning_rate": 1.0416598380805665e-06, "loss": 0.7747, "step": 25760 }, { "epoch": 0.8022730811147439, "grad_norm": 2.007265329360962, "learning_rate": 1.0408404077485334e-06, "loss": 0.7477, "step": 25765 }, { "epoch": 0.8024287716020552, "grad_norm": 2.014956474304199, "learning_rate": 1.0400209774165003e-06, "loss": 0.7242, "step": 25770 }, { "epoch": 0.8025844620893663, "grad_norm": 1.959507942199707, "learning_rate": 1.039201547084467e-06, "loss": 0.8369, "step": 25775 }, { "epoch": 0.8027401525766775, "grad_norm": 1.9143354892730713, "learning_rate": 1.0383821167524338e-06, "loss": 0.8259, "step": 25780 }, { "epoch": 0.8028958430639888, "grad_norm": 3.200979232788086, "learning_rate": 1.0375626864204006e-06, "loss": 0.7524, "step": 25785 }, { "epoch": 0.8030515335513, "grad_norm": 2.6325523853302, "learning_rate": 1.0367432560883675e-06, "loss": 0.7799, "step": 25790 }, { "epoch": 0.8032072240386112, "grad_norm": 3.5807456970214844, "learning_rate": 1.0359238257563344e-06, "loss": 0.778, "step": 25795 }, { "epoch": 0.8033629145259225, "grad_norm": 1.8701460361480713, "learning_rate": 1.035104395424301e-06, "loss": 0.7758, "step": 25800 }, { "epoch": 0.8035186050132337, "grad_norm": 2.8727521896362305, "learning_rate": 1.034284965092268e-06, "loss": 0.7806, "step": 25805 }, { "epoch": 0.8036742955005449, "grad_norm": 2.009977102279663, "learning_rate": 1.0334655347602348e-06, "loss": 0.67, "step": 25810 }, { "epoch": 0.8038299859878562, "grad_norm": 2.3575894832611084, "learning_rate": 1.0326461044282017e-06, "loss": 0.7732, "step": 25815 }, { "epoch": 0.8039856764751674, "grad_norm": 1.7611322402954102, "learning_rate": 1.0318266740961683e-06, "loss": 0.7228, "step": 25820 }, { "epoch": 0.8041413669624786, "grad_norm": 2.5127944946289062, "learning_rate": 1.0310072437641352e-06, "loss": 0.6932, "step": 25825 }, { "epoch": 0.8042970574497899, "grad_norm": 2.0232861042022705, "learning_rate": 1.030187813432102e-06, "loss": 0.8357, "step": 25830 }, { "epoch": 0.8044527479371011, "grad_norm": 2.2065935134887695, "learning_rate": 1.029368383100069e-06, "loss": 0.6751, "step": 25835 }, { "epoch": 0.8046084384244123, "grad_norm": 1.9709302186965942, "learning_rate": 1.0285489527680358e-06, "loss": 0.7829, "step": 25840 }, { "epoch": 0.8047641289117234, "grad_norm": 1.8576622009277344, "learning_rate": 1.0277295224360027e-06, "loss": 0.7399, "step": 25845 }, { "epoch": 0.8049198193990347, "grad_norm": 1.9996638298034668, "learning_rate": 1.0269100921039695e-06, "loss": 0.7176, "step": 25850 }, { "epoch": 0.8050755098863459, "grad_norm": 1.881115436553955, "learning_rate": 1.0260906617719362e-06, "loss": 0.6455, "step": 25855 }, { "epoch": 0.8052312003736571, "grad_norm": 2.192204713821411, "learning_rate": 1.025271231439903e-06, "loss": 0.784, "step": 25860 }, { "epoch": 0.8053868908609684, "grad_norm": 2.1954150199890137, "learning_rate": 1.02445180110787e-06, "loss": 0.8322, "step": 25865 }, { "epoch": 0.8055425813482796, "grad_norm": 2.296856641769409, "learning_rate": 1.0236323707758366e-06, "loss": 0.7588, "step": 25870 }, { "epoch": 0.8056982718355908, "grad_norm": 2.2025516033172607, "learning_rate": 1.0228129404438035e-06, "loss": 0.7605, "step": 25875 }, { "epoch": 0.8058539623229021, "grad_norm": 1.981283187866211, "learning_rate": 1.0219935101117703e-06, "loss": 0.7333, "step": 25880 }, { "epoch": 0.8060096528102133, "grad_norm": 2.14044189453125, "learning_rate": 1.0211740797797372e-06, "loss": 0.7891, "step": 25885 }, { "epoch": 0.8061653432975245, "grad_norm": 3.0097031593322754, "learning_rate": 1.020354649447704e-06, "loss": 0.7611, "step": 25890 }, { "epoch": 0.8063210337848358, "grad_norm": 2.4470572471618652, "learning_rate": 1.019535219115671e-06, "loss": 0.7602, "step": 25895 }, { "epoch": 0.806476724272147, "grad_norm": 2.177830219268799, "learning_rate": 1.0187157887836378e-06, "loss": 0.6994, "step": 25900 }, { "epoch": 0.8066324147594582, "grad_norm": 2.146712064743042, "learning_rate": 1.0178963584516045e-06, "loss": 0.7771, "step": 25905 }, { "epoch": 0.8067881052467695, "grad_norm": 2.3480961322784424, "learning_rate": 1.0170769281195713e-06, "loss": 0.7809, "step": 25910 }, { "epoch": 0.8069437957340806, "grad_norm": 1.9943315982818604, "learning_rate": 1.0162574977875382e-06, "loss": 0.7326, "step": 25915 }, { "epoch": 0.8070994862213918, "grad_norm": 2.2738771438598633, "learning_rate": 1.015438067455505e-06, "loss": 0.7799, "step": 25920 }, { "epoch": 0.8072551767087031, "grad_norm": 2.2590341567993164, "learning_rate": 1.014618637123472e-06, "loss": 0.7733, "step": 25925 }, { "epoch": 0.8074108671960143, "grad_norm": 2.3402369022369385, "learning_rate": 1.0137992067914388e-06, "loss": 0.7575, "step": 25930 }, { "epoch": 0.8075665576833255, "grad_norm": 2.2896976470947266, "learning_rate": 1.0129797764594055e-06, "loss": 0.8409, "step": 25935 }, { "epoch": 0.8077222481706368, "grad_norm": 2.0568344593048096, "learning_rate": 1.0121603461273723e-06, "loss": 0.7769, "step": 25940 }, { "epoch": 0.807877938657948, "grad_norm": 2.472515821456909, "learning_rate": 1.0113409157953392e-06, "loss": 0.786, "step": 25945 }, { "epoch": 0.8080336291452592, "grad_norm": 2.336329936981201, "learning_rate": 1.0105214854633059e-06, "loss": 0.7906, "step": 25950 }, { "epoch": 0.8081893196325705, "grad_norm": 2.3923919200897217, "learning_rate": 1.0097020551312727e-06, "loss": 0.7502, "step": 25955 }, { "epoch": 0.8083450101198817, "grad_norm": 2.1368627548217773, "learning_rate": 1.0088826247992396e-06, "loss": 0.7609, "step": 25960 }, { "epoch": 0.8085007006071929, "grad_norm": 1.9406920671463013, "learning_rate": 1.0080631944672065e-06, "loss": 0.708, "step": 25965 }, { "epoch": 0.8086563910945042, "grad_norm": 1.972440242767334, "learning_rate": 1.0072437641351733e-06, "loss": 0.7823, "step": 25970 }, { "epoch": 0.8088120815818154, "grad_norm": 2.2418465614318848, "learning_rate": 1.0064243338031402e-06, "loss": 0.7214, "step": 25975 }, { "epoch": 0.8089677720691266, "grad_norm": 1.8697375059127808, "learning_rate": 1.005604903471107e-06, "loss": 0.8442, "step": 25980 }, { "epoch": 0.8091234625564379, "grad_norm": 2.2707083225250244, "learning_rate": 1.0047854731390737e-06, "loss": 0.8029, "step": 25985 }, { "epoch": 0.809279153043749, "grad_norm": 2.5368728637695312, "learning_rate": 1.0039660428070406e-06, "loss": 0.7755, "step": 25990 }, { "epoch": 0.8094348435310602, "grad_norm": 2.1029844284057617, "learning_rate": 1.0031466124750075e-06, "loss": 0.6887, "step": 25995 }, { "epoch": 0.8095905340183714, "grad_norm": 2.1459505558013916, "learning_rate": 1.0023271821429743e-06, "loss": 0.8233, "step": 26000 }, { "epoch": 0.8097462245056827, "grad_norm": 2.1104776859283447, "learning_rate": 1.0015077518109412e-06, "loss": 0.795, "step": 26005 }, { "epoch": 0.8099019149929939, "grad_norm": 2.2224769592285156, "learning_rate": 1.0006883214789079e-06, "loss": 0.7626, "step": 26010 }, { "epoch": 0.8100576054803051, "grad_norm": 1.9640429019927979, "learning_rate": 9.998688911468747e-07, "loss": 0.7675, "step": 26015 }, { "epoch": 0.8102132959676164, "grad_norm": 2.6207706928253174, "learning_rate": 9.990494608148416e-07, "loss": 0.6988, "step": 26020 }, { "epoch": 0.8103689864549276, "grad_norm": 1.846863865852356, "learning_rate": 9.982300304828085e-07, "loss": 0.6869, "step": 26025 }, { "epoch": 0.8105246769422388, "grad_norm": 1.8285090923309326, "learning_rate": 9.974106001507751e-07, "loss": 0.7598, "step": 26030 }, { "epoch": 0.8106803674295501, "grad_norm": 2.29421067237854, "learning_rate": 9.96591169818742e-07, "loss": 0.7189, "step": 26035 }, { "epoch": 0.8108360579168613, "grad_norm": 2.032777786254883, "learning_rate": 9.957717394867089e-07, "loss": 0.7594, "step": 26040 }, { "epoch": 0.8109917484041725, "grad_norm": 2.371638774871826, "learning_rate": 9.949523091546757e-07, "loss": 0.7964, "step": 26045 }, { "epoch": 0.8111474388914838, "grad_norm": 2.0382332801818848, "learning_rate": 9.941328788226426e-07, "loss": 0.7438, "step": 26050 }, { "epoch": 0.811303129378795, "grad_norm": 2.2776331901550293, "learning_rate": 9.933134484906095e-07, "loss": 0.7609, "step": 26055 }, { "epoch": 0.8114588198661061, "grad_norm": 2.154581069946289, "learning_rate": 9.924940181585764e-07, "loss": 0.778, "step": 26060 }, { "epoch": 0.8116145103534174, "grad_norm": 2.116457462310791, "learning_rate": 9.91674587826543e-07, "loss": 0.7402, "step": 26065 }, { "epoch": 0.8117702008407286, "grad_norm": 2.191681146621704, "learning_rate": 9.908551574945099e-07, "loss": 0.7618, "step": 26070 }, { "epoch": 0.8119258913280398, "grad_norm": 1.77736496925354, "learning_rate": 9.900357271624767e-07, "loss": 0.7346, "step": 26075 }, { "epoch": 0.8120815818153511, "grad_norm": 2.227728843688965, "learning_rate": 9.892162968304436e-07, "loss": 0.6983, "step": 26080 }, { "epoch": 0.8122372723026623, "grad_norm": 2.0087804794311523, "learning_rate": 9.883968664984103e-07, "loss": 0.8069, "step": 26085 }, { "epoch": 0.8123929627899735, "grad_norm": 2.1397624015808105, "learning_rate": 9.875774361663771e-07, "loss": 0.7487, "step": 26090 }, { "epoch": 0.8125486532772848, "grad_norm": 2.087974786758423, "learning_rate": 9.86758005834344e-07, "loss": 0.676, "step": 26095 }, { "epoch": 0.812704343764596, "grad_norm": 2.251613140106201, "learning_rate": 9.859385755023109e-07, "loss": 0.759, "step": 26100 }, { "epoch": 0.8128600342519072, "grad_norm": 1.995153546333313, "learning_rate": 9.851191451702778e-07, "loss": 0.7887, "step": 26105 }, { "epoch": 0.8130157247392185, "grad_norm": 1.8927668333053589, "learning_rate": 9.842997148382446e-07, "loss": 0.714, "step": 26110 }, { "epoch": 0.8131714152265297, "grad_norm": 2.1668665409088135, "learning_rate": 9.834802845062113e-07, "loss": 0.6995, "step": 26115 }, { "epoch": 0.8133271057138409, "grad_norm": 2.173227548599243, "learning_rate": 9.826608541741782e-07, "loss": 0.774, "step": 26120 }, { "epoch": 0.8134827962011522, "grad_norm": 2.8788177967071533, "learning_rate": 9.81841423842145e-07, "loss": 0.6896, "step": 26125 }, { "epoch": 0.8136384866884633, "grad_norm": 1.714202642440796, "learning_rate": 9.810219935101119e-07, "loss": 0.8344, "step": 26130 }, { "epoch": 0.8137941771757745, "grad_norm": 1.8634637594223022, "learning_rate": 9.802025631780788e-07, "loss": 0.759, "step": 26135 }, { "epoch": 0.8139498676630857, "grad_norm": 2.20399808883667, "learning_rate": 9.793831328460456e-07, "loss": 0.7446, "step": 26140 }, { "epoch": 0.814105558150397, "grad_norm": 2.160611391067505, "learning_rate": 9.785637025140123e-07, "loss": 0.7689, "step": 26145 }, { "epoch": 0.8142612486377082, "grad_norm": 2.1439220905303955, "learning_rate": 9.777442721819792e-07, "loss": 0.6891, "step": 26150 }, { "epoch": 0.8144169391250194, "grad_norm": 2.020920753479004, "learning_rate": 9.76924841849946e-07, "loss": 0.7349, "step": 26155 }, { "epoch": 0.8145726296123307, "grad_norm": 2.235262870788574, "learning_rate": 9.761054115179127e-07, "loss": 0.7255, "step": 26160 }, { "epoch": 0.8147283200996419, "grad_norm": 1.936610221862793, "learning_rate": 9.752859811858796e-07, "loss": 0.8047, "step": 26165 }, { "epoch": 0.8148840105869531, "grad_norm": 2.6849803924560547, "learning_rate": 9.744665508538464e-07, "loss": 0.7649, "step": 26170 }, { "epoch": 0.8150397010742644, "grad_norm": 2.3608007431030273, "learning_rate": 9.736471205218133e-07, "loss": 0.8175, "step": 26175 }, { "epoch": 0.8151953915615756, "grad_norm": 2.205418348312378, "learning_rate": 9.728276901897802e-07, "loss": 0.7661, "step": 26180 }, { "epoch": 0.8153510820488868, "grad_norm": 2.0344667434692383, "learning_rate": 9.72008259857747e-07, "loss": 0.7974, "step": 26185 }, { "epoch": 0.8155067725361981, "grad_norm": 4.768444538116455, "learning_rate": 9.71188829525714e-07, "loss": 0.7731, "step": 26190 }, { "epoch": 0.8156624630235093, "grad_norm": 2.0617926120758057, "learning_rate": 9.703693991936806e-07, "loss": 0.7739, "step": 26195 }, { "epoch": 0.8158181535108205, "grad_norm": 2.1619327068328857, "learning_rate": 9.695499688616474e-07, "loss": 0.8281, "step": 26200 }, { "epoch": 0.8159738439981317, "grad_norm": 2.1789324283599854, "learning_rate": 9.687305385296143e-07, "loss": 0.7987, "step": 26205 }, { "epoch": 0.8161295344854429, "grad_norm": 2.2692174911499023, "learning_rate": 9.679111081975812e-07, "loss": 0.7367, "step": 26210 }, { "epoch": 0.8162852249727541, "grad_norm": 2.4915878772735596, "learning_rate": 9.67091677865548e-07, "loss": 0.7326, "step": 26215 }, { "epoch": 0.8164409154600654, "grad_norm": 1.9818333387374878, "learning_rate": 9.66272247533515e-07, "loss": 0.8201, "step": 26220 }, { "epoch": 0.8165966059473766, "grad_norm": 2.057283639907837, "learning_rate": 9.654528172014816e-07, "loss": 0.7077, "step": 26225 }, { "epoch": 0.8167522964346878, "grad_norm": 2.4982666969299316, "learning_rate": 9.646333868694484e-07, "loss": 0.8093, "step": 26230 }, { "epoch": 0.8169079869219991, "grad_norm": 1.7728463411331177, "learning_rate": 9.638139565374153e-07, "loss": 0.8035, "step": 26235 }, { "epoch": 0.8170636774093103, "grad_norm": 1.9995286464691162, "learning_rate": 9.62994526205382e-07, "loss": 0.747, "step": 26240 }, { "epoch": 0.8172193678966215, "grad_norm": 1.7422456741333008, "learning_rate": 9.621750958733488e-07, "loss": 0.781, "step": 26245 }, { "epoch": 0.8173750583839328, "grad_norm": 2.17854380607605, "learning_rate": 9.613556655413157e-07, "loss": 0.7666, "step": 26250 }, { "epoch": 0.817530748871244, "grad_norm": 1.8393608331680298, "learning_rate": 9.605362352092826e-07, "loss": 0.7374, "step": 26255 }, { "epoch": 0.8176864393585552, "grad_norm": 2.0660290718078613, "learning_rate": 9.597168048772494e-07, "loss": 0.7947, "step": 26260 }, { "epoch": 0.8178421298458665, "grad_norm": 2.6689555644989014, "learning_rate": 9.588973745452163e-07, "loss": 0.8061, "step": 26265 }, { "epoch": 0.8179978203331777, "grad_norm": 2.213235855102539, "learning_rate": 9.580779442131832e-07, "loss": 0.7641, "step": 26270 }, { "epoch": 0.8181535108204888, "grad_norm": 2.214754581451416, "learning_rate": 9.572585138811498e-07, "loss": 0.7464, "step": 26275 }, { "epoch": 0.8183092013078, "grad_norm": 2.3477373123168945, "learning_rate": 9.564390835491167e-07, "loss": 0.823, "step": 26280 }, { "epoch": 0.8184648917951113, "grad_norm": 2.2980947494506836, "learning_rate": 9.556196532170836e-07, "loss": 0.7662, "step": 26285 }, { "epoch": 0.8186205822824225, "grad_norm": 1.9231650829315186, "learning_rate": 9.548002228850504e-07, "loss": 0.6744, "step": 26290 }, { "epoch": 0.8187762727697337, "grad_norm": 2.509333372116089, "learning_rate": 9.539807925530173e-07, "loss": 0.8299, "step": 26295 }, { "epoch": 0.818931963257045, "grad_norm": 2.207043409347534, "learning_rate": 9.53161362220984e-07, "loss": 0.7633, "step": 26300 }, { "epoch": 0.8190876537443562, "grad_norm": 2.3046860694885254, "learning_rate": 9.523419318889508e-07, "loss": 0.8074, "step": 26305 }, { "epoch": 0.8192433442316674, "grad_norm": 2.239795446395874, "learning_rate": 9.515225015569177e-07, "loss": 0.6966, "step": 26310 }, { "epoch": 0.8193990347189787, "grad_norm": 2.04103684425354, "learning_rate": 9.507030712248845e-07, "loss": 0.755, "step": 26315 }, { "epoch": 0.8195547252062899, "grad_norm": 2.2946088314056396, "learning_rate": 9.498836408928513e-07, "loss": 0.7378, "step": 26320 }, { "epoch": 0.8197104156936011, "grad_norm": 2.287757396697998, "learning_rate": 9.490642105608182e-07, "loss": 0.8012, "step": 26325 }, { "epoch": 0.8198661061809124, "grad_norm": 3.1999943256378174, "learning_rate": 9.48244780228785e-07, "loss": 0.8385, "step": 26330 }, { "epoch": 0.8200217966682236, "grad_norm": 2.138862133026123, "learning_rate": 9.474253498967518e-07, "loss": 0.6858, "step": 26335 }, { "epoch": 0.8201774871555348, "grad_norm": 2.091738224029541, "learning_rate": 9.466059195647187e-07, "loss": 0.827, "step": 26340 }, { "epoch": 0.820333177642846, "grad_norm": 2.9151811599731445, "learning_rate": 9.457864892326856e-07, "loss": 0.7973, "step": 26345 }, { "epoch": 0.8204888681301572, "grad_norm": 2.41434383392334, "learning_rate": 9.449670589006523e-07, "loss": 0.7829, "step": 26350 }, { "epoch": 0.8206445586174684, "grad_norm": 2.1718876361846924, "learning_rate": 9.441476285686192e-07, "loss": 0.761, "step": 26355 }, { "epoch": 0.8208002491047797, "grad_norm": 2.3561160564422607, "learning_rate": 9.433281982365861e-07, "loss": 0.8114, "step": 26360 }, { "epoch": 0.8209559395920909, "grad_norm": 2.0430727005004883, "learning_rate": 9.425087679045529e-07, "loss": 0.7304, "step": 26365 }, { "epoch": 0.8211116300794021, "grad_norm": 2.69498610496521, "learning_rate": 9.416893375725196e-07, "loss": 0.8173, "step": 26370 }, { "epoch": 0.8212673205667134, "grad_norm": 2.3804585933685303, "learning_rate": 9.408699072404865e-07, "loss": 0.718, "step": 26375 }, { "epoch": 0.8214230110540246, "grad_norm": 2.135157346725464, "learning_rate": 9.400504769084532e-07, "loss": 0.7667, "step": 26380 }, { "epoch": 0.8215787015413358, "grad_norm": 2.050194501876831, "learning_rate": 9.392310465764201e-07, "loss": 0.7954, "step": 26385 }, { "epoch": 0.8217343920286471, "grad_norm": 2.363369941711426, "learning_rate": 9.38411616244387e-07, "loss": 0.7765, "step": 26390 }, { "epoch": 0.8218900825159583, "grad_norm": 2.0716428756713867, "learning_rate": 9.375921859123538e-07, "loss": 0.7499, "step": 26395 }, { "epoch": 0.8220457730032695, "grad_norm": 1.9038609266281128, "learning_rate": 9.367727555803206e-07, "loss": 0.7148, "step": 26400 }, { "epoch": 0.8222014634905808, "grad_norm": 2.3439271450042725, "learning_rate": 9.359533252482875e-07, "loss": 0.8016, "step": 26405 }, { "epoch": 0.822357153977892, "grad_norm": 2.023158311843872, "learning_rate": 9.351338949162544e-07, "loss": 0.7692, "step": 26410 }, { "epoch": 0.8225128444652032, "grad_norm": 2.3755247592926025, "learning_rate": 9.343144645842211e-07, "loss": 0.8295, "step": 26415 }, { "epoch": 0.8226685349525144, "grad_norm": 1.973608374595642, "learning_rate": 9.33495034252188e-07, "loss": 0.7968, "step": 26420 }, { "epoch": 0.8228242254398256, "grad_norm": 2.151156187057495, "learning_rate": 9.326756039201549e-07, "loss": 0.7736, "step": 26425 }, { "epoch": 0.8229799159271368, "grad_norm": 2.091081380844116, "learning_rate": 9.318561735881216e-07, "loss": 0.7197, "step": 26430 }, { "epoch": 0.823135606414448, "grad_norm": 2.135293483734131, "learning_rate": 9.310367432560885e-07, "loss": 0.7878, "step": 26435 }, { "epoch": 0.8232912969017593, "grad_norm": 1.9665604829788208, "learning_rate": 9.302173129240552e-07, "loss": 0.7499, "step": 26440 }, { "epoch": 0.8234469873890705, "grad_norm": 2.051650047302246, "learning_rate": 9.29397882592022e-07, "loss": 0.7436, "step": 26445 }, { "epoch": 0.8236026778763818, "grad_norm": 2.5744988918304443, "learning_rate": 9.285784522599889e-07, "loss": 0.7177, "step": 26450 }, { "epoch": 0.823758368363693, "grad_norm": 1.950975775718689, "learning_rate": 9.277590219279558e-07, "loss": 0.8094, "step": 26455 }, { "epoch": 0.8239140588510042, "grad_norm": 1.8937424421310425, "learning_rate": 9.269395915959225e-07, "loss": 0.7749, "step": 26460 }, { "epoch": 0.8240697493383154, "grad_norm": 2.2445015907287598, "learning_rate": 9.261201612638894e-07, "loss": 0.7691, "step": 26465 }, { "epoch": 0.8242254398256267, "grad_norm": 2.2288296222686768, "learning_rate": 9.253007309318563e-07, "loss": 0.7735, "step": 26470 }, { "epoch": 0.8243811303129379, "grad_norm": 2.1937780380249023, "learning_rate": 9.24481300599823e-07, "loss": 0.7891, "step": 26475 }, { "epoch": 0.8245368208002491, "grad_norm": 1.9325820207595825, "learning_rate": 9.236618702677899e-07, "loss": 0.7144, "step": 26480 }, { "epoch": 0.8246925112875604, "grad_norm": 2.385801076889038, "learning_rate": 9.228424399357568e-07, "loss": 0.7371, "step": 26485 }, { "epoch": 0.8248482017748715, "grad_norm": 2.0418214797973633, "learning_rate": 9.220230096037236e-07, "loss": 0.8603, "step": 26490 }, { "epoch": 0.8250038922621827, "grad_norm": 2.220381498336792, "learning_rate": 9.212035792716904e-07, "loss": 0.7702, "step": 26495 }, { "epoch": 0.825159582749494, "grad_norm": 3.00784969329834, "learning_rate": 9.203841489396573e-07, "loss": 0.8198, "step": 26500 }, { "epoch": 0.8253152732368052, "grad_norm": 2.7064006328582764, "learning_rate": 9.195647186076241e-07, "loss": 0.734, "step": 26505 }, { "epoch": 0.8254709637241164, "grad_norm": 2.342724084854126, "learning_rate": 9.187452882755909e-07, "loss": 0.7452, "step": 26510 }, { "epoch": 0.8256266542114277, "grad_norm": 2.0162785053253174, "learning_rate": 9.179258579435577e-07, "loss": 0.7961, "step": 26515 }, { "epoch": 0.8257823446987389, "grad_norm": 2.1385650634765625, "learning_rate": 9.171064276115245e-07, "loss": 0.7542, "step": 26520 }, { "epoch": 0.8259380351860501, "grad_norm": 2.2224841117858887, "learning_rate": 9.162869972794913e-07, "loss": 0.7776, "step": 26525 }, { "epoch": 0.8260937256733614, "grad_norm": 2.436129570007324, "learning_rate": 9.154675669474582e-07, "loss": 0.6841, "step": 26530 }, { "epoch": 0.8262494161606726, "grad_norm": 1.9100204706192017, "learning_rate": 9.14648136615425e-07, "loss": 0.8241, "step": 26535 }, { "epoch": 0.8264051066479838, "grad_norm": 3.6621670722961426, "learning_rate": 9.138287062833918e-07, "loss": 0.8065, "step": 26540 }, { "epoch": 0.8265607971352951, "grad_norm": 2.0523648262023926, "learning_rate": 9.130092759513587e-07, "loss": 0.7669, "step": 26545 }, { "epoch": 0.8267164876226063, "grad_norm": 2.023582935333252, "learning_rate": 9.121898456193255e-07, "loss": 0.7634, "step": 26550 }, { "epoch": 0.8268721781099175, "grad_norm": 2.1697351932525635, "learning_rate": 9.113704152872924e-07, "loss": 0.7535, "step": 26555 }, { "epoch": 0.8270278685972287, "grad_norm": 1.975532054901123, "learning_rate": 9.105509849552592e-07, "loss": 0.7435, "step": 26560 }, { "epoch": 0.8271835590845399, "grad_norm": 2.743006944656372, "learning_rate": 9.09731554623226e-07, "loss": 0.7214, "step": 26565 }, { "epoch": 0.8273392495718511, "grad_norm": 2.269986152648926, "learning_rate": 9.089121242911929e-07, "loss": 0.7815, "step": 26570 }, { "epoch": 0.8274949400591624, "grad_norm": 2.1681549549102783, "learning_rate": 9.080926939591597e-07, "loss": 0.7727, "step": 26575 }, { "epoch": 0.8276506305464736, "grad_norm": 2.188749313354492, "learning_rate": 9.072732636271265e-07, "loss": 0.708, "step": 26580 }, { "epoch": 0.8278063210337848, "grad_norm": 1.9846745729446411, "learning_rate": 9.064538332950933e-07, "loss": 0.6913, "step": 26585 }, { "epoch": 0.827962011521096, "grad_norm": 1.9287415742874146, "learning_rate": 9.056344029630601e-07, "loss": 0.7214, "step": 26590 }, { "epoch": 0.8281177020084073, "grad_norm": 2.142470121383667, "learning_rate": 9.048149726310269e-07, "loss": 0.8483, "step": 26595 }, { "epoch": 0.8282733924957185, "grad_norm": 1.9769644737243652, "learning_rate": 9.039955422989938e-07, "loss": 0.7562, "step": 26600 }, { "epoch": 0.8284290829830298, "grad_norm": 2.2699148654937744, "learning_rate": 9.031761119669606e-07, "loss": 0.813, "step": 26605 }, { "epoch": 0.828584773470341, "grad_norm": 2.008286714553833, "learning_rate": 9.023566816349274e-07, "loss": 0.7464, "step": 26610 }, { "epoch": 0.8287404639576522, "grad_norm": 1.9979358911514282, "learning_rate": 9.015372513028943e-07, "loss": 0.8252, "step": 26615 }, { "epoch": 0.8288961544449635, "grad_norm": 2.429065465927124, "learning_rate": 9.007178209708612e-07, "loss": 0.7031, "step": 26620 }, { "epoch": 0.8290518449322747, "grad_norm": 2.5820722579956055, "learning_rate": 8.99898390638828e-07, "loss": 0.741, "step": 26625 }, { "epoch": 0.8292075354195859, "grad_norm": 2.2543094158172607, "learning_rate": 8.990789603067948e-07, "loss": 0.7075, "step": 26630 }, { "epoch": 0.829363225906897, "grad_norm": 2.1303975582122803, "learning_rate": 8.982595299747617e-07, "loss": 0.7426, "step": 26635 }, { "epoch": 0.8295189163942083, "grad_norm": 2.4971976280212402, "learning_rate": 8.974400996427285e-07, "loss": 0.7991, "step": 26640 }, { "epoch": 0.8296746068815195, "grad_norm": 2.229865312576294, "learning_rate": 8.966206693106953e-07, "loss": 0.7607, "step": 26645 }, { "epoch": 0.8298302973688307, "grad_norm": 2.1556899547576904, "learning_rate": 8.958012389786622e-07, "loss": 0.7882, "step": 26650 }, { "epoch": 0.829985987856142, "grad_norm": 2.2198398113250732, "learning_rate": 8.949818086466288e-07, "loss": 0.7149, "step": 26655 }, { "epoch": 0.8301416783434532, "grad_norm": 2.5981948375701904, "learning_rate": 8.941623783145957e-07, "loss": 0.7824, "step": 26660 }, { "epoch": 0.8302973688307644, "grad_norm": 1.88547945022583, "learning_rate": 8.933429479825626e-07, "loss": 0.7522, "step": 26665 }, { "epoch": 0.8304530593180757, "grad_norm": 1.8825998306274414, "learning_rate": 8.925235176505294e-07, "loss": 0.8471, "step": 26670 }, { "epoch": 0.8306087498053869, "grad_norm": 2.215773344039917, "learning_rate": 8.917040873184962e-07, "loss": 0.7543, "step": 26675 }, { "epoch": 0.8307644402926981, "grad_norm": 2.4234700202941895, "learning_rate": 8.908846569864631e-07, "loss": 0.7, "step": 26680 }, { "epoch": 0.8309201307800094, "grad_norm": 1.8289817571640015, "learning_rate": 8.900652266544299e-07, "loss": 0.728, "step": 26685 }, { "epoch": 0.8310758212673206, "grad_norm": 2.785400629043579, "learning_rate": 8.892457963223967e-07, "loss": 0.7863, "step": 26690 }, { "epoch": 0.8312315117546318, "grad_norm": 1.7767024040222168, "learning_rate": 8.884263659903636e-07, "loss": 0.7584, "step": 26695 }, { "epoch": 0.8313872022419431, "grad_norm": 1.9354057312011719, "learning_rate": 8.876069356583305e-07, "loss": 0.7887, "step": 26700 }, { "epoch": 0.8315428927292542, "grad_norm": 2.313711643218994, "learning_rate": 8.867875053262972e-07, "loss": 0.7893, "step": 26705 }, { "epoch": 0.8316985832165654, "grad_norm": 2.040217161178589, "learning_rate": 8.859680749942641e-07, "loss": 0.7808, "step": 26710 }, { "epoch": 0.8318542737038767, "grad_norm": 2.0364725589752197, "learning_rate": 8.85148644662231e-07, "loss": 0.7525, "step": 26715 }, { "epoch": 0.8320099641911879, "grad_norm": 2.545581340789795, "learning_rate": 8.843292143301977e-07, "loss": 0.7817, "step": 26720 }, { "epoch": 0.8321656546784991, "grad_norm": 3.416593313217163, "learning_rate": 8.835097839981646e-07, "loss": 0.8072, "step": 26725 }, { "epoch": 0.8323213451658104, "grad_norm": 1.7585866451263428, "learning_rate": 8.826903536661314e-07, "loss": 0.7991, "step": 26730 }, { "epoch": 0.8324770356531216, "grad_norm": 1.8035821914672852, "learning_rate": 8.818709233340981e-07, "loss": 0.7188, "step": 26735 }, { "epoch": 0.8326327261404328, "grad_norm": 1.9120725393295288, "learning_rate": 8.81051493002065e-07, "loss": 0.7338, "step": 26740 }, { "epoch": 0.832788416627744, "grad_norm": 2.3484795093536377, "learning_rate": 8.802320626700319e-07, "loss": 0.7854, "step": 26745 }, { "epoch": 0.8329441071150553, "grad_norm": 2.949213981628418, "learning_rate": 8.794126323379986e-07, "loss": 0.8022, "step": 26750 }, { "epoch": 0.8330997976023665, "grad_norm": 2.5386414527893066, "learning_rate": 8.785932020059655e-07, "loss": 0.7585, "step": 26755 }, { "epoch": 0.8332554880896778, "grad_norm": 2.3325161933898926, "learning_rate": 8.777737716739324e-07, "loss": 0.8384, "step": 26760 }, { "epoch": 0.833411178576989, "grad_norm": 2.16542387008667, "learning_rate": 8.769543413418992e-07, "loss": 0.7853, "step": 26765 }, { "epoch": 0.8335668690643002, "grad_norm": 2.325028657913208, "learning_rate": 8.76134911009866e-07, "loss": 0.6745, "step": 26770 }, { "epoch": 0.8337225595516113, "grad_norm": 2.243008613586426, "learning_rate": 8.753154806778329e-07, "loss": 0.7386, "step": 26775 }, { "epoch": 0.8338782500389226, "grad_norm": 1.818547248840332, "learning_rate": 8.744960503457997e-07, "loss": 0.7869, "step": 26780 }, { "epoch": 0.8340339405262338, "grad_norm": 2.047219753265381, "learning_rate": 8.736766200137665e-07, "loss": 0.7227, "step": 26785 }, { "epoch": 0.834189631013545, "grad_norm": 2.4566946029663086, "learning_rate": 8.728571896817334e-07, "loss": 0.7734, "step": 26790 }, { "epoch": 0.8343453215008563, "grad_norm": 2.321601629257202, "learning_rate": 8.720377593497002e-07, "loss": 0.8031, "step": 26795 }, { "epoch": 0.8345010119881675, "grad_norm": 2.144247531890869, "learning_rate": 8.712183290176669e-07, "loss": 0.851, "step": 26800 }, { "epoch": 0.8346567024754787, "grad_norm": 1.8075222969055176, "learning_rate": 8.703988986856338e-07, "loss": 0.8207, "step": 26805 }, { "epoch": 0.83481239296279, "grad_norm": 1.794143557548523, "learning_rate": 8.695794683536006e-07, "loss": 0.8146, "step": 26810 }, { "epoch": 0.8349680834501012, "grad_norm": 2.0541980266571045, "learning_rate": 8.687600380215674e-07, "loss": 0.7915, "step": 26815 }, { "epoch": 0.8351237739374124, "grad_norm": 1.852239966392517, "learning_rate": 8.679406076895343e-07, "loss": 0.7373, "step": 26820 }, { "epoch": 0.8352794644247237, "grad_norm": 2.5159850120544434, "learning_rate": 8.671211773575011e-07, "loss": 0.8214, "step": 26825 }, { "epoch": 0.8354351549120349, "grad_norm": 2.0085508823394775, "learning_rate": 8.66301747025468e-07, "loss": 0.6895, "step": 26830 }, { "epoch": 0.8355908453993461, "grad_norm": 2.1273462772369385, "learning_rate": 8.654823166934348e-07, "loss": 0.8021, "step": 26835 }, { "epoch": 0.8357465358866574, "grad_norm": 1.8658360242843628, "learning_rate": 8.646628863614016e-07, "loss": 0.8082, "step": 26840 }, { "epoch": 0.8359022263739686, "grad_norm": 2.438211679458618, "learning_rate": 8.638434560293685e-07, "loss": 0.7572, "step": 26845 }, { "epoch": 0.8360579168612797, "grad_norm": 2.411372184753418, "learning_rate": 8.630240256973353e-07, "loss": 0.8198, "step": 26850 }, { "epoch": 0.836213607348591, "grad_norm": 2.4395811557769775, "learning_rate": 8.622045953653021e-07, "loss": 0.7463, "step": 26855 }, { "epoch": 0.8363692978359022, "grad_norm": 2.06900691986084, "learning_rate": 8.61385165033269e-07, "loss": 0.7576, "step": 26860 }, { "epoch": 0.8365249883232134, "grad_norm": 1.9512985944747925, "learning_rate": 8.605657347012359e-07, "loss": 0.7668, "step": 26865 }, { "epoch": 0.8366806788105247, "grad_norm": 2.301832675933838, "learning_rate": 8.597463043692025e-07, "loss": 0.7026, "step": 26870 }, { "epoch": 0.8368363692978359, "grad_norm": 2.0488061904907227, "learning_rate": 8.589268740371694e-07, "loss": 0.7691, "step": 26875 }, { "epoch": 0.8369920597851471, "grad_norm": 2.2643346786499023, "learning_rate": 8.581074437051362e-07, "loss": 0.6993, "step": 26880 }, { "epoch": 0.8371477502724584, "grad_norm": 2.0628087520599365, "learning_rate": 8.57288013373103e-07, "loss": 0.7158, "step": 26885 }, { "epoch": 0.8373034407597696, "grad_norm": 2.2797980308532715, "learning_rate": 8.564685830410699e-07, "loss": 0.7885, "step": 26890 }, { "epoch": 0.8374591312470808, "grad_norm": 1.9536315202713013, "learning_rate": 8.556491527090367e-07, "loss": 0.8059, "step": 26895 }, { "epoch": 0.8376148217343921, "grad_norm": 1.9663214683532715, "learning_rate": 8.548297223770035e-07, "loss": 0.8249, "step": 26900 }, { "epoch": 0.8377705122217033, "grad_norm": 2.357137680053711, "learning_rate": 8.540102920449704e-07, "loss": 0.749, "step": 26905 }, { "epoch": 0.8379262027090145, "grad_norm": 2.332812786102295, "learning_rate": 8.531908617129373e-07, "loss": 0.8071, "step": 26910 }, { "epoch": 0.8380818931963258, "grad_norm": 2.3659348487854004, "learning_rate": 8.52371431380904e-07, "loss": 0.8322, "step": 26915 }, { "epoch": 0.8382375836836369, "grad_norm": 2.194225549697876, "learning_rate": 8.515520010488709e-07, "loss": 0.7637, "step": 26920 }, { "epoch": 0.8383932741709481, "grad_norm": 2.442631483078003, "learning_rate": 8.507325707168378e-07, "loss": 0.745, "step": 26925 }, { "epoch": 0.8385489646582593, "grad_norm": 1.7978342771530151, "learning_rate": 8.499131403848046e-07, "loss": 0.7641, "step": 26930 }, { "epoch": 0.8387046551455706, "grad_norm": 1.915193796157837, "learning_rate": 8.490937100527714e-07, "loss": 0.7195, "step": 26935 }, { "epoch": 0.8388603456328818, "grad_norm": 2.5853030681610107, "learning_rate": 8.482742797207383e-07, "loss": 0.758, "step": 26940 }, { "epoch": 0.839016036120193, "grad_norm": 2.2270615100860596, "learning_rate": 8.47454849388705e-07, "loss": 0.7198, "step": 26945 }, { "epoch": 0.8391717266075043, "grad_norm": 2.290198564529419, "learning_rate": 8.466354190566718e-07, "loss": 0.7282, "step": 26950 }, { "epoch": 0.8393274170948155, "grad_norm": 1.9831926822662354, "learning_rate": 8.458159887246387e-07, "loss": 0.7225, "step": 26955 }, { "epoch": 0.8394831075821267, "grad_norm": 1.8943300247192383, "learning_rate": 8.449965583926055e-07, "loss": 0.7473, "step": 26960 }, { "epoch": 0.839638798069438, "grad_norm": 2.044649124145508, "learning_rate": 8.441771280605723e-07, "loss": 0.7681, "step": 26965 }, { "epoch": 0.8397944885567492, "grad_norm": 1.76748788356781, "learning_rate": 8.433576977285392e-07, "loss": 0.8355, "step": 26970 }, { "epoch": 0.8399501790440604, "grad_norm": 2.1516692638397217, "learning_rate": 8.425382673965061e-07, "loss": 0.8129, "step": 26975 }, { "epoch": 0.8401058695313717, "grad_norm": 1.8874638080596924, "learning_rate": 8.417188370644728e-07, "loss": 0.7082, "step": 26980 }, { "epoch": 0.8402615600186829, "grad_norm": 2.2084429264068604, "learning_rate": 8.408994067324397e-07, "loss": 0.7097, "step": 26985 }, { "epoch": 0.840417250505994, "grad_norm": 2.0293147563934326, "learning_rate": 8.400799764004066e-07, "loss": 0.7691, "step": 26990 }, { "epoch": 0.8405729409933053, "grad_norm": 2.160452127456665, "learning_rate": 8.392605460683733e-07, "loss": 0.7458, "step": 26995 }, { "epoch": 0.8407286314806165, "grad_norm": 2.155660629272461, "learning_rate": 8.384411157363402e-07, "loss": 0.7241, "step": 27000 }, { "epoch": 0.8408843219679277, "grad_norm": 2.0681285858154297, "learning_rate": 8.376216854043071e-07, "loss": 0.7884, "step": 27005 }, { "epoch": 0.841040012455239, "grad_norm": 2.1804251670837402, "learning_rate": 8.368022550722739e-07, "loss": 0.8277, "step": 27010 }, { "epoch": 0.8411957029425502, "grad_norm": 2.3699796199798584, "learning_rate": 8.359828247402406e-07, "loss": 0.7611, "step": 27015 }, { "epoch": 0.8413513934298614, "grad_norm": 2.206935167312622, "learning_rate": 8.351633944082075e-07, "loss": 0.7312, "step": 27020 }, { "epoch": 0.8415070839171727, "grad_norm": 1.9490996599197388, "learning_rate": 8.343439640761742e-07, "loss": 0.7547, "step": 27025 }, { "epoch": 0.8416627744044839, "grad_norm": 2.2800159454345703, "learning_rate": 8.335245337441411e-07, "loss": 0.7273, "step": 27030 }, { "epoch": 0.8418184648917951, "grad_norm": 2.359151840209961, "learning_rate": 8.32705103412108e-07, "loss": 0.7732, "step": 27035 }, { "epoch": 0.8419741553791064, "grad_norm": 1.7327138185501099, "learning_rate": 8.318856730800748e-07, "loss": 0.7318, "step": 27040 }, { "epoch": 0.8421298458664176, "grad_norm": 2.5118768215179443, "learning_rate": 8.310662427480416e-07, "loss": 0.7137, "step": 27045 }, { "epoch": 0.8422855363537288, "grad_norm": 2.0703046321868896, "learning_rate": 8.302468124160085e-07, "loss": 0.7258, "step": 27050 }, { "epoch": 0.8424412268410401, "grad_norm": 2.9356772899627686, "learning_rate": 8.294273820839753e-07, "loss": 0.6726, "step": 27055 }, { "epoch": 0.8425969173283513, "grad_norm": 2.355348587036133, "learning_rate": 8.286079517519421e-07, "loss": 0.8198, "step": 27060 }, { "epoch": 0.8427526078156624, "grad_norm": 2.260244607925415, "learning_rate": 8.27788521419909e-07, "loss": 0.78, "step": 27065 }, { "epoch": 0.8429082983029736, "grad_norm": 1.9534636735916138, "learning_rate": 8.269690910878758e-07, "loss": 0.7906, "step": 27070 }, { "epoch": 0.8430639887902849, "grad_norm": 1.9608858823776245, "learning_rate": 8.261496607558427e-07, "loss": 0.7358, "step": 27075 }, { "epoch": 0.8432196792775961, "grad_norm": 2.1504316329956055, "learning_rate": 8.253302304238095e-07, "loss": 0.7991, "step": 27080 }, { "epoch": 0.8433753697649073, "grad_norm": 1.7418909072875977, "learning_rate": 8.245108000917762e-07, "loss": 0.6805, "step": 27085 }, { "epoch": 0.8435310602522186, "grad_norm": 2.3683254718780518, "learning_rate": 8.23691369759743e-07, "loss": 0.8022, "step": 27090 }, { "epoch": 0.8436867507395298, "grad_norm": 2.204089879989624, "learning_rate": 8.228719394277099e-07, "loss": 0.7289, "step": 27095 }, { "epoch": 0.843842441226841, "grad_norm": 2.294771671295166, "learning_rate": 8.220525090956767e-07, "loss": 0.7011, "step": 27100 }, { "epoch": 0.8439981317141523, "grad_norm": 2.5825881958007812, "learning_rate": 8.212330787636435e-07, "loss": 0.836, "step": 27105 }, { "epoch": 0.8441538222014635, "grad_norm": 2.1410093307495117, "learning_rate": 8.204136484316104e-07, "loss": 0.7959, "step": 27110 }, { "epoch": 0.8443095126887747, "grad_norm": 2.094064474105835, "learning_rate": 8.195942180995772e-07, "loss": 0.763, "step": 27115 }, { "epoch": 0.844465203176086, "grad_norm": 2.463285446166992, "learning_rate": 8.187747877675441e-07, "loss": 0.7999, "step": 27120 }, { "epoch": 0.8446208936633972, "grad_norm": 3.1888537406921387, "learning_rate": 8.179553574355109e-07, "loss": 0.7737, "step": 27125 }, { "epoch": 0.8447765841507084, "grad_norm": 1.9086107015609741, "learning_rate": 8.171359271034777e-07, "loss": 0.711, "step": 27130 }, { "epoch": 0.8449322746380196, "grad_norm": 2.197488784790039, "learning_rate": 8.163164967714446e-07, "loss": 0.7038, "step": 27135 }, { "epoch": 0.8450879651253308, "grad_norm": 2.094247341156006, "learning_rate": 8.154970664394114e-07, "loss": 0.7614, "step": 27140 }, { "epoch": 0.845243655612642, "grad_norm": 2.637784957885742, "learning_rate": 8.146776361073782e-07, "loss": 0.7658, "step": 27145 }, { "epoch": 0.8453993460999533, "grad_norm": 2.0083093643188477, "learning_rate": 8.138582057753451e-07, "loss": 0.7874, "step": 27150 }, { "epoch": 0.8455550365872645, "grad_norm": 2.005587339401245, "learning_rate": 8.13038775443312e-07, "loss": 0.7735, "step": 27155 }, { "epoch": 0.8457107270745757, "grad_norm": 1.862343668937683, "learning_rate": 8.122193451112786e-07, "loss": 0.6234, "step": 27160 }, { "epoch": 0.845866417561887, "grad_norm": 2.116786479949951, "learning_rate": 8.113999147792455e-07, "loss": 0.7538, "step": 27165 }, { "epoch": 0.8460221080491982, "grad_norm": 2.142010450363159, "learning_rate": 8.105804844472123e-07, "loss": 0.7056, "step": 27170 }, { "epoch": 0.8461777985365094, "grad_norm": 1.944631814956665, "learning_rate": 8.097610541151791e-07, "loss": 0.7676, "step": 27175 }, { "epoch": 0.8463334890238207, "grad_norm": 1.9323102235794067, "learning_rate": 8.08941623783146e-07, "loss": 0.6562, "step": 27180 }, { "epoch": 0.8464891795111319, "grad_norm": 1.9948360919952393, "learning_rate": 8.081221934511129e-07, "loss": 0.6824, "step": 27185 }, { "epoch": 0.8466448699984431, "grad_norm": 2.2791695594787598, "learning_rate": 8.073027631190797e-07, "loss": 0.8256, "step": 27190 }, { "epoch": 0.8468005604857544, "grad_norm": 2.0616283416748047, "learning_rate": 8.064833327870465e-07, "loss": 0.8409, "step": 27195 }, { "epoch": 0.8469562509730656, "grad_norm": 2.319610595703125, "learning_rate": 8.056639024550134e-07, "loss": 0.7236, "step": 27200 }, { "epoch": 0.8471119414603767, "grad_norm": 2.199228048324585, "learning_rate": 8.048444721229802e-07, "loss": 0.7183, "step": 27205 }, { "epoch": 0.847267631947688, "grad_norm": 2.081796646118164, "learning_rate": 8.04025041790947e-07, "loss": 0.7105, "step": 27210 }, { "epoch": 0.8474233224349992, "grad_norm": 2.0692598819732666, "learning_rate": 8.032056114589139e-07, "loss": 0.6984, "step": 27215 }, { "epoch": 0.8475790129223104, "grad_norm": 2.20157790184021, "learning_rate": 8.023861811268808e-07, "loss": 0.7471, "step": 27220 }, { "epoch": 0.8477347034096216, "grad_norm": 2.301093578338623, "learning_rate": 8.015667507948475e-07, "loss": 0.7449, "step": 27225 }, { "epoch": 0.8478903938969329, "grad_norm": 1.8811557292938232, "learning_rate": 8.007473204628143e-07, "loss": 0.7595, "step": 27230 }, { "epoch": 0.8480460843842441, "grad_norm": 2.1396288871765137, "learning_rate": 7.99927890130781e-07, "loss": 0.7061, "step": 27235 }, { "epoch": 0.8482017748715553, "grad_norm": 1.965766191482544, "learning_rate": 7.991084597987479e-07, "loss": 0.7326, "step": 27240 }, { "epoch": 0.8483574653588666, "grad_norm": 2.1390461921691895, "learning_rate": 7.982890294667148e-07, "loss": 0.6745, "step": 27245 }, { "epoch": 0.8485131558461778, "grad_norm": 1.889851450920105, "learning_rate": 7.974695991346817e-07, "loss": 0.7627, "step": 27250 }, { "epoch": 0.848668846333489, "grad_norm": 2.017320156097412, "learning_rate": 7.966501688026484e-07, "loss": 0.858, "step": 27255 }, { "epoch": 0.8488245368208003, "grad_norm": 1.8407968282699585, "learning_rate": 7.958307384706153e-07, "loss": 0.6979, "step": 27260 }, { "epoch": 0.8489802273081115, "grad_norm": 2.449042320251465, "learning_rate": 7.950113081385822e-07, "loss": 0.754, "step": 27265 }, { "epoch": 0.8491359177954227, "grad_norm": 2.310065507888794, "learning_rate": 7.941918778065489e-07, "loss": 0.6862, "step": 27270 }, { "epoch": 0.849291608282734, "grad_norm": 1.9391497373580933, "learning_rate": 7.933724474745158e-07, "loss": 0.7086, "step": 27275 }, { "epoch": 0.8494472987700451, "grad_norm": 2.1041390895843506, "learning_rate": 7.925530171424827e-07, "loss": 0.7481, "step": 27280 }, { "epoch": 0.8496029892573563, "grad_norm": 1.899114966392517, "learning_rate": 7.917335868104495e-07, "loss": 0.764, "step": 27285 }, { "epoch": 0.8497586797446676, "grad_norm": 1.898888111114502, "learning_rate": 7.909141564784163e-07, "loss": 0.7008, "step": 27290 }, { "epoch": 0.8499143702319788, "grad_norm": 2.0878522396087646, "learning_rate": 7.900947261463832e-07, "loss": 0.786, "step": 27295 }, { "epoch": 0.85007006071929, "grad_norm": 2.2609646320343018, "learning_rate": 7.892752958143498e-07, "loss": 0.7837, "step": 27300 }, { "epoch": 0.8502257512066013, "grad_norm": 2.2271411418914795, "learning_rate": 7.884558654823167e-07, "loss": 0.719, "step": 27305 }, { "epoch": 0.8503814416939125, "grad_norm": 2.2931602001190186, "learning_rate": 7.876364351502836e-07, "loss": 0.7248, "step": 27310 }, { "epoch": 0.8505371321812237, "grad_norm": 2.1410443782806396, "learning_rate": 7.868170048182503e-07, "loss": 0.7892, "step": 27315 }, { "epoch": 0.850692822668535, "grad_norm": 1.9844213724136353, "learning_rate": 7.859975744862172e-07, "loss": 0.6997, "step": 27320 }, { "epoch": 0.8508485131558462, "grad_norm": 2.1950316429138184, "learning_rate": 7.851781441541841e-07, "loss": 0.7471, "step": 27325 }, { "epoch": 0.8510042036431574, "grad_norm": 1.7485780715942383, "learning_rate": 7.843587138221509e-07, "loss": 0.8094, "step": 27330 }, { "epoch": 0.8511598941304687, "grad_norm": 2.610045909881592, "learning_rate": 7.835392834901177e-07, "loss": 0.7865, "step": 27335 }, { "epoch": 0.8513155846177799, "grad_norm": 2.248159885406494, "learning_rate": 7.827198531580846e-07, "loss": 0.714, "step": 27340 }, { "epoch": 0.8514712751050911, "grad_norm": 2.6675362586975098, "learning_rate": 7.819004228260514e-07, "loss": 0.6836, "step": 27345 }, { "epoch": 0.8516269655924023, "grad_norm": 2.2747514247894287, "learning_rate": 7.810809924940182e-07, "loss": 0.7316, "step": 27350 }, { "epoch": 0.8517826560797135, "grad_norm": 1.956618070602417, "learning_rate": 7.802615621619851e-07, "loss": 0.7727, "step": 27355 }, { "epoch": 0.8519383465670247, "grad_norm": 1.786985158920288, "learning_rate": 7.794421318299519e-07, "loss": 0.7452, "step": 27360 }, { "epoch": 0.852094037054336, "grad_norm": 1.8522921800613403, "learning_rate": 7.786227014979188e-07, "loss": 0.7438, "step": 27365 }, { "epoch": 0.8522497275416472, "grad_norm": 2.1129114627838135, "learning_rate": 7.778032711658855e-07, "loss": 0.7231, "step": 27370 }, { "epoch": 0.8524054180289584, "grad_norm": 2.0524988174438477, "learning_rate": 7.769838408338523e-07, "loss": 0.747, "step": 27375 }, { "epoch": 0.8525611085162697, "grad_norm": 2.017876148223877, "learning_rate": 7.761644105018191e-07, "loss": 0.6853, "step": 27380 }, { "epoch": 0.8527167990035809, "grad_norm": 1.996972680091858, "learning_rate": 7.75344980169786e-07, "loss": 0.7464, "step": 27385 }, { "epoch": 0.8528724894908921, "grad_norm": 2.1294493675231934, "learning_rate": 7.745255498377528e-07, "loss": 0.7579, "step": 27390 }, { "epoch": 0.8530281799782033, "grad_norm": 1.948084831237793, "learning_rate": 7.737061195057197e-07, "loss": 0.6695, "step": 27395 }, { "epoch": 0.8531838704655146, "grad_norm": 1.9824435710906982, "learning_rate": 7.728866891736865e-07, "loss": 0.703, "step": 27400 }, { "epoch": 0.8533395609528258, "grad_norm": 2.3073954582214355, "learning_rate": 7.720672588416533e-07, "loss": 0.7182, "step": 27405 }, { "epoch": 0.853495251440137, "grad_norm": 2.3147799968719482, "learning_rate": 7.712478285096202e-07, "loss": 0.7254, "step": 27410 }, { "epoch": 0.8536509419274483, "grad_norm": 2.082709312438965, "learning_rate": 7.70428398177587e-07, "loss": 0.8338, "step": 27415 }, { "epoch": 0.8538066324147594, "grad_norm": 1.970061182975769, "learning_rate": 7.696089678455538e-07, "loss": 0.8145, "step": 27420 }, { "epoch": 0.8539623229020706, "grad_norm": 2.1123878955841064, "learning_rate": 7.687895375135207e-07, "loss": 0.7317, "step": 27425 }, { "epoch": 0.8541180133893819, "grad_norm": 2.014777660369873, "learning_rate": 7.679701071814876e-07, "loss": 0.8003, "step": 27430 }, { "epoch": 0.8542737038766931, "grad_norm": 2.3453071117401123, "learning_rate": 7.671506768494544e-07, "loss": 0.7363, "step": 27435 }, { "epoch": 0.8544293943640043, "grad_norm": 2.1334643363952637, "learning_rate": 7.663312465174212e-07, "loss": 0.7727, "step": 27440 }, { "epoch": 0.8545850848513156, "grad_norm": 1.9030894041061401, "learning_rate": 7.655118161853879e-07, "loss": 0.7204, "step": 27445 }, { "epoch": 0.8547407753386268, "grad_norm": 2.2379939556121826, "learning_rate": 7.646923858533547e-07, "loss": 0.7159, "step": 27450 }, { "epoch": 0.854896465825938, "grad_norm": 2.5861570835113525, "learning_rate": 7.638729555213216e-07, "loss": 0.7246, "step": 27455 }, { "epoch": 0.8550521563132493, "grad_norm": 3.0398380756378174, "learning_rate": 7.630535251892885e-07, "loss": 0.7372, "step": 27460 }, { "epoch": 0.8552078468005605, "grad_norm": 2.4543728828430176, "learning_rate": 7.622340948572553e-07, "loss": 0.7478, "step": 27465 }, { "epoch": 0.8553635372878717, "grad_norm": 2.2310543060302734, "learning_rate": 7.614146645252221e-07, "loss": 0.8208, "step": 27470 }, { "epoch": 0.855519227775183, "grad_norm": 2.089406967163086, "learning_rate": 7.60595234193189e-07, "loss": 0.8302, "step": 27475 }, { "epoch": 0.8556749182624942, "grad_norm": 2.0127670764923096, "learning_rate": 7.597758038611558e-07, "loss": 0.7842, "step": 27480 }, { "epoch": 0.8558306087498054, "grad_norm": 2.5086002349853516, "learning_rate": 7.589563735291226e-07, "loss": 0.7925, "step": 27485 }, { "epoch": 0.8559862992371167, "grad_norm": 2.1801748275756836, "learning_rate": 7.581369431970895e-07, "loss": 0.7705, "step": 27490 }, { "epoch": 0.8561419897244278, "grad_norm": 2.589085340499878, "learning_rate": 7.573175128650564e-07, "loss": 0.7391, "step": 27495 }, { "epoch": 0.856297680211739, "grad_norm": 2.3018882274627686, "learning_rate": 7.564980825330231e-07, "loss": 0.7039, "step": 27500 }, { "epoch": 0.8564533706990503, "grad_norm": 1.8020083904266357, "learning_rate": 7.5567865220099e-07, "loss": 0.7295, "step": 27505 }, { "epoch": 0.8566090611863615, "grad_norm": 1.8108768463134766, "learning_rate": 7.548592218689569e-07, "loss": 0.7108, "step": 27510 }, { "epoch": 0.8567647516736727, "grad_norm": 2.6535773277282715, "learning_rate": 7.540397915369235e-07, "loss": 0.7297, "step": 27515 }, { "epoch": 0.856920442160984, "grad_norm": 1.935793399810791, "learning_rate": 7.532203612048904e-07, "loss": 0.7642, "step": 27520 }, { "epoch": 0.8570761326482952, "grad_norm": 1.9213069677352905, "learning_rate": 7.524009308728572e-07, "loss": 0.7266, "step": 27525 }, { "epoch": 0.8572318231356064, "grad_norm": 1.9309275150299072, "learning_rate": 7.51581500540824e-07, "loss": 0.7318, "step": 27530 }, { "epoch": 0.8573875136229177, "grad_norm": 1.9322694540023804, "learning_rate": 7.507620702087909e-07, "loss": 0.7688, "step": 27535 }, { "epoch": 0.8575432041102289, "grad_norm": 1.855457067489624, "learning_rate": 7.499426398767578e-07, "loss": 0.6844, "step": 27540 }, { "epoch": 0.8576988945975401, "grad_norm": 2.4463632106781006, "learning_rate": 7.491232095447245e-07, "loss": 0.8459, "step": 27545 }, { "epoch": 0.8578545850848514, "grad_norm": 2.6442558765411377, "learning_rate": 7.483037792126914e-07, "loss": 0.7665, "step": 27550 }, { "epoch": 0.8580102755721626, "grad_norm": 1.890368103981018, "learning_rate": 7.474843488806583e-07, "loss": 0.6992, "step": 27555 }, { "epoch": 0.8581659660594738, "grad_norm": 2.174144744873047, "learning_rate": 7.46664918548625e-07, "loss": 0.764, "step": 27560 }, { "epoch": 0.8583216565467849, "grad_norm": 2.5192315578460693, "learning_rate": 7.458454882165919e-07, "loss": 0.749, "step": 27565 }, { "epoch": 0.8584773470340962, "grad_norm": 1.761802077293396, "learning_rate": 7.450260578845588e-07, "loss": 0.7484, "step": 27570 }, { "epoch": 0.8586330375214074, "grad_norm": 1.9789153337478638, "learning_rate": 7.442066275525256e-07, "loss": 0.6805, "step": 27575 }, { "epoch": 0.8587887280087186, "grad_norm": 2.115776300430298, "learning_rate": 7.433871972204924e-07, "loss": 0.7573, "step": 27580 }, { "epoch": 0.8589444184960299, "grad_norm": 2.514043092727661, "learning_rate": 7.425677668884592e-07, "loss": 0.7856, "step": 27585 }, { "epoch": 0.8591001089833411, "grad_norm": 2.4604716300964355, "learning_rate": 7.417483365564259e-07, "loss": 0.7706, "step": 27590 }, { "epoch": 0.8592557994706523, "grad_norm": 2.031846284866333, "learning_rate": 7.409289062243928e-07, "loss": 0.7348, "step": 27595 }, { "epoch": 0.8594114899579636, "grad_norm": 1.908942461013794, "learning_rate": 7.401094758923597e-07, "loss": 0.6919, "step": 27600 }, { "epoch": 0.8595671804452748, "grad_norm": 2.639979600906372, "learning_rate": 7.392900455603265e-07, "loss": 0.7991, "step": 27605 }, { "epoch": 0.859722870932586, "grad_norm": 3.441936492919922, "learning_rate": 7.384706152282933e-07, "loss": 0.7808, "step": 27610 }, { "epoch": 0.8598785614198973, "grad_norm": 2.1810314655303955, "learning_rate": 7.376511848962602e-07, "loss": 0.7296, "step": 27615 }, { "epoch": 0.8600342519072085, "grad_norm": 1.7242283821105957, "learning_rate": 7.36831754564227e-07, "loss": 0.6796, "step": 27620 }, { "epoch": 0.8601899423945197, "grad_norm": 2.426236867904663, "learning_rate": 7.360123242321938e-07, "loss": 0.768, "step": 27625 }, { "epoch": 0.860345632881831, "grad_norm": 2.2513742446899414, "learning_rate": 7.351928939001607e-07, "loss": 0.841, "step": 27630 }, { "epoch": 0.8605013233691421, "grad_norm": 2.5888938903808594, "learning_rate": 7.343734635681275e-07, "loss": 0.8783, "step": 27635 }, { "epoch": 0.8606570138564533, "grad_norm": 2.0162160396575928, "learning_rate": 7.335540332360944e-07, "loss": 0.7845, "step": 27640 }, { "epoch": 0.8608127043437646, "grad_norm": 2.1355807781219482, "learning_rate": 7.327346029040612e-07, "loss": 0.784, "step": 27645 }, { "epoch": 0.8609683948310758, "grad_norm": 2.6932053565979004, "learning_rate": 7.31915172572028e-07, "loss": 0.77, "step": 27650 }, { "epoch": 0.861124085318387, "grad_norm": 1.997666835784912, "learning_rate": 7.310957422399949e-07, "loss": 0.8638, "step": 27655 }, { "epoch": 0.8612797758056983, "grad_norm": 2.2188563346862793, "learning_rate": 7.302763119079616e-07, "loss": 0.796, "step": 27660 }, { "epoch": 0.8614354662930095, "grad_norm": 2.1790664196014404, "learning_rate": 7.294568815759284e-07, "loss": 0.8408, "step": 27665 }, { "epoch": 0.8615911567803207, "grad_norm": 2.0661330223083496, "learning_rate": 7.286374512438953e-07, "loss": 0.7816, "step": 27670 }, { "epoch": 0.861746847267632, "grad_norm": 2.1554200649261475, "learning_rate": 7.278180209118621e-07, "loss": 0.8145, "step": 27675 }, { "epoch": 0.8619025377549432, "grad_norm": 2.542992115020752, "learning_rate": 7.269985905798289e-07, "loss": 0.7741, "step": 27680 }, { "epoch": 0.8620582282422544, "grad_norm": 1.8988538980484009, "learning_rate": 7.261791602477958e-07, "loss": 0.7647, "step": 27685 }, { "epoch": 0.8622139187295657, "grad_norm": 3.4485421180725098, "learning_rate": 7.253597299157626e-07, "loss": 0.7561, "step": 27690 }, { "epoch": 0.8623696092168769, "grad_norm": 2.2016172409057617, "learning_rate": 7.245402995837294e-07, "loss": 0.8229, "step": 27695 }, { "epoch": 0.8625252997041881, "grad_norm": 2.402132034301758, "learning_rate": 7.237208692516963e-07, "loss": 0.7822, "step": 27700 }, { "epoch": 0.8626809901914994, "grad_norm": 2.430607795715332, "learning_rate": 7.229014389196632e-07, "loss": 0.7617, "step": 27705 }, { "epoch": 0.8628366806788105, "grad_norm": 2.0135538578033447, "learning_rate": 7.2208200858763e-07, "loss": 0.7708, "step": 27710 }, { "epoch": 0.8629923711661217, "grad_norm": 2.4280529022216797, "learning_rate": 7.212625782555968e-07, "loss": 0.827, "step": 27715 }, { "epoch": 0.8631480616534329, "grad_norm": 2.0867130756378174, "learning_rate": 7.204431479235637e-07, "loss": 0.7635, "step": 27720 }, { "epoch": 0.8633037521407442, "grad_norm": 2.5034892559051514, "learning_rate": 7.196237175915305e-07, "loss": 0.8041, "step": 27725 }, { "epoch": 0.8634594426280554, "grad_norm": 2.0732169151306152, "learning_rate": 7.188042872594972e-07, "loss": 0.7558, "step": 27730 }, { "epoch": 0.8636151331153666, "grad_norm": 2.1734445095062256, "learning_rate": 7.17984856927464e-07, "loss": 0.676, "step": 27735 }, { "epoch": 0.8637708236026779, "grad_norm": 2.093636989593506, "learning_rate": 7.171654265954308e-07, "loss": 0.7595, "step": 27740 }, { "epoch": 0.8639265140899891, "grad_norm": 2.6332859992980957, "learning_rate": 7.163459962633977e-07, "loss": 0.7653, "step": 27745 }, { "epoch": 0.8640822045773003, "grad_norm": 2.584913492202759, "learning_rate": 7.155265659313646e-07, "loss": 0.8312, "step": 27750 }, { "epoch": 0.8642378950646116, "grad_norm": 2.0988001823425293, "learning_rate": 7.147071355993314e-07, "loss": 0.7983, "step": 27755 }, { "epoch": 0.8643935855519228, "grad_norm": 2.695615768432617, "learning_rate": 7.138877052672982e-07, "loss": 0.7247, "step": 27760 }, { "epoch": 0.864549276039234, "grad_norm": 2.6453442573547363, "learning_rate": 7.130682749352651e-07, "loss": 0.7093, "step": 27765 }, { "epoch": 0.8647049665265453, "grad_norm": 2.19476056098938, "learning_rate": 7.122488446032319e-07, "loss": 0.709, "step": 27770 }, { "epoch": 0.8648606570138565, "grad_norm": 2.142770290374756, "learning_rate": 7.114294142711987e-07, "loss": 0.777, "step": 27775 }, { "epoch": 0.8650163475011676, "grad_norm": 2.5381336212158203, "learning_rate": 7.106099839391656e-07, "loss": 0.7112, "step": 27780 }, { "epoch": 0.8651720379884789, "grad_norm": 2.398123025894165, "learning_rate": 7.097905536071325e-07, "loss": 0.7988, "step": 27785 }, { "epoch": 0.8653277284757901, "grad_norm": 2.041093587875366, "learning_rate": 7.089711232750992e-07, "loss": 0.7894, "step": 27790 }, { "epoch": 0.8654834189631013, "grad_norm": 2.3092081546783447, "learning_rate": 7.081516929430661e-07, "loss": 0.722, "step": 27795 }, { "epoch": 0.8656391094504126, "grad_norm": 2.0394673347473145, "learning_rate": 7.073322626110328e-07, "loss": 0.6816, "step": 27800 }, { "epoch": 0.8657947999377238, "grad_norm": 2.038907527923584, "learning_rate": 7.065128322789996e-07, "loss": 0.7722, "step": 27805 }, { "epoch": 0.865950490425035, "grad_norm": 2.0161690711975098, "learning_rate": 7.056934019469665e-07, "loss": 0.7672, "step": 27810 }, { "epoch": 0.8661061809123463, "grad_norm": 1.7007617950439453, "learning_rate": 7.048739716149334e-07, "loss": 0.8257, "step": 27815 }, { "epoch": 0.8662618713996575, "grad_norm": 2.4569778442382812, "learning_rate": 7.040545412829001e-07, "loss": 0.719, "step": 27820 }, { "epoch": 0.8664175618869687, "grad_norm": 1.8159617185592651, "learning_rate": 7.03235110950867e-07, "loss": 0.7199, "step": 27825 }, { "epoch": 0.86657325237428, "grad_norm": 2.072765350341797, "learning_rate": 7.024156806188339e-07, "loss": 0.7995, "step": 27830 }, { "epoch": 0.8667289428615912, "grad_norm": 2.0289556980133057, "learning_rate": 7.015962502868006e-07, "loss": 0.7942, "step": 27835 }, { "epoch": 0.8668846333489024, "grad_norm": 2.3411309719085693, "learning_rate": 7.007768199547675e-07, "loss": 0.7175, "step": 27840 }, { "epoch": 0.8670403238362137, "grad_norm": 2.2608134746551514, "learning_rate": 6.999573896227344e-07, "loss": 0.6813, "step": 27845 }, { "epoch": 0.8671960143235248, "grad_norm": 2.002155065536499, "learning_rate": 6.991379592907012e-07, "loss": 0.7304, "step": 27850 }, { "epoch": 0.867351704810836, "grad_norm": 2.757969379425049, "learning_rate": 6.98318528958668e-07, "loss": 0.6931, "step": 27855 }, { "epoch": 0.8675073952981472, "grad_norm": 2.081735372543335, "learning_rate": 6.974990986266349e-07, "loss": 0.7908, "step": 27860 }, { "epoch": 0.8676630857854585, "grad_norm": 2.324571371078491, "learning_rate": 6.966796682946017e-07, "loss": 0.777, "step": 27865 }, { "epoch": 0.8678187762727697, "grad_norm": 2.404081344604492, "learning_rate": 6.958602379625685e-07, "loss": 0.803, "step": 27870 }, { "epoch": 0.8679744667600809, "grad_norm": 2.04707932472229, "learning_rate": 6.950408076305353e-07, "loss": 0.7873, "step": 27875 }, { "epoch": 0.8681301572473922, "grad_norm": 2.5107011795043945, "learning_rate": 6.942213772985021e-07, "loss": 0.8475, "step": 27880 }, { "epoch": 0.8682858477347034, "grad_norm": 2.154693365097046, "learning_rate": 6.934019469664689e-07, "loss": 0.7781, "step": 27885 }, { "epoch": 0.8684415382220146, "grad_norm": 1.8885470628738403, "learning_rate": 6.925825166344358e-07, "loss": 0.746, "step": 27890 }, { "epoch": 0.8685972287093259, "grad_norm": 1.755670428276062, "learning_rate": 6.917630863024026e-07, "loss": 0.7341, "step": 27895 }, { "epoch": 0.8687529191966371, "grad_norm": 2.0810441970825195, "learning_rate": 6.909436559703694e-07, "loss": 0.6961, "step": 27900 }, { "epoch": 0.8689086096839483, "grad_norm": 1.8299089670181274, "learning_rate": 6.901242256383363e-07, "loss": 0.6914, "step": 27905 }, { "epoch": 0.8690643001712596, "grad_norm": 1.833265781402588, "learning_rate": 6.893047953063031e-07, "loss": 0.6845, "step": 27910 }, { "epoch": 0.8692199906585708, "grad_norm": 2.4595134258270264, "learning_rate": 6.8848536497427e-07, "loss": 0.8043, "step": 27915 }, { "epoch": 0.869375681145882, "grad_norm": 2.016918897628784, "learning_rate": 6.876659346422368e-07, "loss": 0.7616, "step": 27920 }, { "epoch": 0.8695313716331932, "grad_norm": 2.328577995300293, "learning_rate": 6.868465043102036e-07, "loss": 0.7135, "step": 27925 }, { "epoch": 0.8696870621205044, "grad_norm": 2.0844979286193848, "learning_rate": 6.860270739781705e-07, "loss": 0.6978, "step": 27930 }, { "epoch": 0.8698427526078156, "grad_norm": 2.2197928428649902, "learning_rate": 6.852076436461373e-07, "loss": 0.7828, "step": 27935 }, { "epoch": 0.8699984430951269, "grad_norm": 2.808755874633789, "learning_rate": 6.843882133141041e-07, "loss": 0.7865, "step": 27940 }, { "epoch": 0.8701541335824381, "grad_norm": 2.2513067722320557, "learning_rate": 6.835687829820708e-07, "loss": 0.7186, "step": 27945 }, { "epoch": 0.8703098240697493, "grad_norm": 2.16239070892334, "learning_rate": 6.827493526500377e-07, "loss": 0.7974, "step": 27950 }, { "epoch": 0.8704655145570606, "grad_norm": 2.676884889602661, "learning_rate": 6.819299223180045e-07, "loss": 0.8457, "step": 27955 }, { "epoch": 0.8706212050443718, "grad_norm": 1.9573380947113037, "learning_rate": 6.811104919859714e-07, "loss": 0.7165, "step": 27960 }, { "epoch": 0.870776895531683, "grad_norm": 2.2064638137817383, "learning_rate": 6.802910616539382e-07, "loss": 0.7499, "step": 27965 }, { "epoch": 0.8709325860189943, "grad_norm": 2.188939094543457, "learning_rate": 6.79471631321905e-07, "loss": 0.7878, "step": 27970 }, { "epoch": 0.8710882765063055, "grad_norm": 2.342494487762451, "learning_rate": 6.786522009898719e-07, "loss": 0.78, "step": 27975 }, { "epoch": 0.8712439669936167, "grad_norm": 2.0526466369628906, "learning_rate": 6.778327706578387e-07, "loss": 0.7504, "step": 27980 }, { "epoch": 0.871399657480928, "grad_norm": 2.2124693393707275, "learning_rate": 6.770133403258056e-07, "loss": 0.7445, "step": 27985 }, { "epoch": 0.8715553479682392, "grad_norm": 2.163411855697632, "learning_rate": 6.761939099937724e-07, "loss": 0.7773, "step": 27990 }, { "epoch": 0.8717110384555503, "grad_norm": 1.913795828819275, "learning_rate": 6.753744796617393e-07, "loss": 0.7562, "step": 27995 }, { "epoch": 0.8718667289428615, "grad_norm": 2.273775100708008, "learning_rate": 6.74555049329706e-07, "loss": 0.7374, "step": 28000 }, { "epoch": 0.8720224194301728, "grad_norm": 2.0069682598114014, "learning_rate": 6.737356189976729e-07, "loss": 0.7135, "step": 28005 }, { "epoch": 0.872178109917484, "grad_norm": 1.9320743083953857, "learning_rate": 6.729161886656398e-07, "loss": 0.7765, "step": 28010 }, { "epoch": 0.8723338004047952, "grad_norm": 3.111701488494873, "learning_rate": 6.720967583336064e-07, "loss": 0.816, "step": 28015 }, { "epoch": 0.8724894908921065, "grad_norm": 2.5724148750305176, "learning_rate": 6.712773280015733e-07, "loss": 0.8379, "step": 28020 }, { "epoch": 0.8726451813794177, "grad_norm": 2.222137689590454, "learning_rate": 6.704578976695402e-07, "loss": 0.7081, "step": 28025 }, { "epoch": 0.872800871866729, "grad_norm": 2.3790810108184814, "learning_rate": 6.69638467337507e-07, "loss": 0.8032, "step": 28030 }, { "epoch": 0.8729565623540402, "grad_norm": 2.147426128387451, "learning_rate": 6.688190370054738e-07, "loss": 0.7116, "step": 28035 }, { "epoch": 0.8731122528413514, "grad_norm": 2.137680768966675, "learning_rate": 6.679996066734407e-07, "loss": 0.7676, "step": 28040 }, { "epoch": 0.8732679433286626, "grad_norm": 2.6216304302215576, "learning_rate": 6.671801763414075e-07, "loss": 0.7886, "step": 28045 }, { "epoch": 0.8734236338159739, "grad_norm": 1.991968035697937, "learning_rate": 6.663607460093743e-07, "loss": 0.6857, "step": 28050 }, { "epoch": 0.8735793243032851, "grad_norm": 2.029012441635132, "learning_rate": 6.655413156773412e-07, "loss": 0.7019, "step": 28055 }, { "epoch": 0.8737350147905963, "grad_norm": 1.9998794794082642, "learning_rate": 6.647218853453081e-07, "loss": 0.7089, "step": 28060 }, { "epoch": 0.8738907052779075, "grad_norm": 1.965273380279541, "learning_rate": 6.639024550132748e-07, "loss": 0.7555, "step": 28065 }, { "epoch": 0.8740463957652187, "grad_norm": 2.050766706466675, "learning_rate": 6.630830246812417e-07, "loss": 0.7896, "step": 28070 }, { "epoch": 0.8742020862525299, "grad_norm": 2.2634339332580566, "learning_rate": 6.622635943492086e-07, "loss": 0.7447, "step": 28075 }, { "epoch": 0.8743577767398412, "grad_norm": 2.509300947189331, "learning_rate": 6.614441640171753e-07, "loss": 0.7431, "step": 28080 }, { "epoch": 0.8745134672271524, "grad_norm": 1.9825340509414673, "learning_rate": 6.606247336851422e-07, "loss": 0.7304, "step": 28085 }, { "epoch": 0.8746691577144636, "grad_norm": 2.1827263832092285, "learning_rate": 6.59805303353109e-07, "loss": 0.8618, "step": 28090 }, { "epoch": 0.8748248482017749, "grad_norm": 2.285727024078369, "learning_rate": 6.589858730210757e-07, "loss": 0.7369, "step": 28095 }, { "epoch": 0.8749805386890861, "grad_norm": 2.2228994369506836, "learning_rate": 6.581664426890426e-07, "loss": 0.6963, "step": 28100 }, { "epoch": 0.8751362291763973, "grad_norm": 2.246927261352539, "learning_rate": 6.573470123570095e-07, "loss": 0.7597, "step": 28105 }, { "epoch": 0.8752919196637086, "grad_norm": 1.7369906902313232, "learning_rate": 6.565275820249762e-07, "loss": 0.8981, "step": 28110 }, { "epoch": 0.8754476101510198, "grad_norm": 2.5153274536132812, "learning_rate": 6.557081516929431e-07, "loss": 0.7132, "step": 28115 }, { "epoch": 0.875603300638331, "grad_norm": 2.1826815605163574, "learning_rate": 6.5488872136091e-07, "loss": 0.7269, "step": 28120 }, { "epoch": 0.8757589911256423, "grad_norm": 2.221750259399414, "learning_rate": 6.540692910288768e-07, "loss": 0.7326, "step": 28125 }, { "epoch": 0.8759146816129535, "grad_norm": 2.312065839767456, "learning_rate": 6.532498606968436e-07, "loss": 0.7463, "step": 28130 }, { "epoch": 0.8760703721002647, "grad_norm": 2.4147331714630127, "learning_rate": 6.524304303648105e-07, "loss": 0.7629, "step": 28135 }, { "epoch": 0.8762260625875758, "grad_norm": 1.912164330482483, "learning_rate": 6.516110000327773e-07, "loss": 0.777, "step": 28140 }, { "epoch": 0.8763817530748871, "grad_norm": 1.918577790260315, "learning_rate": 6.507915697007441e-07, "loss": 0.7553, "step": 28145 }, { "epoch": 0.8765374435621983, "grad_norm": 2.2841460704803467, "learning_rate": 6.49972139368711e-07, "loss": 0.7701, "step": 28150 }, { "epoch": 0.8766931340495095, "grad_norm": 2.0000505447387695, "learning_rate": 6.491527090366778e-07, "loss": 0.7549, "step": 28155 }, { "epoch": 0.8768488245368208, "grad_norm": 2.4908981323242188, "learning_rate": 6.483332787046445e-07, "loss": 0.8285, "step": 28160 }, { "epoch": 0.877004515024132, "grad_norm": 1.9001529216766357, "learning_rate": 6.475138483726114e-07, "loss": 0.7307, "step": 28165 }, { "epoch": 0.8771602055114432, "grad_norm": 2.092463731765747, "learning_rate": 6.466944180405782e-07, "loss": 0.77, "step": 28170 }, { "epoch": 0.8773158959987545, "grad_norm": 2.2490897178649902, "learning_rate": 6.45874987708545e-07, "loss": 0.8098, "step": 28175 }, { "epoch": 0.8774715864860657, "grad_norm": 1.9904266595840454, "learning_rate": 6.450555573765119e-07, "loss": 0.7726, "step": 28180 }, { "epoch": 0.877627276973377, "grad_norm": 2.043896198272705, "learning_rate": 6.442361270444787e-07, "loss": 0.7251, "step": 28185 }, { "epoch": 0.8777829674606882, "grad_norm": 1.7550828456878662, "learning_rate": 6.434166967124455e-07, "loss": 0.6636, "step": 28190 }, { "epoch": 0.8779386579479994, "grad_norm": 2.7957448959350586, "learning_rate": 6.425972663804124e-07, "loss": 0.7228, "step": 28195 }, { "epoch": 0.8780943484353106, "grad_norm": 2.072230100631714, "learning_rate": 6.417778360483792e-07, "loss": 0.7529, "step": 28200 }, { "epoch": 0.8782500389226219, "grad_norm": 2.107036590576172, "learning_rate": 6.409584057163461e-07, "loss": 0.6775, "step": 28205 }, { "epoch": 0.878405729409933, "grad_norm": 2.6227900981903076, "learning_rate": 6.401389753843129e-07, "loss": 0.8062, "step": 28210 }, { "epoch": 0.8785614198972442, "grad_norm": 1.9556478261947632, "learning_rate": 6.393195450522797e-07, "loss": 0.7183, "step": 28215 }, { "epoch": 0.8787171103845555, "grad_norm": 2.1872880458831787, "learning_rate": 6.385001147202466e-07, "loss": 0.721, "step": 28220 }, { "epoch": 0.8788728008718667, "grad_norm": 3.046926259994507, "learning_rate": 6.376806843882134e-07, "loss": 0.7464, "step": 28225 }, { "epoch": 0.8790284913591779, "grad_norm": 2.223435163497925, "learning_rate": 6.368612540561801e-07, "loss": 0.7839, "step": 28230 }, { "epoch": 0.8791841818464892, "grad_norm": 2.306084156036377, "learning_rate": 6.36041823724147e-07, "loss": 0.8014, "step": 28235 }, { "epoch": 0.8793398723338004, "grad_norm": 2.2290821075439453, "learning_rate": 6.352223933921138e-07, "loss": 0.7441, "step": 28240 }, { "epoch": 0.8794955628211116, "grad_norm": 2.7005910873413086, "learning_rate": 6.344029630600806e-07, "loss": 0.7706, "step": 28245 }, { "epoch": 0.8796512533084229, "grad_norm": 5.522416114807129, "learning_rate": 6.335835327280475e-07, "loss": 0.7884, "step": 28250 }, { "epoch": 0.8798069437957341, "grad_norm": 2.7419252395629883, "learning_rate": 6.327641023960143e-07, "loss": 0.7482, "step": 28255 }, { "epoch": 0.8799626342830453, "grad_norm": 2.115407943725586, "learning_rate": 6.319446720639811e-07, "loss": 0.7719, "step": 28260 }, { "epoch": 0.8801183247703566, "grad_norm": 1.8213675022125244, "learning_rate": 6.31125241731948e-07, "loss": 0.7865, "step": 28265 }, { "epoch": 0.8802740152576678, "grad_norm": 3.067092180252075, "learning_rate": 6.303058113999149e-07, "loss": 0.8749, "step": 28270 }, { "epoch": 0.880429705744979, "grad_norm": 2.414777994155884, "learning_rate": 6.294863810678817e-07, "loss": 0.7924, "step": 28275 }, { "epoch": 0.8805853962322902, "grad_norm": 2.105997085571289, "learning_rate": 6.286669507358485e-07, "loss": 0.7279, "step": 28280 }, { "epoch": 0.8807410867196014, "grad_norm": 3.4410977363586426, "learning_rate": 6.278475204038154e-07, "loss": 0.7773, "step": 28285 }, { "epoch": 0.8808967772069126, "grad_norm": 1.9202121496200562, "learning_rate": 6.270280900717822e-07, "loss": 0.7481, "step": 28290 }, { "epoch": 0.8810524676942239, "grad_norm": 2.020784854888916, "learning_rate": 6.26208659739749e-07, "loss": 0.7415, "step": 28295 }, { "epoch": 0.8812081581815351, "grad_norm": 3.0437328815460205, "learning_rate": 6.253892294077159e-07, "loss": 0.7794, "step": 28300 }, { "epoch": 0.8813638486688463, "grad_norm": 1.8251572847366333, "learning_rate": 6.245697990756827e-07, "loss": 0.7113, "step": 28305 }, { "epoch": 0.8815195391561575, "grad_norm": 2.3450348377227783, "learning_rate": 6.237503687436495e-07, "loss": 0.8056, "step": 28310 }, { "epoch": 0.8816752296434688, "grad_norm": 2.043926477432251, "learning_rate": 6.229309384116163e-07, "loss": 0.7597, "step": 28315 }, { "epoch": 0.88183092013078, "grad_norm": 1.8953485488891602, "learning_rate": 6.221115080795831e-07, "loss": 0.7097, "step": 28320 }, { "epoch": 0.8819866106180912, "grad_norm": 2.5477514266967773, "learning_rate": 6.212920777475499e-07, "loss": 0.7597, "step": 28325 }, { "epoch": 0.8821423011054025, "grad_norm": 2.166219472885132, "learning_rate": 6.204726474155168e-07, "loss": 0.7528, "step": 28330 }, { "epoch": 0.8822979915927137, "grad_norm": 2.1362533569335938, "learning_rate": 6.196532170834837e-07, "loss": 0.8334, "step": 28335 }, { "epoch": 0.882453682080025, "grad_norm": 1.975771427154541, "learning_rate": 6.188337867514504e-07, "loss": 0.6594, "step": 28340 }, { "epoch": 0.8826093725673362, "grad_norm": 2.189589738845825, "learning_rate": 6.180143564194173e-07, "loss": 0.7416, "step": 28345 }, { "epoch": 0.8827650630546474, "grad_norm": 1.834826946258545, "learning_rate": 6.171949260873842e-07, "loss": 0.7074, "step": 28350 }, { "epoch": 0.8829207535419585, "grad_norm": 1.9175039529800415, "learning_rate": 6.163754957553509e-07, "loss": 0.8437, "step": 28355 }, { "epoch": 0.8830764440292698, "grad_norm": 2.0101680755615234, "learning_rate": 6.155560654233177e-07, "loss": 0.755, "step": 28360 }, { "epoch": 0.883232134516581, "grad_norm": 1.9872218370437622, "learning_rate": 6.147366350912846e-07, "loss": 0.7756, "step": 28365 }, { "epoch": 0.8833878250038922, "grad_norm": 2.462829828262329, "learning_rate": 6.139172047592514e-07, "loss": 0.7983, "step": 28370 }, { "epoch": 0.8835435154912035, "grad_norm": 2.2255313396453857, "learning_rate": 6.130977744272183e-07, "loss": 0.7399, "step": 28375 }, { "epoch": 0.8836992059785147, "grad_norm": 2.1595590114593506, "learning_rate": 6.122783440951851e-07, "loss": 0.7126, "step": 28380 }, { "epoch": 0.8838548964658259, "grad_norm": 2.448155403137207, "learning_rate": 6.114589137631519e-07, "loss": 0.7404, "step": 28385 }, { "epoch": 0.8840105869531372, "grad_norm": 1.862818717956543, "learning_rate": 6.106394834311188e-07, "loss": 0.7188, "step": 28390 }, { "epoch": 0.8841662774404484, "grad_norm": 2.0574703216552734, "learning_rate": 6.098200530990856e-07, "loss": 0.7161, "step": 28395 }, { "epoch": 0.8843219679277596, "grad_norm": 1.6297320127487183, "learning_rate": 6.090006227670523e-07, "loss": 0.7312, "step": 28400 }, { "epoch": 0.8844776584150709, "grad_norm": 1.946980595588684, "learning_rate": 6.081811924350192e-07, "loss": 0.7504, "step": 28405 }, { "epoch": 0.8846333489023821, "grad_norm": 1.8788655996322632, "learning_rate": 6.073617621029861e-07, "loss": 0.7529, "step": 28410 }, { "epoch": 0.8847890393896933, "grad_norm": 2.0035958290100098, "learning_rate": 6.065423317709529e-07, "loss": 0.729, "step": 28415 }, { "epoch": 0.8849447298770046, "grad_norm": 2.1578125953674316, "learning_rate": 6.057229014389197e-07, "loss": 0.8233, "step": 28420 }, { "epoch": 0.8851004203643157, "grad_norm": 1.9686216115951538, "learning_rate": 6.049034711068866e-07, "loss": 0.7209, "step": 28425 }, { "epoch": 0.8852561108516269, "grad_norm": 2.094071626663208, "learning_rate": 6.040840407748533e-07, "loss": 0.8193, "step": 28430 }, { "epoch": 0.8854118013389382, "grad_norm": 2.2689454555511475, "learning_rate": 6.032646104428202e-07, "loss": 0.741, "step": 28435 }, { "epoch": 0.8855674918262494, "grad_norm": 2.2444875240325928, "learning_rate": 6.024451801107871e-07, "loss": 0.676, "step": 28440 }, { "epoch": 0.8857231823135606, "grad_norm": 1.9556301832199097, "learning_rate": 6.016257497787538e-07, "loss": 0.7015, "step": 28445 }, { "epoch": 0.8858788728008719, "grad_norm": 1.7973119020462036, "learning_rate": 6.008063194467207e-07, "loss": 0.7199, "step": 28450 }, { "epoch": 0.8860345632881831, "grad_norm": 1.671125054359436, "learning_rate": 5.999868891146876e-07, "loss": 0.7204, "step": 28455 }, { "epoch": 0.8861902537754943, "grad_norm": 1.8404011726379395, "learning_rate": 5.991674587826543e-07, "loss": 0.7439, "step": 28460 }, { "epoch": 0.8863459442628056, "grad_norm": 2.1974196434020996, "learning_rate": 5.983480284506211e-07, "loss": 0.6899, "step": 28465 }, { "epoch": 0.8865016347501168, "grad_norm": 1.8160609006881714, "learning_rate": 5.97528598118588e-07, "loss": 0.707, "step": 28470 }, { "epoch": 0.886657325237428, "grad_norm": 2.3060550689697266, "learning_rate": 5.967091677865548e-07, "loss": 0.8036, "step": 28475 }, { "epoch": 0.8868130157247393, "grad_norm": 2.537090301513672, "learning_rate": 5.958897374545217e-07, "loss": 0.7481, "step": 28480 }, { "epoch": 0.8869687062120505, "grad_norm": 1.7635586261749268, "learning_rate": 5.950703071224885e-07, "loss": 0.7483, "step": 28485 }, { "epoch": 0.8871243966993617, "grad_norm": 1.9225192070007324, "learning_rate": 5.942508767904553e-07, "loss": 0.7346, "step": 28490 }, { "epoch": 0.8872800871866728, "grad_norm": 2.5855510234832764, "learning_rate": 5.934314464584222e-07, "loss": 0.8333, "step": 28495 }, { "epoch": 0.8874357776739841, "grad_norm": 2.233651876449585, "learning_rate": 5.92612016126389e-07, "loss": 0.8322, "step": 28500 }, { "epoch": 0.8875914681612953, "grad_norm": 2.154963731765747, "learning_rate": 5.917925857943557e-07, "loss": 0.7357, "step": 28505 }, { "epoch": 0.8877471586486065, "grad_norm": 2.310155153274536, "learning_rate": 5.909731554623226e-07, "loss": 0.7436, "step": 28510 }, { "epoch": 0.8879028491359178, "grad_norm": 2.009965181350708, "learning_rate": 5.901537251302895e-07, "loss": 0.7642, "step": 28515 }, { "epoch": 0.888058539623229, "grad_norm": 1.9217655658721924, "learning_rate": 5.893342947982564e-07, "loss": 0.8046, "step": 28520 }, { "epoch": 0.8882142301105402, "grad_norm": 2.0558886528015137, "learning_rate": 5.885148644662231e-07, "loss": 0.8681, "step": 28525 }, { "epoch": 0.8883699205978515, "grad_norm": 1.8820545673370361, "learning_rate": 5.8769543413419e-07, "loss": 0.7179, "step": 28530 }, { "epoch": 0.8885256110851627, "grad_norm": 1.8996905088424683, "learning_rate": 5.868760038021567e-07, "loss": 0.7114, "step": 28535 }, { "epoch": 0.8886813015724739, "grad_norm": 2.3126845359802246, "learning_rate": 5.860565734701236e-07, "loss": 0.8705, "step": 28540 }, { "epoch": 0.8888369920597852, "grad_norm": 1.8169090747833252, "learning_rate": 5.852371431380905e-07, "loss": 0.7627, "step": 28545 }, { "epoch": 0.8889926825470964, "grad_norm": 2.189425468444824, "learning_rate": 5.844177128060573e-07, "loss": 0.7339, "step": 28550 }, { "epoch": 0.8891483730344076, "grad_norm": 2.1064016819000244, "learning_rate": 5.835982824740241e-07, "loss": 0.784, "step": 28555 }, { "epoch": 0.8893040635217189, "grad_norm": 2.026069164276123, "learning_rate": 5.82778852141991e-07, "loss": 0.7893, "step": 28560 }, { "epoch": 0.8894597540090301, "grad_norm": 1.9206664562225342, "learning_rate": 5.819594218099578e-07, "loss": 0.7515, "step": 28565 }, { "epoch": 0.8896154444963412, "grad_norm": 1.885856032371521, "learning_rate": 5.811399914779245e-07, "loss": 0.7825, "step": 28570 }, { "epoch": 0.8897711349836525, "grad_norm": 2.454080820083618, "learning_rate": 5.803205611458914e-07, "loss": 0.7247, "step": 28575 }, { "epoch": 0.8899268254709637, "grad_norm": 2.343353748321533, "learning_rate": 5.795011308138583e-07, "loss": 0.7996, "step": 28580 }, { "epoch": 0.8900825159582749, "grad_norm": 2.1883773803710938, "learning_rate": 5.786817004818251e-07, "loss": 0.746, "step": 28585 }, { "epoch": 0.8902382064455862, "grad_norm": 1.9957975149154663, "learning_rate": 5.778622701497919e-07, "loss": 0.757, "step": 28590 }, { "epoch": 0.8903938969328974, "grad_norm": 1.9832316637039185, "learning_rate": 5.770428398177588e-07, "loss": 0.715, "step": 28595 }, { "epoch": 0.8905495874202086, "grad_norm": 2.174118995666504, "learning_rate": 5.762234094857256e-07, "loss": 0.73, "step": 28600 }, { "epoch": 0.8907052779075199, "grad_norm": 3.245147466659546, "learning_rate": 5.754039791536924e-07, "loss": 0.7754, "step": 28605 }, { "epoch": 0.8908609683948311, "grad_norm": 2.0913853645324707, "learning_rate": 5.745845488216592e-07, "loss": 0.8471, "step": 28610 }, { "epoch": 0.8910166588821423, "grad_norm": 3.2233121395111084, "learning_rate": 5.73765118489626e-07, "loss": 0.8018, "step": 28615 }, { "epoch": 0.8911723493694536, "grad_norm": 2.4515037536621094, "learning_rate": 5.729456881575929e-07, "loss": 0.7459, "step": 28620 }, { "epoch": 0.8913280398567648, "grad_norm": 2.4603796005249023, "learning_rate": 5.721262578255598e-07, "loss": 0.7666, "step": 28625 }, { "epoch": 0.891483730344076, "grad_norm": 2.204044818878174, "learning_rate": 5.713068274935265e-07, "loss": 0.6977, "step": 28630 }, { "epoch": 0.8916394208313873, "grad_norm": 1.946272611618042, "learning_rate": 5.704873971614934e-07, "loss": 0.6716, "step": 28635 }, { "epoch": 0.8917951113186984, "grad_norm": 2.7691543102264404, "learning_rate": 5.696679668294603e-07, "loss": 0.8647, "step": 28640 }, { "epoch": 0.8919508018060096, "grad_norm": 2.0633866786956787, "learning_rate": 5.68848536497427e-07, "loss": 0.7969, "step": 28645 }, { "epoch": 0.8921064922933208, "grad_norm": 2.3425257205963135, "learning_rate": 5.680291061653939e-07, "loss": 0.7724, "step": 28650 }, { "epoch": 0.8922621827806321, "grad_norm": 2.2182695865631104, "learning_rate": 5.672096758333607e-07, "loss": 0.7329, "step": 28655 }, { "epoch": 0.8924178732679433, "grad_norm": 2.3264410495758057, "learning_rate": 5.663902455013275e-07, "loss": 0.6621, "step": 28660 }, { "epoch": 0.8925735637552545, "grad_norm": 1.908457636833191, "learning_rate": 5.655708151692944e-07, "loss": 0.6753, "step": 28665 }, { "epoch": 0.8927292542425658, "grad_norm": 2.100208044052124, "learning_rate": 5.647513848372612e-07, "loss": 0.7699, "step": 28670 }, { "epoch": 0.892884944729877, "grad_norm": 2.067737340927124, "learning_rate": 5.63931954505228e-07, "loss": 0.7678, "step": 28675 }, { "epoch": 0.8930406352171882, "grad_norm": 2.0323526859283447, "learning_rate": 5.631125241731948e-07, "loss": 0.755, "step": 28680 }, { "epoch": 0.8931963257044995, "grad_norm": 1.7813904285430908, "learning_rate": 5.622930938411617e-07, "loss": 0.682, "step": 28685 }, { "epoch": 0.8933520161918107, "grad_norm": 1.9395827054977417, "learning_rate": 5.614736635091285e-07, "loss": 0.701, "step": 28690 }, { "epoch": 0.8935077066791219, "grad_norm": 2.1480510234832764, "learning_rate": 5.606542331770953e-07, "loss": 0.7626, "step": 28695 }, { "epoch": 0.8936633971664332, "grad_norm": 1.6999976634979248, "learning_rate": 5.598348028450622e-07, "loss": 0.7357, "step": 28700 }, { "epoch": 0.8938190876537444, "grad_norm": 2.2599611282348633, "learning_rate": 5.59015372513029e-07, "loss": 0.7057, "step": 28705 }, { "epoch": 0.8939747781410555, "grad_norm": 2.105114221572876, "learning_rate": 5.581959421809958e-07, "loss": 0.8137, "step": 28710 }, { "epoch": 0.8941304686283668, "grad_norm": 2.1147234439849854, "learning_rate": 5.573765118489626e-07, "loss": 0.7291, "step": 28715 }, { "epoch": 0.894286159115678, "grad_norm": 2.198746681213379, "learning_rate": 5.565570815169294e-07, "loss": 0.771, "step": 28720 }, { "epoch": 0.8944418496029892, "grad_norm": 1.9604310989379883, "learning_rate": 5.557376511848963e-07, "loss": 0.7395, "step": 28725 }, { "epoch": 0.8945975400903005, "grad_norm": 2.607337474822998, "learning_rate": 5.549182208528632e-07, "loss": 0.7847, "step": 28730 }, { "epoch": 0.8947532305776117, "grad_norm": 1.7991255521774292, "learning_rate": 5.540987905208299e-07, "loss": 0.738, "step": 28735 }, { "epoch": 0.8949089210649229, "grad_norm": 2.449068069458008, "learning_rate": 5.532793601887968e-07, "loss": 0.6902, "step": 28740 }, { "epoch": 0.8950646115522342, "grad_norm": 1.9079183340072632, "learning_rate": 5.524599298567637e-07, "loss": 0.8393, "step": 28745 }, { "epoch": 0.8952203020395454, "grad_norm": 2.0194830894470215, "learning_rate": 5.516404995247304e-07, "loss": 0.7401, "step": 28750 }, { "epoch": 0.8953759925268566, "grad_norm": 1.9242877960205078, "learning_rate": 5.508210691926973e-07, "loss": 0.6851, "step": 28755 }, { "epoch": 0.8955316830141679, "grad_norm": 1.8452227115631104, "learning_rate": 5.500016388606641e-07, "loss": 0.7752, "step": 28760 }, { "epoch": 0.8956873735014791, "grad_norm": 2.088864326477051, "learning_rate": 5.491822085286309e-07, "loss": 0.7831, "step": 28765 }, { "epoch": 0.8958430639887903, "grad_norm": 2.344743490219116, "learning_rate": 5.483627781965978e-07, "loss": 0.8149, "step": 28770 }, { "epoch": 0.8959987544761016, "grad_norm": 2.087451219558716, "learning_rate": 5.475433478645646e-07, "loss": 0.742, "step": 28775 }, { "epoch": 0.8961544449634128, "grad_norm": 2.3718209266662598, "learning_rate": 5.467239175325314e-07, "loss": 0.7447, "step": 28780 }, { "epoch": 0.8963101354507239, "grad_norm": 1.8872469663619995, "learning_rate": 5.459044872004982e-07, "loss": 0.7059, "step": 28785 }, { "epoch": 0.8964658259380351, "grad_norm": 2.484438419342041, "learning_rate": 5.450850568684651e-07, "loss": 0.7478, "step": 28790 }, { "epoch": 0.8966215164253464, "grad_norm": 1.8052548170089722, "learning_rate": 5.44265626536432e-07, "loss": 0.7249, "step": 28795 }, { "epoch": 0.8967772069126576, "grad_norm": 2.389808416366577, "learning_rate": 5.434461962043987e-07, "loss": 0.8396, "step": 28800 }, { "epoch": 0.8969328973999688, "grad_norm": 2.3025715351104736, "learning_rate": 5.426267658723656e-07, "loss": 0.6676, "step": 28805 }, { "epoch": 0.8970885878872801, "grad_norm": 2.3155124187469482, "learning_rate": 5.418073355403325e-07, "loss": 0.8182, "step": 28810 }, { "epoch": 0.8972442783745913, "grad_norm": 1.9428786039352417, "learning_rate": 5.409879052082992e-07, "loss": 0.7957, "step": 28815 }, { "epoch": 0.8973999688619025, "grad_norm": 2.1353397369384766, "learning_rate": 5.40168474876266e-07, "loss": 0.7578, "step": 28820 }, { "epoch": 0.8975556593492138, "grad_norm": 2.4914894104003906, "learning_rate": 5.393490445442329e-07, "loss": 0.7446, "step": 28825 }, { "epoch": 0.897711349836525, "grad_norm": 2.124445676803589, "learning_rate": 5.385296142121997e-07, "loss": 0.7636, "step": 28830 }, { "epoch": 0.8978670403238362, "grad_norm": 2.0059421062469482, "learning_rate": 5.377101838801666e-07, "loss": 0.7419, "step": 28835 }, { "epoch": 0.8980227308111475, "grad_norm": 2.186363697052002, "learning_rate": 5.368907535481334e-07, "loss": 0.7837, "step": 28840 }, { "epoch": 0.8981784212984587, "grad_norm": 2.288405179977417, "learning_rate": 5.360713232161002e-07, "loss": 0.7198, "step": 28845 }, { "epoch": 0.8983341117857699, "grad_norm": 2.094606876373291, "learning_rate": 5.352518928840671e-07, "loss": 0.7493, "step": 28850 }, { "epoch": 0.8984898022730811, "grad_norm": 2.0307228565216064, "learning_rate": 5.344324625520339e-07, "loss": 0.7583, "step": 28855 }, { "epoch": 0.8986454927603923, "grad_norm": 2.462087392807007, "learning_rate": 5.336130322200006e-07, "loss": 0.7963, "step": 28860 }, { "epoch": 0.8988011832477035, "grad_norm": 2.487271308898926, "learning_rate": 5.327936018879675e-07, "loss": 0.7853, "step": 28865 }, { "epoch": 0.8989568737350148, "grad_norm": 1.9409162998199463, "learning_rate": 5.319741715559344e-07, "loss": 0.7729, "step": 28870 }, { "epoch": 0.899112564222326, "grad_norm": 2.870582103729248, "learning_rate": 5.311547412239012e-07, "loss": 0.7509, "step": 28875 }, { "epoch": 0.8992682547096372, "grad_norm": 2.2896811962127686, "learning_rate": 5.30335310891868e-07, "loss": 0.7487, "step": 28880 }, { "epoch": 0.8994239451969485, "grad_norm": 2.114232301712036, "learning_rate": 5.295158805598349e-07, "loss": 0.801, "step": 28885 }, { "epoch": 0.8995796356842597, "grad_norm": 2.082977771759033, "learning_rate": 5.286964502278017e-07, "loss": 0.6796, "step": 28890 }, { "epoch": 0.8997353261715709, "grad_norm": 1.6126604080200195, "learning_rate": 5.278770198957685e-07, "loss": 0.6718, "step": 28895 }, { "epoch": 0.8998910166588822, "grad_norm": 2.729158878326416, "learning_rate": 5.270575895637354e-07, "loss": 0.7966, "step": 28900 }, { "epoch": 0.9000467071461934, "grad_norm": 2.261653184890747, "learning_rate": 5.262381592317021e-07, "loss": 0.729, "step": 28905 }, { "epoch": 0.9002023976335046, "grad_norm": 2.3977208137512207, "learning_rate": 5.25418728899669e-07, "loss": 0.8257, "step": 28910 }, { "epoch": 0.9003580881208159, "grad_norm": 2.2196121215820312, "learning_rate": 5.245992985676359e-07, "loss": 0.8095, "step": 28915 }, { "epoch": 0.9005137786081271, "grad_norm": 1.9557658433914185, "learning_rate": 5.237798682356026e-07, "loss": 0.7616, "step": 28920 }, { "epoch": 0.9006694690954383, "grad_norm": 2.045761823654175, "learning_rate": 5.229604379035695e-07, "loss": 0.7239, "step": 28925 }, { "epoch": 0.9008251595827494, "grad_norm": 1.7226192951202393, "learning_rate": 5.221410075715363e-07, "loss": 0.7312, "step": 28930 }, { "epoch": 0.9009808500700607, "grad_norm": 2.161587953567505, "learning_rate": 5.213215772395031e-07, "loss": 0.7323, "step": 28935 }, { "epoch": 0.9011365405573719, "grad_norm": 1.9984333515167236, "learning_rate": 5.2050214690747e-07, "loss": 0.687, "step": 28940 }, { "epoch": 0.9012922310446831, "grad_norm": 2.3306145668029785, "learning_rate": 5.196827165754368e-07, "loss": 0.7186, "step": 28945 }, { "epoch": 0.9014479215319944, "grad_norm": 1.8711707592010498, "learning_rate": 5.188632862434036e-07, "loss": 0.7403, "step": 28950 }, { "epoch": 0.9016036120193056, "grad_norm": 2.33172869682312, "learning_rate": 5.180438559113705e-07, "loss": 0.7746, "step": 28955 }, { "epoch": 0.9017593025066168, "grad_norm": 1.8435176610946655, "learning_rate": 5.172244255793373e-07, "loss": 0.7387, "step": 28960 }, { "epoch": 0.9019149929939281, "grad_norm": 2.0395240783691406, "learning_rate": 5.16404995247304e-07, "loss": 0.8148, "step": 28965 }, { "epoch": 0.9020706834812393, "grad_norm": 2.1512272357940674, "learning_rate": 5.155855649152709e-07, "loss": 0.7436, "step": 28970 }, { "epoch": 0.9022263739685505, "grad_norm": 2.7284538745880127, "learning_rate": 5.147661345832378e-07, "loss": 0.7616, "step": 28975 }, { "epoch": 0.9023820644558618, "grad_norm": 1.9460422992706299, "learning_rate": 5.139467042512046e-07, "loss": 0.7079, "step": 28980 }, { "epoch": 0.902537754943173, "grad_norm": 2.4244000911712646, "learning_rate": 5.131272739191714e-07, "loss": 0.7376, "step": 28985 }, { "epoch": 0.9026934454304842, "grad_norm": 2.218492031097412, "learning_rate": 5.123078435871383e-07, "loss": 0.7512, "step": 28990 }, { "epoch": 0.9028491359177955, "grad_norm": 1.8935246467590332, "learning_rate": 5.114884132551051e-07, "loss": 0.8213, "step": 28995 }, { "epoch": 0.9030048264051066, "grad_norm": 2.748955249786377, "learning_rate": 5.106689829230719e-07, "loss": 0.8311, "step": 29000 }, { "epoch": 0.9031605168924178, "grad_norm": 2.123979330062866, "learning_rate": 5.098495525910388e-07, "loss": 0.7241, "step": 29005 }, { "epoch": 0.9033162073797291, "grad_norm": 2.300992965698242, "learning_rate": 5.090301222590055e-07, "loss": 0.7287, "step": 29010 }, { "epoch": 0.9034718978670403, "grad_norm": 1.984421968460083, "learning_rate": 5.082106919269724e-07, "loss": 0.8131, "step": 29015 }, { "epoch": 0.9036275883543515, "grad_norm": 2.19374680519104, "learning_rate": 5.073912615949393e-07, "loss": 0.7848, "step": 29020 }, { "epoch": 0.9037832788416628, "grad_norm": 2.4286746978759766, "learning_rate": 5.06571831262906e-07, "loss": 0.7672, "step": 29025 }, { "epoch": 0.903938969328974, "grad_norm": 2.081092596054077, "learning_rate": 5.057524009308729e-07, "loss": 0.76, "step": 29030 }, { "epoch": 0.9040946598162852, "grad_norm": 2.4078664779663086, "learning_rate": 5.049329705988397e-07, "loss": 0.6388, "step": 29035 }, { "epoch": 0.9042503503035965, "grad_norm": 3.2024948596954346, "learning_rate": 5.041135402668065e-07, "loss": 0.8061, "step": 29040 }, { "epoch": 0.9044060407909077, "grad_norm": 2.309239625930786, "learning_rate": 5.032941099347734e-07, "loss": 0.7803, "step": 29045 }, { "epoch": 0.9045617312782189, "grad_norm": 2.428985834121704, "learning_rate": 5.024746796027402e-07, "loss": 0.8043, "step": 29050 }, { "epoch": 0.9047174217655302, "grad_norm": 2.4078173637390137, "learning_rate": 5.01655249270707e-07, "loss": 0.7471, "step": 29055 }, { "epoch": 0.9048731122528414, "grad_norm": 1.8520267009735107, "learning_rate": 5.008358189386739e-07, "loss": 0.7019, "step": 29060 }, { "epoch": 0.9050288027401526, "grad_norm": 2.325157642364502, "learning_rate": 5.000163886066407e-07, "loss": 0.6984, "step": 29065 }, { "epoch": 0.9051844932274637, "grad_norm": 2.211613416671753, "learning_rate": 4.991969582746076e-07, "loss": 0.7484, "step": 29070 }, { "epoch": 0.905340183714775, "grad_norm": 2.0958824157714844, "learning_rate": 4.983775279425743e-07, "loss": 0.7283, "step": 29075 }, { "epoch": 0.9054958742020862, "grad_norm": 2.3465487957000732, "learning_rate": 4.975580976105412e-07, "loss": 0.803, "step": 29080 }, { "epoch": 0.9056515646893974, "grad_norm": 2.070364475250244, "learning_rate": 4.967386672785081e-07, "loss": 0.7385, "step": 29085 }, { "epoch": 0.9058072551767087, "grad_norm": 2.083195924758911, "learning_rate": 4.959192369464748e-07, "loss": 0.745, "step": 29090 }, { "epoch": 0.9059629456640199, "grad_norm": 1.9272284507751465, "learning_rate": 4.950998066144417e-07, "loss": 0.7722, "step": 29095 }, { "epoch": 0.9061186361513311, "grad_norm": 2.2174463272094727, "learning_rate": 4.942803762824086e-07, "loss": 0.816, "step": 29100 }, { "epoch": 0.9062743266386424, "grad_norm": 1.850743055343628, "learning_rate": 4.934609459503753e-07, "loss": 0.653, "step": 29105 }, { "epoch": 0.9064300171259536, "grad_norm": 2.0859198570251465, "learning_rate": 4.926415156183422e-07, "loss": 0.7499, "step": 29110 }, { "epoch": 0.9065857076132648, "grad_norm": 2.370715379714966, "learning_rate": 4.91822085286309e-07, "loss": 0.7562, "step": 29115 }, { "epoch": 0.9067413981005761, "grad_norm": 1.733228087425232, "learning_rate": 4.910026549542758e-07, "loss": 0.7461, "step": 29120 }, { "epoch": 0.9068970885878873, "grad_norm": 2.237665891647339, "learning_rate": 4.901832246222427e-07, "loss": 0.706, "step": 29125 }, { "epoch": 0.9070527790751985, "grad_norm": 2.152618885040283, "learning_rate": 4.893637942902095e-07, "loss": 0.711, "step": 29130 }, { "epoch": 0.9072084695625098, "grad_norm": 3.1127121448516846, "learning_rate": 4.885443639581763e-07, "loss": 0.7528, "step": 29135 }, { "epoch": 0.907364160049821, "grad_norm": 1.8549177646636963, "learning_rate": 4.877249336261432e-07, "loss": 0.7684, "step": 29140 }, { "epoch": 0.9075198505371321, "grad_norm": 1.881191611289978, "learning_rate": 4.8690550329411e-07, "loss": 0.7363, "step": 29145 }, { "epoch": 0.9076755410244434, "grad_norm": 1.9374293088912964, "learning_rate": 4.860860729620768e-07, "loss": 0.713, "step": 29150 }, { "epoch": 0.9078312315117546, "grad_norm": 2.440633773803711, "learning_rate": 4.852666426300436e-07, "loss": 0.6725, "step": 29155 }, { "epoch": 0.9079869219990658, "grad_norm": 1.8766728639602661, "learning_rate": 4.844472122980105e-07, "loss": 0.7292, "step": 29160 }, { "epoch": 0.9081426124863771, "grad_norm": 1.8631383180618286, "learning_rate": 4.836277819659773e-07, "loss": 0.7627, "step": 29165 }, { "epoch": 0.9082983029736883, "grad_norm": 1.9877829551696777, "learning_rate": 4.828083516339441e-07, "loss": 0.6947, "step": 29170 }, { "epoch": 0.9084539934609995, "grad_norm": 2.242736339569092, "learning_rate": 4.81988921301911e-07, "loss": 0.7139, "step": 29175 }, { "epoch": 0.9086096839483108, "grad_norm": 2.3807296752929688, "learning_rate": 4.811694909698777e-07, "loss": 0.7991, "step": 29180 }, { "epoch": 0.908765374435622, "grad_norm": 1.8982293605804443, "learning_rate": 4.803500606378446e-07, "loss": 0.6865, "step": 29185 }, { "epoch": 0.9089210649229332, "grad_norm": 2.042227029800415, "learning_rate": 4.795306303058115e-07, "loss": 0.8019, "step": 29190 }, { "epoch": 0.9090767554102445, "grad_norm": 2.699584722518921, "learning_rate": 4.787111999737782e-07, "loss": 0.6989, "step": 29195 }, { "epoch": 0.9092324458975557, "grad_norm": 2.3027639389038086, "learning_rate": 4.778917696417451e-07, "loss": 0.7834, "step": 29200 }, { "epoch": 0.9093881363848669, "grad_norm": 2.100794792175293, "learning_rate": 4.77072339309712e-07, "loss": 0.7327, "step": 29205 }, { "epoch": 0.9095438268721782, "grad_norm": 2.181070566177368, "learning_rate": 4.762529089776788e-07, "loss": 0.771, "step": 29210 }, { "epoch": 0.9096995173594893, "grad_norm": 1.9040441513061523, "learning_rate": 4.7543347864564555e-07, "loss": 0.8355, "step": 29215 }, { "epoch": 0.9098552078468005, "grad_norm": 1.8005765676498413, "learning_rate": 4.746140483136124e-07, "loss": 0.7463, "step": 29220 }, { "epoch": 0.9100108983341118, "grad_norm": 2.275477409362793, "learning_rate": 4.7379461798157924e-07, "loss": 0.7327, "step": 29225 }, { "epoch": 0.910166588821423, "grad_norm": 2.157801628112793, "learning_rate": 4.7297518764954605e-07, "loss": 0.8269, "step": 29230 }, { "epoch": 0.9103222793087342, "grad_norm": 1.979699730873108, "learning_rate": 4.721557573175129e-07, "loss": 0.7362, "step": 29235 }, { "epoch": 0.9104779697960454, "grad_norm": 2.480243444442749, "learning_rate": 4.7133632698547974e-07, "loss": 0.7343, "step": 29240 }, { "epoch": 0.9106336602833567, "grad_norm": 1.9730757474899292, "learning_rate": 4.705168966534466e-07, "loss": 0.8101, "step": 29245 }, { "epoch": 0.9107893507706679, "grad_norm": 1.9814794063568115, "learning_rate": 4.6969746632141337e-07, "loss": 0.7603, "step": 29250 }, { "epoch": 0.9109450412579791, "grad_norm": 2.5945322513580322, "learning_rate": 4.688780359893802e-07, "loss": 0.7503, "step": 29255 }, { "epoch": 0.9111007317452904, "grad_norm": 1.9615274667739868, "learning_rate": 4.6805860565734706e-07, "loss": 0.7737, "step": 29260 }, { "epoch": 0.9112564222326016, "grad_norm": 2.163360595703125, "learning_rate": 4.672391753253139e-07, "loss": 0.7605, "step": 29265 }, { "epoch": 0.9114121127199128, "grad_norm": 2.154611110687256, "learning_rate": 4.664197449932807e-07, "loss": 0.6852, "step": 29270 }, { "epoch": 0.9115678032072241, "grad_norm": 2.290954113006592, "learning_rate": 4.6560031466124756e-07, "loss": 0.8221, "step": 29275 }, { "epoch": 0.9117234936945353, "grad_norm": 2.0612096786499023, "learning_rate": 4.647808843292144e-07, "loss": 0.7659, "step": 29280 }, { "epoch": 0.9118791841818464, "grad_norm": 2.5025737285614014, "learning_rate": 4.6396145399718114e-07, "loss": 0.7852, "step": 29285 }, { "epoch": 0.9120348746691577, "grad_norm": 2.1828012466430664, "learning_rate": 4.63142023665148e-07, "loss": 0.7728, "step": 29290 }, { "epoch": 0.9121905651564689, "grad_norm": 1.9054621458053589, "learning_rate": 4.6232259333311483e-07, "loss": 0.7798, "step": 29295 }, { "epoch": 0.9123462556437801, "grad_norm": 2.065523147583008, "learning_rate": 4.615031630010817e-07, "loss": 0.8496, "step": 29300 }, { "epoch": 0.9125019461310914, "grad_norm": 4.636486530303955, "learning_rate": 4.606837326690485e-07, "loss": 0.7335, "step": 29305 }, { "epoch": 0.9126576366184026, "grad_norm": 2.3291537761688232, "learning_rate": 4.598643023370154e-07, "loss": 0.7428, "step": 29310 }, { "epoch": 0.9128133271057138, "grad_norm": 2.331528425216675, "learning_rate": 4.590448720049822e-07, "loss": 0.7972, "step": 29315 }, { "epoch": 0.9129690175930251, "grad_norm": 2.183519124984741, "learning_rate": 4.58225441672949e-07, "loss": 0.8122, "step": 29320 }, { "epoch": 0.9131247080803363, "grad_norm": 2.078446388244629, "learning_rate": 4.5740601134091583e-07, "loss": 0.8115, "step": 29325 }, { "epoch": 0.9132803985676475, "grad_norm": 2.1087486743927, "learning_rate": 4.5658658100888265e-07, "loss": 0.7678, "step": 29330 }, { "epoch": 0.9134360890549588, "grad_norm": 2.2297956943511963, "learning_rate": 4.5576715067684947e-07, "loss": 0.7886, "step": 29335 }, { "epoch": 0.91359177954227, "grad_norm": 2.204209566116333, "learning_rate": 4.5494772034481633e-07, "loss": 0.725, "step": 29340 }, { "epoch": 0.9137474700295812, "grad_norm": 1.8935832977294922, "learning_rate": 4.5412829001278315e-07, "loss": 0.7416, "step": 29345 }, { "epoch": 0.9139031605168925, "grad_norm": 1.6996549367904663, "learning_rate": 4.5330885968075e-07, "loss": 0.7429, "step": 29350 }, { "epoch": 0.9140588510042037, "grad_norm": 1.9757511615753174, "learning_rate": 4.5248942934871684e-07, "loss": 0.6986, "step": 29355 }, { "epoch": 0.9142145414915148, "grad_norm": 1.966531753540039, "learning_rate": 4.516699990166836e-07, "loss": 0.7819, "step": 29360 }, { "epoch": 0.914370231978826, "grad_norm": 2.259613037109375, "learning_rate": 4.5085056868465047e-07, "loss": 0.7868, "step": 29365 }, { "epoch": 0.9145259224661373, "grad_norm": 2.396827459335327, "learning_rate": 4.500311383526173e-07, "loss": 0.835, "step": 29370 }, { "epoch": 0.9146816129534485, "grad_norm": 1.77602219581604, "learning_rate": 4.492117080205841e-07, "loss": 0.7473, "step": 29375 }, { "epoch": 0.9148373034407598, "grad_norm": 2.5439696311950684, "learning_rate": 4.4839227768855097e-07, "loss": 0.8081, "step": 29380 }, { "epoch": 0.914992993928071, "grad_norm": 2.3124701976776123, "learning_rate": 4.475728473565178e-07, "loss": 0.8054, "step": 29385 }, { "epoch": 0.9151486844153822, "grad_norm": 2.309231758117676, "learning_rate": 4.4675341702448466e-07, "loss": 0.7323, "step": 29390 }, { "epoch": 0.9153043749026935, "grad_norm": 1.9091190099716187, "learning_rate": 4.459339866924514e-07, "loss": 0.7062, "step": 29395 }, { "epoch": 0.9154600653900047, "grad_norm": 1.9896283149719238, "learning_rate": 4.4511455636041824e-07, "loss": 0.7616, "step": 29400 }, { "epoch": 0.9156157558773159, "grad_norm": 1.9387608766555786, "learning_rate": 4.442951260283851e-07, "loss": 0.7625, "step": 29405 }, { "epoch": 0.9157714463646272, "grad_norm": 2.512777328491211, "learning_rate": 4.434756956963519e-07, "loss": 0.7677, "step": 29410 }, { "epoch": 0.9159271368519384, "grad_norm": 2.224165678024292, "learning_rate": 4.426562653643188e-07, "loss": 0.8114, "step": 29415 }, { "epoch": 0.9160828273392496, "grad_norm": 2.5296077728271484, "learning_rate": 4.418368350322856e-07, "loss": 0.7816, "step": 29420 }, { "epoch": 0.9162385178265608, "grad_norm": 2.7369234561920166, "learning_rate": 4.4101740470025243e-07, "loss": 0.7259, "step": 29425 }, { "epoch": 0.916394208313872, "grad_norm": 2.21476411819458, "learning_rate": 4.4019797436821924e-07, "loss": 0.7776, "step": 29430 }, { "epoch": 0.9165498988011832, "grad_norm": 1.8404626846313477, "learning_rate": 4.3937854403618606e-07, "loss": 0.7671, "step": 29435 }, { "epoch": 0.9167055892884944, "grad_norm": 2.397430896759033, "learning_rate": 4.385591137041529e-07, "loss": 0.7868, "step": 29440 }, { "epoch": 0.9168612797758057, "grad_norm": 2.747905731201172, "learning_rate": 4.3773968337211975e-07, "loss": 0.731, "step": 29445 }, { "epoch": 0.9170169702631169, "grad_norm": 1.826646327972412, "learning_rate": 4.3692025304008656e-07, "loss": 0.709, "step": 29450 }, { "epoch": 0.9171726607504281, "grad_norm": 2.337949752807617, "learning_rate": 4.3610082270805343e-07, "loss": 0.7908, "step": 29455 }, { "epoch": 0.9173283512377394, "grad_norm": 2.3911898136138916, "learning_rate": 4.3528139237602025e-07, "loss": 0.7934, "step": 29460 }, { "epoch": 0.9174840417250506, "grad_norm": 2.126744270324707, "learning_rate": 4.34461962043987e-07, "loss": 0.751, "step": 29465 }, { "epoch": 0.9176397322123618, "grad_norm": 2.288337230682373, "learning_rate": 4.336425317119539e-07, "loss": 0.7445, "step": 29470 }, { "epoch": 0.9177954226996731, "grad_norm": 2.3647756576538086, "learning_rate": 4.328231013799207e-07, "loss": 0.7412, "step": 29475 }, { "epoch": 0.9179511131869843, "grad_norm": 2.4325177669525146, "learning_rate": 4.320036710478875e-07, "loss": 0.86, "step": 29480 }, { "epoch": 0.9181068036742955, "grad_norm": 2.139934539794922, "learning_rate": 4.311842407158544e-07, "loss": 0.7093, "step": 29485 }, { "epoch": 0.9182624941616068, "grad_norm": 2.282701015472412, "learning_rate": 4.303648103838212e-07, "loss": 0.7227, "step": 29490 }, { "epoch": 0.918418184648918, "grad_norm": 2.0303657054901123, "learning_rate": 4.2954538005178807e-07, "loss": 0.7797, "step": 29495 }, { "epoch": 0.9185738751362291, "grad_norm": 2.002554416656494, "learning_rate": 4.2872594971975484e-07, "loss": 0.7302, "step": 29500 }, { "epoch": 0.9187295656235404, "grad_norm": 2.905773639678955, "learning_rate": 4.2790651938772165e-07, "loss": 0.7133, "step": 29505 }, { "epoch": 0.9188852561108516, "grad_norm": 2.136500835418701, "learning_rate": 4.270870890556885e-07, "loss": 0.8033, "step": 29510 }, { "epoch": 0.9190409465981628, "grad_norm": 2.344376802444458, "learning_rate": 4.2626765872365534e-07, "loss": 0.7426, "step": 29515 }, { "epoch": 0.919196637085474, "grad_norm": 1.9521008729934692, "learning_rate": 4.254482283916222e-07, "loss": 0.7284, "step": 29520 }, { "epoch": 0.9193523275727853, "grad_norm": 2.102600336074829, "learning_rate": 4.24628798059589e-07, "loss": 0.7617, "step": 29525 }, { "epoch": 0.9195080180600965, "grad_norm": 2.2610394954681396, "learning_rate": 4.2380936772755584e-07, "loss": 0.7493, "step": 29530 }, { "epoch": 0.9196637085474078, "grad_norm": 2.6144309043884277, "learning_rate": 4.229899373955227e-07, "loss": 0.8035, "step": 29535 }, { "epoch": 0.919819399034719, "grad_norm": 2.07253098487854, "learning_rate": 4.221705070634895e-07, "loss": 0.6912, "step": 29540 }, { "epoch": 0.9199750895220302, "grad_norm": 1.90175199508667, "learning_rate": 4.213510767314563e-07, "loss": 0.7129, "step": 29545 }, { "epoch": 0.9201307800093415, "grad_norm": 2.3626041412353516, "learning_rate": 4.2053164639942316e-07, "loss": 0.7866, "step": 29550 }, { "epoch": 0.9202864704966527, "grad_norm": 2.452117443084717, "learning_rate": 4.1971221606739e-07, "loss": 0.7792, "step": 29555 }, { "epoch": 0.9204421609839639, "grad_norm": 2.965169668197632, "learning_rate": 4.1889278573535685e-07, "loss": 0.7342, "step": 29560 }, { "epoch": 0.9205978514712752, "grad_norm": 2.5392911434173584, "learning_rate": 4.1807335540332366e-07, "loss": 0.7777, "step": 29565 }, { "epoch": 0.9207535419585864, "grad_norm": 2.3210790157318115, "learning_rate": 4.172539250712905e-07, "loss": 0.6895, "step": 29570 }, { "epoch": 0.9209092324458975, "grad_norm": 1.8975262641906738, "learning_rate": 4.164344947392573e-07, "loss": 0.7422, "step": 29575 }, { "epoch": 0.9210649229332087, "grad_norm": 1.9844297170639038, "learning_rate": 4.156150644072241e-07, "loss": 0.7065, "step": 29580 }, { "epoch": 0.92122061342052, "grad_norm": 2.439842462539673, "learning_rate": 4.1479563407519093e-07, "loss": 0.7644, "step": 29585 }, { "epoch": 0.9213763039078312, "grad_norm": 2.107410430908203, "learning_rate": 4.139762037431578e-07, "loss": 0.7575, "step": 29590 }, { "epoch": 0.9215319943951424, "grad_norm": 2.129244089126587, "learning_rate": 4.131567734111246e-07, "loss": 0.7653, "step": 29595 }, { "epoch": 0.9216876848824537, "grad_norm": 2.610891580581665, "learning_rate": 4.123373430790915e-07, "loss": 0.6975, "step": 29600 }, { "epoch": 0.9218433753697649, "grad_norm": 2.2869250774383545, "learning_rate": 4.115179127470583e-07, "loss": 0.7618, "step": 29605 }, { "epoch": 0.9219990658570761, "grad_norm": 1.660576581954956, "learning_rate": 4.1069848241502507e-07, "loss": 0.7595, "step": 29610 }, { "epoch": 0.9221547563443874, "grad_norm": 1.9285809993743896, "learning_rate": 4.0987905208299193e-07, "loss": 0.7727, "step": 29615 }, { "epoch": 0.9223104468316986, "grad_norm": 2.0313427448272705, "learning_rate": 4.0905962175095875e-07, "loss": 0.7799, "step": 29620 }, { "epoch": 0.9224661373190098, "grad_norm": 2.463017702102661, "learning_rate": 4.082401914189256e-07, "loss": 0.7163, "step": 29625 }, { "epoch": 0.9226218278063211, "grad_norm": 2.0053725242614746, "learning_rate": 4.0742076108689244e-07, "loss": 0.7101, "step": 29630 }, { "epoch": 0.9227775182936323, "grad_norm": 2.2354915142059326, "learning_rate": 4.0660133075485925e-07, "loss": 0.8097, "step": 29635 }, { "epoch": 0.9229332087809435, "grad_norm": 1.9331402778625488, "learning_rate": 4.057819004228261e-07, "loss": 0.7106, "step": 29640 }, { "epoch": 0.9230888992682547, "grad_norm": 2.1847078800201416, "learning_rate": 4.049624700907929e-07, "loss": 0.7095, "step": 29645 }, { "epoch": 0.9232445897555659, "grad_norm": 1.9355813264846802, "learning_rate": 4.041430397587597e-07, "loss": 0.707, "step": 29650 }, { "epoch": 0.9234002802428771, "grad_norm": 2.048734426498413, "learning_rate": 4.0332360942672657e-07, "loss": 0.8238, "step": 29655 }, { "epoch": 0.9235559707301884, "grad_norm": 2.0579354763031006, "learning_rate": 4.025041790946934e-07, "loss": 0.722, "step": 29660 }, { "epoch": 0.9237116612174996, "grad_norm": 2.273639440536499, "learning_rate": 4.0168474876266026e-07, "loss": 0.7675, "step": 29665 }, { "epoch": 0.9238673517048108, "grad_norm": 2.0917809009552, "learning_rate": 4.008653184306271e-07, "loss": 0.7537, "step": 29670 }, { "epoch": 0.9240230421921221, "grad_norm": 2.059009552001953, "learning_rate": 4.000458880985939e-07, "loss": 0.6792, "step": 29675 }, { "epoch": 0.9241787326794333, "grad_norm": 2.2456841468811035, "learning_rate": 3.992264577665607e-07, "loss": 0.7754, "step": 29680 }, { "epoch": 0.9243344231667445, "grad_norm": 2.3192625045776367, "learning_rate": 3.984070274345275e-07, "loss": 0.7926, "step": 29685 }, { "epoch": 0.9244901136540558, "grad_norm": 2.572411298751831, "learning_rate": 3.9758759710249434e-07, "loss": 0.8092, "step": 29690 }, { "epoch": 0.924645804141367, "grad_norm": 2.5146048069000244, "learning_rate": 3.967681667704612e-07, "loss": 0.6948, "step": 29695 }, { "epoch": 0.9248014946286782, "grad_norm": 2.457982063293457, "learning_rate": 3.9594873643842803e-07, "loss": 0.8324, "step": 29700 }, { "epoch": 0.9249571851159895, "grad_norm": 2.0490260124206543, "learning_rate": 3.951293061063949e-07, "loss": 0.6655, "step": 29705 }, { "epoch": 0.9251128756033007, "grad_norm": 2.225320339202881, "learning_rate": 3.943098757743617e-07, "loss": 0.6973, "step": 29710 }, { "epoch": 0.9252685660906118, "grad_norm": 2.241023063659668, "learning_rate": 3.934904454423285e-07, "loss": 0.7546, "step": 29715 }, { "epoch": 0.925424256577923, "grad_norm": 2.4086406230926514, "learning_rate": 3.9267101511029535e-07, "loss": 0.6541, "step": 29720 }, { "epoch": 0.9255799470652343, "grad_norm": 2.2870259284973145, "learning_rate": 3.9185158477826216e-07, "loss": 0.8225, "step": 29725 }, { "epoch": 0.9257356375525455, "grad_norm": 1.9616856575012207, "learning_rate": 3.9103215444622903e-07, "loss": 0.7837, "step": 29730 }, { "epoch": 0.9258913280398567, "grad_norm": 2.5060882568359375, "learning_rate": 3.9021272411419585e-07, "loss": 0.8489, "step": 29735 }, { "epoch": 0.926047018527168, "grad_norm": 2.461059331893921, "learning_rate": 3.8939329378216267e-07, "loss": 0.8087, "step": 29740 }, { "epoch": 0.9262027090144792, "grad_norm": 2.423205614089966, "learning_rate": 3.8857386345012954e-07, "loss": 0.7144, "step": 29745 }, { "epoch": 0.9263583995017904, "grad_norm": 2.558582067489624, "learning_rate": 3.8775443311809635e-07, "loss": 0.6882, "step": 29750 }, { "epoch": 0.9265140899891017, "grad_norm": 2.181511163711548, "learning_rate": 3.869350027860631e-07, "loss": 0.7379, "step": 29755 }, { "epoch": 0.9266697804764129, "grad_norm": 2.445331573486328, "learning_rate": 3.8611557245403e-07, "loss": 0.8176, "step": 29760 }, { "epoch": 0.9268254709637241, "grad_norm": 2.2107441425323486, "learning_rate": 3.852961421219968e-07, "loss": 0.7775, "step": 29765 }, { "epoch": 0.9269811614510354, "grad_norm": 1.9246896505355835, "learning_rate": 3.8447671178996367e-07, "loss": 0.783, "step": 29770 }, { "epoch": 0.9271368519383466, "grad_norm": 2.2930400371551514, "learning_rate": 3.836572814579305e-07, "loss": 0.8403, "step": 29775 }, { "epoch": 0.9272925424256578, "grad_norm": 2.4848766326904297, "learning_rate": 3.828378511258973e-07, "loss": 0.6965, "step": 29780 }, { "epoch": 0.9274482329129691, "grad_norm": 1.868896245956421, "learning_rate": 3.820184207938642e-07, "loss": 0.7499, "step": 29785 }, { "epoch": 0.9276039234002802, "grad_norm": 2.0796101093292236, "learning_rate": 3.8119899046183094e-07, "loss": 0.7103, "step": 29790 }, { "epoch": 0.9277596138875914, "grad_norm": 2.1441867351531982, "learning_rate": 3.8037956012979775e-07, "loss": 0.8046, "step": 29795 }, { "epoch": 0.9279153043749027, "grad_norm": 2.463869333267212, "learning_rate": 3.795601297977646e-07, "loss": 0.8424, "step": 29800 }, { "epoch": 0.9280709948622139, "grad_norm": 1.8848477602005005, "learning_rate": 3.7874069946573144e-07, "loss": 0.7095, "step": 29805 }, { "epoch": 0.9282266853495251, "grad_norm": 2.165970802307129, "learning_rate": 3.779212691336983e-07, "loss": 0.7526, "step": 29810 }, { "epoch": 0.9283823758368364, "grad_norm": 2.0376698970794678, "learning_rate": 3.7710183880166513e-07, "loss": 0.7799, "step": 29815 }, { "epoch": 0.9285380663241476, "grad_norm": 2.5141677856445312, "learning_rate": 3.76282408469632e-07, "loss": 0.7198, "step": 29820 }, { "epoch": 0.9286937568114588, "grad_norm": 2.2472615242004395, "learning_rate": 3.7546297813759876e-07, "loss": 0.781, "step": 29825 }, { "epoch": 0.9288494472987701, "grad_norm": 2.0846917629241943, "learning_rate": 3.746435478055656e-07, "loss": 0.7374, "step": 29830 }, { "epoch": 0.9290051377860813, "grad_norm": 2.68540096282959, "learning_rate": 3.7382411747353245e-07, "loss": 0.7372, "step": 29835 }, { "epoch": 0.9291608282733925, "grad_norm": 1.8658733367919922, "learning_rate": 3.7300468714149926e-07, "loss": 0.7411, "step": 29840 }, { "epoch": 0.9293165187607038, "grad_norm": 2.0065906047821045, "learning_rate": 3.721852568094661e-07, "loss": 0.8489, "step": 29845 }, { "epoch": 0.929472209248015, "grad_norm": 1.8168842792510986, "learning_rate": 3.7136582647743295e-07, "loss": 0.7704, "step": 29850 }, { "epoch": 0.9296278997353262, "grad_norm": 1.9611111879348755, "learning_rate": 3.7054639614539977e-07, "loss": 0.8217, "step": 29855 }, { "epoch": 0.9297835902226373, "grad_norm": 2.2416696548461914, "learning_rate": 3.6972696581336653e-07, "loss": 0.736, "step": 29860 }, { "epoch": 0.9299392807099486, "grad_norm": 2.456334352493286, "learning_rate": 3.689075354813334e-07, "loss": 0.7509, "step": 29865 }, { "epoch": 0.9300949711972598, "grad_norm": 1.793115496635437, "learning_rate": 3.680881051493002e-07, "loss": 0.6816, "step": 29870 }, { "epoch": 0.930250661684571, "grad_norm": 2.1923935413360596, "learning_rate": 3.672686748172671e-07, "loss": 0.7677, "step": 29875 }, { "epoch": 0.9304063521718823, "grad_norm": 2.1024351119995117, "learning_rate": 3.664492444852339e-07, "loss": 0.819, "step": 29880 }, { "epoch": 0.9305620426591935, "grad_norm": 1.5614608526229858, "learning_rate": 3.656298141532007e-07, "loss": 0.6883, "step": 29885 }, { "epoch": 0.9307177331465047, "grad_norm": 2.0001957416534424, "learning_rate": 3.648103838211676e-07, "loss": 0.7101, "step": 29890 }, { "epoch": 0.930873423633816, "grad_norm": 2.403203010559082, "learning_rate": 3.6399095348913435e-07, "loss": 0.7512, "step": 29895 }, { "epoch": 0.9310291141211272, "grad_norm": 2.127105474472046, "learning_rate": 3.6317152315710117e-07, "loss": 0.7848, "step": 29900 }, { "epoch": 0.9311848046084384, "grad_norm": 1.931077003479004, "learning_rate": 3.6235209282506804e-07, "loss": 0.7262, "step": 29905 }, { "epoch": 0.9313404950957497, "grad_norm": 2.4238173961639404, "learning_rate": 3.6153266249303485e-07, "loss": 0.7608, "step": 29910 }, { "epoch": 0.9314961855830609, "grad_norm": 2.2177021503448486, "learning_rate": 3.607132321610017e-07, "loss": 0.718, "step": 29915 }, { "epoch": 0.9316518760703721, "grad_norm": 1.8401564359664917, "learning_rate": 3.5989380182896854e-07, "loss": 0.7146, "step": 29920 }, { "epoch": 0.9318075665576834, "grad_norm": 3.037858486175537, "learning_rate": 3.590743714969354e-07, "loss": 0.7445, "step": 29925 }, { "epoch": 0.9319632570449945, "grad_norm": 2.3155062198638916, "learning_rate": 3.5825494116490217e-07, "loss": 0.7013, "step": 29930 }, { "epoch": 0.9321189475323057, "grad_norm": 2.2394766807556152, "learning_rate": 3.57435510832869e-07, "loss": 0.7857, "step": 29935 }, { "epoch": 0.932274638019617, "grad_norm": 2.4015591144561768, "learning_rate": 3.5661608050083586e-07, "loss": 0.7871, "step": 29940 }, { "epoch": 0.9324303285069282, "grad_norm": 2.029407262802124, "learning_rate": 3.557966501688027e-07, "loss": 0.7344, "step": 29945 }, { "epoch": 0.9325860189942394, "grad_norm": 2.7671871185302734, "learning_rate": 3.549772198367695e-07, "loss": 0.7782, "step": 29950 }, { "epoch": 0.9327417094815507, "grad_norm": 2.102834701538086, "learning_rate": 3.5415778950473636e-07, "loss": 0.7006, "step": 29955 }, { "epoch": 0.9328973999688619, "grad_norm": 2.0941579341888428, "learning_rate": 3.533383591727032e-07, "loss": 0.7498, "step": 29960 }, { "epoch": 0.9330530904561731, "grad_norm": 1.7306767702102661, "learning_rate": 3.5251892884066994e-07, "loss": 0.7577, "step": 29965 }, { "epoch": 0.9332087809434844, "grad_norm": 2.718999147415161, "learning_rate": 3.516994985086368e-07, "loss": 0.7597, "step": 29970 }, { "epoch": 0.9333644714307956, "grad_norm": 2.1887502670288086, "learning_rate": 3.5088006817660363e-07, "loss": 0.7709, "step": 29975 }, { "epoch": 0.9335201619181068, "grad_norm": 2.120502233505249, "learning_rate": 3.500606378445705e-07, "loss": 0.7278, "step": 29980 }, { "epoch": 0.9336758524054181, "grad_norm": 2.4488003253936768, "learning_rate": 3.492412075125373e-07, "loss": 0.7604, "step": 29985 }, { "epoch": 0.9338315428927293, "grad_norm": 2.009516954421997, "learning_rate": 3.4842177718050413e-07, "loss": 0.7508, "step": 29990 }, { "epoch": 0.9339872333800405, "grad_norm": 2.314335346221924, "learning_rate": 3.47602346848471e-07, "loss": 0.7359, "step": 29995 }, { "epoch": 0.9341429238673518, "grad_norm": 2.2838196754455566, "learning_rate": 3.467829165164378e-07, "loss": 0.8089, "step": 30000 }, { "epoch": 0.9342986143546629, "grad_norm": 2.0128462314605713, "learning_rate": 3.459634861844046e-07, "loss": 0.7746, "step": 30005 }, { "epoch": 0.9344543048419741, "grad_norm": 2.0936851501464844, "learning_rate": 3.4514405585237145e-07, "loss": 0.761, "step": 30010 }, { "epoch": 0.9346099953292853, "grad_norm": 2.17179012298584, "learning_rate": 3.4432462552033827e-07, "loss": 0.7512, "step": 30015 }, { "epoch": 0.9347656858165966, "grad_norm": 2.264925718307495, "learning_rate": 3.4350519518830514e-07, "loss": 0.7631, "step": 30020 }, { "epoch": 0.9349213763039078, "grad_norm": 1.9632617235183716, "learning_rate": 3.4268576485627195e-07, "loss": 0.7315, "step": 30025 }, { "epoch": 0.935077066791219, "grad_norm": 3.059523105621338, "learning_rate": 3.418663345242388e-07, "loss": 0.7781, "step": 30030 }, { "epoch": 0.9352327572785303, "grad_norm": 2.311692237854004, "learning_rate": 3.4104690419220564e-07, "loss": 0.7334, "step": 30035 }, { "epoch": 0.9353884477658415, "grad_norm": 2.264726400375366, "learning_rate": 3.402274738601724e-07, "loss": 0.7453, "step": 30040 }, { "epoch": 0.9355441382531527, "grad_norm": 2.550532341003418, "learning_rate": 3.3940804352813927e-07, "loss": 0.7795, "step": 30045 }, { "epoch": 0.935699828740464, "grad_norm": 2.1568448543548584, "learning_rate": 3.385886131961061e-07, "loss": 0.7047, "step": 30050 }, { "epoch": 0.9358555192277752, "grad_norm": 1.8906856775283813, "learning_rate": 3.377691828640729e-07, "loss": 0.7863, "step": 30055 }, { "epoch": 0.9360112097150864, "grad_norm": 2.0893781185150146, "learning_rate": 3.369497525320398e-07, "loss": 0.7794, "step": 30060 }, { "epoch": 0.9361669002023977, "grad_norm": 1.9789457321166992, "learning_rate": 3.361303222000066e-07, "loss": 0.7755, "step": 30065 }, { "epoch": 0.9363225906897089, "grad_norm": 2.308995246887207, "learning_rate": 3.3531089186797346e-07, "loss": 0.696, "step": 30070 }, { "epoch": 0.93647828117702, "grad_norm": 2.2650561332702637, "learning_rate": 3.344914615359402e-07, "loss": 0.7941, "step": 30075 }, { "epoch": 0.9366339716643313, "grad_norm": 2.383146286010742, "learning_rate": 3.3367203120390704e-07, "loss": 0.7845, "step": 30080 }, { "epoch": 0.9367896621516425, "grad_norm": 2.385333299636841, "learning_rate": 3.328526008718739e-07, "loss": 0.7596, "step": 30085 }, { "epoch": 0.9369453526389537, "grad_norm": 1.8483898639678955, "learning_rate": 3.3203317053984073e-07, "loss": 0.6937, "step": 30090 }, { "epoch": 0.937101043126265, "grad_norm": 3.0114493370056152, "learning_rate": 3.3121374020780754e-07, "loss": 0.7767, "step": 30095 }, { "epoch": 0.9372567336135762, "grad_norm": 2.3086071014404297, "learning_rate": 3.303943098757744e-07, "loss": 0.7406, "step": 30100 }, { "epoch": 0.9374124241008874, "grad_norm": 3.200911045074463, "learning_rate": 3.2957487954374123e-07, "loss": 0.8135, "step": 30105 }, { "epoch": 0.9375681145881987, "grad_norm": 2.5291340351104736, "learning_rate": 3.28755449211708e-07, "loss": 0.8025, "step": 30110 }, { "epoch": 0.9377238050755099, "grad_norm": 2.1495883464813232, "learning_rate": 3.2793601887967486e-07, "loss": 0.8247, "step": 30115 }, { "epoch": 0.9378794955628211, "grad_norm": 2.1300230026245117, "learning_rate": 3.271165885476417e-07, "loss": 0.7584, "step": 30120 }, { "epoch": 0.9380351860501324, "grad_norm": 2.088432788848877, "learning_rate": 3.2629715821560855e-07, "loss": 0.7974, "step": 30125 }, { "epoch": 0.9381908765374436, "grad_norm": 2.792637348175049, "learning_rate": 3.2547772788357536e-07, "loss": 0.7666, "step": 30130 }, { "epoch": 0.9383465670247548, "grad_norm": 3.0057108402252197, "learning_rate": 3.2465829755154223e-07, "loss": 0.7433, "step": 30135 }, { "epoch": 0.9385022575120661, "grad_norm": 1.9183851480484009, "learning_rate": 3.2383886721950905e-07, "loss": 0.7896, "step": 30140 }, { "epoch": 0.9386579479993772, "grad_norm": 2.2054312229156494, "learning_rate": 3.230194368874758e-07, "loss": 0.7728, "step": 30145 }, { "epoch": 0.9388136384866884, "grad_norm": 2.216414451599121, "learning_rate": 3.222000065554427e-07, "loss": 0.74, "step": 30150 }, { "epoch": 0.9389693289739997, "grad_norm": 4.146274089813232, "learning_rate": 3.213805762234095e-07, "loss": 0.8098, "step": 30155 }, { "epoch": 0.9391250194613109, "grad_norm": 2.439769744873047, "learning_rate": 3.205611458913763e-07, "loss": 0.7306, "step": 30160 }, { "epoch": 0.9392807099486221, "grad_norm": 2.292009115219116, "learning_rate": 3.197417155593432e-07, "loss": 0.7901, "step": 30165 }, { "epoch": 0.9394364004359333, "grad_norm": 1.7512528896331787, "learning_rate": 3.1892228522731e-07, "loss": 0.7966, "step": 30170 }, { "epoch": 0.9395920909232446, "grad_norm": 2.098519802093506, "learning_rate": 3.1810285489527687e-07, "loss": 0.7409, "step": 30175 }, { "epoch": 0.9397477814105558, "grad_norm": 1.9167499542236328, "learning_rate": 3.1728342456324364e-07, "loss": 0.6871, "step": 30180 }, { "epoch": 0.939903471897867, "grad_norm": 2.182908296585083, "learning_rate": 3.1646399423121045e-07, "loss": 0.7087, "step": 30185 }, { "epoch": 0.9400591623851783, "grad_norm": 2.1244146823883057, "learning_rate": 3.156445638991773e-07, "loss": 0.7356, "step": 30190 }, { "epoch": 0.9402148528724895, "grad_norm": 2.1601548194885254, "learning_rate": 3.1482513356714414e-07, "loss": 0.7885, "step": 30195 }, { "epoch": 0.9403705433598007, "grad_norm": 2.3724045753479004, "learning_rate": 3.1400570323511096e-07, "loss": 0.7736, "step": 30200 }, { "epoch": 0.940526233847112, "grad_norm": 2.1750919818878174, "learning_rate": 3.131862729030778e-07, "loss": 0.7537, "step": 30205 }, { "epoch": 0.9406819243344232, "grad_norm": 2.010314702987671, "learning_rate": 3.1236684257104464e-07, "loss": 0.7182, "step": 30210 }, { "epoch": 0.9408376148217344, "grad_norm": 2.0563247203826904, "learning_rate": 3.1154741223901146e-07, "loss": 0.7257, "step": 30215 }, { "epoch": 0.9409933053090456, "grad_norm": 2.2938151359558105, "learning_rate": 3.1072798190697833e-07, "loss": 0.7408, "step": 30220 }, { "epoch": 0.9411489957963568, "grad_norm": 2.090364933013916, "learning_rate": 3.099085515749451e-07, "loss": 0.6856, "step": 30225 }, { "epoch": 0.941304686283668, "grad_norm": 2.4503095149993896, "learning_rate": 3.0908912124291196e-07, "loss": 0.8157, "step": 30230 }, { "epoch": 0.9414603767709793, "grad_norm": 2.0931427478790283, "learning_rate": 3.082696909108788e-07, "loss": 0.7393, "step": 30235 }, { "epoch": 0.9416160672582905, "grad_norm": 2.161741256713867, "learning_rate": 3.0745026057884565e-07, "loss": 0.7441, "step": 30240 }, { "epoch": 0.9417717577456017, "grad_norm": 2.0222225189208984, "learning_rate": 3.066308302468124e-07, "loss": 0.7267, "step": 30245 }, { "epoch": 0.941927448232913, "grad_norm": 2.075204610824585, "learning_rate": 3.058113999147793e-07, "loss": 0.7426, "step": 30250 }, { "epoch": 0.9420831387202242, "grad_norm": 2.47029447555542, "learning_rate": 3.049919695827461e-07, "loss": 0.7864, "step": 30255 }, { "epoch": 0.9422388292075354, "grad_norm": 2.4944138526916504, "learning_rate": 3.0417253925071297e-07, "loss": 0.6999, "step": 30260 }, { "epoch": 0.9423945196948467, "grad_norm": 2.427605390548706, "learning_rate": 3.0335310891867973e-07, "loss": 0.8013, "step": 30265 }, { "epoch": 0.9425502101821579, "grad_norm": 2.147118091583252, "learning_rate": 3.025336785866466e-07, "loss": 0.8348, "step": 30270 }, { "epoch": 0.9427059006694691, "grad_norm": 2.146225929260254, "learning_rate": 3.017142482546134e-07, "loss": 0.9313, "step": 30275 }, { "epoch": 0.9428615911567804, "grad_norm": 2.178466796875, "learning_rate": 3.0089481792258023e-07, "loss": 0.8342, "step": 30280 }, { "epoch": 0.9430172816440916, "grad_norm": 1.9981364011764526, "learning_rate": 3.0007538759054705e-07, "loss": 0.7592, "step": 30285 }, { "epoch": 0.9431729721314027, "grad_norm": 2.289630174636841, "learning_rate": 2.992559572585139e-07, "loss": 0.783, "step": 30290 }, { "epoch": 0.943328662618714, "grad_norm": 2.2125043869018555, "learning_rate": 2.9843652692648074e-07, "loss": 0.6877, "step": 30295 }, { "epoch": 0.9434843531060252, "grad_norm": 2.207242727279663, "learning_rate": 2.9761709659444755e-07, "loss": 0.7403, "step": 30300 }, { "epoch": 0.9436400435933364, "grad_norm": 1.8385858535766602, "learning_rate": 2.9679766626241437e-07, "loss": 0.7387, "step": 30305 }, { "epoch": 0.9437957340806477, "grad_norm": 2.1303117275238037, "learning_rate": 2.9597823593038124e-07, "loss": 0.7436, "step": 30310 }, { "epoch": 0.9439514245679589, "grad_norm": 2.155494451522827, "learning_rate": 2.9515880559834805e-07, "loss": 0.7111, "step": 30315 }, { "epoch": 0.9441071150552701, "grad_norm": 1.9696451425552368, "learning_rate": 2.9433937526631487e-07, "loss": 0.7246, "step": 30320 }, { "epoch": 0.9442628055425814, "grad_norm": 2.123647689819336, "learning_rate": 2.9351994493428174e-07, "loss": 0.7442, "step": 30325 }, { "epoch": 0.9444184960298926, "grad_norm": 2.2422842979431152, "learning_rate": 2.9270051460224856e-07, "loss": 0.7969, "step": 30330 }, { "epoch": 0.9445741865172038, "grad_norm": 2.098741054534912, "learning_rate": 2.918810842702154e-07, "loss": 0.7438, "step": 30335 }, { "epoch": 0.944729877004515, "grad_norm": 2.0155575275421143, "learning_rate": 2.910616539381822e-07, "loss": 0.7422, "step": 30340 }, { "epoch": 0.9448855674918263, "grad_norm": 2.0605602264404297, "learning_rate": 2.9024222360614906e-07, "loss": 0.7859, "step": 30345 }, { "epoch": 0.9450412579791375, "grad_norm": 2.030691623687744, "learning_rate": 2.894227932741158e-07, "loss": 0.7093, "step": 30350 }, { "epoch": 0.9451969484664487, "grad_norm": 1.9553097486495972, "learning_rate": 2.886033629420827e-07, "loss": 0.7312, "step": 30355 }, { "epoch": 0.9453526389537599, "grad_norm": 1.8824554681777954, "learning_rate": 2.877839326100495e-07, "loss": 0.7123, "step": 30360 }, { "epoch": 0.9455083294410711, "grad_norm": 2.2703583240509033, "learning_rate": 2.869645022780164e-07, "loss": 0.753, "step": 30365 }, { "epoch": 0.9456640199283823, "grad_norm": 1.9574971199035645, "learning_rate": 2.8614507194598314e-07, "loss": 0.7481, "step": 30370 }, { "epoch": 0.9458197104156936, "grad_norm": 1.9361356496810913, "learning_rate": 2.8532564161395e-07, "loss": 0.7865, "step": 30375 }, { "epoch": 0.9459754009030048, "grad_norm": 1.8011611700057983, "learning_rate": 2.8450621128191683e-07, "loss": 0.6849, "step": 30380 }, { "epoch": 0.946131091390316, "grad_norm": 1.7217870950698853, "learning_rate": 2.836867809498837e-07, "loss": 0.7292, "step": 30385 }, { "epoch": 0.9462867818776273, "grad_norm": 2.351332664489746, "learning_rate": 2.8286735061785046e-07, "loss": 0.7826, "step": 30390 }, { "epoch": 0.9464424723649385, "grad_norm": 2.496936798095703, "learning_rate": 2.8204792028581733e-07, "loss": 0.8012, "step": 30395 }, { "epoch": 0.9465981628522497, "grad_norm": 1.9286209344863892, "learning_rate": 2.8122848995378415e-07, "loss": 0.777, "step": 30400 }, { "epoch": 0.946753853339561, "grad_norm": 2.6430611610412598, "learning_rate": 2.8040905962175096e-07, "loss": 0.842, "step": 30405 }, { "epoch": 0.9469095438268722, "grad_norm": 1.9306527376174927, "learning_rate": 2.795896292897178e-07, "loss": 0.7605, "step": 30410 }, { "epoch": 0.9470652343141834, "grad_norm": 2.387294054031372, "learning_rate": 2.7877019895768465e-07, "loss": 0.7637, "step": 30415 }, { "epoch": 0.9472209248014947, "grad_norm": 2.1909408569335938, "learning_rate": 2.7795076862565147e-07, "loss": 0.7759, "step": 30420 }, { "epoch": 0.9473766152888059, "grad_norm": 2.267469882965088, "learning_rate": 2.771313382936183e-07, "loss": 0.7031, "step": 30425 }, { "epoch": 0.9475323057761171, "grad_norm": 2.250131368637085, "learning_rate": 2.7631190796158515e-07, "loss": 0.7251, "step": 30430 }, { "epoch": 0.9476879962634283, "grad_norm": 2.3078672885894775, "learning_rate": 2.7549247762955197e-07, "loss": 0.7931, "step": 30435 }, { "epoch": 0.9478436867507395, "grad_norm": 1.7651594877243042, "learning_rate": 2.746730472975188e-07, "loss": 0.7521, "step": 30440 }, { "epoch": 0.9479993772380507, "grad_norm": 2.3632254600524902, "learning_rate": 2.738536169654856e-07, "loss": 0.7561, "step": 30445 }, { "epoch": 0.948155067725362, "grad_norm": 1.9984710216522217, "learning_rate": 2.7303418663345247e-07, "loss": 0.7708, "step": 30450 }, { "epoch": 0.9483107582126732, "grad_norm": 2.185934543609619, "learning_rate": 2.722147563014193e-07, "loss": 0.6618, "step": 30455 }, { "epoch": 0.9484664486999844, "grad_norm": 2.0638768672943115, "learning_rate": 2.713953259693861e-07, "loss": 0.852, "step": 30460 }, { "epoch": 0.9486221391872957, "grad_norm": 1.9134114980697632, "learning_rate": 2.705758956373529e-07, "loss": 0.7695, "step": 30465 }, { "epoch": 0.9487778296746069, "grad_norm": 1.950019359588623, "learning_rate": 2.697564653053198e-07, "loss": 0.723, "step": 30470 }, { "epoch": 0.9489335201619181, "grad_norm": 2.0923290252685547, "learning_rate": 2.6893703497328656e-07, "loss": 0.7491, "step": 30475 }, { "epoch": 0.9490892106492294, "grad_norm": 2.1396126747131348, "learning_rate": 2.681176046412534e-07, "loss": 0.7633, "step": 30480 }, { "epoch": 0.9492449011365406, "grad_norm": 2.0932631492614746, "learning_rate": 2.6729817430922024e-07, "loss": 0.7176, "step": 30485 }, { "epoch": 0.9494005916238518, "grad_norm": 2.2075304985046387, "learning_rate": 2.664787439771871e-07, "loss": 0.8109, "step": 30490 }, { "epoch": 0.949556282111163, "grad_norm": 2.3347725868225098, "learning_rate": 2.656593136451539e-07, "loss": 0.7751, "step": 30495 }, { "epoch": 0.9497119725984743, "grad_norm": 2.312779664993286, "learning_rate": 2.6483988331312074e-07, "loss": 0.7374, "step": 30500 }, { "epoch": 0.9498676630857854, "grad_norm": 1.9441863298416138, "learning_rate": 2.6402045298108756e-07, "loss": 0.7058, "step": 30505 }, { "epoch": 0.9500233535730966, "grad_norm": 1.9124006032943726, "learning_rate": 2.6320102264905443e-07, "loss": 0.8047, "step": 30510 }, { "epoch": 0.9501790440604079, "grad_norm": 2.0060460567474365, "learning_rate": 2.623815923170212e-07, "loss": 0.7322, "step": 30515 }, { "epoch": 0.9503347345477191, "grad_norm": 1.8523633480072021, "learning_rate": 2.6156216198498806e-07, "loss": 0.7681, "step": 30520 }, { "epoch": 0.9504904250350303, "grad_norm": 2.182633638381958, "learning_rate": 2.607427316529549e-07, "loss": 0.8237, "step": 30525 }, { "epoch": 0.9506461155223416, "grad_norm": 1.8386802673339844, "learning_rate": 2.599233013209217e-07, "loss": 0.7394, "step": 30530 }, { "epoch": 0.9508018060096528, "grad_norm": 2.1996710300445557, "learning_rate": 2.5910387098888857e-07, "loss": 0.6702, "step": 30535 }, { "epoch": 0.950957496496964, "grad_norm": 1.902433156967163, "learning_rate": 2.582844406568554e-07, "loss": 0.7469, "step": 30540 }, { "epoch": 0.9511131869842753, "grad_norm": 2.2619588375091553, "learning_rate": 2.574650103248222e-07, "loss": 0.7147, "step": 30545 }, { "epoch": 0.9512688774715865, "grad_norm": 2.0073490142822266, "learning_rate": 2.56645579992789e-07, "loss": 0.8718, "step": 30550 }, { "epoch": 0.9514245679588977, "grad_norm": 2.1192657947540283, "learning_rate": 2.558261496607559e-07, "loss": 0.7023, "step": 30555 }, { "epoch": 0.951580258446209, "grad_norm": 2.8513588905334473, "learning_rate": 2.550067193287227e-07, "loss": 0.6807, "step": 30560 }, { "epoch": 0.9517359489335202, "grad_norm": 2.71622371673584, "learning_rate": 2.541872889966895e-07, "loss": 0.7573, "step": 30565 }, { "epoch": 0.9518916394208314, "grad_norm": 2.004748821258545, "learning_rate": 2.5336785866465633e-07, "loss": 0.714, "step": 30570 }, { "epoch": 0.9520473299081426, "grad_norm": 2.3957226276397705, "learning_rate": 2.525484283326232e-07, "loss": 0.7096, "step": 30575 }, { "epoch": 0.9522030203954538, "grad_norm": 2.1029069423675537, "learning_rate": 2.5172899800059e-07, "loss": 0.7744, "step": 30580 }, { "epoch": 0.952358710882765, "grad_norm": 1.9423551559448242, "learning_rate": 2.5090956766855684e-07, "loss": 0.7013, "step": 30585 }, { "epoch": 0.9525144013700763, "grad_norm": 2.8704192638397217, "learning_rate": 2.5009013733652365e-07, "loss": 0.7313, "step": 30590 }, { "epoch": 0.9526700918573875, "grad_norm": 2.0288572311401367, "learning_rate": 2.492707070044905e-07, "loss": 0.7337, "step": 30595 }, { "epoch": 0.9528257823446987, "grad_norm": 2.563504219055176, "learning_rate": 2.4845127667245734e-07, "loss": 0.8061, "step": 30600 }, { "epoch": 0.95298147283201, "grad_norm": 1.9464852809906006, "learning_rate": 2.4763184634042416e-07, "loss": 0.7303, "step": 30605 }, { "epoch": 0.9531371633193212, "grad_norm": 2.0211846828460693, "learning_rate": 2.4681241600839097e-07, "loss": 0.7348, "step": 30610 }, { "epoch": 0.9532928538066324, "grad_norm": 2.188166618347168, "learning_rate": 2.4599298567635784e-07, "loss": 0.8183, "step": 30615 }, { "epoch": 0.9534485442939437, "grad_norm": 2.5514771938323975, "learning_rate": 2.451735553443246e-07, "loss": 0.8605, "step": 30620 }, { "epoch": 0.9536042347812549, "grad_norm": 2.0741965770721436, "learning_rate": 2.443541250122915e-07, "loss": 0.7171, "step": 30625 }, { "epoch": 0.9537599252685661, "grad_norm": 1.818198561668396, "learning_rate": 2.435346946802583e-07, "loss": 0.6717, "step": 30630 }, { "epoch": 0.9539156157558774, "grad_norm": 3.642246723175049, "learning_rate": 2.4271526434822516e-07, "loss": 0.7556, "step": 30635 }, { "epoch": 0.9540713062431886, "grad_norm": 2.1585206985473633, "learning_rate": 2.41895834016192e-07, "loss": 0.7368, "step": 30640 }, { "epoch": 0.9542269967304998, "grad_norm": 2.054222345352173, "learning_rate": 2.410764036841588e-07, "loss": 0.8237, "step": 30645 }, { "epoch": 0.9543826872178109, "grad_norm": 2.8817925453186035, "learning_rate": 2.402569733521256e-07, "loss": 0.6992, "step": 30650 }, { "epoch": 0.9545383777051222, "grad_norm": 2.1153724193573, "learning_rate": 2.3943754302009243e-07, "loss": 0.6719, "step": 30655 }, { "epoch": 0.9546940681924334, "grad_norm": 1.971567988395691, "learning_rate": 2.386181126880593e-07, "loss": 0.6869, "step": 30660 }, { "epoch": 0.9548497586797446, "grad_norm": 2.53501033782959, "learning_rate": 2.3779868235602611e-07, "loss": 0.784, "step": 30665 }, { "epoch": 0.9550054491670559, "grad_norm": 2.323213577270508, "learning_rate": 2.3697925202399296e-07, "loss": 0.7426, "step": 30670 }, { "epoch": 0.9551611396543671, "grad_norm": 2.0009796619415283, "learning_rate": 2.3615982169195975e-07, "loss": 0.7217, "step": 30675 }, { "epoch": 0.9553168301416783, "grad_norm": 2.8803887367248535, "learning_rate": 2.353403913599266e-07, "loss": 0.7514, "step": 30680 }, { "epoch": 0.9554725206289896, "grad_norm": 1.8674488067626953, "learning_rate": 2.3452096102789343e-07, "loss": 0.6704, "step": 30685 }, { "epoch": 0.9556282111163008, "grad_norm": 1.9737180471420288, "learning_rate": 2.3370153069586025e-07, "loss": 0.7527, "step": 30690 }, { "epoch": 0.955783901603612, "grad_norm": 2.335575580596924, "learning_rate": 2.3288210036382707e-07, "loss": 0.8037, "step": 30695 }, { "epoch": 0.9559395920909233, "grad_norm": 2.1259312629699707, "learning_rate": 2.320626700317939e-07, "loss": 0.7512, "step": 30700 }, { "epoch": 0.9560952825782345, "grad_norm": 2.1809113025665283, "learning_rate": 2.3124323969976075e-07, "loss": 0.7725, "step": 30705 }, { "epoch": 0.9562509730655457, "grad_norm": 2.012138605117798, "learning_rate": 2.3042380936772757e-07, "loss": 0.7275, "step": 30710 }, { "epoch": 0.956406663552857, "grad_norm": 2.1020212173461914, "learning_rate": 2.296043790356944e-07, "loss": 0.7966, "step": 30715 }, { "epoch": 0.9565623540401681, "grad_norm": 2.0080602169036865, "learning_rate": 2.2878494870366123e-07, "loss": 0.7645, "step": 30720 }, { "epoch": 0.9567180445274793, "grad_norm": 2.483541488647461, "learning_rate": 2.2796551837162807e-07, "loss": 0.8175, "step": 30725 }, { "epoch": 0.9568737350147906, "grad_norm": 2.1659374237060547, "learning_rate": 2.271460880395949e-07, "loss": 0.863, "step": 30730 }, { "epoch": 0.9570294255021018, "grad_norm": 2.366252899169922, "learning_rate": 2.2632665770756173e-07, "loss": 0.7949, "step": 30735 }, { "epoch": 0.957185115989413, "grad_norm": 2.224555015563965, "learning_rate": 2.2550722737552857e-07, "loss": 0.8202, "step": 30740 }, { "epoch": 0.9573408064767243, "grad_norm": 2.1006131172180176, "learning_rate": 2.2468779704349536e-07, "loss": 0.6584, "step": 30745 }, { "epoch": 0.9574964969640355, "grad_norm": 2.2404892444610596, "learning_rate": 2.238683667114622e-07, "loss": 0.66, "step": 30750 }, { "epoch": 0.9576521874513467, "grad_norm": 1.9577897787094116, "learning_rate": 2.2304893637942905e-07, "loss": 0.7735, "step": 30755 }, { "epoch": 0.957807877938658, "grad_norm": 2.3382580280303955, "learning_rate": 2.222295060473959e-07, "loss": 0.7739, "step": 30760 }, { "epoch": 0.9579635684259692, "grad_norm": 1.7253459692001343, "learning_rate": 2.2141007571536268e-07, "loss": 0.758, "step": 30765 }, { "epoch": 0.9581192589132804, "grad_norm": 2.3423054218292236, "learning_rate": 2.2059064538332953e-07, "loss": 0.7622, "step": 30770 }, { "epoch": 0.9582749494005917, "grad_norm": 1.9688832759857178, "learning_rate": 2.1977121505129637e-07, "loss": 0.6819, "step": 30775 }, { "epoch": 0.9584306398879029, "grad_norm": 2.134037733078003, "learning_rate": 2.1895178471926316e-07, "loss": 0.7103, "step": 30780 }, { "epoch": 0.9585863303752141, "grad_norm": 2.2505552768707275, "learning_rate": 2.1813235438723e-07, "loss": 0.8266, "step": 30785 }, { "epoch": 0.9587420208625252, "grad_norm": 1.897767424583435, "learning_rate": 2.1731292405519685e-07, "loss": 0.7403, "step": 30790 }, { "epoch": 0.9588977113498365, "grad_norm": 2.0166566371917725, "learning_rate": 2.164934937231637e-07, "loss": 0.6935, "step": 30795 }, { "epoch": 0.9590534018371477, "grad_norm": 2.3018054962158203, "learning_rate": 2.1567406339113048e-07, "loss": 0.6831, "step": 30800 }, { "epoch": 0.959209092324459, "grad_norm": 2.127934217453003, "learning_rate": 2.1485463305909732e-07, "loss": 0.7689, "step": 30805 }, { "epoch": 0.9593647828117702, "grad_norm": 2.3140664100646973, "learning_rate": 2.1403520272706417e-07, "loss": 0.7469, "step": 30810 }, { "epoch": 0.9595204732990814, "grad_norm": 1.7453346252441406, "learning_rate": 2.1321577239503098e-07, "loss": 0.6923, "step": 30815 }, { "epoch": 0.9596761637863926, "grad_norm": 2.5703563690185547, "learning_rate": 2.1239634206299783e-07, "loss": 0.8101, "step": 30820 }, { "epoch": 0.9598318542737039, "grad_norm": 1.698293685913086, "learning_rate": 2.1157691173096464e-07, "loss": 0.783, "step": 30825 }, { "epoch": 0.9599875447610151, "grad_norm": 2.2990336418151855, "learning_rate": 2.1075748139893148e-07, "loss": 0.7926, "step": 30830 }, { "epoch": 0.9601432352483263, "grad_norm": 2.4544897079467773, "learning_rate": 2.099380510668983e-07, "loss": 0.7086, "step": 30835 }, { "epoch": 0.9602989257356376, "grad_norm": 2.0937628746032715, "learning_rate": 2.0911862073486514e-07, "loss": 0.7286, "step": 30840 }, { "epoch": 0.9604546162229488, "grad_norm": 2.7490265369415283, "learning_rate": 2.08299190402832e-07, "loss": 0.7876, "step": 30845 }, { "epoch": 0.96061030671026, "grad_norm": 2.0158169269561768, "learning_rate": 2.074797600707988e-07, "loss": 0.7431, "step": 30850 }, { "epoch": 0.9607659971975713, "grad_norm": 2.1096973419189453, "learning_rate": 2.0666032973876562e-07, "loss": 0.7938, "step": 30855 }, { "epoch": 0.9609216876848825, "grad_norm": 2.5093326568603516, "learning_rate": 2.0584089940673246e-07, "loss": 0.8304, "step": 30860 }, { "epoch": 0.9610773781721936, "grad_norm": 1.8853216171264648, "learning_rate": 2.050214690746993e-07, "loss": 0.6533, "step": 30865 }, { "epoch": 0.9612330686595049, "grad_norm": 2.2830076217651367, "learning_rate": 2.042020387426661e-07, "loss": 0.7486, "step": 30870 }, { "epoch": 0.9613887591468161, "grad_norm": 2.0454845428466797, "learning_rate": 2.0338260841063294e-07, "loss": 0.7224, "step": 30875 }, { "epoch": 0.9615444496341273, "grad_norm": 2.3936705589294434, "learning_rate": 2.0256317807859978e-07, "loss": 0.763, "step": 30880 }, { "epoch": 0.9617001401214386, "grad_norm": 2.261434316635132, "learning_rate": 2.0174374774656663e-07, "loss": 0.7537, "step": 30885 }, { "epoch": 0.9618558306087498, "grad_norm": 3.012423515319824, "learning_rate": 2.0092431741453342e-07, "loss": 0.7928, "step": 30890 }, { "epoch": 0.962011521096061, "grad_norm": 2.136967182159424, "learning_rate": 2.0010488708250026e-07, "loss": 0.7547, "step": 30895 }, { "epoch": 0.9621672115833723, "grad_norm": 1.9636141061782837, "learning_rate": 1.992854567504671e-07, "loss": 0.7654, "step": 30900 }, { "epoch": 0.9623229020706835, "grad_norm": 2.594021797180176, "learning_rate": 1.984660264184339e-07, "loss": 0.856, "step": 30905 }, { "epoch": 0.9624785925579947, "grad_norm": 2.4245216846466064, "learning_rate": 1.9764659608640074e-07, "loss": 0.771, "step": 30910 }, { "epoch": 0.962634283045306, "grad_norm": 2.1730916500091553, "learning_rate": 1.9682716575436758e-07, "loss": 0.7398, "step": 30915 }, { "epoch": 0.9627899735326172, "grad_norm": 2.2006161212921143, "learning_rate": 1.9600773542233442e-07, "loss": 0.7488, "step": 30920 }, { "epoch": 0.9629456640199284, "grad_norm": 2.145176649093628, "learning_rate": 1.9518830509030124e-07, "loss": 0.7177, "step": 30925 }, { "epoch": 0.9631013545072397, "grad_norm": 2.29128098487854, "learning_rate": 1.9436887475826805e-07, "loss": 0.752, "step": 30930 }, { "epoch": 0.9632570449945508, "grad_norm": 2.3531782627105713, "learning_rate": 1.935494444262349e-07, "loss": 0.8289, "step": 30935 }, { "epoch": 0.963412735481862, "grad_norm": 2.519967794418335, "learning_rate": 1.9273001409420174e-07, "loss": 0.7961, "step": 30940 }, { "epoch": 0.9635684259691732, "grad_norm": 2.480071544647217, "learning_rate": 1.9191058376216856e-07, "loss": 0.8119, "step": 30945 }, { "epoch": 0.9637241164564845, "grad_norm": 2.067728281021118, "learning_rate": 1.910911534301354e-07, "loss": 0.7735, "step": 30950 }, { "epoch": 0.9638798069437957, "grad_norm": 2.3609437942504883, "learning_rate": 1.9027172309810222e-07, "loss": 0.7065, "step": 30955 }, { "epoch": 0.964035497431107, "grad_norm": 2.156012535095215, "learning_rate": 1.8945229276606903e-07, "loss": 0.7541, "step": 30960 }, { "epoch": 0.9641911879184182, "grad_norm": 2.3086163997650146, "learning_rate": 1.8863286243403588e-07, "loss": 0.7815, "step": 30965 }, { "epoch": 0.9643468784057294, "grad_norm": 2.1610913276672363, "learning_rate": 1.8781343210200272e-07, "loss": 0.6879, "step": 30970 }, { "epoch": 0.9645025688930406, "grad_norm": 2.007519245147705, "learning_rate": 1.8699400176996954e-07, "loss": 0.6751, "step": 30975 }, { "epoch": 0.9646582593803519, "grad_norm": 1.7741984128952026, "learning_rate": 1.8617457143793635e-07, "loss": 0.7408, "step": 30980 }, { "epoch": 0.9648139498676631, "grad_norm": 2.441098690032959, "learning_rate": 1.853551411059032e-07, "loss": 0.7536, "step": 30985 }, { "epoch": 0.9649696403549743, "grad_norm": 2.471592664718628, "learning_rate": 1.8453571077387004e-07, "loss": 0.7469, "step": 30990 }, { "epoch": 0.9651253308422856, "grad_norm": 1.831862211227417, "learning_rate": 1.8371628044183683e-07, "loss": 0.7587, "step": 30995 }, { "epoch": 0.9652810213295968, "grad_norm": 1.9679018259048462, "learning_rate": 1.8289685010980367e-07, "loss": 0.7452, "step": 31000 }, { "epoch": 0.9654367118169079, "grad_norm": 2.0565502643585205, "learning_rate": 1.8207741977777051e-07, "loss": 0.7088, "step": 31005 }, { "epoch": 0.9655924023042192, "grad_norm": 1.976425290107727, "learning_rate": 1.8125798944573736e-07, "loss": 0.681, "step": 31010 }, { "epoch": 0.9657480927915304, "grad_norm": 2.082717180252075, "learning_rate": 1.8043855911370415e-07, "loss": 0.7533, "step": 31015 }, { "epoch": 0.9659037832788416, "grad_norm": 1.8685775995254517, "learning_rate": 1.79619128781671e-07, "loss": 0.8071, "step": 31020 }, { "epoch": 0.9660594737661529, "grad_norm": 1.6899300813674927, "learning_rate": 1.7879969844963783e-07, "loss": 0.6888, "step": 31025 }, { "epoch": 0.9662151642534641, "grad_norm": 2.169762372970581, "learning_rate": 1.7798026811760465e-07, "loss": 0.7025, "step": 31030 }, { "epoch": 0.9663708547407753, "grad_norm": 2.0834693908691406, "learning_rate": 1.7716083778557147e-07, "loss": 0.7611, "step": 31035 }, { "epoch": 0.9665265452280866, "grad_norm": 2.0404129028320312, "learning_rate": 1.763414074535383e-07, "loss": 0.7689, "step": 31040 }, { "epoch": 0.9666822357153978, "grad_norm": 2.4875237941741943, "learning_rate": 1.7552197712150515e-07, "loss": 0.7207, "step": 31045 }, { "epoch": 0.966837926202709, "grad_norm": 2.2185332775115967, "learning_rate": 1.7470254678947197e-07, "loss": 0.7666, "step": 31050 }, { "epoch": 0.9669936166900203, "grad_norm": 2.0712385177612305, "learning_rate": 1.7388311645743879e-07, "loss": 0.7572, "step": 31055 }, { "epoch": 0.9671493071773315, "grad_norm": 2.215334892272949, "learning_rate": 1.7306368612540563e-07, "loss": 0.7564, "step": 31060 }, { "epoch": 0.9673049976646427, "grad_norm": 2.0696651935577393, "learning_rate": 1.7224425579337247e-07, "loss": 0.7676, "step": 31065 }, { "epoch": 0.967460688151954, "grad_norm": 2.116138219833374, "learning_rate": 1.714248254613393e-07, "loss": 0.7343, "step": 31070 }, { "epoch": 0.9676163786392652, "grad_norm": 1.8834973573684692, "learning_rate": 1.7060539512930613e-07, "loss": 0.7663, "step": 31075 }, { "epoch": 0.9677720691265763, "grad_norm": 1.906144380569458, "learning_rate": 1.6978596479727295e-07, "loss": 0.7159, "step": 31080 }, { "epoch": 0.9679277596138876, "grad_norm": 2.336840867996216, "learning_rate": 1.6896653446523977e-07, "loss": 0.774, "step": 31085 }, { "epoch": 0.9680834501011988, "grad_norm": 1.9760946035385132, "learning_rate": 1.681471041332066e-07, "loss": 0.7083, "step": 31090 }, { "epoch": 0.96823914058851, "grad_norm": 2.4500820636749268, "learning_rate": 1.6732767380117345e-07, "loss": 0.7541, "step": 31095 }, { "epoch": 0.9683948310758212, "grad_norm": 1.910477638244629, "learning_rate": 1.665082434691403e-07, "loss": 0.7724, "step": 31100 }, { "epoch": 0.9685505215631325, "grad_norm": 2.077181816101074, "learning_rate": 1.6568881313710708e-07, "loss": 0.7365, "step": 31105 }, { "epoch": 0.9687062120504437, "grad_norm": 2.4598796367645264, "learning_rate": 1.6486938280507393e-07, "loss": 0.7622, "step": 31110 }, { "epoch": 0.968861902537755, "grad_norm": 2.6775479316711426, "learning_rate": 1.6404995247304077e-07, "loss": 0.7717, "step": 31115 }, { "epoch": 0.9690175930250662, "grad_norm": 1.9839838743209839, "learning_rate": 1.6323052214100756e-07, "loss": 0.7499, "step": 31120 }, { "epoch": 0.9691732835123774, "grad_norm": 1.921890377998352, "learning_rate": 1.624110918089744e-07, "loss": 0.7127, "step": 31125 }, { "epoch": 0.9693289739996886, "grad_norm": 1.9568169116973877, "learning_rate": 1.6159166147694125e-07, "loss": 0.729, "step": 31130 }, { "epoch": 0.9694846644869999, "grad_norm": 2.0152108669281006, "learning_rate": 1.607722311449081e-07, "loss": 0.8138, "step": 31135 }, { "epoch": 0.9696403549743111, "grad_norm": 2.2862775325775146, "learning_rate": 1.5995280081287488e-07, "loss": 0.8502, "step": 31140 }, { "epoch": 0.9697960454616223, "grad_norm": 1.902235984802246, "learning_rate": 1.5913337048084172e-07, "loss": 0.8088, "step": 31145 }, { "epoch": 0.9699517359489335, "grad_norm": 2.0105884075164795, "learning_rate": 1.5831394014880857e-07, "loss": 0.7812, "step": 31150 }, { "epoch": 0.9701074264362447, "grad_norm": 2.42374324798584, "learning_rate": 1.5749450981677538e-07, "loss": 0.8107, "step": 31155 }, { "epoch": 0.9702631169235559, "grad_norm": 1.973482370376587, "learning_rate": 1.566750794847422e-07, "loss": 0.7449, "step": 31160 }, { "epoch": 0.9704188074108672, "grad_norm": 2.2041032314300537, "learning_rate": 1.5585564915270904e-07, "loss": 0.7812, "step": 31165 }, { "epoch": 0.9705744978981784, "grad_norm": 1.6737456321716309, "learning_rate": 1.5503621882067589e-07, "loss": 0.6736, "step": 31170 }, { "epoch": 0.9707301883854896, "grad_norm": 1.9596872329711914, "learning_rate": 1.542167884886427e-07, "loss": 0.732, "step": 31175 }, { "epoch": 0.9708858788728009, "grad_norm": 1.887451410293579, "learning_rate": 1.5339735815660954e-07, "loss": 0.807, "step": 31180 }, { "epoch": 0.9710415693601121, "grad_norm": 2.27502703666687, "learning_rate": 1.5257792782457636e-07, "loss": 0.7925, "step": 31185 }, { "epoch": 0.9711972598474233, "grad_norm": 2.342824935913086, "learning_rate": 1.517584974925432e-07, "loss": 0.675, "step": 31190 }, { "epoch": 0.9713529503347346, "grad_norm": 2.2722737789154053, "learning_rate": 1.5093906716051002e-07, "loss": 0.7532, "step": 31195 }, { "epoch": 0.9715086408220458, "grad_norm": 1.8561428785324097, "learning_rate": 1.5011963682847686e-07, "loss": 0.712, "step": 31200 }, { "epoch": 0.971664331309357, "grad_norm": 2.170197010040283, "learning_rate": 1.4930020649644368e-07, "loss": 0.81, "step": 31205 }, { "epoch": 0.9718200217966683, "grad_norm": 1.7470427751541138, "learning_rate": 1.4848077616441052e-07, "loss": 0.7602, "step": 31210 }, { "epoch": 0.9719757122839795, "grad_norm": 4.045734882354736, "learning_rate": 1.4766134583237734e-07, "loss": 0.8829, "step": 31215 }, { "epoch": 0.9721314027712906, "grad_norm": 2.4757871627807617, "learning_rate": 1.4684191550034418e-07, "loss": 0.752, "step": 31220 }, { "epoch": 0.9722870932586019, "grad_norm": 3.0794124603271484, "learning_rate": 1.46022485168311e-07, "loss": 0.7379, "step": 31225 }, { "epoch": 0.9724427837459131, "grad_norm": 2.3461201190948486, "learning_rate": 1.4520305483627784e-07, "loss": 0.7826, "step": 31230 }, { "epoch": 0.9725984742332243, "grad_norm": 2.0140459537506104, "learning_rate": 1.4438362450424466e-07, "loss": 0.7142, "step": 31235 }, { "epoch": 0.9727541647205356, "grad_norm": 2.3757245540618896, "learning_rate": 1.4356419417221148e-07, "loss": 0.7296, "step": 31240 }, { "epoch": 0.9729098552078468, "grad_norm": 1.968518614768982, "learning_rate": 1.4274476384017832e-07, "loss": 0.7762, "step": 31245 }, { "epoch": 0.973065545695158, "grad_norm": 1.9426965713500977, "learning_rate": 1.4192533350814514e-07, "loss": 0.7697, "step": 31250 }, { "epoch": 0.9732212361824693, "grad_norm": 2.002338171005249, "learning_rate": 1.4110590317611198e-07, "loss": 0.7779, "step": 31255 }, { "epoch": 0.9733769266697805, "grad_norm": 2.0658764839172363, "learning_rate": 1.402864728440788e-07, "loss": 0.8147, "step": 31260 }, { "epoch": 0.9735326171570917, "grad_norm": 2.284595251083374, "learning_rate": 1.3946704251204564e-07, "loss": 0.8064, "step": 31265 }, { "epoch": 0.973688307644403, "grad_norm": 2.285919427871704, "learning_rate": 1.3864761218001245e-07, "loss": 0.8111, "step": 31270 }, { "epoch": 0.9738439981317142, "grad_norm": 2.475987434387207, "learning_rate": 1.378281818479793e-07, "loss": 0.8187, "step": 31275 }, { "epoch": 0.9739996886190254, "grad_norm": 2.010385751724243, "learning_rate": 1.3700875151594611e-07, "loss": 0.7412, "step": 31280 }, { "epoch": 0.9741553791063366, "grad_norm": 2.6988890171051025, "learning_rate": 1.3618932118391296e-07, "loss": 0.7616, "step": 31285 }, { "epoch": 0.9743110695936479, "grad_norm": 2.708521842956543, "learning_rate": 1.3536989085187977e-07, "loss": 0.7786, "step": 31290 }, { "epoch": 0.974466760080959, "grad_norm": 2.101881265640259, "learning_rate": 1.3455046051984662e-07, "loss": 0.8193, "step": 31295 }, { "epoch": 0.9746224505682702, "grad_norm": 2.112987995147705, "learning_rate": 1.3373103018781343e-07, "loss": 0.6671, "step": 31300 }, { "epoch": 0.9747781410555815, "grad_norm": 2.284076690673828, "learning_rate": 1.3291159985578028e-07, "loss": 0.7505, "step": 31305 }, { "epoch": 0.9749338315428927, "grad_norm": 2.3307576179504395, "learning_rate": 1.3209216952374712e-07, "loss": 0.7828, "step": 31310 }, { "epoch": 0.9750895220302039, "grad_norm": 1.5737205743789673, "learning_rate": 1.3127273919171394e-07, "loss": 0.7745, "step": 31315 }, { "epoch": 0.9752452125175152, "grad_norm": 2.063467264175415, "learning_rate": 1.3045330885968078e-07, "loss": 0.6557, "step": 31320 }, { "epoch": 0.9754009030048264, "grad_norm": 2.6589577198028564, "learning_rate": 1.296338785276476e-07, "loss": 0.6976, "step": 31325 }, { "epoch": 0.9755565934921376, "grad_norm": 2.1643104553222656, "learning_rate": 1.288144481956144e-07, "loss": 0.7303, "step": 31330 }, { "epoch": 0.9757122839794489, "grad_norm": 2.463820457458496, "learning_rate": 1.2799501786358126e-07, "loss": 0.7046, "step": 31335 }, { "epoch": 0.9758679744667601, "grad_norm": 2.0627620220184326, "learning_rate": 1.2717558753154807e-07, "loss": 0.7423, "step": 31340 }, { "epoch": 0.9760236649540713, "grad_norm": 2.5614137649536133, "learning_rate": 1.2635615719951492e-07, "loss": 0.8582, "step": 31345 }, { "epoch": 0.9761793554413826, "grad_norm": 2.7075490951538086, "learning_rate": 1.2553672686748173e-07, "loss": 0.7537, "step": 31350 }, { "epoch": 0.9763350459286938, "grad_norm": 2.539644241333008, "learning_rate": 1.2471729653544857e-07, "loss": 0.7115, "step": 31355 }, { "epoch": 0.976490736416005, "grad_norm": 2.219312906265259, "learning_rate": 1.238978662034154e-07, "loss": 0.7743, "step": 31360 }, { "epoch": 0.9766464269033162, "grad_norm": 1.7907696962356567, "learning_rate": 1.2307843587138223e-07, "loss": 0.8278, "step": 31365 }, { "epoch": 0.9768021173906274, "grad_norm": 1.907880187034607, "learning_rate": 1.2225900553934905e-07, "loss": 0.7604, "step": 31370 }, { "epoch": 0.9769578078779386, "grad_norm": 2.0169365406036377, "learning_rate": 1.2143957520731587e-07, "loss": 0.7537, "step": 31375 }, { "epoch": 0.9771134983652499, "grad_norm": 2.4870946407318115, "learning_rate": 1.206201448752827e-07, "loss": 0.766, "step": 31380 }, { "epoch": 0.9772691888525611, "grad_norm": 1.9799602031707764, "learning_rate": 1.1980071454324953e-07, "loss": 0.7088, "step": 31385 }, { "epoch": 0.9774248793398723, "grad_norm": 2.3589868545532227, "learning_rate": 1.1898128421121637e-07, "loss": 0.7023, "step": 31390 }, { "epoch": 0.9775805698271836, "grad_norm": 2.3007326126098633, "learning_rate": 1.181618538791832e-07, "loss": 0.8015, "step": 31395 }, { "epoch": 0.9777362603144948, "grad_norm": 2.1481425762176514, "learning_rate": 1.1734242354715004e-07, "loss": 0.7557, "step": 31400 }, { "epoch": 0.977891950801806, "grad_norm": 2.403923749923706, "learning_rate": 1.1652299321511686e-07, "loss": 0.7412, "step": 31405 }, { "epoch": 0.9780476412891173, "grad_norm": 2.284989833831787, "learning_rate": 1.157035628830837e-07, "loss": 0.7779, "step": 31410 }, { "epoch": 0.9782033317764285, "grad_norm": 2.3546206951141357, "learning_rate": 1.1488413255105052e-07, "loss": 0.8066, "step": 31415 }, { "epoch": 0.9783590222637397, "grad_norm": 2.294158935546875, "learning_rate": 1.1406470221901734e-07, "loss": 0.8349, "step": 31420 }, { "epoch": 0.978514712751051, "grad_norm": 2.1347227096557617, "learning_rate": 1.1324527188698418e-07, "loss": 0.7794, "step": 31425 }, { "epoch": 0.9786704032383622, "grad_norm": 2.729257583618164, "learning_rate": 1.1242584155495101e-07, "loss": 0.7182, "step": 31430 }, { "epoch": 0.9788260937256733, "grad_norm": 2.206984281539917, "learning_rate": 1.1160641122291784e-07, "loss": 0.7707, "step": 31435 }, { "epoch": 0.9789817842129845, "grad_norm": 2.3849124908447266, "learning_rate": 1.1078698089088467e-07, "loss": 0.7438, "step": 31440 }, { "epoch": 0.9791374747002958, "grad_norm": 2.0448474884033203, "learning_rate": 1.099675505588515e-07, "loss": 0.7289, "step": 31445 }, { "epoch": 0.979293165187607, "grad_norm": 2.1909725666046143, "learning_rate": 1.0914812022681833e-07, "loss": 0.8019, "step": 31450 }, { "epoch": 0.9794488556749182, "grad_norm": 1.8030346632003784, "learning_rate": 1.0832868989478514e-07, "loss": 0.8573, "step": 31455 }, { "epoch": 0.9796045461622295, "grad_norm": 2.8488829135894775, "learning_rate": 1.0750925956275199e-07, "loss": 0.7916, "step": 31460 }, { "epoch": 0.9797602366495407, "grad_norm": 3.1027116775512695, "learning_rate": 1.066898292307188e-07, "loss": 0.7921, "step": 31465 }, { "epoch": 0.9799159271368519, "grad_norm": 1.9478963613510132, "learning_rate": 1.0587039889868565e-07, "loss": 0.7832, "step": 31470 }, { "epoch": 0.9800716176241632, "grad_norm": 2.5075771808624268, "learning_rate": 1.0505096856665246e-07, "loss": 0.7276, "step": 31475 }, { "epoch": 0.9802273081114744, "grad_norm": 2.317216157913208, "learning_rate": 1.0423153823461931e-07, "loss": 0.7896, "step": 31480 }, { "epoch": 0.9803829985987856, "grad_norm": 1.8697634935379028, "learning_rate": 1.0341210790258612e-07, "loss": 0.7593, "step": 31485 }, { "epoch": 0.9805386890860969, "grad_norm": 2.1710548400878906, "learning_rate": 1.0259267757055297e-07, "loss": 0.726, "step": 31490 }, { "epoch": 0.9806943795734081, "grad_norm": 2.0081193447113037, "learning_rate": 1.0177324723851978e-07, "loss": 0.7909, "step": 31495 }, { "epoch": 0.9808500700607193, "grad_norm": 2.0685813426971436, "learning_rate": 1.0095381690648661e-07, "loss": 0.8045, "step": 31500 }, { "epoch": 0.9810057605480306, "grad_norm": 2.1221251487731934, "learning_rate": 1.0013438657445346e-07, "loss": 0.7678, "step": 31505 }, { "epoch": 0.9811614510353417, "grad_norm": 2.2200815677642822, "learning_rate": 9.931495624242027e-08, "loss": 0.7745, "step": 31510 }, { "epoch": 0.9813171415226529, "grad_norm": 3.076714277267456, "learning_rate": 9.849552591038712e-08, "loss": 0.7877, "step": 31515 }, { "epoch": 0.9814728320099642, "grad_norm": 2.6984453201293945, "learning_rate": 9.767609557835393e-08, "loss": 0.7697, "step": 31520 }, { "epoch": 0.9816285224972754, "grad_norm": 1.9153844118118286, "learning_rate": 9.685666524632078e-08, "loss": 0.7427, "step": 31525 }, { "epoch": 0.9817842129845866, "grad_norm": 2.3006014823913574, "learning_rate": 9.603723491428759e-08, "loss": 0.8688, "step": 31530 }, { "epoch": 0.9819399034718979, "grad_norm": 2.01519513130188, "learning_rate": 9.521780458225443e-08, "loss": 0.7362, "step": 31535 }, { "epoch": 0.9820955939592091, "grad_norm": 2.0691847801208496, "learning_rate": 9.439837425022125e-08, "loss": 0.738, "step": 31540 }, { "epoch": 0.9822512844465203, "grad_norm": 2.5677974224090576, "learning_rate": 9.357894391818808e-08, "loss": 0.7809, "step": 31545 }, { "epoch": 0.9824069749338316, "grad_norm": 3.3179898262023926, "learning_rate": 9.275951358615491e-08, "loss": 0.7743, "step": 31550 }, { "epoch": 0.9825626654211428, "grad_norm": 2.30098557472229, "learning_rate": 9.194008325412174e-08, "loss": 0.7911, "step": 31555 }, { "epoch": 0.982718355908454, "grad_norm": 3.543693780899048, "learning_rate": 9.112065292208857e-08, "loss": 0.8153, "step": 31560 }, { "epoch": 0.9828740463957653, "grad_norm": 2.170236110687256, "learning_rate": 9.03012225900554e-08, "loss": 0.7199, "step": 31565 }, { "epoch": 0.9830297368830765, "grad_norm": 2.344322919845581, "learning_rate": 8.948179225802224e-08, "loss": 0.8247, "step": 31570 }, { "epoch": 0.9831854273703877, "grad_norm": 1.905870795249939, "learning_rate": 8.866236192598906e-08, "loss": 0.6858, "step": 31575 }, { "epoch": 0.9833411178576988, "grad_norm": 1.7734845876693726, "learning_rate": 8.78429315939559e-08, "loss": 0.65, "step": 31580 }, { "epoch": 0.9834968083450101, "grad_norm": 2.577096700668335, "learning_rate": 8.702350126192272e-08, "loss": 0.7308, "step": 31585 }, { "epoch": 0.9836524988323213, "grad_norm": 1.9362568855285645, "learning_rate": 8.620407092988954e-08, "loss": 0.7761, "step": 31590 }, { "epoch": 0.9838081893196325, "grad_norm": 2.280831813812256, "learning_rate": 8.538464059785638e-08, "loss": 0.8112, "step": 31595 }, { "epoch": 0.9839638798069438, "grad_norm": 2.2281734943389893, "learning_rate": 8.45652102658232e-08, "loss": 0.7206, "step": 31600 }, { "epoch": 0.984119570294255, "grad_norm": 2.275369644165039, "learning_rate": 8.374577993379004e-08, "loss": 0.7747, "step": 31605 }, { "epoch": 0.9842752607815662, "grad_norm": 2.077688217163086, "learning_rate": 8.292634960175687e-08, "loss": 0.7419, "step": 31610 }, { "epoch": 0.9844309512688775, "grad_norm": 2.4452097415924072, "learning_rate": 8.21069192697237e-08, "loss": 0.7617, "step": 31615 }, { "epoch": 0.9845866417561887, "grad_norm": 2.397614002227783, "learning_rate": 8.128748893769053e-08, "loss": 0.8478, "step": 31620 }, { "epoch": 0.9847423322434999, "grad_norm": 2.170541524887085, "learning_rate": 8.046805860565734e-08, "loss": 0.8036, "step": 31625 }, { "epoch": 0.9848980227308112, "grad_norm": 2.06120228767395, "learning_rate": 7.964862827362419e-08, "loss": 0.8304, "step": 31630 }, { "epoch": 0.9850537132181224, "grad_norm": 2.1401031017303467, "learning_rate": 7.8829197941591e-08, "loss": 0.797, "step": 31635 }, { "epoch": 0.9852094037054336, "grad_norm": 2.226339340209961, "learning_rate": 7.800976760955785e-08, "loss": 0.7303, "step": 31640 }, { "epoch": 0.9853650941927449, "grad_norm": 1.953464388847351, "learning_rate": 7.719033727752466e-08, "loss": 0.7696, "step": 31645 }, { "epoch": 0.985520784680056, "grad_norm": 2.3373584747314453, "learning_rate": 7.63709069454915e-08, "loss": 0.6952, "step": 31650 }, { "epoch": 0.9856764751673672, "grad_norm": 2.2813541889190674, "learning_rate": 7.555147661345832e-08, "loss": 0.7301, "step": 31655 }, { "epoch": 0.9858321656546785, "grad_norm": 2.199411630630493, "learning_rate": 7.473204628142515e-08, "loss": 0.7271, "step": 31660 }, { "epoch": 0.9859878561419897, "grad_norm": 1.8767658472061157, "learning_rate": 7.391261594939198e-08, "loss": 0.7819, "step": 31665 }, { "epoch": 0.9861435466293009, "grad_norm": 2.201291084289551, "learning_rate": 7.309318561735881e-08, "loss": 0.7115, "step": 31670 }, { "epoch": 0.9862992371166122, "grad_norm": 2.344073534011841, "learning_rate": 7.227375528532566e-08, "loss": 0.8369, "step": 31675 }, { "epoch": 0.9864549276039234, "grad_norm": 2.3011085987091064, "learning_rate": 7.145432495329249e-08, "loss": 0.7814, "step": 31680 }, { "epoch": 0.9866106180912346, "grad_norm": 2.272217035293579, "learning_rate": 7.063489462125932e-08, "loss": 0.7212, "step": 31685 }, { "epoch": 0.9867663085785459, "grad_norm": 1.934389591217041, "learning_rate": 6.981546428922613e-08, "loss": 0.7253, "step": 31690 }, { "epoch": 0.9869219990658571, "grad_norm": 1.9102201461791992, "learning_rate": 6.899603395719296e-08, "loss": 0.7869, "step": 31695 }, { "epoch": 0.9870776895531683, "grad_norm": 2.0771536827087402, "learning_rate": 6.817660362515979e-08, "loss": 0.8164, "step": 31700 }, { "epoch": 0.9872333800404796, "grad_norm": 2.0481457710266113, "learning_rate": 6.735717329312662e-08, "loss": 0.7627, "step": 31705 }, { "epoch": 0.9873890705277908, "grad_norm": 2.0630149841308594, "learning_rate": 6.653774296109345e-08, "loss": 0.7361, "step": 31710 }, { "epoch": 0.987544761015102, "grad_norm": 2.3282203674316406, "learning_rate": 6.571831262906028e-08, "loss": 0.6908, "step": 31715 }, { "epoch": 0.9877004515024133, "grad_norm": 1.9526748657226562, "learning_rate": 6.489888229702711e-08, "loss": 0.7401, "step": 31720 }, { "epoch": 0.9878561419897244, "grad_norm": 2.3342788219451904, "learning_rate": 6.407945196499394e-08, "loss": 0.7013, "step": 31725 }, { "epoch": 0.9880118324770356, "grad_norm": 1.770271897315979, "learning_rate": 6.326002163296077e-08, "loss": 0.7268, "step": 31730 }, { "epoch": 0.9881675229643468, "grad_norm": 2.1516873836517334, "learning_rate": 6.24405913009276e-08, "loss": 0.7321, "step": 31735 }, { "epoch": 0.9883232134516581, "grad_norm": 2.035231113433838, "learning_rate": 6.162116096889443e-08, "loss": 0.7596, "step": 31740 }, { "epoch": 0.9884789039389693, "grad_norm": 2.4203522205352783, "learning_rate": 6.080173063686126e-08, "loss": 0.7351, "step": 31745 }, { "epoch": 0.9886345944262805, "grad_norm": 2.1430928707122803, "learning_rate": 5.998230030482809e-08, "loss": 0.7461, "step": 31750 }, { "epoch": 0.9887902849135918, "grad_norm": 1.939644455909729, "learning_rate": 5.916286997279492e-08, "loss": 0.7513, "step": 31755 }, { "epoch": 0.988945975400903, "grad_norm": 1.9629496335983276, "learning_rate": 5.834343964076175e-08, "loss": 0.6834, "step": 31760 }, { "epoch": 0.9891016658882142, "grad_norm": 2.18039608001709, "learning_rate": 5.752400930872858e-08, "loss": 0.7755, "step": 31765 }, { "epoch": 0.9892573563755255, "grad_norm": 2.1267261505126953, "learning_rate": 5.670457897669541e-08, "loss": 0.6823, "step": 31770 }, { "epoch": 0.9894130468628367, "grad_norm": 2.4831275939941406, "learning_rate": 5.588514864466223e-08, "loss": 0.7551, "step": 31775 }, { "epoch": 0.9895687373501479, "grad_norm": 2.017590045928955, "learning_rate": 5.506571831262906e-08, "loss": 0.7448, "step": 31780 }, { "epoch": 0.9897244278374592, "grad_norm": 2.6563830375671387, "learning_rate": 5.424628798059589e-08, "loss": 0.7492, "step": 31785 }, { "epoch": 0.9898801183247704, "grad_norm": 2.4724321365356445, "learning_rate": 5.342685764856272e-08, "loss": 0.7807, "step": 31790 }, { "epoch": 0.9900358088120815, "grad_norm": 2.1422576904296875, "learning_rate": 5.260742731652955e-08, "loss": 0.6976, "step": 31795 }, { "epoch": 0.9901914992993928, "grad_norm": 2.1386806964874268, "learning_rate": 5.178799698449638e-08, "loss": 0.8406, "step": 31800 }, { "epoch": 0.990347189786704, "grad_norm": 2.0340003967285156, "learning_rate": 5.096856665246321e-08, "loss": 0.7695, "step": 31805 }, { "epoch": 0.9905028802740152, "grad_norm": 1.9129626750946045, "learning_rate": 5.014913632043005e-08, "loss": 0.7419, "step": 31810 }, { "epoch": 0.9906585707613265, "grad_norm": 2.0827078819274902, "learning_rate": 4.9329705988396864e-08, "loss": 0.821, "step": 31815 }, { "epoch": 0.9908142612486377, "grad_norm": 2.1099438667297363, "learning_rate": 4.8510275656363694e-08, "loss": 0.7727, "step": 31820 }, { "epoch": 0.9909699517359489, "grad_norm": 2.413300037384033, "learning_rate": 4.769084532433053e-08, "loss": 0.7247, "step": 31825 }, { "epoch": 0.9911256422232602, "grad_norm": 2.8027098178863525, "learning_rate": 4.687141499229736e-08, "loss": 0.8307, "step": 31830 }, { "epoch": 0.9912813327105714, "grad_norm": 2.060029983520508, "learning_rate": 4.605198466026419e-08, "loss": 0.8182, "step": 31835 }, { "epoch": 0.9914370231978826, "grad_norm": 2.2738938331604004, "learning_rate": 4.523255432823102e-08, "loss": 0.7026, "step": 31840 }, { "epoch": 0.9915927136851939, "grad_norm": 2.241150140762329, "learning_rate": 4.441312399619785e-08, "loss": 0.7124, "step": 31845 }, { "epoch": 0.9917484041725051, "grad_norm": 2.0513508319854736, "learning_rate": 4.359369366416468e-08, "loss": 0.6904, "step": 31850 }, { "epoch": 0.9919040946598163, "grad_norm": 2.238945722579956, "learning_rate": 4.277426333213151e-08, "loss": 0.7411, "step": 31855 }, { "epoch": 0.9920597851471276, "grad_norm": 2.11405086517334, "learning_rate": 4.195483300009833e-08, "loss": 0.7803, "step": 31860 }, { "epoch": 0.9922154756344387, "grad_norm": 2.3068833351135254, "learning_rate": 4.113540266806516e-08, "loss": 0.7472, "step": 31865 }, { "epoch": 0.9923711661217499, "grad_norm": 2.9653351306915283, "learning_rate": 4.031597233603199e-08, "loss": 0.7663, "step": 31870 }, { "epoch": 0.9925268566090611, "grad_norm": 2.1828644275665283, "learning_rate": 3.949654200399882e-08, "loss": 0.7957, "step": 31875 }, { "epoch": 0.9926825470963724, "grad_norm": 2.090471029281616, "learning_rate": 3.867711167196565e-08, "loss": 0.7538, "step": 31880 }, { "epoch": 0.9928382375836836, "grad_norm": 2.5187971591949463, "learning_rate": 3.785768133993248e-08, "loss": 0.6602, "step": 31885 }, { "epoch": 0.9929939280709948, "grad_norm": 2.468801259994507, "learning_rate": 3.703825100789931e-08, "loss": 0.7947, "step": 31890 }, { "epoch": 0.9931496185583061, "grad_norm": 1.987453579902649, "learning_rate": 3.621882067586614e-08, "loss": 0.7484, "step": 31895 }, { "epoch": 0.9933053090456173, "grad_norm": 1.9171732664108276, "learning_rate": 3.539939034383297e-08, "loss": 0.7091, "step": 31900 }, { "epoch": 0.9934609995329285, "grad_norm": 2.5134944915771484, "learning_rate": 3.45799600117998e-08, "loss": 0.8159, "step": 31905 }, { "epoch": 0.9936166900202398, "grad_norm": 2.266521692276001, "learning_rate": 3.3760529679766624e-08, "loss": 0.761, "step": 31910 }, { "epoch": 0.993772380507551, "grad_norm": 2.2173163890838623, "learning_rate": 3.294109934773346e-08, "loss": 0.8042, "step": 31915 }, { "epoch": 0.9939280709948622, "grad_norm": 1.9625109434127808, "learning_rate": 3.212166901570029e-08, "loss": 0.7342, "step": 31920 }, { "epoch": 0.9940837614821735, "grad_norm": 2.068047285079956, "learning_rate": 3.130223868366712e-08, "loss": 0.7293, "step": 31925 }, { "epoch": 0.9942394519694847, "grad_norm": 2.317533016204834, "learning_rate": 3.048280835163395e-08, "loss": 0.766, "step": 31930 }, { "epoch": 0.9943951424567959, "grad_norm": 1.8411144018173218, "learning_rate": 2.9663378019600776e-08, "loss": 0.6606, "step": 31935 }, { "epoch": 0.9945508329441071, "grad_norm": 2.0469956398010254, "learning_rate": 2.8843947687567606e-08, "loss": 0.668, "step": 31940 }, { "epoch": 0.9947065234314183, "grad_norm": 2.226870536804199, "learning_rate": 2.8024517355534436e-08, "loss": 0.7177, "step": 31945 }, { "epoch": 0.9948622139187295, "grad_norm": 1.8251588344573975, "learning_rate": 2.7205087023501266e-08, "loss": 0.7351, "step": 31950 }, { "epoch": 0.9950179044060408, "grad_norm": 2.3208489418029785, "learning_rate": 2.6385656691468092e-08, "loss": 0.8584, "step": 31955 }, { "epoch": 0.995173594893352, "grad_norm": 2.0530893802642822, "learning_rate": 2.5566226359434922e-08, "loss": 0.7561, "step": 31960 }, { "epoch": 0.9953292853806632, "grad_norm": 2.1458826065063477, "learning_rate": 2.4746796027401752e-08, "loss": 0.7608, "step": 31965 }, { "epoch": 0.9954849758679745, "grad_norm": 2.211630344390869, "learning_rate": 2.392736569536858e-08, "loss": 0.7612, "step": 31970 }, { "epoch": 0.9956406663552857, "grad_norm": 2.0256495475769043, "learning_rate": 2.3107935363335408e-08, "loss": 0.7599, "step": 31975 }, { "epoch": 0.9957963568425969, "grad_norm": 2.114264488220215, "learning_rate": 2.228850503130224e-08, "loss": 0.7676, "step": 31980 }, { "epoch": 0.9959520473299082, "grad_norm": 2.2845146656036377, "learning_rate": 2.146907469926907e-08, "loss": 0.7435, "step": 31985 }, { "epoch": 0.9961077378172194, "grad_norm": 2.1074352264404297, "learning_rate": 2.06496443672359e-08, "loss": 0.7646, "step": 31990 }, { "epoch": 0.9962634283045306, "grad_norm": 2.477749824523926, "learning_rate": 1.9830214035202727e-08, "loss": 0.7303, "step": 31995 }, { "epoch": 0.9964191187918419, "grad_norm": 2.393585205078125, "learning_rate": 1.901078370316956e-08, "loss": 0.7842, "step": 32000 }, { "epoch": 0.9965748092791531, "grad_norm": 2.0790352821350098, "learning_rate": 1.8191353371136387e-08, "loss": 0.6972, "step": 32005 }, { "epoch": 0.9967304997664642, "grad_norm": 2.3707761764526367, "learning_rate": 1.7371923039103217e-08, "loss": 0.7503, "step": 32010 }, { "epoch": 0.9968861902537755, "grad_norm": 1.9844924211502075, "learning_rate": 1.6552492707070047e-08, "loss": 0.7787, "step": 32015 }, { "epoch": 0.9970418807410867, "grad_norm": 2.4502837657928467, "learning_rate": 1.5733062375036876e-08, "loss": 0.796, "step": 32020 }, { "epoch": 0.9971975712283979, "grad_norm": 2.4111227989196777, "learning_rate": 1.4913632043003706e-08, "loss": 0.7402, "step": 32025 }, { "epoch": 0.9973532617157091, "grad_norm": 1.9591799974441528, "learning_rate": 1.4094201710970536e-08, "loss": 0.7722, "step": 32030 }, { "epoch": 0.9975089522030204, "grad_norm": 2.642704725265503, "learning_rate": 1.3274771378937364e-08, "loss": 0.7414, "step": 32035 }, { "epoch": 0.9976646426903316, "grad_norm": 2.3279333114624023, "learning_rate": 1.2455341046904194e-08, "loss": 0.7091, "step": 32040 }, { "epoch": 0.9978203331776428, "grad_norm": 1.9021185636520386, "learning_rate": 1.1635910714871022e-08, "loss": 0.7582, "step": 32045 }, { "epoch": 0.9979760236649541, "grad_norm": 3.0975308418273926, "learning_rate": 1.0816480382837852e-08, "loss": 0.7848, "step": 32050 }, { "epoch": 0.9981317141522653, "grad_norm": 2.1675734519958496, "learning_rate": 9.99705005080468e-09, "loss": 0.8104, "step": 32055 }, { "epoch": 0.9982874046395765, "grad_norm": 1.9339698553085327, "learning_rate": 9.177619718771511e-09, "loss": 0.8481, "step": 32060 }, { "epoch": 0.9984430951268878, "grad_norm": 2.1369402408599854, "learning_rate": 8.35818938673834e-09, "loss": 0.7829, "step": 32065 }, { "epoch": 0.998598785614199, "grad_norm": 2.4185547828674316, "learning_rate": 7.53875905470517e-09, "loss": 0.7177, "step": 32070 }, { "epoch": 0.9987544761015102, "grad_norm": 2.242750883102417, "learning_rate": 6.719328722671999e-09, "loss": 0.8306, "step": 32075 }, { "epoch": 0.9989101665888214, "grad_norm": 2.350712299346924, "learning_rate": 5.899898390638828e-09, "loss": 0.7792, "step": 32080 }, { "epoch": 0.9990658570761326, "grad_norm": 1.9944778680801392, "learning_rate": 5.080468058605658e-09, "loss": 0.7656, "step": 32085 }, { "epoch": 0.9992215475634438, "grad_norm": 2.888697862625122, "learning_rate": 4.261037726572487e-09, "loss": 0.809, "step": 32090 }, { "epoch": 0.9993772380507551, "grad_norm": 2.2190699577331543, "learning_rate": 3.4416073945393164e-09, "loss": 0.7181, "step": 32095 }, { "epoch": 0.9995329285380663, "grad_norm": 1.9756126403808594, "learning_rate": 2.6221770625061458e-09, "loss": 0.722, "step": 32100 }, { "epoch": 0.9996886190253775, "grad_norm": 2.430950164794922, "learning_rate": 1.8027467304729754e-09, "loss": 0.7188, "step": 32105 }, { "epoch": 0.9998443095126888, "grad_norm": 2.1471128463745117, "learning_rate": 9.833163984398048e-10, "loss": 0.719, "step": 32110 }, { "epoch": 1.0, "grad_norm": 2.0647552013397217, "learning_rate": 1.638860664066341e-10, "loss": 0.6824, "step": 32115 }, { "epoch": 1.0, "step": 32115, "total_flos": 2.516217150612439e+19, "train_loss": 0.8029150493787528, "train_runtime": 26158.9095, "train_samples_per_second": 39.285, "train_steps_per_second": 1.228 } ], "logging_steps": 5, "max_steps": 32115, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.516217150612439e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }