| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.854368932038835, |
| "eval_steps": 500, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009708737864077669, |
| "grad_norm": 3.141545295715332, |
| "learning_rate": 1.5e-06, |
| "loss": 0.3406, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.019417475728155338, |
| "grad_norm": 3.8397164344787598, |
| "learning_rate": 3.166666666666667e-06, |
| "loss": 0.3078, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02912621359223301, |
| "grad_norm": 1.7836706638336182, |
| "learning_rate": 4.833333333333333e-06, |
| "loss": 0.1926, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.038834951456310676, |
| "grad_norm": 1.257520079612732, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 0.1476, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04854368932038835, |
| "grad_norm": 1.0694407224655151, |
| "learning_rate": 8.166666666666668e-06, |
| "loss": 0.135, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05825242718446602, |
| "grad_norm": 0.9249787926673889, |
| "learning_rate": 9.833333333333333e-06, |
| "loss": 0.1516, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06796116504854369, |
| "grad_norm": 1.0610548257827759, |
| "learning_rate": 1.1500000000000002e-05, |
| "loss": 0.1073, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07766990291262135, |
| "grad_norm": 0.9249606132507324, |
| "learning_rate": 1.3166666666666665e-05, |
| "loss": 0.0894, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.08737864077669903, |
| "grad_norm": 0.6027079224586487, |
| "learning_rate": 1.4833333333333336e-05, |
| "loss": 0.0814, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0970873786407767, |
| "grad_norm": 0.8849421739578247, |
| "learning_rate": 1.65e-05, |
| "loss": 0.0963, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.10679611650485436, |
| "grad_norm": 1.2120862007141113, |
| "learning_rate": 1.8166666666666667e-05, |
| "loss": 0.0817, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.11650485436893204, |
| "grad_norm": 0.781768262386322, |
| "learning_rate": 1.9833333333333335e-05, |
| "loss": 0.087, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1262135922330097, |
| "grad_norm": 0.5335855484008789, |
| "learning_rate": 2.15e-05, |
| "loss": 0.0502, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.13592233009708737, |
| "grad_norm": 0.6909231543540955, |
| "learning_rate": 2.3166666666666666e-05, |
| "loss": 0.0813, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.14563106796116504, |
| "grad_norm": 0.8511887192726135, |
| "learning_rate": 2.4833333333333335e-05, |
| "loss": 0.0823, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1553398058252427, |
| "grad_norm": 1.1109554767608643, |
| "learning_rate": 2.6500000000000004e-05, |
| "loss": 0.0638, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1650485436893204, |
| "grad_norm": 0.7318443655967712, |
| "learning_rate": 2.816666666666667e-05, |
| "loss": 0.0649, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.17475728155339806, |
| "grad_norm": 0.7571411728858948, |
| "learning_rate": 2.9833333333333335e-05, |
| "loss": 0.064, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.18446601941747573, |
| "grad_norm": 0.8281847834587097, |
| "learning_rate": 3.15e-05, |
| "loss": 0.0731, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1941747572815534, |
| "grad_norm": 0.7228130102157593, |
| "learning_rate": 3.316666666666667e-05, |
| "loss": 0.0674, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.20388349514563106, |
| "grad_norm": 0.9129646420478821, |
| "learning_rate": 3.483333333333334e-05, |
| "loss": 0.0674, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.21359223300970873, |
| "grad_norm": 0.7561999559402466, |
| "learning_rate": 3.65e-05, |
| "loss": 0.0632, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.22330097087378642, |
| "grad_norm": 0.6391080617904663, |
| "learning_rate": 3.816666666666667e-05, |
| "loss": 0.0748, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.23300970873786409, |
| "grad_norm": 0.8094485402107239, |
| "learning_rate": 3.983333333333333e-05, |
| "loss": 0.0822, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.24271844660194175, |
| "grad_norm": 0.5360986590385437, |
| "learning_rate": 4.15e-05, |
| "loss": 0.0617, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2524271844660194, |
| "grad_norm": 0.7553185224533081, |
| "learning_rate": 4.316666666666667e-05, |
| "loss": 0.0666, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2621359223300971, |
| "grad_norm": 0.6815693378448486, |
| "learning_rate": 4.483333333333333e-05, |
| "loss": 0.061, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.27184466019417475, |
| "grad_norm": 0.5073156356811523, |
| "learning_rate": 4.6500000000000005e-05, |
| "loss": 0.0473, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2815533980582524, |
| "grad_norm": 0.8138278722763062, |
| "learning_rate": 4.8166666666666674e-05, |
| "loss": 0.0652, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.2912621359223301, |
| "grad_norm": 0.8348583579063416, |
| "learning_rate": 4.9833333333333336e-05, |
| "loss": 0.0746, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.30097087378640774, |
| "grad_norm": 1.0741016864776611, |
| "learning_rate": 4.999969242985639e-05, |
| "loss": 0.0718, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3106796116504854, |
| "grad_norm": 1.182652473449707, |
| "learning_rate": 4.999862923413781e-05, |
| "loss": 0.06, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.32038834951456313, |
| "grad_norm": 0.835841953754425, |
| "learning_rate": 4.999680664797127e-05, |
| "loss": 0.0607, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3300970873786408, |
| "grad_norm": 0.6821876764297485, |
| "learning_rate": 4.999422472672202e-05, |
| "loss": 0.0554, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.33980582524271846, |
| "grad_norm": 0.9348113536834717, |
| "learning_rate": 4.99908835488218e-05, |
| "loss": 0.0629, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.34951456310679613, |
| "grad_norm": 0.5926679372787476, |
| "learning_rate": 4.998678321576651e-05, |
| "loss": 0.0506, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3592233009708738, |
| "grad_norm": 1.0646535158157349, |
| "learning_rate": 4.9981923852113145e-05, |
| "loss": 0.0782, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.36893203883495146, |
| "grad_norm": 1.0149277448654175, |
| "learning_rate": 4.997630560547597e-05, |
| "loss": 0.0665, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3786407766990291, |
| "grad_norm": 0.9700633883476257, |
| "learning_rate": 4.996992864652204e-05, |
| "loss": 0.067, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3883495145631068, |
| "grad_norm": 1.1201013326644897, |
| "learning_rate": 4.996279316896606e-05, |
| "loss": 0.0656, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.39805825242718446, |
| "grad_norm": 0.7025376558303833, |
| "learning_rate": 4.9954899389564455e-05, |
| "loss": 0.0711, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4077669902912621, |
| "grad_norm": 0.6064212918281555, |
| "learning_rate": 4.9946247548108794e-05, |
| "loss": 0.0572, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4174757281553398, |
| "grad_norm": 0.5238557457923889, |
| "learning_rate": 4.993683790741852e-05, |
| "loss": 0.0633, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.42718446601941745, |
| "grad_norm": 0.599403977394104, |
| "learning_rate": 4.992667075333296e-05, |
| "loss": 0.0508, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4368932038834951, |
| "grad_norm": 0.55897057056427, |
| "learning_rate": 4.991574639470263e-05, |
| "loss": 0.0539, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.44660194174757284, |
| "grad_norm": 0.628349244594574, |
| "learning_rate": 4.990406516337987e-05, |
| "loss": 0.0496, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4563106796116505, |
| "grad_norm": 0.7520539164543152, |
| "learning_rate": 4.989162741420876e-05, |
| "loss": 0.0504, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.46601941747572817, |
| "grad_norm": 1.0347453355789185, |
| "learning_rate": 4.9878433525014335e-05, |
| "loss": 0.0646, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.47572815533980584, |
| "grad_norm": 1.04426109790802, |
| "learning_rate": 4.9864483896591094e-05, |
| "loss": 0.0679, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4854368932038835, |
| "grad_norm": 0.9814009070396423, |
| "learning_rate": 4.984977895269087e-05, |
| "loss": 0.0607, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.49514563106796117, |
| "grad_norm": 0.9229623079299927, |
| "learning_rate": 4.983431914000991e-05, |
| "loss": 0.0667, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5048543689320388, |
| "grad_norm": 1.0306048393249512, |
| "learning_rate": 4.981810492817532e-05, |
| "loss": 0.0658, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5145631067961165, |
| "grad_norm": 0.9046841859817505, |
| "learning_rate": 4.980113680973082e-05, |
| "loss": 0.0746, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5242718446601942, |
| "grad_norm": 0.6024751663208008, |
| "learning_rate": 4.978341530012175e-05, |
| "loss": 0.0525, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5339805825242718, |
| "grad_norm": 1.0224015712738037, |
| "learning_rate": 4.976494093767943e-05, |
| "loss": 0.0725, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5436893203883495, |
| "grad_norm": 1.0598809719085693, |
| "learning_rate": 4.9745714283604803e-05, |
| "loss": 0.0853, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5533980582524272, |
| "grad_norm": 0.6167446970939636, |
| "learning_rate": 4.972573592195139e-05, |
| "loss": 0.0612, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5631067961165048, |
| "grad_norm": 0.6139333844184875, |
| "learning_rate": 4.970500645960756e-05, |
| "loss": 0.0525, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.5728155339805825, |
| "grad_norm": 0.7935957908630371, |
| "learning_rate": 4.968352652627806e-05, |
| "loss": 0.0526, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5825242718446602, |
| "grad_norm": 0.5704646706581116, |
| "learning_rate": 4.966129677446492e-05, |
| "loss": 0.0614, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5922330097087378, |
| "grad_norm": 0.5081838369369507, |
| "learning_rate": 4.9638317879447606e-05, |
| "loss": 0.0549, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6019417475728155, |
| "grad_norm": 0.5273293256759644, |
| "learning_rate": 4.961459053926252e-05, |
| "loss": 0.0523, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6116504854368932, |
| "grad_norm": 0.423949271440506, |
| "learning_rate": 4.9590115474681816e-05, |
| "loss": 0.0571, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6213592233009708, |
| "grad_norm": 0.8299949169158936, |
| "learning_rate": 4.956489342919147e-05, |
| "loss": 0.0548, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6310679611650486, |
| "grad_norm": 0.8283652067184448, |
| "learning_rate": 4.95389251689687e-05, |
| "loss": 0.0495, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6407766990291263, |
| "grad_norm": 0.828658938407898, |
| "learning_rate": 4.9512211482858714e-05, |
| "loss": 0.0642, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.6504854368932039, |
| "grad_norm": 0.7801154851913452, |
| "learning_rate": 4.948475318235073e-05, |
| "loss": 0.0467, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6601941747572816, |
| "grad_norm": 0.7108743786811829, |
| "learning_rate": 4.945655110155333e-05, |
| "loss": 0.0624, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6699029126213593, |
| "grad_norm": 0.5172573328018188, |
| "learning_rate": 4.9427606097169117e-05, |
| "loss": 0.0601, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6796116504854369, |
| "grad_norm": 0.7694447636604309, |
| "learning_rate": 4.939791904846869e-05, |
| "loss": 0.0585, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6893203883495146, |
| "grad_norm": 0.651713490486145, |
| "learning_rate": 4.9367490857263944e-05, |
| "loss": 0.0521, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6990291262135923, |
| "grad_norm": 1.2775520086288452, |
| "learning_rate": 4.9336322447880676e-05, |
| "loss": 0.0687, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7087378640776699, |
| "grad_norm": 0.6819589138031006, |
| "learning_rate": 4.930441476713049e-05, |
| "loss": 0.047, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7184466019417476, |
| "grad_norm": 0.8026332855224609, |
| "learning_rate": 4.927176878428206e-05, |
| "loss": 0.0419, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7281553398058253, |
| "grad_norm": 0.7379328608512878, |
| "learning_rate": 4.923838549103166e-05, |
| "loss": 0.0547, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7378640776699029, |
| "grad_norm": 0.5019276738166809, |
| "learning_rate": 4.920426590147304e-05, |
| "loss": 0.0492, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.7475728155339806, |
| "grad_norm": 0.8244302272796631, |
| "learning_rate": 4.916941105206666e-05, |
| "loss": 0.0542, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.7572815533980582, |
| "grad_norm": 0.4618038535118103, |
| "learning_rate": 4.9133822001608164e-05, |
| "loss": 0.0676, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.7669902912621359, |
| "grad_norm": 0.5262795686721802, |
| "learning_rate": 4.9097499831196216e-05, |
| "loss": 0.0415, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 0.537287175655365, |
| "learning_rate": 4.906044564419969e-05, |
| "loss": 0.0511, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7864077669902912, |
| "grad_norm": 0.9852665066719055, |
| "learning_rate": 4.902266056622414e-05, |
| "loss": 0.055, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7961165048543689, |
| "grad_norm": 0.6320510506629944, |
| "learning_rate": 4.8984145745077584e-05, |
| "loss": 0.0554, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8058252427184466, |
| "grad_norm": 0.6188237071037292, |
| "learning_rate": 4.894490235073566e-05, |
| "loss": 0.0783, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8155339805825242, |
| "grad_norm": 0.6136628985404968, |
| "learning_rate": 4.890493157530609e-05, |
| "loss": 0.0532, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8252427184466019, |
| "grad_norm": 0.6563002467155457, |
| "learning_rate": 4.8864234632992457e-05, |
| "loss": 0.0583, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8349514563106796, |
| "grad_norm": 0.779821515083313, |
| "learning_rate": 4.88228127600573e-05, |
| "loss": 0.0608, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.8446601941747572, |
| "grad_norm": 0.3783950209617615, |
| "learning_rate": 4.878066721478461e-05, |
| "loss": 0.0599, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.8543689320388349, |
| "grad_norm": 0.7626412510871887, |
| "learning_rate": 4.8737799277441566e-05, |
| "loss": 0.0526, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.8640776699029126, |
| "grad_norm": 0.5785266160964966, |
| "learning_rate": 4.869421025023965e-05, |
| "loss": 0.0623, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.8737864077669902, |
| "grad_norm": 0.959281861782074, |
| "learning_rate": 4.8649901457295096e-05, |
| "loss": 0.0509, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.883495145631068, |
| "grad_norm": 0.6331967115402222, |
| "learning_rate": 4.860487424458867e-05, |
| "loss": 0.0498, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.8932038834951457, |
| "grad_norm": 0.7830513119697571, |
| "learning_rate": 4.8559129979924787e-05, |
| "loss": 0.0457, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.9029126213592233, |
| "grad_norm": 0.8294515013694763, |
| "learning_rate": 4.8512670052889955e-05, |
| "loss": 0.0528, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.912621359223301, |
| "grad_norm": 0.7075780034065247, |
| "learning_rate": 4.846549587481052e-05, |
| "loss": 0.0474, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.9223300970873787, |
| "grad_norm": 0.4232042729854584, |
| "learning_rate": 4.841760887870988e-05, |
| "loss": 0.042, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9320388349514563, |
| "grad_norm": 0.6930996179580688, |
| "learning_rate": 4.836901051926489e-05, |
| "loss": 0.0616, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.941747572815534, |
| "grad_norm": 1.1047744750976562, |
| "learning_rate": 4.831970227276171e-05, |
| "loss": 0.0606, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.9514563106796117, |
| "grad_norm": 0.6250813603401184, |
| "learning_rate": 4.82696856370509e-05, |
| "loss": 0.0419, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.9611650485436893, |
| "grad_norm": 0.5649757385253906, |
| "learning_rate": 4.8218962131502e-05, |
| "loss": 0.0434, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.970873786407767, |
| "grad_norm": 0.5142524242401123, |
| "learning_rate": 4.81675332969573e-05, |
| "loss": 0.0455, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9805825242718447, |
| "grad_norm": 0.437963604927063, |
| "learning_rate": 4.811540069568512e-05, |
| "loss": 0.0477, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.9902912621359223, |
| "grad_norm": 0.9865050315856934, |
| "learning_rate": 4.8062565911332235e-05, |
| "loss": 0.0636, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.4108821749687195, |
| "learning_rate": 4.8009030548875896e-05, |
| "loss": 0.0386, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.0097087378640777, |
| "grad_norm": 0.40395405888557434, |
| "learning_rate": 4.795479623457497e-05, |
| "loss": 0.0406, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.0194174757281553, |
| "grad_norm": 0.6007186770439148, |
| "learning_rate": 4.789986461592061e-05, |
| "loss": 0.05, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.029126213592233, |
| "grad_norm": 0.7217828631401062, |
| "learning_rate": 4.784423736158616e-05, |
| "loss": 0.051, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.0388349514563107, |
| "grad_norm": 0.6370289325714111, |
| "learning_rate": 4.7787916161376515e-05, |
| "loss": 0.0641, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.0485436893203883, |
| "grad_norm": 0.6220316886901855, |
| "learning_rate": 4.773090272617672e-05, |
| "loss": 0.0624, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.058252427184466, |
| "grad_norm": 0.7248903512954712, |
| "learning_rate": 4.7673198787900063e-05, |
| "loss": 0.0567, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.0679611650485437, |
| "grad_norm": 0.7164710164070129, |
| "learning_rate": 4.761480609943546e-05, |
| "loss": 0.0424, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.0776699029126213, |
| "grad_norm": 0.6365995407104492, |
| "learning_rate": 4.755572643459414e-05, |
| "loss": 0.062, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.087378640776699, |
| "grad_norm": 0.5840019583702087, |
| "learning_rate": 4.7495961588055836e-05, |
| "loss": 0.0344, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.0970873786407767, |
| "grad_norm": 0.6124367713928223, |
| "learning_rate": 4.7435513375314253e-05, |
| "loss": 0.0524, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.1067961165048543, |
| "grad_norm": 0.6590617299079895, |
| "learning_rate": 4.737438363262187e-05, |
| "loss": 0.0615, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.116504854368932, |
| "grad_norm": 0.6731277108192444, |
| "learning_rate": 4.7312574216934225e-05, |
| "loss": 0.0536, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.1262135922330097, |
| "grad_norm": 0.4859127700328827, |
| "learning_rate": 4.7250087005853446e-05, |
| "loss": 0.0456, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.1359223300970873, |
| "grad_norm": 0.48708751797676086, |
| "learning_rate": 4.718692389757128e-05, |
| "loss": 0.0499, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.145631067961165, |
| "grad_norm": 0.7826804518699646, |
| "learning_rate": 4.7123086810811356e-05, |
| "loss": 0.0437, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.1553398058252426, |
| "grad_norm": 0.550841748714447, |
| "learning_rate": 4.705857768477098e-05, |
| "loss": 0.0507, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.1650485436893203, |
| "grad_norm": 0.7365569472312927, |
| "learning_rate": 4.699339847906215e-05, |
| "loss": 0.0624, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.174757281553398, |
| "grad_norm": 1.0098744630813599, |
| "learning_rate": 4.6927551173652075e-05, |
| "loss": 0.0619, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.1844660194174756, |
| "grad_norm": 0.738837718963623, |
| "learning_rate": 4.6861037768803016e-05, |
| "loss": 0.0503, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.1941747572815533, |
| "grad_norm": 0.5268725752830505, |
| "learning_rate": 4.679386028501156e-05, |
| "loss": 0.0489, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.203883495145631, |
| "grad_norm": 0.8031307458877563, |
| "learning_rate": 4.672602076294714e-05, |
| "loss": 0.0595, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.2135922330097086, |
| "grad_norm": 0.6441630721092224, |
| "learning_rate": 4.665752126339018e-05, |
| "loss": 0.0425, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.2233009708737863, |
| "grad_norm": 0.5169068574905396, |
| "learning_rate": 4.658836386716938e-05, |
| "loss": 0.0536, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.233009708737864, |
| "grad_norm": 0.6084913015365601, |
| "learning_rate": 4.65185506750986e-05, |
| "loss": 0.0349, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.2427184466019416, |
| "grad_norm": 0.7514814734458923, |
| "learning_rate": 4.6448083807912934e-05, |
| "loss": 0.053, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.2524271844660193, |
| "grad_norm": 0.49506863951683044, |
| "learning_rate": 4.637696540620441e-05, |
| "loss": 0.0497, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.262135922330097, |
| "grad_norm": 0.6199485659599304, |
| "learning_rate": 4.630519763035687e-05, |
| "loss": 0.0517, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.2718446601941746, |
| "grad_norm": 0.6022672653198242, |
| "learning_rate": 4.623278266048039e-05, |
| "loss": 0.0439, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.2815533980582523, |
| "grad_norm": 0.7462307810783386, |
| "learning_rate": 4.6159722696345045e-05, |
| "loss": 0.0454, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.29126213592233, |
| "grad_norm": 0.40218812227249146, |
| "learning_rate": 4.608601995731407e-05, |
| "loss": 0.0326, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.3009708737864076, |
| "grad_norm": 0.7571138739585876, |
| "learning_rate": 4.601167668227648e-05, |
| "loss": 0.0364, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.3106796116504853, |
| "grad_norm": 0.77698814868927, |
| "learning_rate": 4.593669512957901e-05, |
| "loss": 0.058, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.3203883495145632, |
| "grad_norm": 0.7244229316711426, |
| "learning_rate": 4.586107757695755e-05, |
| "loss": 0.056, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.3300970873786409, |
| "grad_norm": 0.5994431972503662, |
| "learning_rate": 4.578482632146793e-05, |
| "loss": 0.038, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.3398058252427185, |
| "grad_norm": 0.7894623279571533, |
| "learning_rate": 4.570794367941616e-05, |
| "loss": 0.0425, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.3495145631067962, |
| "grad_norm": 0.7884004712104797, |
| "learning_rate": 4.563043198628806e-05, |
| "loss": 0.0495, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.3592233009708738, |
| "grad_norm": 0.6530587673187256, |
| "learning_rate": 4.5552293596678294e-05, |
| "loss": 0.0414, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.3689320388349515, |
| "grad_norm": 0.8399489521980286, |
| "learning_rate": 4.5473530884218886e-05, |
| "loss": 0.0493, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.3786407766990292, |
| "grad_norm": 0.6782814860343933, |
| "learning_rate": 4.539414624150708e-05, |
| "loss": 0.0472, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.3883495145631068, |
| "grad_norm": 0.6846131682395935, |
| "learning_rate": 4.5314142080032696e-05, |
| "loss": 0.0335, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.3980582524271845, |
| "grad_norm": 0.4258232116699219, |
| "learning_rate": 4.5233520830104805e-05, |
| "loss": 0.0576, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.4077669902912622, |
| "grad_norm": 0.6838470101356506, |
| "learning_rate": 4.515228494077798e-05, |
| "loss": 0.0606, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.4174757281553398, |
| "grad_norm": 0.5958576798439026, |
| "learning_rate": 4.5070436879777865e-05, |
| "loss": 0.0444, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.4271844660194175, |
| "grad_norm": 0.48623862862586975, |
| "learning_rate": 4.4987979133426215e-05, |
| "loss": 0.0449, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.4368932038834952, |
| "grad_norm": 0.5873615741729736, |
| "learning_rate": 4.490491420656537e-05, |
| "loss": 0.0442, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.4466019417475728, |
| "grad_norm": 0.4631931781768799, |
| "learning_rate": 4.482124462248217e-05, |
| "loss": 0.0423, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.4563106796116505, |
| "grad_norm": 0.5900189280509949, |
| "learning_rate": 4.473697292283129e-05, |
| "loss": 0.0485, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.4660194174757282, |
| "grad_norm": 0.36097192764282227, |
| "learning_rate": 4.465210166755803e-05, |
| "loss": 0.0364, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.4757281553398058, |
| "grad_norm": 0.6405830383300781, |
| "learning_rate": 4.456663343482059e-05, |
| "loss": 0.0416, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.4854368932038835, |
| "grad_norm": 0.5862817168235779, |
| "learning_rate": 4.44805708209117e-05, |
| "loss": 0.0475, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.4951456310679612, |
| "grad_norm": 0.7803816199302673, |
| "learning_rate": 4.4393916440179786e-05, |
| "loss": 0.0472, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.5048543689320388, |
| "grad_norm": 0.5544291138648987, |
| "learning_rate": 4.430667292494955e-05, |
| "loss": 0.0361, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.5145631067961165, |
| "grad_norm": 0.5777389407157898, |
| "learning_rate": 4.4218842925441966e-05, |
| "loss": 0.0439, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.5242718446601942, |
| "grad_norm": 0.6668662428855896, |
| "learning_rate": 4.413042910969385e-05, |
| "loss": 0.0312, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.5339805825242718, |
| "grad_norm": 0.8618906736373901, |
| "learning_rate": 4.404143416347675e-05, |
| "loss": 0.0481, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.5436893203883495, |
| "grad_norm": 0.5840690732002258, |
| "learning_rate": 4.395186079021537e-05, |
| "loss": 0.0485, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.5533980582524272, |
| "grad_norm": 0.3441118001937866, |
| "learning_rate": 4.386171171090547e-05, |
| "loss": 0.0401, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.5631067961165048, |
| "grad_norm": 0.27396291494369507, |
| "learning_rate": 4.37709896640312e-05, |
| "loss": 0.0379, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.5728155339805825, |
| "grad_norm": 0.4788952171802521, |
| "learning_rate": 4.367969740548189e-05, |
| "loss": 0.0398, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.5825242718446602, |
| "grad_norm": 0.6418083906173706, |
| "learning_rate": 4.358783770846836e-05, |
| "loss": 0.0657, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.5922330097087378, |
| "grad_norm": 0.6678286790847778, |
| "learning_rate": 4.349541336343867e-05, |
| "loss": 0.0466, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.6019417475728155, |
| "grad_norm": 1.0702563524246216, |
| "learning_rate": 4.3402427177993366e-05, |
| "loss": 0.0452, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.6116504854368932, |
| "grad_norm": 0.6941045522689819, |
| "learning_rate": 4.3308881976800146e-05, |
| "loss": 0.0521, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.6213592233009708, |
| "grad_norm": 0.5572572350502014, |
| "learning_rate": 4.321478060150813e-05, |
| "loss": 0.0336, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.6310679611650487, |
| "grad_norm": 0.5476228594779968, |
| "learning_rate": 4.312012591066146e-05, |
| "loss": 0.0375, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.6407766990291264, |
| "grad_norm": 0.6187303066253662, |
| "learning_rate": 4.302492077961253e-05, |
| "loss": 0.041, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.650485436893204, |
| "grad_norm": 0.8198221921920776, |
| "learning_rate": 4.292916810043459e-05, |
| "loss": 0.0463, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.6601941747572817, |
| "grad_norm": 0.6056732535362244, |
| "learning_rate": 4.283287078183392e-05, |
| "loss": 0.0494, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.6699029126213594, |
| "grad_norm": 0.6936320662498474, |
| "learning_rate": 4.273603174906149e-05, |
| "loss": 0.0413, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.679611650485437, |
| "grad_norm": 0.6517147421836853, |
| "learning_rate": 4.2638653943824026e-05, |
| "loss": 0.0462, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.6893203883495147, |
| "grad_norm": 0.5960354804992676, |
| "learning_rate": 4.254074032419474e-05, |
| "loss": 0.0403, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.6990291262135924, |
| "grad_norm": 0.5907045602798462, |
| "learning_rate": 4.244229386452342e-05, |
| "loss": 0.0378, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.70873786407767, |
| "grad_norm": 0.5899525284767151, |
| "learning_rate": 4.2343317555346084e-05, |
| "loss": 0.0457, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.7184466019417477, |
| "grad_norm": 0.4403168261051178, |
| "learning_rate": 4.2243814403294126e-05, |
| "loss": 0.0317, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.7281553398058254, |
| "grad_norm": 0.4933370351791382, |
| "learning_rate": 4.214378743100302e-05, |
| "loss": 0.0497, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.737864077669903, |
| "grad_norm": 0.6774694919586182, |
| "learning_rate": 4.204323967702045e-05, |
| "loss": 0.0383, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.7475728155339807, |
| "grad_norm": 0.5610571503639221, |
| "learning_rate": 4.1942174195714066e-05, |
| "loss": 0.0469, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.7572815533980584, |
| "grad_norm": 0.8438130617141724, |
| "learning_rate": 4.184059405717863e-05, |
| "loss": 0.0478, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.766990291262136, |
| "grad_norm": 0.8638678789138794, |
| "learning_rate": 4.173850234714282e-05, |
| "loss": 0.0489, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.7766990291262137, |
| "grad_norm": 0.8042917251586914, |
| "learning_rate": 4.1635902166875456e-05, |
| "loss": 0.0527, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.7864077669902914, |
| "grad_norm": 1.2712494134902954, |
| "learning_rate": 4.1532796633091296e-05, |
| "loss": 0.0512, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.796116504854369, |
| "grad_norm": 0.5284692645072937, |
| "learning_rate": 4.142918887785638e-05, |
| "loss": 0.061, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.8058252427184467, |
| "grad_norm": 0.6059673428535461, |
| "learning_rate": 4.1325082048492866e-05, |
| "loss": 0.0513, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.8155339805825244, |
| "grad_norm": 0.6848878860473633, |
| "learning_rate": 4.122047930748343e-05, |
| "loss": 0.0395, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.825242718446602, |
| "grad_norm": 0.8983065485954285, |
| "learning_rate": 4.1115383832375174e-05, |
| "loss": 0.0334, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.8349514563106797, |
| "grad_norm": 0.5672741532325745, |
| "learning_rate": 4.100979881568316e-05, |
| "loss": 0.0384, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.8446601941747574, |
| "grad_norm": 0.3233070969581604, |
| "learning_rate": 4.090372746479337e-05, |
| "loss": 0.0416, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.854368932038835, |
| "grad_norm": 0.6278699040412903, |
| "learning_rate": 4.0797173001865305e-05, |
| "loss": 0.0528, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.8640776699029127, |
| "grad_norm": 0.7250891923904419, |
| "learning_rate": 4.069013866373409e-05, |
| "loss": 0.0361, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.8737864077669903, |
| "grad_norm": 1.1977667808532715, |
| "learning_rate": 4.058262770181217e-05, |
| "loss": 0.0455, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.883495145631068, |
| "grad_norm": 0.7614680528640747, |
| "learning_rate": 4.0474643381990505e-05, |
| "loss": 0.0453, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.8932038834951457, |
| "grad_norm": 0.7502397298812866, |
| "learning_rate": 4.036618898453941e-05, |
| "loss": 0.0366, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.9029126213592233, |
| "grad_norm": 1.1674546003341675, |
| "learning_rate": 4.025726780400886e-05, |
| "loss": 0.0536, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.912621359223301, |
| "grad_norm": 0.6499914526939392, |
| "learning_rate": 4.0147883149128433e-05, |
| "loss": 0.0355, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.9223300970873787, |
| "grad_norm": 0.5907668471336365, |
| "learning_rate": 4.003803834270681e-05, |
| "loss": 0.0436, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.9320388349514563, |
| "grad_norm": 0.6319796442985535, |
| "learning_rate": 3.9927736721530805e-05, |
| "loss": 0.0356, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.941747572815534, |
| "grad_norm": 0.49555259943008423, |
| "learning_rate": 3.981698163626406e-05, |
| "loss": 0.0394, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.9514563106796117, |
| "grad_norm": 0.8911224603652954, |
| "learning_rate": 3.970577645134519e-05, |
| "loss": 0.0593, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.9611650485436893, |
| "grad_norm": 0.5634008049964905, |
| "learning_rate": 3.9594124544885615e-05, |
| "loss": 0.0428, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.970873786407767, |
| "grad_norm": 0.4893592596054077, |
| "learning_rate": 3.948202930856697e-05, |
| "loss": 0.0314, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.9805825242718447, |
| "grad_norm": 0.3447858691215515, |
| "learning_rate": 3.936949414753803e-05, |
| "loss": 0.0402, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.9902912621359223, |
| "grad_norm": 0.6254267692565918, |
| "learning_rate": 3.925652248031127e-05, |
| "loss": 0.0453, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.6977013945579529, |
| "learning_rate": 3.914311773865909e-05, |
| "loss": 0.0312, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.0097087378640777, |
| "grad_norm": 1.1433523893356323, |
| "learning_rate": 3.902928336750945e-05, |
| "loss": 0.0346, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.0194174757281553, |
| "grad_norm": 0.9643350839614868, |
| "learning_rate": 3.891502282484132e-05, |
| "loss": 0.0387, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.029126213592233, |
| "grad_norm": 0.4589916169643402, |
| "learning_rate": 3.8800339581579607e-05, |
| "loss": 0.0361, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.0388349514563107, |
| "grad_norm": 0.383705198764801, |
| "learning_rate": 3.868523712148971e-05, |
| "loss": 0.0324, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.0485436893203883, |
| "grad_norm": 0.4596363604068756, |
| "learning_rate": 3.8569718941071684e-05, |
| "loss": 0.0507, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.058252427184466, |
| "grad_norm": 0.7858505845069885, |
| "learning_rate": 3.845378854945406e-05, |
| "loss": 0.0401, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.0679611650485437, |
| "grad_norm": 0.5478680729866028, |
| "learning_rate": 3.83374494682872e-05, |
| "loss": 0.0428, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.0776699029126213, |
| "grad_norm": 0.4434937536716461, |
| "learning_rate": 3.822070523163636e-05, |
| "loss": 0.0239, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.087378640776699, |
| "grad_norm": 0.7560930848121643, |
| "learning_rate": 3.810355938587433e-05, |
| "loss": 0.0518, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.0970873786407767, |
| "grad_norm": 0.7263623476028442, |
| "learning_rate": 3.798601548957366e-05, |
| "loss": 0.0368, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.1067961165048543, |
| "grad_norm": 0.43963274359703064, |
| "learning_rate": 3.786807711339863e-05, |
| "loss": 0.0346, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.116504854368932, |
| "grad_norm": 0.5044754147529602, |
| "learning_rate": 3.774974783999672e-05, |
| "loss": 0.0426, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.1262135922330097, |
| "grad_norm": 0.5606735348701477, |
| "learning_rate": 3.763103126388984e-05, |
| "loss": 0.0386, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.1359223300970873, |
| "grad_norm": 0.40492603182792664, |
| "learning_rate": 3.751193099136505e-05, |
| "loss": 0.032, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.145631067961165, |
| "grad_norm": 0.5373347401618958, |
| "learning_rate": 3.739245064036511e-05, |
| "loss": 0.0402, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.1553398058252426, |
| "grad_norm": 0.5568259358406067, |
| "learning_rate": 3.727259384037852e-05, |
| "loss": 0.0355, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.1650485436893203, |
| "grad_norm": 0.6880197525024414, |
| "learning_rate": 3.715236423232928e-05, |
| "loss": 0.027, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.174757281553398, |
| "grad_norm": 0.6524577736854553, |
| "learning_rate": 3.703176546846627e-05, |
| "loss": 0.0332, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.1844660194174756, |
| "grad_norm": 0.6772493720054626, |
| "learning_rate": 3.6910801212252343e-05, |
| "loss": 0.0449, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.1941747572815533, |
| "grad_norm": 0.7550950050354004, |
| "learning_rate": 3.678947513825299e-05, |
| "loss": 0.0279, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.203883495145631, |
| "grad_norm": 0.5255644917488098, |
| "learning_rate": 3.666779093202479e-05, |
| "loss": 0.0286, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.2135922330097086, |
| "grad_norm": 0.48359280824661255, |
| "learning_rate": 3.654575229000334e-05, |
| "loss": 0.0295, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.2233009708737863, |
| "grad_norm": 0.6473977565765381, |
| "learning_rate": 3.642336291939109e-05, |
| "loss": 0.0498, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.233009708737864, |
| "grad_norm": 0.4669754207134247, |
| "learning_rate": 3.6300626538044646e-05, |
| "loss": 0.0231, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.2427184466019416, |
| "grad_norm": 0.49186766147613525, |
| "learning_rate": 3.6177546874361865e-05, |
| "loss": 0.0358, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.2524271844660193, |
| "grad_norm": 0.6987037658691406, |
| "learning_rate": 3.6054127667168596e-05, |
| "loss": 0.0308, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.262135922330097, |
| "grad_norm": 0.6978657841682434, |
| "learning_rate": 3.5930372665605064e-05, |
| "loss": 0.0422, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.2718446601941746, |
| "grad_norm": 0.7777918577194214, |
| "learning_rate": 3.580628562901206e-05, |
| "loss": 0.0405, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.2815533980582523, |
| "grad_norm": 0.47653138637542725, |
| "learning_rate": 3.568187032681667e-05, |
| "loss": 0.0386, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.29126213592233, |
| "grad_norm": 0.3695906102657318, |
| "learning_rate": 3.55571305384178e-05, |
| "loss": 0.041, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.3009708737864076, |
| "grad_norm": 0.6194424629211426, |
| "learning_rate": 3.543207005307138e-05, |
| "loss": 0.0357, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.3106796116504853, |
| "grad_norm": 0.826737642288208, |
| "learning_rate": 3.530669266977521e-05, |
| "loss": 0.043, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.320388349514563, |
| "grad_norm": 0.5638214349746704, |
| "learning_rate": 3.5181002197153627e-05, |
| "loss": 0.0419, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.3300970873786406, |
| "grad_norm": 0.7033189535140991, |
| "learning_rate": 3.505500245334175e-05, |
| "loss": 0.0403, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.3398058252427183, |
| "grad_norm": 0.596515417098999, |
| "learning_rate": 3.4928697265869515e-05, |
| "loss": 0.0349, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.349514563106796, |
| "grad_norm": 0.2086048573255539, |
| "learning_rate": 3.4802090471545426e-05, |
| "loss": 0.0465, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.3592233009708736, |
| "grad_norm": 0.8158115148544312, |
| "learning_rate": 3.467518591633995e-05, |
| "loss": 0.0409, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.3689320388349513, |
| "grad_norm": 0.771389901638031, |
| "learning_rate": 3.454798745526876e-05, |
| "loss": 0.0422, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.378640776699029, |
| "grad_norm": 0.5441147685050964, |
| "learning_rate": 3.4420498952275566e-05, |
| "loss": 0.0317, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.3883495145631066, |
| "grad_norm": 0.4860936999320984, |
| "learning_rate": 3.429272428011476e-05, |
| "loss": 0.0289, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.3980582524271843, |
| "grad_norm": 0.39900827407836914, |
| "learning_rate": 3.41646673202338e-05, |
| "loss": 0.0364, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.407766990291262, |
| "grad_norm": 0.8306066393852234, |
| "learning_rate": 3.403633196265525e-05, |
| "loss": 0.0422, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.4174757281553396, |
| "grad_norm": 0.46274441480636597, |
| "learning_rate": 3.390772210585866e-05, |
| "loss": 0.0278, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.4271844660194173, |
| "grad_norm": 0.31475892663002014, |
| "learning_rate": 3.377884165666212e-05, |
| "loss": 0.0321, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.436893203883495, |
| "grad_norm": 0.46006783843040466, |
| "learning_rate": 3.3649694530103563e-05, |
| "loss": 0.0268, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.4466019417475726, |
| "grad_norm": 0.696368396282196, |
| "learning_rate": 3.352028464932188e-05, |
| "loss": 0.0332, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.4563106796116507, |
| "grad_norm": 0.5227698087692261, |
| "learning_rate": 3.33906159454377e-05, |
| "loss": 0.0267, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.466019417475728, |
| "grad_norm": 0.44031020998954773, |
| "learning_rate": 3.3260692357434e-05, |
| "loss": 0.0306, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.475728155339806, |
| "grad_norm": 0.2782779335975647, |
| "learning_rate": 3.313051783203648e-05, |
| "loss": 0.0399, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.4854368932038833, |
| "grad_norm": 0.5851346850395203, |
| "learning_rate": 3.300009632359357e-05, |
| "loss": 0.0334, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.4951456310679614, |
| "grad_norm": 0.6514892578125, |
| "learning_rate": 3.2869431793956425e-05, |
| "loss": 0.0308, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.5048543689320386, |
| "grad_norm": 0.7347185015678406, |
| "learning_rate": 3.2738528212358514e-05, |
| "loss": 0.0344, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.5145631067961167, |
| "grad_norm": 0.7562367916107178, |
| "learning_rate": 3.260738955529504e-05, |
| "loss": 0.0248, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.524271844660194, |
| "grad_norm": 0.6510912775993347, |
| "learning_rate": 3.247601980640217e-05, |
| "loss": 0.0425, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.533980582524272, |
| "grad_norm": 0.47440388798713684, |
| "learning_rate": 3.234442295633602e-05, |
| "loss": 0.0344, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.5436893203883493, |
| "grad_norm": 0.3861897885799408, |
| "learning_rate": 3.22126030026514e-05, |
| "loss": 0.02, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.5533980582524274, |
| "grad_norm": 1.0448626279830933, |
| "learning_rate": 3.208056394968043e-05, |
| "loss": 0.0339, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.5631067961165046, |
| "grad_norm": 0.5638805031776428, |
| "learning_rate": 3.1948309808410866e-05, |
| "loss": 0.0403, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.5728155339805827, |
| "grad_norm": 0.3574475347995758, |
| "learning_rate": 3.181584459636423e-05, |
| "loss": 0.042, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.58252427184466, |
| "grad_norm": 0.509300172328949, |
| "learning_rate": 3.168317233747384e-05, |
| "loss": 0.0448, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.592233009708738, |
| "grad_norm": 0.3442770838737488, |
| "learning_rate": 3.155029706196253e-05, |
| "loss": 0.0432, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.6019417475728153, |
| "grad_norm": 0.7308867573738098, |
| "learning_rate": 3.141722280622021e-05, |
| "loss": 0.0427, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.6116504854368934, |
| "grad_norm": 0.4898527264595032, |
| "learning_rate": 3.128395361268126e-05, |
| "loss": 0.0392, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.6213592233009706, |
| "grad_norm": 0.7416666746139526, |
| "learning_rate": 3.115049352970177e-05, |
| "loss": 0.0299, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.6310679611650487, |
| "grad_norm": 0.7394320964813232, |
| "learning_rate": 3.101684661143653e-05, |
| "loss": 0.0309, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.6407766990291264, |
| "grad_norm": 0.34724247455596924, |
| "learning_rate": 3.088301691771585e-05, |
| "loss": 0.045, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.650485436893204, |
| "grad_norm": 0.6371554136276245, |
| "learning_rate": 3.074900851392228e-05, |
| "loss": 0.0331, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.6601941747572817, |
| "grad_norm": 0.5572023391723633, |
| "learning_rate": 3.061482547086712e-05, |
| "loss": 0.0236, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.6699029126213594, |
| "grad_norm": 0.42055222392082214, |
| "learning_rate": 3.0480471864666687e-05, |
| "loss": 0.0365, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.679611650485437, |
| "grad_norm": 0.5603195428848267, |
| "learning_rate": 3.0345951776618587e-05, |
| "loss": 0.0292, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.6893203883495147, |
| "grad_norm": 0.4651443064212799, |
| "learning_rate": 3.021126929307766e-05, |
| "loss": 0.0324, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.6990291262135924, |
| "grad_norm": 0.6232860088348389, |
| "learning_rate": 3.007642850533191e-05, |
| "loss": 0.0408, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.70873786407767, |
| "grad_norm": 0.5879865288734436, |
| "learning_rate": 2.9941433509478156e-05, |
| "loss": 0.0369, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.7184466019417477, |
| "grad_norm": 0.4446578919887543, |
| "learning_rate": 2.9806288406297676e-05, |
| "loss": 0.0265, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.7281553398058254, |
| "grad_norm": 0.6267426609992981, |
| "learning_rate": 2.9670997301131586e-05, |
| "loss": 0.0566, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.737864077669903, |
| "grad_norm": 0.7768962979316711, |
| "learning_rate": 2.9535564303756142e-05, |
| "loss": 0.0355, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.7475728155339807, |
| "grad_norm": 0.40637749433517456, |
| "learning_rate": 2.9399993528257902e-05, |
| "loss": 0.0372, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.7572815533980584, |
| "grad_norm": 0.6524062752723694, |
| "learning_rate": 2.9264289092908757e-05, |
| "loss": 0.0421, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.766990291262136, |
| "grad_norm": 0.3888689875602722, |
| "learning_rate": 2.912845512004081e-05, |
| "loss": 0.0372, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.7766990291262137, |
| "grad_norm": 0.49758753180503845, |
| "learning_rate": 2.8992495735921165e-05, |
| "loss": 0.0374, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.7864077669902914, |
| "grad_norm": 0.4377051591873169, |
| "learning_rate": 2.8856415070626584e-05, |
| "loss": 0.0333, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.796116504854369, |
| "grad_norm": 0.4847152531147003, |
| "learning_rate": 2.8720217257918032e-05, |
| "loss": 0.0313, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.8058252427184467, |
| "grad_norm": 0.5500916838645935, |
| "learning_rate": 2.8583906435115047e-05, |
| "loss": 0.0368, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.8155339805825244, |
| "grad_norm": 0.4447888731956482, |
| "learning_rate": 2.8447486742970176e-05, |
| "loss": 0.0474, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.825242718446602, |
| "grad_norm": 0.6298380494117737, |
| "learning_rate": 2.8310962325543066e-05, |
| "loss": 0.0238, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.8349514563106797, |
| "grad_norm": 0.3452640175819397, |
| "learning_rate": 2.817433733007466e-05, |
| "loss": 0.0322, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.8446601941747574, |
| "grad_norm": 0.8238608837127686, |
| "learning_rate": 2.803761590686117e-05, |
| "loss": 0.0265, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.854368932038835, |
| "grad_norm": 0.6997093558311462, |
| "learning_rate": 2.7900802209128058e-05, |
| "loss": 0.0292, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.8640776699029127, |
| "grad_norm": 0.549586832523346, |
| "learning_rate": 2.776390039290378e-05, |
| "loss": 0.0281, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.8737864077669903, |
| "grad_norm": 0.35238906741142273, |
| "learning_rate": 2.762691461689364e-05, |
| "loss": 0.0352, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.883495145631068, |
| "grad_norm": 0.6192727088928223, |
| "learning_rate": 2.74898490423534e-05, |
| "loss": 0.0287, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.8932038834951457, |
| "grad_norm": 0.41445961594581604, |
| "learning_rate": 2.7352707832962865e-05, |
| "loss": 0.0318, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.9029126213592233, |
| "grad_norm": 0.41043612360954285, |
| "learning_rate": 2.7215495154699435e-05, |
| "loss": 0.0303, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.912621359223301, |
| "grad_norm": 0.4324229061603546, |
| "learning_rate": 2.7078215175711546e-05, |
| "loss": 0.0363, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.9223300970873787, |
| "grad_norm": 0.42089489102363586, |
| "learning_rate": 2.6940872066192052e-05, |
| "loss": 0.0224, |
| "step": 3010 |
| }, |
| { |
| "epoch": 2.9320388349514563, |
| "grad_norm": 0.5276926755905151, |
| "learning_rate": 2.6803469998251514e-05, |
| "loss": 0.0272, |
| "step": 3020 |
| }, |
| { |
| "epoch": 2.941747572815534, |
| "grad_norm": 0.29638004302978516, |
| "learning_rate": 2.6666013145791508e-05, |
| "loss": 0.0297, |
| "step": 3030 |
| }, |
| { |
| "epoch": 2.9514563106796117, |
| "grad_norm": 0.36561524868011475, |
| "learning_rate": 2.652850568437783e-05, |
| "loss": 0.0347, |
| "step": 3040 |
| }, |
| { |
| "epoch": 2.9611650485436893, |
| "grad_norm": 0.5376370549201965, |
| "learning_rate": 2.6390951791113605e-05, |
| "loss": 0.032, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.970873786407767, |
| "grad_norm": 0.31827905774116516, |
| "learning_rate": 2.625335564451245e-05, |
| "loss": 0.0259, |
| "step": 3060 |
| }, |
| { |
| "epoch": 2.9805825242718447, |
| "grad_norm": 0.46682074666023254, |
| "learning_rate": 2.6115721424371532e-05, |
| "loss": 0.0184, |
| "step": 3070 |
| }, |
| { |
| "epoch": 2.9902912621359223, |
| "grad_norm": 0.603458046913147, |
| "learning_rate": 2.5978053311644573e-05, |
| "loss": 0.04, |
| "step": 3080 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.9130224585533142, |
| "learning_rate": 2.584035548831486e-05, |
| "loss": 0.0422, |
| "step": 3090 |
| }, |
| { |
| "epoch": 3.0097087378640777, |
| "grad_norm": 0.5185207724571228, |
| "learning_rate": 2.5702632137268223e-05, |
| "loss": 0.0276, |
| "step": 3100 |
| }, |
| { |
| "epoch": 3.0194174757281553, |
| "grad_norm": 0.3327677845954895, |
| "learning_rate": 2.556488744216594e-05, |
| "loss": 0.0227, |
| "step": 3110 |
| }, |
| { |
| "epoch": 3.029126213592233, |
| "grad_norm": 0.3861842155456543, |
| "learning_rate": 2.5427125587317664e-05, |
| "loss": 0.0225, |
| "step": 3120 |
| }, |
| { |
| "epoch": 3.0388349514563107, |
| "grad_norm": 0.515380322933197, |
| "learning_rate": 2.528935075755432e-05, |
| "loss": 0.0263, |
| "step": 3130 |
| }, |
| { |
| "epoch": 3.0485436893203883, |
| "grad_norm": 0.5063336491584778, |
| "learning_rate": 2.5151567138100985e-05, |
| "loss": 0.0299, |
| "step": 3140 |
| }, |
| { |
| "epoch": 3.058252427184466, |
| "grad_norm": 0.334063321352005, |
| "learning_rate": 2.5013778914449713e-05, |
| "loss": 0.0285, |
| "step": 3150 |
| }, |
| { |
| "epoch": 3.0679611650485437, |
| "grad_norm": 0.5150654315948486, |
| "learning_rate": 2.487599027223246e-05, |
| "loss": 0.0284, |
| "step": 3160 |
| }, |
| { |
| "epoch": 3.0776699029126213, |
| "grad_norm": 0.5191290974617004, |
| "learning_rate": 2.4738205397093864e-05, |
| "loss": 0.0211, |
| "step": 3170 |
| }, |
| { |
| "epoch": 3.087378640776699, |
| "grad_norm": 0.3468325138092041, |
| "learning_rate": 2.460042847456414e-05, |
| "loss": 0.0376, |
| "step": 3180 |
| }, |
| { |
| "epoch": 3.0970873786407767, |
| "grad_norm": 0.8282861113548279, |
| "learning_rate": 2.4462663689931935e-05, |
| "loss": 0.0343, |
| "step": 3190 |
| }, |
| { |
| "epoch": 3.1067961165048543, |
| "grad_norm": 0.2505621612071991, |
| "learning_rate": 2.432491522811717e-05, |
| "loss": 0.0231, |
| "step": 3200 |
| }, |
| { |
| "epoch": 3.116504854368932, |
| "grad_norm": 0.5581758618354797, |
| "learning_rate": 2.418718727354392e-05, |
| "loss": 0.0237, |
| "step": 3210 |
| }, |
| { |
| "epoch": 3.1262135922330097, |
| "grad_norm": 0.6428472995758057, |
| "learning_rate": 2.404948401001331e-05, |
| "loss": 0.0416, |
| "step": 3220 |
| }, |
| { |
| "epoch": 3.1359223300970873, |
| "grad_norm": 0.465550035238266, |
| "learning_rate": 2.3911809620576427e-05, |
| "loss": 0.0364, |
| "step": 3230 |
| }, |
| { |
| "epoch": 3.145631067961165, |
| "grad_norm": 0.27850693464279175, |
| "learning_rate": 2.3774168287407226e-05, |
| "loss": 0.024, |
| "step": 3240 |
| }, |
| { |
| "epoch": 3.1553398058252426, |
| "grad_norm": 0.22807244956493378, |
| "learning_rate": 2.3636564191675507e-05, |
| "loss": 0.0331, |
| "step": 3250 |
| }, |
| { |
| "epoch": 3.1650485436893203, |
| "grad_norm": 0.7809758186340332, |
| "learning_rate": 2.3499001513419906e-05, |
| "loss": 0.0317, |
| "step": 3260 |
| }, |
| { |
| "epoch": 3.174757281553398, |
| "grad_norm": 0.578892707824707, |
| "learning_rate": 2.3361484431420903e-05, |
| "loss": 0.0282, |
| "step": 3270 |
| }, |
| { |
| "epoch": 3.1844660194174756, |
| "grad_norm": 0.5307206511497498, |
| "learning_rate": 2.3224017123073877e-05, |
| "loss": 0.0192, |
| "step": 3280 |
| }, |
| { |
| "epoch": 3.1941747572815533, |
| "grad_norm": 0.47398531436920166, |
| "learning_rate": 2.3086603764262238e-05, |
| "loss": 0.0241, |
| "step": 3290 |
| }, |
| { |
| "epoch": 3.203883495145631, |
| "grad_norm": 0.3220309913158417, |
| "learning_rate": 2.294924852923055e-05, |
| "loss": 0.0233, |
| "step": 3300 |
| }, |
| { |
| "epoch": 3.2135922330097086, |
| "grad_norm": 0.5301534533500671, |
| "learning_rate": 2.281195559045772e-05, |
| "loss": 0.0275, |
| "step": 3310 |
| }, |
| { |
| "epoch": 3.2233009708737863, |
| "grad_norm": 0.5139557719230652, |
| "learning_rate": 2.267472911853028e-05, |
| "loss": 0.0367, |
| "step": 3320 |
| }, |
| { |
| "epoch": 3.233009708737864, |
| "grad_norm": 0.4234354794025421, |
| "learning_rate": 2.2537573282015685e-05, |
| "loss": 0.0273, |
| "step": 3330 |
| }, |
| { |
| "epoch": 3.2427184466019416, |
| "grad_norm": 0.7240735292434692, |
| "learning_rate": 2.240049224733566e-05, |
| "loss": 0.0262, |
| "step": 3340 |
| }, |
| { |
| "epoch": 3.2524271844660193, |
| "grad_norm": 0.5383247137069702, |
| "learning_rate": 2.2263490178639646e-05, |
| "loss": 0.0281, |
| "step": 3350 |
| }, |
| { |
| "epoch": 3.262135922330097, |
| "grad_norm": 0.7138472199440002, |
| "learning_rate": 2.212657123767834e-05, |
| "loss": 0.0302, |
| "step": 3360 |
| }, |
| { |
| "epoch": 3.2718446601941746, |
| "grad_norm": 0.22765517234802246, |
| "learning_rate": 2.1989739583677238e-05, |
| "loss": 0.0226, |
| "step": 3370 |
| }, |
| { |
| "epoch": 3.2815533980582523, |
| "grad_norm": 0.3961436450481415, |
| "learning_rate": 2.1852999373210275e-05, |
| "loss": 0.0445, |
| "step": 3380 |
| }, |
| { |
| "epoch": 3.29126213592233, |
| "grad_norm": 0.507087230682373, |
| "learning_rate": 2.1716354760073602e-05, |
| "loss": 0.0214, |
| "step": 3390 |
| }, |
| { |
| "epoch": 3.3009708737864076, |
| "grad_norm": 0.27097827196121216, |
| "learning_rate": 2.1579809895159375e-05, |
| "loss": 0.0306, |
| "step": 3400 |
| }, |
| { |
| "epoch": 3.3106796116504853, |
| "grad_norm": 0.7758641839027405, |
| "learning_rate": 2.1443368926329662e-05, |
| "loss": 0.0298, |
| "step": 3410 |
| }, |
| { |
| "epoch": 3.320388349514563, |
| "grad_norm": 0.578881025314331, |
| "learning_rate": 2.1307035998290463e-05, |
| "loss": 0.0224, |
| "step": 3420 |
| }, |
| { |
| "epoch": 3.3300970873786406, |
| "grad_norm": 0.4881564676761627, |
| "learning_rate": 2.117081525246579e-05, |
| "loss": 0.0258, |
| "step": 3430 |
| }, |
| { |
| "epoch": 3.3398058252427183, |
| "grad_norm": 0.3650118410587311, |
| "learning_rate": 2.1034710826871853e-05, |
| "loss": 0.0338, |
| "step": 3440 |
| }, |
| { |
| "epoch": 3.349514563106796, |
| "grad_norm": 0.6280027627944946, |
| "learning_rate": 2.089872685599138e-05, |
| "loss": 0.0273, |
| "step": 3450 |
| }, |
| { |
| "epoch": 3.3592233009708736, |
| "grad_norm": 0.4090375602245331, |
| "learning_rate": 2.0762867470648013e-05, |
| "loss": 0.0184, |
| "step": 3460 |
| }, |
| { |
| "epoch": 3.3689320388349513, |
| "grad_norm": 0.2901502847671509, |
| "learning_rate": 2.062713679788081e-05, |
| "loss": 0.0345, |
| "step": 3470 |
| }, |
| { |
| "epoch": 3.378640776699029, |
| "grad_norm": 0.4359225630760193, |
| "learning_rate": 2.0491538960818907e-05, |
| "loss": 0.0288, |
| "step": 3480 |
| }, |
| { |
| "epoch": 3.3883495145631066, |
| "grad_norm": 0.36795324087142944, |
| "learning_rate": 2.035607807855625e-05, |
| "loss": 0.0269, |
| "step": 3490 |
| }, |
| { |
| "epoch": 3.3980582524271843, |
| "grad_norm": 0.24833780527114868, |
| "learning_rate": 2.022075826602646e-05, |
| "loss": 0.0186, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.407766990291262, |
| "grad_norm": 0.7320423126220703, |
| "learning_rate": 2.0085583633877848e-05, |
| "loss": 0.0294, |
| "step": 3510 |
| }, |
| { |
| "epoch": 3.4174757281553396, |
| "grad_norm": 0.6890563368797302, |
| "learning_rate": 1.9950558288348542e-05, |
| "loss": 0.0245, |
| "step": 3520 |
| }, |
| { |
| "epoch": 3.4271844660194173, |
| "grad_norm": 0.487366259098053, |
| "learning_rate": 1.9815686331141746e-05, |
| "loss": 0.0279, |
| "step": 3530 |
| }, |
| { |
| "epoch": 3.436893203883495, |
| "grad_norm": 0.2819335162639618, |
| "learning_rate": 1.968097185930115e-05, |
| "loss": 0.0198, |
| "step": 3540 |
| }, |
| { |
| "epoch": 3.4466019417475726, |
| "grad_norm": 0.582991361618042, |
| "learning_rate": 1.9546418965086442e-05, |
| "loss": 0.0296, |
| "step": 3550 |
| }, |
| { |
| "epoch": 3.4563106796116507, |
| "grad_norm": 0.45356523990631104, |
| "learning_rate": 1.9412031735849053e-05, |
| "loss": 0.019, |
| "step": 3560 |
| }, |
| { |
| "epoch": 3.466019417475728, |
| "grad_norm": 0.3442479372024536, |
| "learning_rate": 1.9277814253907943e-05, |
| "loss": 0.0307, |
| "step": 3570 |
| }, |
| { |
| "epoch": 3.475728155339806, |
| "grad_norm": 0.6031113266944885, |
| "learning_rate": 1.9143770596425615e-05, |
| "loss": 0.0359, |
| "step": 3580 |
| }, |
| { |
| "epoch": 3.4854368932038833, |
| "grad_norm": 0.44703733921051025, |
| "learning_rate": 1.9009904835284255e-05, |
| "loss": 0.0335, |
| "step": 3590 |
| }, |
| { |
| "epoch": 3.4951456310679614, |
| "grad_norm": 0.37397444248199463, |
| "learning_rate": 1.887622103696205e-05, |
| "loss": 0.0253, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.5048543689320386, |
| "grad_norm": 0.4148513674736023, |
| "learning_rate": 1.8742723262409634e-05, |
| "loss": 0.0212, |
| "step": 3610 |
| }, |
| { |
| "epoch": 3.5145631067961167, |
| "grad_norm": 0.360975980758667, |
| "learning_rate": 1.8609415566926764e-05, |
| "loss": 0.0257, |
| "step": 3620 |
| }, |
| { |
| "epoch": 3.524271844660194, |
| "grad_norm": 0.7373941540718079, |
| "learning_rate": 1.8476302000039096e-05, |
| "loss": 0.0153, |
| "step": 3630 |
| }, |
| { |
| "epoch": 3.533980582524272, |
| "grad_norm": 0.3859127163887024, |
| "learning_rate": 1.8343386605375192e-05, |
| "loss": 0.0248, |
| "step": 3640 |
| }, |
| { |
| "epoch": 3.5436893203883493, |
| "grad_norm": 0.6052334904670715, |
| "learning_rate": 1.8210673420543684e-05, |
| "loss": 0.0232, |
| "step": 3650 |
| }, |
| { |
| "epoch": 3.5533980582524274, |
| "grad_norm": 0.5397413372993469, |
| "learning_rate": 1.8078166477010615e-05, |
| "loss": 0.0288, |
| "step": 3660 |
| }, |
| { |
| "epoch": 3.5631067961165046, |
| "grad_norm": 0.7300609946250916, |
| "learning_rate": 1.7945869799976973e-05, |
| "loss": 0.0299, |
| "step": 3670 |
| }, |
| { |
| "epoch": 3.5728155339805827, |
| "grad_norm": 0.45010340213775635, |
| "learning_rate": 1.7813787408256423e-05, |
| "loss": 0.0216, |
| "step": 3680 |
| }, |
| { |
| "epoch": 3.58252427184466, |
| "grad_norm": 0.6106627583503723, |
| "learning_rate": 1.768192331415324e-05, |
| "loss": 0.0203, |
| "step": 3690 |
| }, |
| { |
| "epoch": 3.592233009708738, |
| "grad_norm": 0.2628588080406189, |
| "learning_rate": 1.7550281523340382e-05, |
| "loss": 0.0246, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.6019417475728153, |
| "grad_norm": 0.5940352082252502, |
| "learning_rate": 1.741886603473786e-05, |
| "loss": 0.0235, |
| "step": 3710 |
| }, |
| { |
| "epoch": 3.6116504854368934, |
| "grad_norm": 0.3991672396659851, |
| "learning_rate": 1.7287680840391236e-05, |
| "loss": 0.031, |
| "step": 3720 |
| }, |
| { |
| "epoch": 3.6213592233009706, |
| "grad_norm": 0.2776699662208557, |
| "learning_rate": 1.7156729925350336e-05, |
| "loss": 0.0209, |
| "step": 3730 |
| }, |
| { |
| "epoch": 3.6310679611650487, |
| "grad_norm": 0.39451634883880615, |
| "learning_rate": 1.702601726754825e-05, |
| "loss": 0.0167, |
| "step": 3740 |
| }, |
| { |
| "epoch": 3.6407766990291264, |
| "grad_norm": 0.683345377445221, |
| "learning_rate": 1.6895546837680443e-05, |
| "loss": 0.0267, |
| "step": 3750 |
| }, |
| { |
| "epoch": 3.650485436893204, |
| "grad_norm": 0.29565560817718506, |
| "learning_rate": 1.6765322599084147e-05, |
| "loss": 0.0239, |
| "step": 3760 |
| }, |
| { |
| "epoch": 3.6601941747572817, |
| "grad_norm": 0.7811886668205261, |
| "learning_rate": 1.6635348507617976e-05, |
| "loss": 0.0294, |
| "step": 3770 |
| }, |
| { |
| "epoch": 3.6699029126213594, |
| "grad_norm": 0.4642491936683655, |
| "learning_rate": 1.6505628511541764e-05, |
| "loss": 0.0253, |
| "step": 3780 |
| }, |
| { |
| "epoch": 3.679611650485437, |
| "grad_norm": 0.3479717969894409, |
| "learning_rate": 1.6376166551396607e-05, |
| "loss": 0.023, |
| "step": 3790 |
| }, |
| { |
| "epoch": 3.6893203883495147, |
| "grad_norm": 0.4311876893043518, |
| "learning_rate": 1.6246966559885186e-05, |
| "loss": 0.0379, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.6990291262135924, |
| "grad_norm": 0.39482253789901733, |
| "learning_rate": 1.6118032461752254e-05, |
| "loss": 0.0177, |
| "step": 3810 |
| }, |
| { |
| "epoch": 3.70873786407767, |
| "grad_norm": 1.0531320571899414, |
| "learning_rate": 1.598936817366548e-05, |
| "loss": 0.0249, |
| "step": 3820 |
| }, |
| { |
| "epoch": 3.7184466019417477, |
| "grad_norm": 0.3932639956474304, |
| "learning_rate": 1.5860977604096424e-05, |
| "loss": 0.0175, |
| "step": 3830 |
| }, |
| { |
| "epoch": 3.7281553398058254, |
| "grad_norm": 0.37272295355796814, |
| "learning_rate": 1.573286465320181e-05, |
| "loss": 0.0186, |
| "step": 3840 |
| }, |
| { |
| "epoch": 3.737864077669903, |
| "grad_norm": 0.7553488612174988, |
| "learning_rate": 1.560503321270507e-05, |
| "loss": 0.0374, |
| "step": 3850 |
| }, |
| { |
| "epoch": 3.7475728155339807, |
| "grad_norm": 0.8713415265083313, |
| "learning_rate": 1.547748716577813e-05, |
| "loss": 0.0215, |
| "step": 3860 |
| }, |
| { |
| "epoch": 3.7572815533980584, |
| "grad_norm": 0.6009941697120667, |
| "learning_rate": 1.535023038692341e-05, |
| "loss": 0.0233, |
| "step": 3870 |
| }, |
| { |
| "epoch": 3.766990291262136, |
| "grad_norm": 0.24493688344955444, |
| "learning_rate": 1.5223266741856152e-05, |
| "loss": 0.0174, |
| "step": 3880 |
| }, |
| { |
| "epoch": 3.7766990291262137, |
| "grad_norm": 0.44789615273475647, |
| "learning_rate": 1.5096600087387019e-05, |
| "loss": 0.023, |
| "step": 3890 |
| }, |
| { |
| "epoch": 3.7864077669902914, |
| "grad_norm": 0.237369105219841, |
| "learning_rate": 1.497023427130487e-05, |
| "loss": 0.0186, |
| "step": 3900 |
| }, |
| { |
| "epoch": 3.796116504854369, |
| "grad_norm": 0.360939621925354, |
| "learning_rate": 1.4844173132259933e-05, |
| "loss": 0.0212, |
| "step": 3910 |
| }, |
| { |
| "epoch": 3.8058252427184467, |
| "grad_norm": 0.5565642714500427, |
| "learning_rate": 1.471842049964718e-05, |
| "loss": 0.0282, |
| "step": 3920 |
| }, |
| { |
| "epoch": 3.8155339805825244, |
| "grad_norm": 0.23310214281082153, |
| "learning_rate": 1.4592980193489975e-05, |
| "loss": 0.019, |
| "step": 3930 |
| }, |
| { |
| "epoch": 3.825242718446602, |
| "grad_norm": 0.5143676400184631, |
| "learning_rate": 1.4467856024324056e-05, |
| "loss": 0.0248, |
| "step": 3940 |
| }, |
| { |
| "epoch": 3.8349514563106797, |
| "grad_norm": 0.5294964909553528, |
| "learning_rate": 1.4343051793081813e-05, |
| "loss": 0.0196, |
| "step": 3950 |
| }, |
| { |
| "epoch": 3.8446601941747574, |
| "grad_norm": 0.40681859850883484, |
| "learning_rate": 1.421857129097673e-05, |
| "loss": 0.0327, |
| "step": 3960 |
| }, |
| { |
| "epoch": 3.854368932038835, |
| "grad_norm": 0.7441811561584473, |
| "learning_rate": 1.4094418299388331e-05, |
| "loss": 0.0209, |
| "step": 3970 |
| }, |
| { |
| "epoch": 3.8640776699029127, |
| "grad_norm": 0.25539830327033997, |
| "learning_rate": 1.3970596589747242e-05, |
| "loss": 0.0239, |
| "step": 3980 |
| }, |
| { |
| "epoch": 3.8737864077669903, |
| "grad_norm": 0.3928983807563782, |
| "learning_rate": 1.3847109923420631e-05, |
| "loss": 0.0248, |
| "step": 3990 |
| }, |
| { |
| "epoch": 3.883495145631068, |
| "grad_norm": 0.6516932845115662, |
| "learning_rate": 1.3723962051597988e-05, |
| "loss": 0.0205, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.8932038834951457, |
| "grad_norm": 0.33396992087364197, |
| "learning_rate": 1.3601156715177127e-05, |
| "loss": 0.0154, |
| "step": 4010 |
| }, |
| { |
| "epoch": 3.9029126213592233, |
| "grad_norm": 0.22356855869293213, |
| "learning_rate": 1.3478697644650556e-05, |
| "loss": 0.0192, |
| "step": 4020 |
| }, |
| { |
| "epoch": 3.912621359223301, |
| "grad_norm": 0.44459477066993713, |
| "learning_rate": 1.33565885599922e-05, |
| "loss": 0.0254, |
| "step": 4030 |
| }, |
| { |
| "epoch": 3.9223300970873787, |
| "grad_norm": 0.3573704957962036, |
| "learning_rate": 1.3234833170544339e-05, |
| "loss": 0.0161, |
| "step": 4040 |
| }, |
| { |
| "epoch": 3.9320388349514563, |
| "grad_norm": 0.5939960479736328, |
| "learning_rate": 1.3113435174904942e-05, |
| "loss": 0.0264, |
| "step": 4050 |
| }, |
| { |
| "epoch": 3.941747572815534, |
| "grad_norm": 0.5546061396598816, |
| "learning_rate": 1.2992398260815369e-05, |
| "loss": 0.0173, |
| "step": 4060 |
| }, |
| { |
| "epoch": 3.9514563106796117, |
| "grad_norm": 0.3262450098991394, |
| "learning_rate": 1.2871726105048266e-05, |
| "loss": 0.0166, |
| "step": 4070 |
| }, |
| { |
| "epoch": 3.9611650485436893, |
| "grad_norm": 0.33466869592666626, |
| "learning_rate": 1.2751422373295902e-05, |
| "loss": 0.0217, |
| "step": 4080 |
| }, |
| { |
| "epoch": 3.970873786407767, |
| "grad_norm": 0.3955913782119751, |
| "learning_rate": 1.2631490720058875e-05, |
| "loss": 0.0225, |
| "step": 4090 |
| }, |
| { |
| "epoch": 3.9805825242718447, |
| "grad_norm": 0.41335901618003845, |
| "learning_rate": 1.2511934788535006e-05, |
| "loss": 0.0176, |
| "step": 4100 |
| }, |
| { |
| "epoch": 3.9902912621359223, |
| "grad_norm": 0.29506537318229675, |
| "learning_rate": 1.2392758210508715e-05, |
| "loss": 0.02, |
| "step": 4110 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.4939889907836914, |
| "learning_rate": 1.2273964606240718e-05, |
| "loss": 0.0311, |
| "step": 4120 |
| }, |
| { |
| "epoch": 4.009708737864078, |
| "grad_norm": 0.3422580361366272, |
| "learning_rate": 1.2155557584358007e-05, |
| "loss": 0.0179, |
| "step": 4130 |
| }, |
| { |
| "epoch": 4.019417475728155, |
| "grad_norm": 0.3916333019733429, |
| "learning_rate": 1.2037540741744247e-05, |
| "loss": 0.0193, |
| "step": 4140 |
| }, |
| { |
| "epoch": 4.029126213592233, |
| "grad_norm": 0.4686119556427002, |
| "learning_rate": 1.1919917663430552e-05, |
| "loss": 0.0262, |
| "step": 4150 |
| }, |
| { |
| "epoch": 4.038834951456311, |
| "grad_norm": 0.37790584564208984, |
| "learning_rate": 1.1802691922486483e-05, |
| "loss": 0.0175, |
| "step": 4160 |
| }, |
| { |
| "epoch": 4.048543689320389, |
| "grad_norm": 0.35493457317352295, |
| "learning_rate": 1.1685867079911642e-05, |
| "loss": 0.0188, |
| "step": 4170 |
| }, |
| { |
| "epoch": 4.058252427184466, |
| "grad_norm": 0.48948442935943604, |
| "learning_rate": 1.1569446684527383e-05, |
| "loss": 0.0156, |
| "step": 4180 |
| }, |
| { |
| "epoch": 4.067961165048544, |
| "grad_norm": 0.5336034893989563, |
| "learning_rate": 1.1453434272869051e-05, |
| "loss": 0.0166, |
| "step": 4190 |
| }, |
| { |
| "epoch": 4.077669902912621, |
| "grad_norm": 0.23183093965053558, |
| "learning_rate": 1.1337833369078593e-05, |
| "loss": 0.018, |
| "step": 4200 |
| }, |
| { |
| "epoch": 4.087378640776699, |
| "grad_norm": 0.1705956757068634, |
| "learning_rate": 1.1222647484797422e-05, |
| "loss": 0.0183, |
| "step": 4210 |
| }, |
| { |
| "epoch": 4.097087378640777, |
| "grad_norm": 0.38924074172973633, |
| "learning_rate": 1.11078801190598e-05, |
| "loss": 0.0247, |
| "step": 4220 |
| }, |
| { |
| "epoch": 4.106796116504855, |
| "grad_norm": 0.7169899344444275, |
| "learning_rate": 1.0993534758186549e-05, |
| "loss": 0.0421, |
| "step": 4230 |
| }, |
| { |
| "epoch": 4.116504854368932, |
| "grad_norm": 0.5987399220466614, |
| "learning_rate": 1.0879614875679109e-05, |
| "loss": 0.0272, |
| "step": 4240 |
| }, |
| { |
| "epoch": 4.12621359223301, |
| "grad_norm": 0.19637225568294525, |
| "learning_rate": 1.0766123932114042e-05, |
| "loss": 0.0255, |
| "step": 4250 |
| }, |
| { |
| "epoch": 4.135922330097087, |
| "grad_norm": 0.3982377052307129, |
| "learning_rate": 1.0653065375037933e-05, |
| "loss": 0.0243, |
| "step": 4260 |
| }, |
| { |
| "epoch": 4.145631067961165, |
| "grad_norm": 0.2953176200389862, |
| "learning_rate": 1.0540442638862618e-05, |
| "loss": 0.0232, |
| "step": 4270 |
| }, |
| { |
| "epoch": 4.155339805825243, |
| "grad_norm": 0.41485655307769775, |
| "learning_rate": 1.0428259144760874e-05, |
| "loss": 0.0187, |
| "step": 4280 |
| }, |
| { |
| "epoch": 4.165048543689321, |
| "grad_norm": 0.3506160080432892, |
| "learning_rate": 1.031651830056253e-05, |
| "loss": 0.0181, |
| "step": 4290 |
| }, |
| { |
| "epoch": 4.174757281553398, |
| "grad_norm": 0.13416297733783722, |
| "learning_rate": 1.0205223500650876e-05, |
| "loss": 0.0121, |
| "step": 4300 |
| }, |
| { |
| "epoch": 4.184466019417476, |
| "grad_norm": 0.32197481393814087, |
| "learning_rate": 1.0094378125859602e-05, |
| "loss": 0.0186, |
| "step": 4310 |
| }, |
| { |
| "epoch": 4.194174757281553, |
| "grad_norm": 0.4894006848335266, |
| "learning_rate": 9.983985543370098e-06, |
| "loss": 0.0323, |
| "step": 4320 |
| }, |
| { |
| "epoch": 4.203883495145631, |
| "grad_norm": 0.48915204405784607, |
| "learning_rate": 9.874049106609135e-06, |
| "loss": 0.0187, |
| "step": 4330 |
| }, |
| { |
| "epoch": 4.213592233009709, |
| "grad_norm": 0.2629840075969696, |
| "learning_rate": 9.764572155147011e-06, |
| "loss": 0.0105, |
| "step": 4340 |
| }, |
| { |
| "epoch": 4.223300970873787, |
| "grad_norm": 0.27265119552612305, |
| "learning_rate": 9.655558014596139e-06, |
| "loss": 0.0102, |
| "step": 4350 |
| }, |
| { |
| "epoch": 4.233009708737864, |
| "grad_norm": 0.3926210105419159, |
| "learning_rate": 9.547009996509964e-06, |
| "loss": 0.0136, |
| "step": 4360 |
| }, |
| { |
| "epoch": 4.242718446601942, |
| "grad_norm": 0.33687448501586914, |
| "learning_rate": 9.43893139828241e-06, |
| "loss": 0.0224, |
| "step": 4370 |
| }, |
| { |
| "epoch": 4.252427184466019, |
| "grad_norm": 0.24977613985538483, |
| "learning_rate": 9.331325503047694e-06, |
| "loss": 0.0222, |
| "step": 4380 |
| }, |
| { |
| "epoch": 4.262135922330097, |
| "grad_norm": 0.35725629329681396, |
| "learning_rate": 9.224195579580602e-06, |
| "loss": 0.0133, |
| "step": 4390 |
| }, |
| { |
| "epoch": 4.271844660194175, |
| "grad_norm": 0.3017430901527405, |
| "learning_rate": 9.117544882197204e-06, |
| "loss": 0.026, |
| "step": 4400 |
| }, |
| { |
| "epoch": 4.281553398058253, |
| "grad_norm": 0.5395331978797913, |
| "learning_rate": 9.011376650655967e-06, |
| "loss": 0.02, |
| "step": 4410 |
| }, |
| { |
| "epoch": 4.29126213592233, |
| "grad_norm": 0.3497810661792755, |
| "learning_rate": 8.905694110059353e-06, |
| "loss": 0.0258, |
| "step": 4420 |
| }, |
| { |
| "epoch": 4.300970873786408, |
| "grad_norm": 0.3075038492679596, |
| "learning_rate": 8.800500470755879e-06, |
| "loss": 0.0219, |
| "step": 4430 |
| }, |
| { |
| "epoch": 4.310679611650485, |
| "grad_norm": 0.3044409453868866, |
| "learning_rate": 8.695798928242541e-06, |
| "loss": 0.0218, |
| "step": 4440 |
| }, |
| { |
| "epoch": 4.320388349514563, |
| "grad_norm": 0.4984518885612488, |
| "learning_rate": 8.591592663067771e-06, |
| "loss": 0.022, |
| "step": 4450 |
| }, |
| { |
| "epoch": 4.330097087378641, |
| "grad_norm": 0.24142467975616455, |
| "learning_rate": 8.487884840734852e-06, |
| "loss": 0.0171, |
| "step": 4460 |
| }, |
| { |
| "epoch": 4.339805825242719, |
| "grad_norm": 0.2782730460166931, |
| "learning_rate": 8.384678611605695e-06, |
| "loss": 0.0212, |
| "step": 4470 |
| }, |
| { |
| "epoch": 4.349514563106796, |
| "grad_norm": 0.3198665976524353, |
| "learning_rate": 8.281977110805177e-06, |
| "loss": 0.019, |
| "step": 4480 |
| }, |
| { |
| "epoch": 4.359223300970874, |
| "grad_norm": 0.38634228706359863, |
| "learning_rate": 8.179783458125922e-06, |
| "loss": 0.0218, |
| "step": 4490 |
| }, |
| { |
| "epoch": 4.368932038834951, |
| "grad_norm": 0.4669983983039856, |
| "learning_rate": 8.078100757933485e-06, |
| "loss": 0.0154, |
| "step": 4500 |
| }, |
| { |
| "epoch": 4.378640776699029, |
| "grad_norm": 0.24510270357131958, |
| "learning_rate": 7.976932099072068e-06, |
| "loss": 0.0352, |
| "step": 4510 |
| }, |
| { |
| "epoch": 4.388349514563107, |
| "grad_norm": 0.543775200843811, |
| "learning_rate": 7.876280554770724e-06, |
| "loss": 0.0209, |
| "step": 4520 |
| }, |
| { |
| "epoch": 4.398058252427185, |
| "grad_norm": 0.2873745560646057, |
| "learning_rate": 7.776149182549941e-06, |
| "loss": 0.0181, |
| "step": 4530 |
| }, |
| { |
| "epoch": 4.407766990291262, |
| "grad_norm": 0.5087159872055054, |
| "learning_rate": 7.676541024128798e-06, |
| "loss": 0.0298, |
| "step": 4540 |
| }, |
| { |
| "epoch": 4.41747572815534, |
| "grad_norm": 0.207069993019104, |
| "learning_rate": 7.577459105332574e-06, |
| "loss": 0.028, |
| "step": 4550 |
| }, |
| { |
| "epoch": 4.427184466019417, |
| "grad_norm": 0.5228657722473145, |
| "learning_rate": 7.478906436000807e-06, |
| "loss": 0.0172, |
| "step": 4560 |
| }, |
| { |
| "epoch": 4.436893203883495, |
| "grad_norm": 0.3885524570941925, |
| "learning_rate": 7.380886009895874e-06, |
| "loss": 0.0243, |
| "step": 4570 |
| }, |
| { |
| "epoch": 4.446601941747573, |
| "grad_norm": 0.7750719785690308, |
| "learning_rate": 7.283400804612048e-06, |
| "loss": 0.0193, |
| "step": 4580 |
| }, |
| { |
| "epoch": 4.456310679611651, |
| "grad_norm": 0.37235888838768005, |
| "learning_rate": 7.1864537814850395e-06, |
| "loss": 0.0184, |
| "step": 4590 |
| }, |
| { |
| "epoch": 4.466019417475728, |
| "grad_norm": 0.32343965768814087, |
| "learning_rate": 7.090047885502077e-06, |
| "loss": 0.0207, |
| "step": 4600 |
| }, |
| { |
| "epoch": 4.475728155339806, |
| "grad_norm": 0.42449483275413513, |
| "learning_rate": 6.994186045212387e-06, |
| "loss": 0.0194, |
| "step": 4610 |
| }, |
| { |
| "epoch": 4.485436893203883, |
| "grad_norm": 0.5391475558280945, |
| "learning_rate": 6.898871172638261e-06, |
| "loss": 0.0159, |
| "step": 4620 |
| }, |
| { |
| "epoch": 4.495145631067961, |
| "grad_norm": 0.4376124143600464, |
| "learning_rate": 6.8041061631866245e-06, |
| "loss": 0.0201, |
| "step": 4630 |
| }, |
| { |
| "epoch": 4.504854368932039, |
| "grad_norm": 0.724936306476593, |
| "learning_rate": 6.709893895561031e-06, |
| "loss": 0.0288, |
| "step": 4640 |
| }, |
| { |
| "epoch": 4.514563106796117, |
| "grad_norm": 0.22697369754314423, |
| "learning_rate": 6.616237231674235e-06, |
| "loss": 0.0125, |
| "step": 4650 |
| }, |
| { |
| "epoch": 4.524271844660194, |
| "grad_norm": 0.5198168754577637, |
| "learning_rate": 6.5231390165612884e-06, |
| "loss": 0.0224, |
| "step": 4660 |
| }, |
| { |
| "epoch": 4.533980582524272, |
| "grad_norm": 0.83481764793396, |
| "learning_rate": 6.430602078293055e-06, |
| "loss": 0.018, |
| "step": 4670 |
| }, |
| { |
| "epoch": 4.543689320388349, |
| "grad_norm": 0.3727293312549591, |
| "learning_rate": 6.338629227890342e-06, |
| "loss": 0.02, |
| "step": 4680 |
| }, |
| { |
| "epoch": 4.553398058252427, |
| "grad_norm": 0.5401569604873657, |
| "learning_rate": 6.247223259238511e-06, |
| "loss": 0.0277, |
| "step": 4690 |
| }, |
| { |
| "epoch": 4.563106796116505, |
| "grad_norm": 0.5769630670547485, |
| "learning_rate": 6.156386949002588e-06, |
| "loss": 0.0185, |
| "step": 4700 |
| }, |
| { |
| "epoch": 4.572815533980583, |
| "grad_norm": 0.4057222008705139, |
| "learning_rate": 6.066123056542911e-06, |
| "loss": 0.0194, |
| "step": 4710 |
| }, |
| { |
| "epoch": 4.58252427184466, |
| "grad_norm": 0.45939818024635315, |
| "learning_rate": 5.976434323831348e-06, |
| "loss": 0.0143, |
| "step": 4720 |
| }, |
| { |
| "epoch": 4.592233009708738, |
| "grad_norm": 0.11893407255411148, |
| "learning_rate": 5.887323475367956e-06, |
| "loss": 0.0224, |
| "step": 4730 |
| }, |
| { |
| "epoch": 4.601941747572815, |
| "grad_norm": 0.1190888062119484, |
| "learning_rate": 5.798793218098236e-06, |
| "loss": 0.0182, |
| "step": 4740 |
| }, |
| { |
| "epoch": 4.611650485436893, |
| "grad_norm": 0.3699619174003601, |
| "learning_rate": 5.710846241330928e-06, |
| "loss": 0.0144, |
| "step": 4750 |
| }, |
| { |
| "epoch": 4.621359223300971, |
| "grad_norm": 0.41403263807296753, |
| "learning_rate": 5.62348521665628e-06, |
| "loss": 0.0206, |
| "step": 4760 |
| }, |
| { |
| "epoch": 4.631067961165049, |
| "grad_norm": 0.6446648836135864, |
| "learning_rate": 5.536712797864885e-06, |
| "loss": 0.0221, |
| "step": 4770 |
| }, |
| { |
| "epoch": 4.640776699029126, |
| "grad_norm": 0.33612629771232605, |
| "learning_rate": 5.45053162086713e-06, |
| "loss": 0.0215, |
| "step": 4780 |
| }, |
| { |
| "epoch": 4.650485436893204, |
| "grad_norm": 0.49632659554481506, |
| "learning_rate": 5.3649443036130566e-06, |
| "loss": 0.0171, |
| "step": 4790 |
| }, |
| { |
| "epoch": 4.660194174757281, |
| "grad_norm": 0.370231568813324, |
| "learning_rate": 5.2799534460128725e-06, |
| "loss": 0.0215, |
| "step": 4800 |
| }, |
| { |
| "epoch": 4.669902912621359, |
| "grad_norm": 0.3890692889690399, |
| "learning_rate": 5.195561629857953e-06, |
| "loss": 0.0202, |
| "step": 4810 |
| }, |
| { |
| "epoch": 4.679611650485437, |
| "grad_norm": 0.6446177363395691, |
| "learning_rate": 5.1117714187424195e-06, |
| "loss": 0.0229, |
| "step": 4820 |
| }, |
| { |
| "epoch": 4.689320388349515, |
| "grad_norm": 0.38938695192337036, |
| "learning_rate": 5.0285853579852795e-06, |
| "loss": 0.0226, |
| "step": 4830 |
| }, |
| { |
| "epoch": 4.699029126213592, |
| "grad_norm": 0.2890338897705078, |
| "learning_rate": 4.946005974553086e-06, |
| "loss": 0.0218, |
| "step": 4840 |
| }, |
| { |
| "epoch": 4.70873786407767, |
| "grad_norm": 0.7432148456573486, |
| "learning_rate": 4.8640357769831695e-06, |
| "loss": 0.0192, |
| "step": 4850 |
| }, |
| { |
| "epoch": 4.718446601941747, |
| "grad_norm": 0.4233954846858978, |
| "learning_rate": 4.78267725530748e-06, |
| "loss": 0.0139, |
| "step": 4860 |
| }, |
| { |
| "epoch": 4.728155339805825, |
| "grad_norm": 0.3382183611392975, |
| "learning_rate": 4.7019328809768895e-06, |
| "loss": 0.0151, |
| "step": 4870 |
| }, |
| { |
| "epoch": 4.737864077669903, |
| "grad_norm": 0.3196679949760437, |
| "learning_rate": 4.621805106786142e-06, |
| "loss": 0.021, |
| "step": 4880 |
| }, |
| { |
| "epoch": 4.747572815533981, |
| "grad_norm": 0.6986203193664551, |
| "learning_rate": 4.542296366799367e-06, |
| "loss": 0.0281, |
| "step": 4890 |
| }, |
| { |
| "epoch": 4.757281553398058, |
| "grad_norm": 0.2677580416202545, |
| "learning_rate": 4.463409076276095e-06, |
| "loss": 0.0111, |
| "step": 4900 |
| }, |
| { |
| "epoch": 4.766990291262136, |
| "grad_norm": 0.2945110499858856, |
| "learning_rate": 4.385145631597909e-06, |
| "loss": 0.0186, |
| "step": 4910 |
| }, |
| { |
| "epoch": 4.776699029126213, |
| "grad_norm": 0.2980441153049469, |
| "learning_rate": 4.307508410195671e-06, |
| "loss": 0.0234, |
| "step": 4920 |
| }, |
| { |
| "epoch": 4.786407766990291, |
| "grad_norm": 0.31078580021858215, |
| "learning_rate": 4.230499770477258e-06, |
| "loss": 0.0174, |
| "step": 4930 |
| }, |
| { |
| "epoch": 4.796116504854369, |
| "grad_norm": 0.16771598160266876, |
| "learning_rate": 4.154122051755954e-06, |
| "loss": 0.0119, |
| "step": 4940 |
| }, |
| { |
| "epoch": 4.805825242718447, |
| "grad_norm": 0.41210389137268066, |
| "learning_rate": 4.078377574179382e-06, |
| "loss": 0.0211, |
| "step": 4950 |
| }, |
| { |
| "epoch": 4.815533980582524, |
| "grad_norm": 0.19906070828437805, |
| "learning_rate": 4.003268638659005e-06, |
| "loss": 0.0172, |
| "step": 4960 |
| }, |
| { |
| "epoch": 4.825242718446602, |
| "grad_norm": 0.35959959030151367, |
| "learning_rate": 3.9287975268002565e-06, |
| "loss": 0.0252, |
| "step": 4970 |
| }, |
| { |
| "epoch": 4.834951456310679, |
| "grad_norm": 0.27199459075927734, |
| "learning_rate": 3.854966500833216e-06, |
| "loss": 0.0178, |
| "step": 4980 |
| }, |
| { |
| "epoch": 4.844660194174757, |
| "grad_norm": 0.5328648686408997, |
| "learning_rate": 3.78177780354389e-06, |
| "loss": 0.0152, |
| "step": 4990 |
| }, |
| { |
| "epoch": 4.854368932038835, |
| "grad_norm": 0.38457655906677246, |
| "learning_rate": 3.709233658206079e-06, |
| "loss": 0.028, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 6000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|