| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 4674, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0021399529210357373, | |
| "grad_norm": 2.7134010791778564, | |
| "learning_rate": 3.5460992907801423e-06, | |
| "loss": 12.9166, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.004279905842071475, | |
| "grad_norm": 2.857581853866577, | |
| "learning_rate": 7.092198581560285e-06, | |
| "loss": 12.8988, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.006419858763107212, | |
| "grad_norm": 3.6460659503936768, | |
| "learning_rate": 1.0638297872340426e-05, | |
| "loss": 12.596, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.00855981168414295, | |
| "grad_norm": 4.4102678298950195, | |
| "learning_rate": 1.418439716312057e-05, | |
| "loss": 12.0487, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.010699764605178685, | |
| "grad_norm": 4.709506034851074, | |
| "learning_rate": 1.773049645390071e-05, | |
| "loss": 11.2542, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.012839717526214423, | |
| "grad_norm": 3.634777069091797, | |
| "learning_rate": 2.1276595744680852e-05, | |
| "loss": 10.3125, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.014979670447250161, | |
| "grad_norm": 3.215787649154663, | |
| "learning_rate": 2.4822695035460995e-05, | |
| "loss": 9.4876, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0171196233682859, | |
| "grad_norm": 2.2986671924591064, | |
| "learning_rate": 2.836879432624114e-05, | |
| "loss": 8.8462, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.019259576289321637, | |
| "grad_norm": 3.012507677078247, | |
| "learning_rate": 3.191489361702128e-05, | |
| "loss": 8.3983, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.02139952921035737, | |
| "grad_norm": 1.9252734184265137, | |
| "learning_rate": 3.546099290780142e-05, | |
| "loss": 7.9777, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02353948213139311, | |
| "grad_norm": 4.199497699737549, | |
| "learning_rate": 3.900709219858156e-05, | |
| "loss": 7.6574, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.025679435052428846, | |
| "grad_norm": 6.262833118438721, | |
| "learning_rate": 4.2553191489361704e-05, | |
| "loss": 7.4121, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.027819387973464584, | |
| "grad_norm": 6.515402317047119, | |
| "learning_rate": 4.609929078014185e-05, | |
| "loss": 7.1827, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.029959340894500322, | |
| "grad_norm": 2.35896372795105, | |
| "learning_rate": 4.964539007092199e-05, | |
| "loss": 6.9596, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.032099293815536056, | |
| "grad_norm": 3.057620048522949, | |
| "learning_rate": 5.319148936170213e-05, | |
| "loss": 6.7407, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0342392467365718, | |
| "grad_norm": 2.1466054916381836, | |
| "learning_rate": 5.673758865248228e-05, | |
| "loss": 6.4509, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03637919965760753, | |
| "grad_norm": 4.194606781005859, | |
| "learning_rate": 6.0283687943262414e-05, | |
| "loss": 6.3802, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.03851915257864327, | |
| "grad_norm": 2.6796622276306152, | |
| "learning_rate": 6.382978723404256e-05, | |
| "loss": 6.1995, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04065910549967901, | |
| "grad_norm": 4.852718353271484, | |
| "learning_rate": 6.737588652482269e-05, | |
| "loss": 5.9464, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.04279905842071474, | |
| "grad_norm": 4.550829887390137, | |
| "learning_rate": 7.092198581560284e-05, | |
| "loss": 5.7867, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04493901134175048, | |
| "grad_norm": 5.130778789520264, | |
| "learning_rate": 7.446808510638298e-05, | |
| "loss": 5.6702, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.04707896426278622, | |
| "grad_norm": 4.426118850708008, | |
| "learning_rate": 7.801418439716312e-05, | |
| "loss": 5.5532, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04921891718382196, | |
| "grad_norm": 3.7589707374572754, | |
| "learning_rate": 8.156028368794327e-05, | |
| "loss": 5.2532, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.05135887010485769, | |
| "grad_norm": 4.644890785217285, | |
| "learning_rate": 8.510638297872341e-05, | |
| "loss": 5.1286, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05349882302589343, | |
| "grad_norm": 3.4871771335601807, | |
| "learning_rate": 8.865248226950354e-05, | |
| "loss": 4.9816, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05563877594692917, | |
| "grad_norm": 6.377041339874268, | |
| "learning_rate": 9.21985815602837e-05, | |
| "loss": 4.855, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0577787288679649, | |
| "grad_norm": 4.984015941619873, | |
| "learning_rate": 9.574468085106384e-05, | |
| "loss": 4.6588, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.059918681789000644, | |
| "grad_norm": 5.524806976318359, | |
| "learning_rate": 9.929078014184398e-05, | |
| "loss": 4.5006, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06205863471003638, | |
| "grad_norm": 6.326210021972656, | |
| "learning_rate": 9.999980787316854e-05, | |
| "loss": 4.5593, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.06419858763107211, | |
| "grad_norm": 6.40717887878418, | |
| "learning_rate": 9.999902736044627e-05, | |
| "loss": 4.2919, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06633854055210785, | |
| "grad_norm": 5.881182670593262, | |
| "learning_rate": 9.999764646327135e-05, | |
| "loss": 4.2351, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.0684784934731436, | |
| "grad_norm": 7.092776298522949, | |
| "learning_rate": 9.99956651982255e-05, | |
| "loss": 4.1024, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.07061844639417933, | |
| "grad_norm": 6.636219501495361, | |
| "learning_rate": 9.999308358909955e-05, | |
| "loss": 4.0457, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.07275839931521506, | |
| "grad_norm": 6.4725141525268555, | |
| "learning_rate": 9.998990166689332e-05, | |
| "loss": 3.9062, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0748983522362508, | |
| "grad_norm": 9.732144355773926, | |
| "learning_rate": 9.998611946981506e-05, | |
| "loss": 3.9227, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.07703830515728655, | |
| "grad_norm": 8.540387153625488, | |
| "learning_rate": 9.998173704328112e-05, | |
| "loss": 3.7503, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07917825807832228, | |
| "grad_norm": 7.108048915863037, | |
| "learning_rate": 9.99767544399153e-05, | |
| "loss": 3.6129, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.08131821099935801, | |
| "grad_norm": 7.507834434509277, | |
| "learning_rate": 9.997117171954835e-05, | |
| "loss": 3.6377, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08345816392039375, | |
| "grad_norm": 7.859340667724609, | |
| "learning_rate": 9.996498894921713e-05, | |
| "loss": 3.6035, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.08559811684142948, | |
| "grad_norm": 7.093273162841797, | |
| "learning_rate": 9.995820620316386e-05, | |
| "loss": 3.5484, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08773806976246523, | |
| "grad_norm": 8.989423751831055, | |
| "learning_rate": 9.995082356283525e-05, | |
| "loss": 3.5132, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.08987802268350097, | |
| "grad_norm": 8.387621879577637, | |
| "learning_rate": 9.994284111688145e-05, | |
| "loss": 3.3861, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.0920179756045367, | |
| "grad_norm": 6.809689044952393, | |
| "learning_rate": 9.993425896115509e-05, | |
| "loss": 3.37, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.09415792852557243, | |
| "grad_norm": 7.266931056976318, | |
| "learning_rate": 9.992507719870998e-05, | |
| "loss": 3.4136, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.09629788144660817, | |
| "grad_norm": 6.893076419830322, | |
| "learning_rate": 9.991529593980006e-05, | |
| "loss": 3.3817, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.09843783436764392, | |
| "grad_norm": 6.13765287399292, | |
| "learning_rate": 9.990491530187791e-05, | |
| "loss": 3.3894, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.10057778728867965, | |
| "grad_norm": 8.638238906860352, | |
| "learning_rate": 9.989393540959343e-05, | |
| "loss": 3.2748, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.10271774020971539, | |
| "grad_norm": 7.530318737030029, | |
| "learning_rate": 9.98823563947923e-05, | |
| "loss": 3.1735, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10485769313075112, | |
| "grad_norm": 7.243378639221191, | |
| "learning_rate": 9.987017839651447e-05, | |
| "loss": 3.186, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.10699764605178685, | |
| "grad_norm": 8.053439140319824, | |
| "learning_rate": 9.985740156099239e-05, | |
| "loss": 3.2066, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1091375989728226, | |
| "grad_norm": 7.392351150512695, | |
| "learning_rate": 9.984402604164928e-05, | |
| "loss": 3.132, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.11127755189385834, | |
| "grad_norm": 6.198112487792969, | |
| "learning_rate": 9.983005199909738e-05, | |
| "loss": 3.1682, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.11341750481489407, | |
| "grad_norm": 7.847325801849365, | |
| "learning_rate": 9.981547960113591e-05, | |
| "loss": 2.9939, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.1155574577359298, | |
| "grad_norm": 6.414429664611816, | |
| "learning_rate": 9.980030902274907e-05, | |
| "loss": 2.9657, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11769741065696554, | |
| "grad_norm": 7.576863765716553, | |
| "learning_rate": 9.9784540446104e-05, | |
| "loss": 3.0261, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.11983736357800129, | |
| "grad_norm": 7.394157886505127, | |
| "learning_rate": 9.976817406054856e-05, | |
| "loss": 3.0425, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.12197731649903702, | |
| "grad_norm": 6.541194915771484, | |
| "learning_rate": 9.975121006260905e-05, | |
| "loss": 2.9808, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.12411726942007276, | |
| "grad_norm": 7.00990104675293, | |
| "learning_rate": 9.973364865598783e-05, | |
| "loss": 3.0134, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1262572223411085, | |
| "grad_norm": 8.408513069152832, | |
| "learning_rate": 9.97154900515609e-05, | |
| "loss": 2.9945, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.12839717526214423, | |
| "grad_norm": 6.816072463989258, | |
| "learning_rate": 9.96967344673754e-05, | |
| "loss": 2.968, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.13053712818317997, | |
| "grad_norm": 7.750121593475342, | |
| "learning_rate": 9.967738212864692e-05, | |
| "loss": 2.9725, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.1326770811042157, | |
| "grad_norm": 7.111683368682861, | |
| "learning_rate": 9.965743326775686e-05, | |
| "loss": 2.9475, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.13481703402525144, | |
| "grad_norm": 9.491382598876953, | |
| "learning_rate": 9.963688812424958e-05, | |
| "loss": 2.9236, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.1369569869462872, | |
| "grad_norm": 9.146512031555176, | |
| "learning_rate": 9.96157469448296e-05, | |
| "loss": 2.8553, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1390969398673229, | |
| "grad_norm": 10.622199058532715, | |
| "learning_rate": 9.959400998335855e-05, | |
| "loss": 2.7986, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.14123689278835866, | |
| "grad_norm": 9.17403793334961, | |
| "learning_rate": 9.957167750085217e-05, | |
| "loss": 2.8332, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1433768457093944, | |
| "grad_norm": 8.492594718933105, | |
| "learning_rate": 9.95487497654772e-05, | |
| "loss": 2.8036, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.14551679863043013, | |
| "grad_norm": 7.058006763458252, | |
| "learning_rate": 9.95252270525481e-05, | |
| "loss": 2.8065, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.14765675155146588, | |
| "grad_norm": 8.662881851196289, | |
| "learning_rate": 9.950110964452382e-05, | |
| "loss": 2.829, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.1497967044725016, | |
| "grad_norm": 8.287664413452148, | |
| "learning_rate": 9.947639783100429e-05, | |
| "loss": 2.8611, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.15193665739353734, | |
| "grad_norm": 6.383536338806152, | |
| "learning_rate": 9.945109190872706e-05, | |
| "loss": 2.719, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.1540766103145731, | |
| "grad_norm": 9.04951000213623, | |
| "learning_rate": 9.94251921815637e-05, | |
| "loss": 2.7782, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1562165632356088, | |
| "grad_norm": 9.873688697814941, | |
| "learning_rate": 9.939869896051613e-05, | |
| "loss": 2.7622, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.15835651615664456, | |
| "grad_norm": 8.37444019317627, | |
| "learning_rate": 9.93716125637129e-05, | |
| "loss": 2.7327, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.16049646907768028, | |
| "grad_norm": 8.676753997802734, | |
| "learning_rate": 9.934393331640536e-05, | |
| "loss": 2.6887, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.16263642199871603, | |
| "grad_norm": 8.401046752929688, | |
| "learning_rate": 9.931566155096378e-05, | |
| "loss": 2.6337, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.16477637491975178, | |
| "grad_norm": 9.715431213378906, | |
| "learning_rate": 9.928679760687333e-05, | |
| "loss": 2.7182, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.1669163278407875, | |
| "grad_norm": 7.727181434631348, | |
| "learning_rate": 9.925734183073001e-05, | |
| "loss": 2.7088, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.16905628076182325, | |
| "grad_norm": 8.17239761352539, | |
| "learning_rate": 9.92272945762365e-05, | |
| "loss": 2.6574, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.17119623368285897, | |
| "grad_norm": 9.848095893859863, | |
| "learning_rate": 9.919665620419792e-05, | |
| "loss": 2.6521, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.17333618660389472, | |
| "grad_norm": 6.269837856292725, | |
| "learning_rate": 9.916542708251745e-05, | |
| "loss": 2.6658, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.17547613952493046, | |
| "grad_norm": 9.290328979492188, | |
| "learning_rate": 9.913360758619199e-05, | |
| "loss": 2.6467, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.17761609244596618, | |
| "grad_norm": 8.005675315856934, | |
| "learning_rate": 9.910119809730759e-05, | |
| "loss": 2.7059, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.17975604536700193, | |
| "grad_norm": 7.254702091217041, | |
| "learning_rate": 9.906819900503486e-05, | |
| "loss": 2.6869, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.18189599828803765, | |
| "grad_norm": 8.958318710327148, | |
| "learning_rate": 9.903461070562436e-05, | |
| "loss": 2.6647, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.1840359512090734, | |
| "grad_norm": 10.019506454467773, | |
| "learning_rate": 9.900043360240181e-05, | |
| "loss": 2.6647, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.18617590413010915, | |
| "grad_norm": 9.011824607849121, | |
| "learning_rate": 9.89656681057632e-05, | |
| "loss": 2.6462, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.18831585705114487, | |
| "grad_norm": 8.521088600158691, | |
| "learning_rate": 9.893031463316996e-05, | |
| "loss": 2.6276, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.19045580997218062, | |
| "grad_norm": 9.127740859985352, | |
| "learning_rate": 9.889437360914379e-05, | |
| "loss": 2.5787, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.19259576289321634, | |
| "grad_norm": 7.98406457901001, | |
| "learning_rate": 9.885784546526177e-05, | |
| "loss": 2.6082, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.19473571581425209, | |
| "grad_norm": 8.42713451385498, | |
| "learning_rate": 9.882073064015102e-05, | |
| "loss": 2.5825, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.19687566873528783, | |
| "grad_norm": 10.31495189666748, | |
| "learning_rate": 9.87830295794835e-05, | |
| "loss": 2.5767, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.19901562165632355, | |
| "grad_norm": 8.118849754333496, | |
| "learning_rate": 9.874474273597059e-05, | |
| "loss": 2.5494, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.2011555745773593, | |
| "grad_norm": 7.514265537261963, | |
| "learning_rate": 9.870587056935777e-05, | |
| "loss": 2.5824, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.20329552749839502, | |
| "grad_norm": 9.503680229187012, | |
| "learning_rate": 9.866641354641901e-05, | |
| "loss": 2.5055, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.20543548041943077, | |
| "grad_norm": 6.921200275421143, | |
| "learning_rate": 9.862637214095121e-05, | |
| "loss": 2.628, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.20757543334046652, | |
| "grad_norm": 6.561913013458252, | |
| "learning_rate": 9.858574683376844e-05, | |
| "loss": 2.5504, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.20971538626150224, | |
| "grad_norm": 8.711860656738281, | |
| "learning_rate": 9.854453811269625e-05, | |
| "loss": 2.6222, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.211855339182538, | |
| "grad_norm": 7.749279975891113, | |
| "learning_rate": 9.85027464725658e-05, | |
| "loss": 2.4887, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.2139952921035737, | |
| "grad_norm": 9.228084564208984, | |
| "learning_rate": 9.846037241520782e-05, | |
| "loss": 2.5844, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.21613524502460946, | |
| "grad_norm": 10.207733154296875, | |
| "learning_rate": 9.841741644944675e-05, | |
| "loss": 2.5684, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2182751979456452, | |
| "grad_norm": 6.548727512359619, | |
| "learning_rate": 9.837387909109452e-05, | |
| "loss": 2.5237, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.22041515086668093, | |
| "grad_norm": 9.262266159057617, | |
| "learning_rate": 9.832976086294432e-05, | |
| "loss": 2.5004, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.22255510378771667, | |
| "grad_norm": 8.879256248474121, | |
| "learning_rate": 9.828506229476444e-05, | |
| "loss": 2.5217, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2246950567087524, | |
| "grad_norm": 9.322160720825195, | |
| "learning_rate": 9.823978392329183e-05, | |
| "loss": 2.5006, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.22683500962978814, | |
| "grad_norm": 11.270200729370117, | |
| "learning_rate": 9.819392629222568e-05, | |
| "loss": 2.5517, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2289749625508239, | |
| "grad_norm": 7.910391330718994, | |
| "learning_rate": 9.814748995222085e-05, | |
| "loss": 2.4775, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.2311149154718596, | |
| "grad_norm": 11.13558578491211, | |
| "learning_rate": 9.810047546088133e-05, | |
| "loss": 2.5364, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.23325486839289536, | |
| "grad_norm": 8.814207077026367, | |
| "learning_rate": 9.805288338275352e-05, | |
| "loss": 2.4367, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.23539482131393108, | |
| "grad_norm": 8.249216079711914, | |
| "learning_rate": 9.800471428931939e-05, | |
| "loss": 2.4926, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.23753477423496683, | |
| "grad_norm": 8.129894256591797, | |
| "learning_rate": 9.795596875898967e-05, | |
| "loss": 2.5597, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.23967472715600258, | |
| "grad_norm": 7.8779683113098145, | |
| "learning_rate": 9.790664737709696e-05, | |
| "loss": 2.4822, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.2418146800770383, | |
| "grad_norm": 9.75910472869873, | |
| "learning_rate": 9.785675073588855e-05, | |
| "loss": 2.459, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.24395463299807404, | |
| "grad_norm": 10.04244613647461, | |
| "learning_rate": 9.78062794345195e-05, | |
| "loss": 2.4718, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.2460945859191098, | |
| "grad_norm": 11.239801406860352, | |
| "learning_rate": 9.775523407904525e-05, | |
| "loss": 2.4605, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.2482345388401455, | |
| "grad_norm": 11.514714241027832, | |
| "learning_rate": 9.770361528241452e-05, | |
| "loss": 2.4807, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.25037449176118126, | |
| "grad_norm": 11.684077262878418, | |
| "learning_rate": 9.765142366446178e-05, | |
| "loss": 2.4928, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.252514444682217, | |
| "grad_norm": 9.043937683105469, | |
| "learning_rate": 9.759865985189995e-05, | |
| "loss": 2.3884, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2546543976032527, | |
| "grad_norm": 8.806846618652344, | |
| "learning_rate": 9.754532447831285e-05, | |
| "loss": 2.4352, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.25679435052428845, | |
| "grad_norm": 11.088272094726562, | |
| "learning_rate": 9.749141818414749e-05, | |
| "loss": 2.4701, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2589343034453242, | |
| "grad_norm": 10.800878524780273, | |
| "learning_rate": 9.743694161670646e-05, | |
| "loss": 2.4473, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.26107425636635995, | |
| "grad_norm": 8.226394653320312, | |
| "learning_rate": 9.73818954301402e-05, | |
| "loss": 2.3953, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2632142092873957, | |
| "grad_norm": 7.874807834625244, | |
| "learning_rate": 9.732628028543906e-05, | |
| "loss": 2.4438, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.2653541622084314, | |
| "grad_norm": 9.975316047668457, | |
| "learning_rate": 9.727009685042538e-05, | |
| "loss": 2.407, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.26749411512946714, | |
| "grad_norm": 7.762071132659912, | |
| "learning_rate": 9.72133457997455e-05, | |
| "loss": 2.4803, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.2696340680505029, | |
| "grad_norm": 8.840380668640137, | |
| "learning_rate": 9.715602781486166e-05, | |
| "loss": 2.393, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.27177402097153863, | |
| "grad_norm": 9.467190742492676, | |
| "learning_rate": 9.709814358404378e-05, | |
| "loss": 2.3655, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.2739139738925744, | |
| "grad_norm": 7.103944778442383, | |
| "learning_rate": 9.703969380236123e-05, | |
| "loss": 2.3414, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2760539268136101, | |
| "grad_norm": 7.247581958770752, | |
| "learning_rate": 9.698067917167446e-05, | |
| "loss": 2.4331, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.2781938797346458, | |
| "grad_norm": 8.669685363769531, | |
| "learning_rate": 9.692110040062659e-05, | |
| "loss": 2.4395, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.28033383265568157, | |
| "grad_norm": 8.114962577819824, | |
| "learning_rate": 9.68609582046349e-05, | |
| "loss": 2.3229, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.2824737855767173, | |
| "grad_norm": 7.252189636230469, | |
| "learning_rate": 9.680025330588223e-05, | |
| "loss": 2.3969, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.28461373849775307, | |
| "grad_norm": 9.382356643676758, | |
| "learning_rate": 9.67389864333083e-05, | |
| "loss": 2.3446, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.2867536914187888, | |
| "grad_norm": 7.623274803161621, | |
| "learning_rate": 9.667715832260098e-05, | |
| "loss": 2.3587, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2888936443398245, | |
| "grad_norm": 7.448448657989502, | |
| "learning_rate": 9.661476971618744e-05, | |
| "loss": 2.3671, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.29103359726086026, | |
| "grad_norm": 8.833525657653809, | |
| "learning_rate": 9.655182136322524e-05, | |
| "loss": 2.3739, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.293173550181896, | |
| "grad_norm": 10.259654998779297, | |
| "learning_rate": 9.648831401959333e-05, | |
| "loss": 2.3668, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.29531350310293175, | |
| "grad_norm": 10.228246688842773, | |
| "learning_rate": 9.642424844788298e-05, | |
| "loss": 2.3793, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2974534560239675, | |
| "grad_norm": 8.286998748779297, | |
| "learning_rate": 9.635962541738862e-05, | |
| "loss": 2.3696, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.2995934089450032, | |
| "grad_norm": 8.957596778869629, | |
| "learning_rate": 9.62944457040986e-05, | |
| "loss": 2.3829, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.30173336186603894, | |
| "grad_norm": 8.238673210144043, | |
| "learning_rate": 9.622871009068588e-05, | |
| "loss": 2.3366, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.3038733147870747, | |
| "grad_norm": 8.315441131591797, | |
| "learning_rate": 9.616241936649862e-05, | |
| "loss": 2.3475, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.30601326770811044, | |
| "grad_norm": 8.352023124694824, | |
| "learning_rate": 9.609557432755068e-05, | |
| "loss": 2.2942, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.3081532206291462, | |
| "grad_norm": 10.10500717163086, | |
| "learning_rate": 9.602817577651217e-05, | |
| "loss": 2.3405, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.3102931735501819, | |
| "grad_norm": 7.825136184692383, | |
| "learning_rate": 9.596022452269962e-05, | |
| "loss": 2.2845, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.3124331264712176, | |
| "grad_norm": 10.414852142333984, | |
| "learning_rate": 9.589172138206648e-05, | |
| "loss": 2.3508, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3145730793922534, | |
| "grad_norm": 8.343073844909668, | |
| "learning_rate": 9.582266717719314e-05, | |
| "loss": 2.3429, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.3167130323132891, | |
| "grad_norm": 8.42280101776123, | |
| "learning_rate": 9.575306273727713e-05, | |
| "loss": 2.2881, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.31885298523432487, | |
| "grad_norm": 7.317922592163086, | |
| "learning_rate": 9.568290889812322e-05, | |
| "loss": 2.3279, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.32099293815536056, | |
| "grad_norm": 8.232301712036133, | |
| "learning_rate": 9.561220650213326e-05, | |
| "loss": 2.2888, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3231328910763963, | |
| "grad_norm": 10.112414360046387, | |
| "learning_rate": 9.554095639829615e-05, | |
| "loss": 2.3637, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.32527284399743206, | |
| "grad_norm": 10.274619102478027, | |
| "learning_rate": 9.546915944217764e-05, | |
| "loss": 2.373, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.3274127969184678, | |
| "grad_norm": 10.280336380004883, | |
| "learning_rate": 9.539681649591002e-05, | |
| "loss": 2.3361, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.32955274983950356, | |
| "grad_norm": 8.281644821166992, | |
| "learning_rate": 9.532392842818177e-05, | |
| "loss": 2.3094, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.33169270276053925, | |
| "grad_norm": 9.085750579833984, | |
| "learning_rate": 9.52504961142272e-05, | |
| "loss": 2.3396, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.333832655681575, | |
| "grad_norm": 8.183152198791504, | |
| "learning_rate": 9.517652043581583e-05, | |
| "loss": 2.2929, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.33597260860261074, | |
| "grad_norm": 10.052477836608887, | |
| "learning_rate": 9.510200228124191e-05, | |
| "loss": 2.3608, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.3381125615236465, | |
| "grad_norm": 8.700220108032227, | |
| "learning_rate": 9.502694254531364e-05, | |
| "loss": 2.2485, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.34025251444468224, | |
| "grad_norm": 7.829571723937988, | |
| "learning_rate": 9.495134212934256e-05, | |
| "loss": 2.3197, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.34239246736571793, | |
| "grad_norm": 7.233550071716309, | |
| "learning_rate": 9.48752019411326e-05, | |
| "loss": 2.2776, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3445324202867537, | |
| "grad_norm": 8.803876876831055, | |
| "learning_rate": 9.479852289496925e-05, | |
| "loss": 2.3447, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.34667237320778943, | |
| "grad_norm": 7.353137016296387, | |
| "learning_rate": 9.472130591160855e-05, | |
| "loss": 2.3355, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.3488123261288252, | |
| "grad_norm": 8.22417163848877, | |
| "learning_rate": 9.464355191826608e-05, | |
| "loss": 2.2921, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.3509522790498609, | |
| "grad_norm": 9.752290725708008, | |
| "learning_rate": 9.456526184860579e-05, | |
| "loss": 2.31, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.3530922319708966, | |
| "grad_norm": 9.317963600158691, | |
| "learning_rate": 9.448643664272876e-05, | |
| "loss": 2.2816, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.35523218489193237, | |
| "grad_norm": 7.882107734680176, | |
| "learning_rate": 9.440707724716196e-05, | |
| "loss": 2.2285, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.3573721378129681, | |
| "grad_norm": 7.263378620147705, | |
| "learning_rate": 9.432718461484688e-05, | |
| "loss": 2.2865, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.35951209073400386, | |
| "grad_norm": 8.264389038085938, | |
| "learning_rate": 9.424675970512808e-05, | |
| "loss": 2.2278, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3616520436550396, | |
| "grad_norm": 7.658196926116943, | |
| "learning_rate": 9.416580348374163e-05, | |
| "loss": 2.2733, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.3637919965760753, | |
| "grad_norm": 7.531430721282959, | |
| "learning_rate": 9.40843169228036e-05, | |
| "loss": 2.2451, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.36593194949711105, | |
| "grad_norm": 6.57159423828125, | |
| "learning_rate": 9.400230100079829e-05, | |
| "loss": 2.2292, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.3680719024181468, | |
| "grad_norm": 7.315071105957031, | |
| "learning_rate": 9.391975670256657e-05, | |
| "loss": 2.269, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.37021185533918255, | |
| "grad_norm": 6.916749000549316, | |
| "learning_rate": 9.383668501929395e-05, | |
| "loss": 2.2789, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.3723518082602183, | |
| "grad_norm": 6.972721576690674, | |
| "learning_rate": 9.37530869484988e-05, | |
| "loss": 2.2388, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.374491761181254, | |
| "grad_norm": 7.823705673217773, | |
| "learning_rate": 9.36689634940203e-05, | |
| "loss": 2.2251, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.37663171410228974, | |
| "grad_norm": 10.453600883483887, | |
| "learning_rate": 9.358431566600636e-05, | |
| "loss": 2.2375, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3787716670233255, | |
| "grad_norm": 10.327241897583008, | |
| "learning_rate": 9.349914448090156e-05, | |
| "loss": 2.2713, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.38091161994436123, | |
| "grad_norm": 9.396978378295898, | |
| "learning_rate": 9.34134509614349e-05, | |
| "loss": 2.2676, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.383051572865397, | |
| "grad_norm": 7.874500751495361, | |
| "learning_rate": 9.332723613660754e-05, | |
| "loss": 2.2645, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.3851915257864327, | |
| "grad_norm": 9.300860404968262, | |
| "learning_rate": 9.32405010416804e-05, | |
| "loss": 2.2894, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3873314787074684, | |
| "grad_norm": 7.320137977600098, | |
| "learning_rate": 9.315324671816183e-05, | |
| "loss": 2.2072, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.38947143162850417, | |
| "grad_norm": 7.579836368560791, | |
| "learning_rate": 9.306547421379497e-05, | |
| "loss": 2.2244, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3916113845495399, | |
| "grad_norm": 8.637717247009277, | |
| "learning_rate": 9.297718458254528e-05, | |
| "loss": 2.1833, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.39375133747057567, | |
| "grad_norm": 6.750846862792969, | |
| "learning_rate": 9.288837888458782e-05, | |
| "loss": 2.2157, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.39589129039161136, | |
| "grad_norm": 8.550662994384766, | |
| "learning_rate": 9.27990581862945e-05, | |
| "loss": 2.224, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.3980312433126471, | |
| "grad_norm": 7.079535961151123, | |
| "learning_rate": 9.270922356022142e-05, | |
| "loss": 2.1862, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.40017119623368286, | |
| "grad_norm": 6.939311981201172, | |
| "learning_rate": 9.261887608509579e-05, | |
| "loss": 2.2017, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.4023111491547186, | |
| "grad_norm": 8.532953262329102, | |
| "learning_rate": 9.252801684580308e-05, | |
| "loss": 2.1758, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.40445110207575435, | |
| "grad_norm": 7.4575018882751465, | |
| "learning_rate": 9.243664693337404e-05, | |
| "loss": 2.1896, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.40659105499679005, | |
| "grad_norm": 7.295337677001953, | |
| "learning_rate": 9.234476744497149e-05, | |
| "loss": 2.252, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4087310079178258, | |
| "grad_norm": 6.507411479949951, | |
| "learning_rate": 9.225237948387722e-05, | |
| "loss": 2.2629, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.41087096083886154, | |
| "grad_norm": 8.774213790893555, | |
| "learning_rate": 9.215948415947875e-05, | |
| "loss": 2.1715, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.4130109137598973, | |
| "grad_norm": 9.428667068481445, | |
| "learning_rate": 9.20660825872559e-05, | |
| "loss": 2.1916, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.41515086668093304, | |
| "grad_norm": 7.89529275894165, | |
| "learning_rate": 9.197217588876756e-05, | |
| "loss": 2.2247, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.41729081960196873, | |
| "grad_norm": 7.525676727294922, | |
| "learning_rate": 9.187776519163811e-05, | |
| "loss": 2.1985, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.4194307725230045, | |
| "grad_norm": 7.482941627502441, | |
| "learning_rate": 9.178285162954386e-05, | |
| "loss": 2.1853, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.42157072544404023, | |
| "grad_norm": 6.92722225189209, | |
| "learning_rate": 9.168743634219955e-05, | |
| "loss": 2.1629, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.423710678365076, | |
| "grad_norm": 7.211160659790039, | |
| "learning_rate": 9.159152047534454e-05, | |
| "loss": 2.1817, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4258506312861117, | |
| "grad_norm": 7.496613025665283, | |
| "learning_rate": 9.149510518072916e-05, | |
| "loss": 2.1736, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.4279905842071474, | |
| "grad_norm": 7.383609294891357, | |
| "learning_rate": 9.139819161610082e-05, | |
| "loss": 2.2043, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.43013053712818317, | |
| "grad_norm": 7.53545618057251, | |
| "learning_rate": 9.130078094519008e-05, | |
| "loss": 2.232, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.4322704900492189, | |
| "grad_norm": 6.151658535003662, | |
| "learning_rate": 9.120287433769674e-05, | |
| "loss": 2.1939, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.43441044297025466, | |
| "grad_norm": 7.977633953094482, | |
| "learning_rate": 9.11044729692758e-05, | |
| "loss": 2.2096, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.4365503958912904, | |
| "grad_norm": 8.515596389770508, | |
| "learning_rate": 9.100557802152328e-05, | |
| "loss": 2.2042, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.4386903488123261, | |
| "grad_norm": 6.877818584442139, | |
| "learning_rate": 9.090619068196203e-05, | |
| "loss": 2.1006, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.44083030173336185, | |
| "grad_norm": 7.46975040435791, | |
| "learning_rate": 9.080631214402754e-05, | |
| "loss": 2.0974, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.4429702546543976, | |
| "grad_norm": 7.358278751373291, | |
| "learning_rate": 9.070594360705358e-05, | |
| "loss": 2.1188, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.44511020757543335, | |
| "grad_norm": 7.501420974731445, | |
| "learning_rate": 9.060508627625779e-05, | |
| "loss": 2.134, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.4472501604964691, | |
| "grad_norm": 7.990818500518799, | |
| "learning_rate": 9.050374136272717e-05, | |
| "loss": 2.1787, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.4493901134175048, | |
| "grad_norm": 6.6185760498046875, | |
| "learning_rate": 9.04019100834036e-05, | |
| "loss": 2.14, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.45153006633854054, | |
| "grad_norm": 9.137276649475098, | |
| "learning_rate": 9.029959366106923e-05, | |
| "loss": 2.1632, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.4536700192595763, | |
| "grad_norm": 8.287654876708984, | |
| "learning_rate": 9.019679332433173e-05, | |
| "loss": 2.1734, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.45580997218061203, | |
| "grad_norm": 8.167359352111816, | |
| "learning_rate": 9.009351030760958e-05, | |
| "loss": 2.116, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.4579499251016478, | |
| "grad_norm": 7.944402694702148, | |
| "learning_rate": 8.998974585111729e-05, | |
| "loss": 2.1126, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.4600898780226835, | |
| "grad_norm": 8.373970985412598, | |
| "learning_rate": 8.988550120085038e-05, | |
| "loss": 2.1179, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.4622298309437192, | |
| "grad_norm": 7.862525463104248, | |
| "learning_rate": 8.978077760857058e-05, | |
| "loss": 2.1599, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.46436978386475497, | |
| "grad_norm": 6.35279655456543, | |
| "learning_rate": 8.967557633179067e-05, | |
| "loss": 2.1058, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.4665097367857907, | |
| "grad_norm": 6.809432506561279, | |
| "learning_rate": 8.956989863375944e-05, | |
| "loss": 2.1049, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.46864968970682647, | |
| "grad_norm": 10.011709213256836, | |
| "learning_rate": 8.946374578344653e-05, | |
| "loss": 2.1654, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.47078964262786216, | |
| "grad_norm": 8.779292106628418, | |
| "learning_rate": 8.935711905552713e-05, | |
| "loss": 2.089, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4729295955488979, | |
| "grad_norm": 8.868700981140137, | |
| "learning_rate": 8.925001973036677e-05, | |
| "loss": 2.16, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.47506954846993366, | |
| "grad_norm": 8.026627540588379, | |
| "learning_rate": 8.914244909400585e-05, | |
| "loss": 2.1683, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.4772095013909694, | |
| "grad_norm": 6.766802787780762, | |
| "learning_rate": 8.903440843814423e-05, | |
| "loss": 2.1475, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.47934945431200515, | |
| "grad_norm": 6.383385181427002, | |
| "learning_rate": 8.892589906012577e-05, | |
| "loss": 2.151, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.4814894072330409, | |
| "grad_norm": 7.344244956970215, | |
| "learning_rate": 8.881692226292269e-05, | |
| "loss": 2.1037, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.4836293601540766, | |
| "grad_norm": 8.650083541870117, | |
| "learning_rate": 8.870747935511992e-05, | |
| "loss": 2.0868, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.48576931307511234, | |
| "grad_norm": 7.451263427734375, | |
| "learning_rate": 8.859757165089943e-05, | |
| "loss": 2.1287, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.4879092659961481, | |
| "grad_norm": 7.021806716918945, | |
| "learning_rate": 8.848720047002446e-05, | |
| "loss": 2.1256, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.49004921891718384, | |
| "grad_norm": 7.580230712890625, | |
| "learning_rate": 8.837636713782358e-05, | |
| "loss": 2.058, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.4921891718382196, | |
| "grad_norm": 6.413537979125977, | |
| "learning_rate": 8.826507298517489e-05, | |
| "loss": 2.0742, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4943291247592553, | |
| "grad_norm": 7.502536296844482, | |
| "learning_rate": 8.815331934848996e-05, | |
| "loss": 2.1497, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.496469077680291, | |
| "grad_norm": 6.689949035644531, | |
| "learning_rate": 8.804110756969781e-05, | |
| "loss": 2.112, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.4986090306013268, | |
| "grad_norm": 8.030281066894531, | |
| "learning_rate": 8.792843899622879e-05, | |
| "loss": 2.1844, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.5007489835223625, | |
| "grad_norm": 6.825489044189453, | |
| "learning_rate": 8.781531498099844e-05, | |
| "loss": 2.1259, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.5028889364433983, | |
| "grad_norm": 7.533500671386719, | |
| "learning_rate": 8.770173688239116e-05, | |
| "loss": 2.0923, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.505028889364434, | |
| "grad_norm": 6.412694454193115, | |
| "learning_rate": 8.758770606424398e-05, | |
| "loss": 2.1432, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.5071688422854698, | |
| "grad_norm": 6.011632442474365, | |
| "learning_rate": 8.747322389583013e-05, | |
| "loss": 2.0918, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.5093087952065054, | |
| "grad_norm": 7.687342166900635, | |
| "learning_rate": 8.735829175184267e-05, | |
| "loss": 2.1275, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.5114487481275412, | |
| "grad_norm": 7.902970314025879, | |
| "learning_rate": 8.724291101237784e-05, | |
| "loss": 2.0638, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.5135887010485769, | |
| "grad_norm": 6.375851631164551, | |
| "learning_rate": 8.71270830629187e-05, | |
| "loss": 2.1268, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5157286539696126, | |
| "grad_norm": 11.488832473754883, | |
| "learning_rate": 8.701080929431824e-05, | |
| "loss": 2.1133, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.5178686068906484, | |
| "grad_norm": 7.46975564956665, | |
| "learning_rate": 8.689409110278292e-05, | |
| "loss": 2.0586, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.5200085598116841, | |
| "grad_norm": 10.648994445800781, | |
| "learning_rate": 8.677692988985575e-05, | |
| "loss": 2.0896, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.5221485127327199, | |
| "grad_norm": 8.54841136932373, | |
| "learning_rate": 8.665932706239949e-05, | |
| "loss": 2.0725, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.5242884656537556, | |
| "grad_norm": 6.593012809753418, | |
| "learning_rate": 8.654128403257982e-05, | |
| "loss": 2.0339, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.5264284185747914, | |
| "grad_norm": 8.725300788879395, | |
| "learning_rate": 8.642280221784828e-05, | |
| "loss": 2.0586, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.5285683714958271, | |
| "grad_norm": 8.611812591552734, | |
| "learning_rate": 8.630388304092536e-05, | |
| "loss": 2.049, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.5307083244168628, | |
| "grad_norm": 8.650274276733398, | |
| "learning_rate": 8.618452792978336e-05, | |
| "loss": 2.0758, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5328482773378985, | |
| "grad_norm": 5.806057453155518, | |
| "learning_rate": 8.606473831762916e-05, | |
| "loss": 2.075, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.5349882302589343, | |
| "grad_norm": 6.725940227508545, | |
| "learning_rate": 8.59445156428872e-05, | |
| "loss": 2.1084, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.53712818317997, | |
| "grad_norm": 7.8081889152526855, | |
| "learning_rate": 8.582386134918204e-05, | |
| "loss": 2.076, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.5392681361010058, | |
| "grad_norm": 7.350069046020508, | |
| "learning_rate": 8.570277688532112e-05, | |
| "loss": 2.0705, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5414080890220415, | |
| "grad_norm": 6.163577079772949, | |
| "learning_rate": 8.55812637052773e-05, | |
| "loss": 2.0214, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.5435480419430773, | |
| "grad_norm": 8.294384956359863, | |
| "learning_rate": 8.545932326817145e-05, | |
| "loss": 2.0657, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.545687994864113, | |
| "grad_norm": 7.981808185577393, | |
| "learning_rate": 8.533695703825493e-05, | |
| "loss": 2.0762, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.5478279477851488, | |
| "grad_norm": 7.62240743637085, | |
| "learning_rate": 8.521416648489193e-05, | |
| "loss": 2.0571, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5499679007061845, | |
| "grad_norm": 7.337045192718506, | |
| "learning_rate": 8.509095308254191e-05, | |
| "loss": 2.0734, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.5521078536272201, | |
| "grad_norm": 6.613148212432861, | |
| "learning_rate": 8.496731831074189e-05, | |
| "loss": 2.0913, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.5542478065482559, | |
| "grad_norm": 6.551748275756836, | |
| "learning_rate": 8.484326365408866e-05, | |
| "loss": 2.0183, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.5563877594692916, | |
| "grad_norm": 6.191518783569336, | |
| "learning_rate": 8.471879060222094e-05, | |
| "loss": 2.0514, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5585277123903274, | |
| "grad_norm": 6.579594135284424, | |
| "learning_rate": 8.459390064980146e-05, | |
| "loss": 2.083, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.5606676653113631, | |
| "grad_norm": 8.821534156799316, | |
| "learning_rate": 8.446859529649917e-05, | |
| "loss": 2.0494, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.5628076182323989, | |
| "grad_norm": 8.188867568969727, | |
| "learning_rate": 8.434287604697101e-05, | |
| "loss": 2.0842, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.5649475711534346, | |
| "grad_norm": 6.688650608062744, | |
| "learning_rate": 8.421674441084404e-05, | |
| "loss": 2.059, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.5670875240744704, | |
| "grad_norm": 9.34494400024414, | |
| "learning_rate": 8.409020190269716e-05, | |
| "loss": 2.0587, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.5692274769955061, | |
| "grad_norm": 9.420138359069824, | |
| "learning_rate": 8.396325004204303e-05, | |
| "loss": 2.0278, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.5713674299165419, | |
| "grad_norm": 8.35175609588623, | |
| "learning_rate": 8.383589035330977e-05, | |
| "loss": 1.9988, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.5735073828375776, | |
| "grad_norm": 7.808613300323486, | |
| "learning_rate": 8.370812436582267e-05, | |
| "loss": 2.0572, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5756473357586133, | |
| "grad_norm": 6.553249359130859, | |
| "learning_rate": 8.357995361378583e-05, | |
| "loss": 2.0234, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.577787288679649, | |
| "grad_norm": 6.828055381774902, | |
| "learning_rate": 8.345137963626372e-05, | |
| "loss": 2.0428, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5799272416006848, | |
| "grad_norm": 8.460224151611328, | |
| "learning_rate": 8.332240397716272e-05, | |
| "loss": 2.05, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.5820671945217205, | |
| "grad_norm": 6.507852554321289, | |
| "learning_rate": 8.319302818521255e-05, | |
| "loss": 2.0947, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5842071474427563, | |
| "grad_norm": 6.2819085121154785, | |
| "learning_rate": 8.306325381394774e-05, | |
| "loss": 2.0366, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.586347100363792, | |
| "grad_norm": 7.773951530456543, | |
| "learning_rate": 8.293308242168889e-05, | |
| "loss": 2.0078, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5884870532848278, | |
| "grad_norm": 6.938888072967529, | |
| "learning_rate": 8.280251557152399e-05, | |
| "loss": 2.0739, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.5906270062058635, | |
| "grad_norm": 7.499892234802246, | |
| "learning_rate": 8.26715548312897e-05, | |
| "loss": 2.011, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5927669591268993, | |
| "grad_norm": 6.491375923156738, | |
| "learning_rate": 8.254020177355243e-05, | |
| "loss": 1.9787, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.594906912047935, | |
| "grad_norm": 7.310795307159424, | |
| "learning_rate": 8.240845797558958e-05, | |
| "loss": 2.0306, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5970468649689706, | |
| "grad_norm": 6.266601085662842, | |
| "learning_rate": 8.227632501937045e-05, | |
| "loss": 1.9754, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.5991868178900064, | |
| "grad_norm": 7.139545440673828, | |
| "learning_rate": 8.214380449153735e-05, | |
| "loss": 2.0119, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6013267708110421, | |
| "grad_norm": 7.089097499847412, | |
| "learning_rate": 8.201089798338655e-05, | |
| "loss": 2.0456, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.6034667237320779, | |
| "grad_norm": 6.723423004150391, | |
| "learning_rate": 8.187760709084911e-05, | |
| "loss": 2.0233, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.6056066766531136, | |
| "grad_norm": 6.559820175170898, | |
| "learning_rate": 8.174393341447177e-05, | |
| "loss": 1.995, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.6077466295741494, | |
| "grad_norm": 7.716728687286377, | |
| "learning_rate": 8.160987855939766e-05, | |
| "loss": 2.0308, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.6098865824951851, | |
| "grad_norm": 5.852180480957031, | |
| "learning_rate": 8.147544413534714e-05, | |
| "loss": 2.0049, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.6120265354162209, | |
| "grad_norm": 9.875506401062012, | |
| "learning_rate": 8.134063175659836e-05, | |
| "loss": 2.0243, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.6141664883372566, | |
| "grad_norm": 7.915645122528076, | |
| "learning_rate": 8.120544304196793e-05, | |
| "loss": 2.0137, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.6163064412582924, | |
| "grad_norm": 6.7806077003479, | |
| "learning_rate": 8.10698796147915e-05, | |
| "loss": 2.0122, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.618446394179328, | |
| "grad_norm": 6.318131446838379, | |
| "learning_rate": 8.093394310290421e-05, | |
| "loss": 2.0041, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.6205863471003638, | |
| "grad_norm": 6.302967071533203, | |
| "learning_rate": 8.079763513862116e-05, | |
| "loss": 2.0402, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.6227263000213995, | |
| "grad_norm": 8.546146392822266, | |
| "learning_rate": 8.066095735871786e-05, | |
| "loss": 2.036, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.6248662529424353, | |
| "grad_norm": 7.018831729888916, | |
| "learning_rate": 8.052391140441051e-05, | |
| "loss": 1.9625, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.627006205863471, | |
| "grad_norm": 6.960865497589111, | |
| "learning_rate": 8.038649892133632e-05, | |
| "loss": 2.013, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.6291461587845067, | |
| "grad_norm": 7.643087863922119, | |
| "learning_rate": 8.024872155953376e-05, | |
| "loss": 2.0234, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.6312861117055425, | |
| "grad_norm": 6.25241231918335, | |
| "learning_rate": 8.011058097342275e-05, | |
| "loss": 1.9427, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.6334260646265782, | |
| "grad_norm": 7.6290283203125, | |
| "learning_rate": 7.997207882178474e-05, | |
| "loss": 1.9874, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.635566017547614, | |
| "grad_norm": 10.151625633239746, | |
| "learning_rate": 7.983321676774285e-05, | |
| "loss": 1.9609, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.6377059704686497, | |
| "grad_norm": 6.939437389373779, | |
| "learning_rate": 7.969399647874191e-05, | |
| "loss": 2.0095, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6398459233896854, | |
| "grad_norm": 7.24613618850708, | |
| "learning_rate": 7.955441962652835e-05, | |
| "loss": 1.9874, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.6419858763107211, | |
| "grad_norm": 8.051011085510254, | |
| "learning_rate": 7.941448788713024e-05, | |
| "loss": 2.0215, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6441258292317569, | |
| "grad_norm": 7.338986873626709, | |
| "learning_rate": 7.927420294083705e-05, | |
| "loss": 1.941, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.6462657821527926, | |
| "grad_norm": 7.442719459533691, | |
| "learning_rate": 7.91335664721796e-05, | |
| "loss": 2.0215, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.6484057350738284, | |
| "grad_norm": 7.5543341636657715, | |
| "learning_rate": 7.899258016990969e-05, | |
| "loss": 1.9694, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.6505456879948641, | |
| "grad_norm": 7.134289264678955, | |
| "learning_rate": 7.885124572697998e-05, | |
| "loss": 2.0004, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.6526856409158999, | |
| "grad_norm": 6.567378520965576, | |
| "learning_rate": 7.870956484052346e-05, | |
| "loss": 1.9725, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.6548255938369356, | |
| "grad_norm": 7.110774040222168, | |
| "learning_rate": 7.856753921183331e-05, | |
| "loss": 2.0402, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.6569655467579714, | |
| "grad_norm": 6.285329818725586, | |
| "learning_rate": 7.842517054634226e-05, | |
| "loss": 1.974, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.6591054996790071, | |
| "grad_norm": 5.5663676261901855, | |
| "learning_rate": 7.828246055360226e-05, | |
| "loss": 2.0189, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.6612454526000427, | |
| "grad_norm": 6.485752582550049, | |
| "learning_rate": 7.813941094726384e-05, | |
| "loss": 1.9699, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.6633854055210785, | |
| "grad_norm": 5.899412155151367, | |
| "learning_rate": 7.79960234450556e-05, | |
| "loss": 1.9966, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.6655253584421142, | |
| "grad_norm": 7.0582051277160645, | |
| "learning_rate": 7.78522997687636e-05, | |
| "loss": 1.9811, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.66766531136315, | |
| "grad_norm": 6.355223178863525, | |
| "learning_rate": 7.770824164421062e-05, | |
| "loss": 1.9238, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.6698052642841857, | |
| "grad_norm": 7.558047771453857, | |
| "learning_rate": 7.756385080123546e-05, | |
| "loss": 1.9915, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.6719452172052215, | |
| "grad_norm": 6.747286319732666, | |
| "learning_rate": 7.741912897367221e-05, | |
| "loss": 1.9746, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.6740851701262572, | |
| "grad_norm": 6.1656293869018555, | |
| "learning_rate": 7.727407789932935e-05, | |
| "loss": 1.9615, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.676225123047293, | |
| "grad_norm": 7.076664447784424, | |
| "learning_rate": 7.7128699319969e-05, | |
| "loss": 2.0052, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.6783650759683287, | |
| "grad_norm": 7.908239841461182, | |
| "learning_rate": 7.698299498128587e-05, | |
| "loss": 2.0223, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.6805050288893645, | |
| "grad_norm": 6.080158233642578, | |
| "learning_rate": 7.68369666328864e-05, | |
| "loss": 1.9561, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.6826449818104001, | |
| "grad_norm": 6.817996025085449, | |
| "learning_rate": 7.669061602826768e-05, | |
| "loss": 1.9811, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.6847849347314359, | |
| "grad_norm": 5.7741475105285645, | |
| "learning_rate": 7.654394492479648e-05, | |
| "loss": 1.9011, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6869248876524716, | |
| "grad_norm": 5.87952995300293, | |
| "learning_rate": 7.639695508368803e-05, | |
| "loss": 1.9695, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.6890648405735074, | |
| "grad_norm": 6.916929721832275, | |
| "learning_rate": 7.6249648269985e-05, | |
| "loss": 1.9536, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6912047934945431, | |
| "grad_norm": 8.327000617980957, | |
| "learning_rate": 7.61020262525362e-05, | |
| "loss": 1.961, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.6933447464155789, | |
| "grad_norm": 7.954501628875732, | |
| "learning_rate": 7.59540908039754e-05, | |
| "loss": 2.0029, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6954846993366146, | |
| "grad_norm": 6.015987873077393, | |
| "learning_rate": 7.580584370070001e-05, | |
| "loss": 1.9753, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.6976246522576504, | |
| "grad_norm": 7.0654520988464355, | |
| "learning_rate": 7.565728672284979e-05, | |
| "loss": 1.9378, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6997646051786861, | |
| "grad_norm": 7.740458965301514, | |
| "learning_rate": 7.550842165428543e-05, | |
| "loss": 1.9464, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.7019045580997219, | |
| "grad_norm": 7.246245861053467, | |
| "learning_rate": 7.535925028256717e-05, | |
| "loss": 2.0064, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.7040445110207575, | |
| "grad_norm": 6.526742458343506, | |
| "learning_rate": 7.520977439893329e-05, | |
| "loss": 1.9545, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.7061844639417932, | |
| "grad_norm": 7.05415153503418, | |
| "learning_rate": 7.505999579827863e-05, | |
| "loss": 1.979, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.708324416862829, | |
| "grad_norm": 6.696120738983154, | |
| "learning_rate": 7.490991627913306e-05, | |
| "loss": 1.9026, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.7104643697838647, | |
| "grad_norm": 7.282532215118408, | |
| "learning_rate": 7.475953764363983e-05, | |
| "loss": 1.9877, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.7126043227049005, | |
| "grad_norm": 5.767569541931152, | |
| "learning_rate": 7.460886169753397e-05, | |
| "loss": 2.005, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.7147442756259362, | |
| "grad_norm": 6.214768409729004, | |
| "learning_rate": 7.445789025012055e-05, | |
| "loss": 1.949, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.716884228546972, | |
| "grad_norm": 6.871894836425781, | |
| "learning_rate": 7.430662511425308e-05, | |
| "loss": 1.9725, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.7190241814680077, | |
| "grad_norm": 7.443453311920166, | |
| "learning_rate": 7.415506810631155e-05, | |
| "loss": 1.9316, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.7211641343890435, | |
| "grad_norm": 6.8776140213012695, | |
| "learning_rate": 7.400322104618085e-05, | |
| "loss": 1.9556, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.7233040873100792, | |
| "grad_norm": 6.675306797027588, | |
| "learning_rate": 7.385108575722868e-05, | |
| "loss": 1.9833, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.7254440402311149, | |
| "grad_norm": 6.732053756713867, | |
| "learning_rate": 7.369866406628385e-05, | |
| "loss": 1.9036, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.7275839931521506, | |
| "grad_norm": 7.516349792480469, | |
| "learning_rate": 7.354595780361423e-05, | |
| "loss": 1.9876, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.7297239460731864, | |
| "grad_norm": 6.245758056640625, | |
| "learning_rate": 7.339296880290481e-05, | |
| "loss": 1.9403, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.7318638989942221, | |
| "grad_norm": 6.886528015136719, | |
| "learning_rate": 7.323969890123565e-05, | |
| "loss": 1.9668, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.7340038519152579, | |
| "grad_norm": 7.249610900878906, | |
| "learning_rate": 7.308614993905992e-05, | |
| "loss": 2.0097, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.7361438048362936, | |
| "grad_norm": 6.901829719543457, | |
| "learning_rate": 7.293232376018164e-05, | |
| "loss": 1.9935, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.7382837577573294, | |
| "grad_norm": 7.697319984436035, | |
| "learning_rate": 7.277822221173367e-05, | |
| "loss": 1.9295, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.7404237106783651, | |
| "grad_norm": 6.5634565353393555, | |
| "learning_rate": 7.262384714415551e-05, | |
| "loss": 1.9149, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.7425636635994008, | |
| "grad_norm": 6.889857769012451, | |
| "learning_rate": 7.2469200411171e-05, | |
| "loss": 1.947, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.7447036165204366, | |
| "grad_norm": 7.622896194458008, | |
| "learning_rate": 7.231428386976618e-05, | |
| "loss": 1.8894, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.7468435694414722, | |
| "grad_norm": 7.235098838806152, | |
| "learning_rate": 7.215909938016684e-05, | |
| "loss": 1.9483, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.748983522362508, | |
| "grad_norm": 5.822272300720215, | |
| "learning_rate": 7.200364880581637e-05, | |
| "loss": 1.9301, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.7511234752835437, | |
| "grad_norm": 6.663504123687744, | |
| "learning_rate": 7.184793401335322e-05, | |
| "loss": 1.9482, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.7532634282045795, | |
| "grad_norm": 7.109114170074463, | |
| "learning_rate": 7.169195687258859e-05, | |
| "loss": 1.9659, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.7554033811256152, | |
| "grad_norm": 7.012261867523193, | |
| "learning_rate": 7.15357192564839e-05, | |
| "loss": 1.9284, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.757543334046651, | |
| "grad_norm": 7.140257835388184, | |
| "learning_rate": 7.137922304112838e-05, | |
| "loss": 1.9185, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.7596832869676867, | |
| "grad_norm": 5.866714954376221, | |
| "learning_rate": 7.122247010571647e-05, | |
| "loss": 1.9535, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.7618232398887225, | |
| "grad_norm": 6.297852516174316, | |
| "learning_rate": 7.106546233252528e-05, | |
| "loss": 1.9455, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.7639631928097582, | |
| "grad_norm": 5.564944267272949, | |
| "learning_rate": 7.090820160689201e-05, | |
| "loss": 1.9153, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.766103145730794, | |
| "grad_norm": 6.030523777008057, | |
| "learning_rate": 7.07506898171913e-05, | |
| "loss": 1.9386, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.7682430986518297, | |
| "grad_norm": 7.070890426635742, | |
| "learning_rate": 7.059292885481253e-05, | |
| "loss": 1.9576, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.7703830515728654, | |
| "grad_norm": 7.497870922088623, | |
| "learning_rate": 7.04349206141371e-05, | |
| "loss": 1.8669, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7725230044939011, | |
| "grad_norm": 5.917843818664551, | |
| "learning_rate": 7.02766669925158e-05, | |
| "loss": 1.9455, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.7746629574149368, | |
| "grad_norm": 6.862101078033447, | |
| "learning_rate": 7.011816989024583e-05, | |
| "loss": 1.9464, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.7768029103359726, | |
| "grad_norm": 6.385555267333984, | |
| "learning_rate": 6.995943121054816e-05, | |
| "loss": 1.9477, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.7789428632570083, | |
| "grad_norm": 6.692836761474609, | |
| "learning_rate": 6.980045285954456e-05, | |
| "loss": 1.916, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.7810828161780441, | |
| "grad_norm": 7.266310214996338, | |
| "learning_rate": 6.964123674623475e-05, | |
| "loss": 1.929, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.7832227690990798, | |
| "grad_norm": 6.483401775360107, | |
| "learning_rate": 6.948178478247355e-05, | |
| "loss": 1.9205, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.7853627220201156, | |
| "grad_norm": 7.336221694946289, | |
| "learning_rate": 6.932209888294777e-05, | |
| "loss": 1.9402, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.7875026749411513, | |
| "grad_norm": 7.22684383392334, | |
| "learning_rate": 6.916218096515336e-05, | |
| "loss": 1.8924, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.7896426278621871, | |
| "grad_norm": 6.436882972717285, | |
| "learning_rate": 6.900203294937229e-05, | |
| "loss": 1.9378, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.7917825807832227, | |
| "grad_norm": 6.200204849243164, | |
| "learning_rate": 6.884165675864954e-05, | |
| "loss": 1.864, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.7939225337042585, | |
| "grad_norm": 6.617162227630615, | |
| "learning_rate": 6.868105431877002e-05, | |
| "loss": 1.9279, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.7960624866252942, | |
| "grad_norm": 5.673040866851807, | |
| "learning_rate": 6.85202275582354e-05, | |
| "loss": 1.8682, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.79820243954633, | |
| "grad_norm": 6.891549110412598, | |
| "learning_rate": 6.835917840824097e-05, | |
| "loss": 1.8942, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.8003423924673657, | |
| "grad_norm": 6.0316386222839355, | |
| "learning_rate": 6.819790880265246e-05, | |
| "loss": 1.906, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.8024823453884015, | |
| "grad_norm": 7.065370559692383, | |
| "learning_rate": 6.803642067798284e-05, | |
| "loss": 1.9118, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.8046222983094372, | |
| "grad_norm": 6.079885959625244, | |
| "learning_rate": 6.7874715973369e-05, | |
| "loss": 1.934, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.806762251230473, | |
| "grad_norm": 6.481017112731934, | |
| "learning_rate": 6.771279663054853e-05, | |
| "loss": 1.9257, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.8089022041515087, | |
| "grad_norm": 6.038636207580566, | |
| "learning_rate": 6.755066459383637e-05, | |
| "loss": 1.8815, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.8110421570725445, | |
| "grad_norm": 6.196339130401611, | |
| "learning_rate": 6.738832181010149e-05, | |
| "loss": 1.9115, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.8131821099935801, | |
| "grad_norm": 6.638113498687744, | |
| "learning_rate": 6.722577022874345e-05, | |
| "loss": 1.9637, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.8153220629146158, | |
| "grad_norm": 6.1240973472595215, | |
| "learning_rate": 6.706301180166909e-05, | |
| "loss": 1.9453, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.8174620158356516, | |
| "grad_norm": 6.026691436767578, | |
| "learning_rate": 6.690004848326898e-05, | |
| "loss": 1.8705, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.8196019687566873, | |
| "grad_norm": 6.312042713165283, | |
| "learning_rate": 6.673688223039406e-05, | |
| "loss": 1.8869, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.8217419216777231, | |
| "grad_norm": 6.738498687744141, | |
| "learning_rate": 6.65735150023321e-05, | |
| "loss": 1.9406, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.8238818745987588, | |
| "grad_norm": 5.581632614135742, | |
| "learning_rate": 6.64099487607841e-05, | |
| "loss": 1.8928, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.8260218275197946, | |
| "grad_norm": 6.734776973724365, | |
| "learning_rate": 6.624618546984082e-05, | |
| "loss": 1.8833, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.8281617804408303, | |
| "grad_norm": 6.365813732147217, | |
| "learning_rate": 6.608222709595925e-05, | |
| "loss": 1.8952, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.8303017333618661, | |
| "grad_norm": 6.569833278656006, | |
| "learning_rate": 6.59180756079388e-05, | |
| "loss": 1.9004, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.8324416862829018, | |
| "grad_norm": 5.505297660827637, | |
| "learning_rate": 6.575373297689786e-05, | |
| "loss": 1.8726, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.8345816392039375, | |
| "grad_norm": 6.422504901885986, | |
| "learning_rate": 6.558920117625005e-05, | |
| "loss": 1.9392, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.8367215921249732, | |
| "grad_norm": 5.929808139801025, | |
| "learning_rate": 6.542448218168049e-05, | |
| "loss": 1.8829, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.838861545046009, | |
| "grad_norm": 5.6545796394348145, | |
| "learning_rate": 6.525957797112211e-05, | |
| "loss": 1.8625, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.8410014979670447, | |
| "grad_norm": 5.610157489776611, | |
| "learning_rate": 6.509449052473193e-05, | |
| "loss": 1.9184, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.8431414508880805, | |
| "grad_norm": 6.630683898925781, | |
| "learning_rate": 6.492922182486722e-05, | |
| "loss": 1.9024, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.8452814038091162, | |
| "grad_norm": 5.480852127075195, | |
| "learning_rate": 6.476377385606175e-05, | |
| "loss": 1.8924, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.847421356730152, | |
| "grad_norm": 6.156518459320068, | |
| "learning_rate": 6.459814860500194e-05, | |
| "loss": 1.8499, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.8495613096511877, | |
| "grad_norm": 6.882572174072266, | |
| "learning_rate": 6.443234806050298e-05, | |
| "loss": 1.8714, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.8517012625722234, | |
| "grad_norm": 6.394567489624023, | |
| "learning_rate": 6.4266374213485e-05, | |
| "loss": 1.889, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.8538412154932592, | |
| "grad_norm": 5.844759941101074, | |
| "learning_rate": 6.41002290569491e-05, | |
| "loss": 1.919, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.8559811684142948, | |
| "grad_norm": 5.819794178009033, | |
| "learning_rate": 6.393391458595345e-05, | |
| "loss": 1.8827, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8581211213353306, | |
| "grad_norm": 7.27986478805542, | |
| "learning_rate": 6.37674327975894e-05, | |
| "loss": 1.8559, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.8602610742563663, | |
| "grad_norm": 6.1043195724487305, | |
| "learning_rate": 6.360078569095734e-05, | |
| "loss": 1.8751, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.8624010271774021, | |
| "grad_norm": 6.8420090675354, | |
| "learning_rate": 6.343397526714284e-05, | |
| "loss": 1.8557, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.8645409800984378, | |
| "grad_norm": 6.02893590927124, | |
| "learning_rate": 6.326700352919259e-05, | |
| "loss": 1.9407, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.8666809330194736, | |
| "grad_norm": 6.999791145324707, | |
| "learning_rate": 6.309987248209029e-05, | |
| "loss": 1.8652, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.8688208859405093, | |
| "grad_norm": 6.686702728271484, | |
| "learning_rate": 6.293258413273262e-05, | |
| "loss": 1.9068, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.8709608388615451, | |
| "grad_norm": 6.408459186553955, | |
| "learning_rate": 6.276514048990515e-05, | |
| "loss": 1.8797, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.8731007917825808, | |
| "grad_norm": 6.277412414550781, | |
| "learning_rate": 6.259754356425818e-05, | |
| "loss": 1.8417, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.8752407447036166, | |
| "grad_norm": 5.6100287437438965, | |
| "learning_rate": 6.242979536828262e-05, | |
| "loss": 1.87, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.8773806976246522, | |
| "grad_norm": 6.132706165313721, | |
| "learning_rate": 6.226189791628583e-05, | |
| "loss": 1.9009, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.879520650545688, | |
| "grad_norm": 5.885615825653076, | |
| "learning_rate": 6.209385322436746e-05, | |
| "loss": 1.8624, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.8816606034667237, | |
| "grad_norm": 6.174984455108643, | |
| "learning_rate": 6.192566331039514e-05, | |
| "loss": 1.8723, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.8838005563877595, | |
| "grad_norm": 5.804656982421875, | |
| "learning_rate": 6.175733019398034e-05, | |
| "loss": 1.8639, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.8859405093087952, | |
| "grad_norm": 7.446183204650879, | |
| "learning_rate": 6.15888558964541e-05, | |
| "loss": 1.8703, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.888080462229831, | |
| "grad_norm": 6.513533115386963, | |
| "learning_rate": 6.142024244084278e-05, | |
| "loss": 1.8334, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.8902204151508667, | |
| "grad_norm": 6.254002571105957, | |
| "learning_rate": 6.125149185184369e-05, | |
| "loss": 1.8476, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.8923603680719024, | |
| "grad_norm": 5.820918560028076, | |
| "learning_rate": 6.108260615580086e-05, | |
| "loss": 1.9445, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.8945003209929382, | |
| "grad_norm": 7.240386962890625, | |
| "learning_rate": 6.091358738068064e-05, | |
| "loss": 1.8563, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.8966402739139739, | |
| "grad_norm": 6.337396621704102, | |
| "learning_rate": 6.074443755604744e-05, | |
| "loss": 1.8851, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.8987802268350096, | |
| "grad_norm": 7.422278881072998, | |
| "learning_rate": 6.0575158713039234e-05, | |
| "loss": 1.8112, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.9009201797560453, | |
| "grad_norm": 6.84944486618042, | |
| "learning_rate": 6.040575288434329e-05, | |
| "loss": 1.865, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.9030601326770811, | |
| "grad_norm": 6.545510768890381, | |
| "learning_rate": 6.02362221041717e-05, | |
| "loss": 1.8829, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.9052000855981168, | |
| "grad_norm": 6.097507476806641, | |
| "learning_rate": 6.006656840823696e-05, | |
| "loss": 1.8363, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.9073400385191526, | |
| "grad_norm": 7.34000301361084, | |
| "learning_rate": 5.9896793833727496e-05, | |
| "loss": 1.8738, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.9094799914401883, | |
| "grad_norm": 5.860642433166504, | |
| "learning_rate": 5.972690041928331e-05, | |
| "loss": 1.8792, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.9116199443612241, | |
| "grad_norm": 6.345127582550049, | |
| "learning_rate": 5.9556890204971326e-05, | |
| "loss": 1.838, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.9137598972822598, | |
| "grad_norm": 6.226019382476807, | |
| "learning_rate": 5.93867652322611e-05, | |
| "loss": 1.881, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.9158998502032956, | |
| "grad_norm": 6.527356147766113, | |
| "learning_rate": 5.921652754400011e-05, | |
| "loss": 1.8406, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.9180398031243313, | |
| "grad_norm": 5.739786148071289, | |
| "learning_rate": 5.904617918438936e-05, | |
| "loss": 1.8496, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.920179756045367, | |
| "grad_norm": 7.399641990661621, | |
| "learning_rate": 5.887572219895877e-05, | |
| "loss": 1.8212, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.9223197089664027, | |
| "grad_norm": 5.731332302093506, | |
| "learning_rate": 5.8705158634542644e-05, | |
| "loss": 1.9046, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.9244596618874384, | |
| "grad_norm": 7.024575710296631, | |
| "learning_rate": 5.853449053925505e-05, | |
| "loss": 1.8799, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.9265996148084742, | |
| "grad_norm": 5.317823886871338, | |
| "learning_rate": 5.836371996246528e-05, | |
| "loss": 1.8679, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.9287395677295099, | |
| "grad_norm": 5.5442795753479, | |
| "learning_rate": 5.819284895477323e-05, | |
| "loss": 1.8641, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.9308795206505457, | |
| "grad_norm": 5.924790382385254, | |
| "learning_rate": 5.80218795679847e-05, | |
| "loss": 1.8456, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.9330194735715814, | |
| "grad_norm": 5.661588668823242, | |
| "learning_rate": 5.78508138550869e-05, | |
| "loss": 1.848, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.9351594264926172, | |
| "grad_norm": 5.443496227264404, | |
| "learning_rate": 5.7679653870223673e-05, | |
| "loss": 1.8579, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.9372993794136529, | |
| "grad_norm": 5.579583644866943, | |
| "learning_rate": 5.750840166867085e-05, | |
| "loss": 1.8477, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.9394393323346887, | |
| "grad_norm": 6.413700580596924, | |
| "learning_rate": 5.733705930681165e-05, | |
| "loss": 1.8889, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.9415792852557243, | |
| "grad_norm": 6.018016338348389, | |
| "learning_rate": 5.7165628842111866e-05, | |
| "loss": 1.8509, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.9437192381767601, | |
| "grad_norm": 6.163970470428467, | |
| "learning_rate": 5.699411233309528e-05, | |
| "loss": 1.8492, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.9458591910977958, | |
| "grad_norm": 7.421061992645264, | |
| "learning_rate": 5.682251183931886e-05, | |
| "loss": 1.8493, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.9479991440188316, | |
| "grad_norm": 6.061490058898926, | |
| "learning_rate": 5.6650829421348065e-05, | |
| "loss": 1.8128, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.9501390969398673, | |
| "grad_norm": 6.558000087738037, | |
| "learning_rate": 5.647906714073208e-05, | |
| "loss": 1.8476, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.9522790498609031, | |
| "grad_norm": 7.766129970550537, | |
| "learning_rate": 5.630722705997908e-05, | |
| "loss": 1.8357, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.9544190027819388, | |
| "grad_norm": 6.785080909729004, | |
| "learning_rate": 5.6135311242531473e-05, | |
| "loss": 1.845, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.9565589557029746, | |
| "grad_norm": 5.932000637054443, | |
| "learning_rate": 5.59633217527411e-05, | |
| "loss": 1.8277, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.9586989086240103, | |
| "grad_norm": 6.414392948150635, | |
| "learning_rate": 5.5791260655844424e-05, | |
| "loss": 1.9083, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.960838861545046, | |
| "grad_norm": 5.878129482269287, | |
| "learning_rate": 5.5619130017937806e-05, | |
| "loss": 1.8427, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.9629788144660818, | |
| "grad_norm": 5.418539524078369, | |
| "learning_rate": 5.5446931905952624e-05, | |
| "loss": 1.8509, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.9651187673871174, | |
| "grad_norm": 5.333776473999023, | |
| "learning_rate": 5.527466838763049e-05, | |
| "loss": 1.8308, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.9672587203081532, | |
| "grad_norm": 5.526288032531738, | |
| "learning_rate": 5.510234153149839e-05, | |
| "loss": 1.8443, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.9693986732291889, | |
| "grad_norm": 5.585949897766113, | |
| "learning_rate": 5.4929953406843906e-05, | |
| "loss": 1.8008, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.9715386261502247, | |
| "grad_norm": 6.400721549987793, | |
| "learning_rate": 5.475750608369029e-05, | |
| "loss": 1.8238, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.9736785790712604, | |
| "grad_norm": 6.028564453125, | |
| "learning_rate": 5.4585001632771656e-05, | |
| "loss": 1.8195, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.9758185319922962, | |
| "grad_norm": 6.440245628356934, | |
| "learning_rate": 5.4412442125508113e-05, | |
| "loss": 1.8225, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.9779584849133319, | |
| "grad_norm": 6.663422584533691, | |
| "learning_rate": 5.4239829633980866e-05, | |
| "loss": 1.8487, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.9800984378343677, | |
| "grad_norm": 6.021454811096191, | |
| "learning_rate": 5.4067166230907365e-05, | |
| "loss": 1.8593, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.9822383907554034, | |
| "grad_norm": 6.139098644256592, | |
| "learning_rate": 5.389445398961639e-05, | |
| "loss": 1.8337, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.9843783436764392, | |
| "grad_norm": 5.165621280670166, | |
| "learning_rate": 5.3721694984023194e-05, | |
| "loss": 1.899, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9865182965974748, | |
| "grad_norm": 6.382976055145264, | |
| "learning_rate": 5.354889128860454e-05, | |
| "loss": 1.8559, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.9886582495185106, | |
| "grad_norm": 6.370285511016846, | |
| "learning_rate": 5.337604497837383e-05, | |
| "loss": 1.7831, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.9907982024395463, | |
| "grad_norm": 5.373252868652344, | |
| "learning_rate": 5.320315812885618e-05, | |
| "loss": 1.8073, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.992938155360582, | |
| "grad_norm": 6.694858551025391, | |
| "learning_rate": 5.3030232816063505e-05, | |
| "loss": 1.7992, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.9950781082816178, | |
| "grad_norm": 6.6283440589904785, | |
| "learning_rate": 5.28572711164696e-05, | |
| "loss": 1.8422, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.9972180612026535, | |
| "grad_norm": 6.046857833862305, | |
| "learning_rate": 5.268427510698517e-05, | |
| "loss": 1.8237, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.9993580141236893, | |
| "grad_norm": 6.323689937591553, | |
| "learning_rate": 5.251124686493291e-05, | |
| "loss": 1.8662, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 1.0012839717526214, | |
| "grad_norm": 6.170103549957275, | |
| "learning_rate": 5.233818846802255e-05, | |
| "loss": 1.7427, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.0034239246736572, | |
| "grad_norm": 5.78736686706543, | |
| "learning_rate": 5.216510199432596e-05, | |
| "loss": 1.83, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 1.005563877594693, | |
| "grad_norm": 5.895803451538086, | |
| "learning_rate": 5.199198952225212e-05, | |
| "loss": 1.8002, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.0077038305157286, | |
| "grad_norm": 6.435295581817627, | |
| "learning_rate": 5.1818853130522184e-05, | |
| "loss": 1.7659, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 1.0098437834367644, | |
| "grad_norm": 6.010552406311035, | |
| "learning_rate": 5.164569489814456e-05, | |
| "loss": 1.7948, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.0119837363578001, | |
| "grad_norm": 6.7105255126953125, | |
| "learning_rate": 5.147251690438992e-05, | |
| "loss": 1.797, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 1.014123689278836, | |
| "grad_norm": 6.607806205749512, | |
| "learning_rate": 5.1299321228766194e-05, | |
| "loss": 1.7717, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.0162636421998716, | |
| "grad_norm": 8.161765098571777, | |
| "learning_rate": 5.112610995099368e-05, | |
| "loss": 1.7855, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.0184035951209074, | |
| "grad_norm": 6.3959150314331055, | |
| "learning_rate": 5.095288515097999e-05, | |
| "loss": 1.8212, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.0205435480419431, | |
| "grad_norm": 5.896310806274414, | |
| "learning_rate": 5.0779648908795116e-05, | |
| "loss": 1.7695, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 1.0226835009629789, | |
| "grad_norm": 5.285399913787842, | |
| "learning_rate": 5.060640330464646e-05, | |
| "loss": 1.7961, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.0248234538840146, | |
| "grad_norm": 6.583270072937012, | |
| "learning_rate": 5.043315041885383e-05, | |
| "loss": 1.8068, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 1.0269634068050504, | |
| "grad_norm": 6.104979991912842, | |
| "learning_rate": 5.0259892331824474e-05, | |
| "loss": 1.7806, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.0291033597260861, | |
| "grad_norm": 5.723150730133057, | |
| "learning_rate": 5.008663112402811e-05, | |
| "loss": 1.7961, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 1.0312433126471217, | |
| "grad_norm": 5.977392673492432, | |
| "learning_rate": 4.991336887597192e-05, | |
| "loss": 1.7362, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.0333832655681574, | |
| "grad_norm": 6.383233070373535, | |
| "learning_rate": 4.974010766817555e-05, | |
| "loss": 1.7895, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 1.0355232184891932, | |
| "grad_norm": 6.362678527832031, | |
| "learning_rate": 4.95668495811462e-05, | |
| "loss": 1.7526, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.037663171410229, | |
| "grad_norm": 5.289202690124512, | |
| "learning_rate": 4.939359669535357e-05, | |
| "loss": 1.8182, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.0398031243312646, | |
| "grad_norm": 6.294672012329102, | |
| "learning_rate": 4.922035109120491e-05, | |
| "loss": 1.7909, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.0419430772523004, | |
| "grad_norm": 6.827216625213623, | |
| "learning_rate": 4.904711484902003e-05, | |
| "loss": 1.8041, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 1.0440830301733361, | |
| "grad_norm": 6.308746337890625, | |
| "learning_rate": 4.887389004900633e-05, | |
| "loss": 1.8017, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.046222983094372, | |
| "grad_norm": 5.453704357147217, | |
| "learning_rate": 4.870067877123382e-05, | |
| "loss": 1.791, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 1.0483629360154076, | |
| "grad_norm": 5.51895809173584, | |
| "learning_rate": 4.852748309561009e-05, | |
| "loss": 1.7959, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.0505028889364434, | |
| "grad_norm": 5.434797286987305, | |
| "learning_rate": 4.835430510185545e-05, | |
| "loss": 1.8243, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 1.0526428418574791, | |
| "grad_norm": 5.3560590744018555, | |
| "learning_rate": 4.818114686947783e-05, | |
| "loss": 1.8104, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.0547827947785149, | |
| "grad_norm": 6.535407543182373, | |
| "learning_rate": 4.800801047774789e-05, | |
| "loss": 1.7754, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 1.0569227476995506, | |
| "grad_norm": 5.90179967880249, | |
| "learning_rate": 4.783489800567405e-05, | |
| "loss": 1.8041, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.0590627006205864, | |
| "grad_norm": 6.035419464111328, | |
| "learning_rate": 4.766181153197746e-05, | |
| "loss": 1.8154, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.0612026535416221, | |
| "grad_norm": 5.992060661315918, | |
| "learning_rate": 4.748875313506711e-05, | |
| "loss": 1.8101, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.0633426064626579, | |
| "grad_norm": 6.405609130859375, | |
| "learning_rate": 4.7315724893014846e-05, | |
| "loss": 1.7881, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 1.0654825593836936, | |
| "grad_norm": 5.741700649261475, | |
| "learning_rate": 4.714272888353041e-05, | |
| "loss": 1.7986, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.0676225123047294, | |
| "grad_norm": 5.35413122177124, | |
| "learning_rate": 4.69697671839365e-05, | |
| "loss": 1.7832, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 1.0697624652257651, | |
| "grad_norm": 5.857509613037109, | |
| "learning_rate": 4.679684187114384e-05, | |
| "loss": 1.7965, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.0719024181468009, | |
| "grad_norm": 6.7070698738098145, | |
| "learning_rate": 4.6623955021626184e-05, | |
| "loss": 1.7924, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 1.0740423710678364, | |
| "grad_norm": 6.540157318115234, | |
| "learning_rate": 4.645110871139547e-05, | |
| "loss": 1.7951, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.0761823239888721, | |
| "grad_norm": 6.704575538635254, | |
| "learning_rate": 4.627830501597681e-05, | |
| "loss": 1.7831, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 1.078322276909908, | |
| "grad_norm": 6.622875213623047, | |
| "learning_rate": 4.610554601038361e-05, | |
| "loss": 1.7869, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.0804622298309436, | |
| "grad_norm": 5.487006187438965, | |
| "learning_rate": 4.593283376909264e-05, | |
| "loss": 1.7893, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.0826021827519794, | |
| "grad_norm": 6.443029403686523, | |
| "learning_rate": 4.576017036601914e-05, | |
| "loss": 1.7392, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.0847421356730151, | |
| "grad_norm": 5.588938236236572, | |
| "learning_rate": 4.558755787449189e-05, | |
| "loss": 1.7468, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 1.0868820885940509, | |
| "grad_norm": 5.544746398925781, | |
| "learning_rate": 4.541499836722835e-05, | |
| "loss": 1.7849, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.0890220415150866, | |
| "grad_norm": 5.714095592498779, | |
| "learning_rate": 4.5242493916309705e-05, | |
| "loss": 1.7508, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 1.0911619944361224, | |
| "grad_norm": 5.488661289215088, | |
| "learning_rate": 4.507004659315611e-05, | |
| "loss": 1.8118, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.0933019473571581, | |
| "grad_norm": 6.51017951965332, | |
| "learning_rate": 4.489765846850162e-05, | |
| "loss": 1.7632, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 1.0954419002781939, | |
| "grad_norm": 5.363729476928711, | |
| "learning_rate": 4.472533161236954e-05, | |
| "loss": 1.7984, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.0975818531992296, | |
| "grad_norm": 5.888598918914795, | |
| "learning_rate": 4.4553068094047394e-05, | |
| "loss": 1.7983, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 1.0997218061202654, | |
| "grad_norm": 6.100613594055176, | |
| "learning_rate": 4.438086998206221e-05, | |
| "loss": 1.7707, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.1018617590413011, | |
| "grad_norm": 5.885534286499023, | |
| "learning_rate": 4.4208739344155594e-05, | |
| "loss": 1.8299, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.1040017119623369, | |
| "grad_norm": 6.399609088897705, | |
| "learning_rate": 4.4036678247258924e-05, | |
| "loss": 1.7365, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.1061416648833726, | |
| "grad_norm": 7.783679008483887, | |
| "learning_rate": 4.386468875746854e-05, | |
| "loss": 1.8312, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 1.1082816178044084, | |
| "grad_norm": 5.783539772033691, | |
| "learning_rate": 4.369277294002093e-05, | |
| "loss": 1.7481, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.1104215707254441, | |
| "grad_norm": 5.326051235198975, | |
| "learning_rate": 4.352093285926793e-05, | |
| "loss": 1.7541, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 1.1125615236464799, | |
| "grad_norm": 5.783180236816406, | |
| "learning_rate": 4.334917057865194e-05, | |
| "loss": 1.7801, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.1147014765675156, | |
| "grad_norm": 5.667355537414551, | |
| "learning_rate": 4.3177488160681146e-05, | |
| "loss": 1.7967, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 1.1168414294885514, | |
| "grad_norm": 5.82685661315918, | |
| "learning_rate": 4.300588766690473e-05, | |
| "loss": 1.7543, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.1189813824095869, | |
| "grad_norm": 6.610193252563477, | |
| "learning_rate": 4.283437115788814e-05, | |
| "loss": 1.7785, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 1.1211213353306226, | |
| "grad_norm": 7.57377815246582, | |
| "learning_rate": 4.266294069318837e-05, | |
| "loss": 1.7679, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.1232612882516584, | |
| "grad_norm": 6.202375411987305, | |
| "learning_rate": 4.2491598331329154e-05, | |
| "loss": 1.8045, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.1254012411726941, | |
| "grad_norm": 6.378594875335693, | |
| "learning_rate": 4.232034612977634e-05, | |
| "loss": 1.7454, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.1275411940937299, | |
| "grad_norm": 5.158632755279541, | |
| "learning_rate": 4.21491861449131e-05, | |
| "loss": 1.7993, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 1.1296811470147656, | |
| "grad_norm": 6.776663780212402, | |
| "learning_rate": 4.19781204320153e-05, | |
| "loss": 1.7253, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.1318210999358014, | |
| "grad_norm": 5.978639125823975, | |
| "learning_rate": 4.180715104522679e-05, | |
| "loss": 1.7127, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 1.1339610528568371, | |
| "grad_norm": 5.210892200469971, | |
| "learning_rate": 4.1636280037534725e-05, | |
| "loss": 1.7764, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.1361010057778729, | |
| "grad_norm": 6.2656121253967285, | |
| "learning_rate": 4.1465509460744963e-05, | |
| "loss": 1.7425, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 1.1382409586989086, | |
| "grad_norm": 5.737490177154541, | |
| "learning_rate": 4.129484136545737e-05, | |
| "loss": 1.7626, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.1403809116199444, | |
| "grad_norm": 5.1411261558532715, | |
| "learning_rate": 4.112427780104124e-05, | |
| "loss": 1.7971, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 1.1425208645409801, | |
| "grad_norm": 5.617253303527832, | |
| "learning_rate": 4.0953820815610636e-05, | |
| "loss": 1.7431, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.1446608174620159, | |
| "grad_norm": 6.250400543212891, | |
| "learning_rate": 4.0783472455999886e-05, | |
| "loss": 1.7756, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.1468007703830516, | |
| "grad_norm": 7.598665714263916, | |
| "learning_rate": 4.06132347677389e-05, | |
| "loss": 1.7462, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.1489407233040874, | |
| "grad_norm": 5.751972675323486, | |
| "learning_rate": 4.0443109795028665e-05, | |
| "loss": 1.7599, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 1.1510806762251231, | |
| "grad_norm": 6.453872203826904, | |
| "learning_rate": 4.0273099580716725e-05, | |
| "loss": 1.7637, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.1532206291461589, | |
| "grad_norm": 6.619513034820557, | |
| "learning_rate": 4.010320616627252e-05, | |
| "loss": 1.7396, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 1.1553605820671946, | |
| "grad_norm": 6.701980113983154, | |
| "learning_rate": 3.993343159176307e-05, | |
| "loss": 1.6959, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.1575005349882304, | |
| "grad_norm": 5.974419116973877, | |
| "learning_rate": 3.976377789582832e-05, | |
| "loss": 1.8088, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 1.1596404879092659, | |
| "grad_norm": 5.028608798980713, | |
| "learning_rate": 3.959424711565672e-05, | |
| "loss": 1.7439, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.1617804408303019, | |
| "grad_norm": 5.743880748748779, | |
| "learning_rate": 3.9424841286960784e-05, | |
| "loss": 1.748, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 1.1639203937513374, | |
| "grad_norm": 5.568974018096924, | |
| "learning_rate": 3.925556244395259e-05, | |
| "loss": 1.7883, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.1660603466723731, | |
| "grad_norm": 5.259049892425537, | |
| "learning_rate": 3.908641261931937e-05, | |
| "loss": 1.7304, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.1682002995934089, | |
| "grad_norm": 5.7519850730896, | |
| "learning_rate": 3.8917393844199156e-05, | |
| "loss": 1.7241, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.1703402525144446, | |
| "grad_norm": 5.750739097595215, | |
| "learning_rate": 3.874850814815632e-05, | |
| "loss": 1.6955, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 1.1724802054354804, | |
| "grad_norm": 6.248133182525635, | |
| "learning_rate": 3.857975755915723e-05, | |
| "loss": 1.7689, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.1746201583565161, | |
| "grad_norm": 5.397186756134033, | |
| "learning_rate": 3.8411144103545904e-05, | |
| "loss": 1.7588, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 1.1767601112775519, | |
| "grad_norm": 5.462392330169678, | |
| "learning_rate": 3.824266980601968e-05, | |
| "loss": 1.7879, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.1789000641985876, | |
| "grad_norm": 6.177926063537598, | |
| "learning_rate": 3.807433668960488e-05, | |
| "loss": 1.7516, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 1.1810400171196234, | |
| "grad_norm": 5.820185661315918, | |
| "learning_rate": 3.7906146775632554e-05, | |
| "loss": 1.7227, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.1831799700406591, | |
| "grad_norm": 5.526188850402832, | |
| "learning_rate": 3.773810208371417e-05, | |
| "loss": 1.7781, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 1.1853199229616949, | |
| "grad_norm": 5.152137756347656, | |
| "learning_rate": 3.7570204631717395e-05, | |
| "loss": 1.775, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.1874598758827306, | |
| "grad_norm": 6.030661106109619, | |
| "learning_rate": 3.740245643574184e-05, | |
| "loss": 1.8056, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.1895998288037664, | |
| "grad_norm": 5.847061634063721, | |
| "learning_rate": 3.723485951009486e-05, | |
| "loss": 1.7345, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.191739781724802, | |
| "grad_norm": 5.8102126121521, | |
| "learning_rate": 3.706741586726738e-05, | |
| "loss": 1.7519, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 1.1938797346458379, | |
| "grad_norm": 6.639953136444092, | |
| "learning_rate": 3.690012751790972e-05, | |
| "loss": 1.7485, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.1960196875668736, | |
| "grad_norm": 5.996955871582031, | |
| "learning_rate": 3.673299647080742e-05, | |
| "loss": 1.7072, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 1.1981596404879093, | |
| "grad_norm": 5.289943218231201, | |
| "learning_rate": 3.656602473285717e-05, | |
| "loss": 1.7831, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.200299593408945, | |
| "grad_norm": 5.406320571899414, | |
| "learning_rate": 3.639921430904268e-05, | |
| "loss": 1.7397, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 1.2024395463299808, | |
| "grad_norm": 6.86167573928833, | |
| "learning_rate": 3.6232567202410624e-05, | |
| "loss": 1.7408, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.2045794992510164, | |
| "grad_norm": 5.778091907501221, | |
| "learning_rate": 3.606608541404656e-05, | |
| "loss": 1.765, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 1.2067194521720521, | |
| "grad_norm": 6.24983024597168, | |
| "learning_rate": 3.5899770943050924e-05, | |
| "loss": 1.7543, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.2088594050930879, | |
| "grad_norm": 6.323977470397949, | |
| "learning_rate": 3.573362578651501e-05, | |
| "loss": 1.7168, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.2109993580141236, | |
| "grad_norm": 6.535098075866699, | |
| "learning_rate": 3.556765193949704e-05, | |
| "loss": 1.7583, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.2131393109351594, | |
| "grad_norm": 5.957965850830078, | |
| "learning_rate": 3.5401851394998084e-05, | |
| "loss": 1.7272, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 1.2152792638561951, | |
| "grad_norm": 5.54447078704834, | |
| "learning_rate": 3.523622614393827e-05, | |
| "loss": 1.8009, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.2174192167772309, | |
| "grad_norm": 5.613691329956055, | |
| "learning_rate": 3.5070778175132806e-05, | |
| "loss": 1.7559, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 1.2195591696982666, | |
| "grad_norm": 5.480210781097412, | |
| "learning_rate": 3.4905509475268104e-05, | |
| "loss": 1.74, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.2216991226193024, | |
| "grad_norm": 6.216781139373779, | |
| "learning_rate": 3.474042202887792e-05, | |
| "loss": 1.7272, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 1.223839075540338, | |
| "grad_norm": 6.038024425506592, | |
| "learning_rate": 3.4575517818319534e-05, | |
| "loss": 1.7408, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.2259790284613739, | |
| "grad_norm": 5.762340068817139, | |
| "learning_rate": 3.4410798823749964e-05, | |
| "loss": 1.7576, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 1.2281189813824096, | |
| "grad_norm": 6.565494060516357, | |
| "learning_rate": 3.424626702310214e-05, | |
| "loss": 1.7008, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.2302589343034454, | |
| "grad_norm": 6.1579508781433105, | |
| "learning_rate": 3.408192439206121e-05, | |
| "loss": 1.7218, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.232398887224481, | |
| "grad_norm": 5.965446949005127, | |
| "learning_rate": 3.391777290404077e-05, | |
| "loss": 1.6989, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.2345388401455168, | |
| "grad_norm": 6.2155070304870605, | |
| "learning_rate": 3.3753814530159176e-05, | |
| "loss": 1.7461, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 1.2366787930665526, | |
| "grad_norm": 6.247200965881348, | |
| "learning_rate": 3.3590051239215916e-05, | |
| "loss": 1.7542, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.2388187459875883, | |
| "grad_norm": 4.975124359130859, | |
| "learning_rate": 3.342648499766791e-05, | |
| "loss": 1.7299, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 1.240958698908624, | |
| "grad_norm": 5.148104190826416, | |
| "learning_rate": 3.326311776960593e-05, | |
| "loss": 1.7252, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.2430986518296598, | |
| "grad_norm": 6.518229961395264, | |
| "learning_rate": 3.309995151673103e-05, | |
| "loss": 1.754, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 1.2452386047506954, | |
| "grad_norm": 5.220431804656982, | |
| "learning_rate": 3.293698819833093e-05, | |
| "loss": 1.7349, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.2473785576717313, | |
| "grad_norm": 5.202417373657227, | |
| "learning_rate": 3.277422977125656e-05, | |
| "loss": 1.7521, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 1.2495185105927669, | |
| "grad_norm": 5.129791736602783, | |
| "learning_rate": 3.2611678189898523e-05, | |
| "loss": 1.7653, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.2516584635138028, | |
| "grad_norm": 6.944059371948242, | |
| "learning_rate": 3.244933540616363e-05, | |
| "loss": 1.7411, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.2537984164348384, | |
| "grad_norm": 5.993988513946533, | |
| "learning_rate": 3.228720336945148e-05, | |
| "loss": 1.7544, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.255938369355874, | |
| "grad_norm": 5.735143184661865, | |
| "learning_rate": 3.2125284026631006e-05, | |
| "loss": 1.7374, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 1.2580783222769099, | |
| "grad_norm": 5.272975444793701, | |
| "learning_rate": 3.196357932201717e-05, | |
| "loss": 1.7362, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.2602182751979456, | |
| "grad_norm": 6.259987831115723, | |
| "learning_rate": 3.1802091197347544e-05, | |
| "loss": 1.745, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 1.2623582281189814, | |
| "grad_norm": 5.6015400886535645, | |
| "learning_rate": 3.164082159175904e-05, | |
| "loss": 1.7478, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.264498181040017, | |
| "grad_norm": 6.1617536544799805, | |
| "learning_rate": 3.147977244176461e-05, | |
| "loss": 1.757, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 1.2666381339610528, | |
| "grad_norm": 4.88234806060791, | |
| "learning_rate": 3.131894568122999e-05, | |
| "loss": 1.73, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.2687780868820886, | |
| "grad_norm": 5.751287460327148, | |
| "learning_rate": 3.115834324135047e-05, | |
| "loss": 1.7807, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 1.2709180398031243, | |
| "grad_norm": 5.901682376861572, | |
| "learning_rate": 3.099796705062773e-05, | |
| "loss": 1.7535, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.27305799272416, | |
| "grad_norm": 5.5405144691467285, | |
| "learning_rate": 3.083781903484667e-05, | |
| "loss": 1.7742, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.2751979456451958, | |
| "grad_norm": 5.621067523956299, | |
| "learning_rate": 3.067790111705225e-05, | |
| "loss": 1.6996, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.2773378985662316, | |
| "grad_norm": 5.193610191345215, | |
| "learning_rate": 3.051821521752647e-05, | |
| "loss": 1.7568, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 1.2794778514872673, | |
| "grad_norm": 5.218043804168701, | |
| "learning_rate": 3.0358763253765264e-05, | |
| "loss": 1.7299, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.281617804408303, | |
| "grad_norm": 6.297114849090576, | |
| "learning_rate": 3.0199547140455474e-05, | |
| "loss": 1.7814, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 1.2837577573293388, | |
| "grad_norm": 6.044188976287842, | |
| "learning_rate": 3.0040568789451862e-05, | |
| "loss": 1.7381, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2858977102503744, | |
| "grad_norm": 5.990639686584473, | |
| "learning_rate": 2.9881830109754184e-05, | |
| "loss": 1.7458, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 1.2880376631714103, | |
| "grad_norm": 5.563923358917236, | |
| "learning_rate": 2.9723333007484218e-05, | |
| "loss": 1.7403, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.2901776160924459, | |
| "grad_norm": 5.493687152862549, | |
| "learning_rate": 2.9565079385862903e-05, | |
| "loss": 1.7141, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 1.2923175690134818, | |
| "grad_norm": 5.307217121124268, | |
| "learning_rate": 2.9407071145187494e-05, | |
| "loss": 1.7546, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.2944575219345174, | |
| "grad_norm": 5.565853118896484, | |
| "learning_rate": 2.924931018280871e-05, | |
| "loss": 1.7392, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.296597474855553, | |
| "grad_norm": 5.918313503265381, | |
| "learning_rate": 2.9091798393107994e-05, | |
| "loss": 1.7598, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.2987374277765888, | |
| "grad_norm": 5.450323104858398, | |
| "learning_rate": 2.8934537667474732e-05, | |
| "loss": 1.7204, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 1.3008773806976246, | |
| "grad_norm": 5.59390115737915, | |
| "learning_rate": 2.8777529894283538e-05, | |
| "loss": 1.7345, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.3030173336186603, | |
| "grad_norm": 5.590770721435547, | |
| "learning_rate": 2.8620776958871627e-05, | |
| "loss": 1.7814, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 1.305157286539696, | |
| "grad_norm": 5.129935264587402, | |
| "learning_rate": 2.8464280743516102e-05, | |
| "loss": 1.7527, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.3072972394607318, | |
| "grad_norm": 6.99540376663208, | |
| "learning_rate": 2.8308043127411423e-05, | |
| "loss": 1.7197, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 1.3094371923817676, | |
| "grad_norm": 6.13936710357666, | |
| "learning_rate": 2.8152065986646788e-05, | |
| "loss": 1.7153, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.3115771453028033, | |
| "grad_norm": 5.489097595214844, | |
| "learning_rate": 2.7996351194183646e-05, | |
| "loss": 1.7562, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 1.313717098223839, | |
| "grad_norm": 6.8664398193359375, | |
| "learning_rate": 2.784090061983317e-05, | |
| "loss": 1.7034, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.3158570511448748, | |
| "grad_norm": 5.367057800292969, | |
| "learning_rate": 2.7685716130233842e-05, | |
| "loss": 1.6691, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.3179970040659106, | |
| "grad_norm": 6.975956916809082, | |
| "learning_rate": 2.7530799588829005e-05, | |
| "loss": 1.7107, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.3201369569869463, | |
| "grad_norm": 6.1998443603515625, | |
| "learning_rate": 2.7376152855844495e-05, | |
| "loss": 1.7955, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 1.322276909907982, | |
| "grad_norm": 6.424160957336426, | |
| "learning_rate": 2.7221777788266324e-05, | |
| "loss": 1.7312, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.3244168628290178, | |
| "grad_norm": 5.118837356567383, | |
| "learning_rate": 2.7067676239818364e-05, | |
| "loss": 1.7105, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 1.3265568157500536, | |
| "grad_norm": 5.895681858062744, | |
| "learning_rate": 2.6913850060940083e-05, | |
| "loss": 1.7077, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.3286967686710893, | |
| "grad_norm": 6.501992702484131, | |
| "learning_rate": 2.676030109876434e-05, | |
| "loss": 1.728, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 1.3308367215921248, | |
| "grad_norm": 5.911489486694336, | |
| "learning_rate": 2.66070311970952e-05, | |
| "loss": 1.7214, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.3329766745131608, | |
| "grad_norm": 5.91454553604126, | |
| "learning_rate": 2.6454042196385798e-05, | |
| "loss": 1.6895, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 1.3351166274341963, | |
| "grad_norm": 5.649311065673828, | |
| "learning_rate": 2.6301335933716176e-05, | |
| "loss": 1.7488, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.3372565803552323, | |
| "grad_norm": 7.105709075927734, | |
| "learning_rate": 2.614891424277135e-05, | |
| "loss": 1.6687, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.3393965332762678, | |
| "grad_norm": 5.769228458404541, | |
| "learning_rate": 2.599677895381919e-05, | |
| "loss": 1.7175, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.3415364861973036, | |
| "grad_norm": 7.222846984863281, | |
| "learning_rate": 2.5844931893688473e-05, | |
| "loss": 1.721, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 1.3436764391183393, | |
| "grad_norm": 5.122998237609863, | |
| "learning_rate": 2.5693374885746957e-05, | |
| "loss": 1.7325, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.345816392039375, | |
| "grad_norm": 5.9646148681640625, | |
| "learning_rate": 2.5542109749879473e-05, | |
| "loss": 1.7016, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 1.3479563449604108, | |
| "grad_norm": 5.405865669250488, | |
| "learning_rate": 2.5391138302466062e-05, | |
| "loss": 1.7666, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.3500962978814466, | |
| "grad_norm": 6.047122478485107, | |
| "learning_rate": 2.5240462356360184e-05, | |
| "loss": 1.7438, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 1.3522362508024823, | |
| "grad_norm": 6.545495986938477, | |
| "learning_rate": 2.5090083720866952e-05, | |
| "loss": 1.7322, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.354376203723518, | |
| "grad_norm": 6.075828552246094, | |
| "learning_rate": 2.4940004201721384e-05, | |
| "loss": 1.7089, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 1.3565161566445538, | |
| "grad_norm": 5.580131530761719, | |
| "learning_rate": 2.479022560106673e-05, | |
| "loss": 1.7456, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.3586561095655896, | |
| "grad_norm": 5.123158931732178, | |
| "learning_rate": 2.4640749717432854e-05, | |
| "loss": 1.7118, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.3607960624866253, | |
| "grad_norm": 5.250161170959473, | |
| "learning_rate": 2.4491578345714587e-05, | |
| "loss": 1.7271, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.362936015407661, | |
| "grad_norm": 5.588467121124268, | |
| "learning_rate": 2.434271327715023e-05, | |
| "loss": 1.7026, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 1.3650759683286968, | |
| "grad_norm": 5.4725518226623535, | |
| "learning_rate": 2.419415629930001e-05, | |
| "loss": 1.7128, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.3672159212497326, | |
| "grad_norm": 5.664406776428223, | |
| "learning_rate": 2.4045909196024624e-05, | |
| "loss": 1.7244, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 1.3693558741707683, | |
| "grad_norm": 4.935891151428223, | |
| "learning_rate": 2.389797374746382e-05, | |
| "loss": 1.7123, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.3714958270918038, | |
| "grad_norm": 5.0147881507873535, | |
| "learning_rate": 2.3750351730015015e-05, | |
| "loss": 1.6921, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 1.3736357800128398, | |
| "grad_norm": 5.382151126861572, | |
| "learning_rate": 2.3603044916311963e-05, | |
| "loss": 1.78, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.3757757329338753, | |
| "grad_norm": 5.000919818878174, | |
| "learning_rate": 2.345605507520352e-05, | |
| "loss": 1.699, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 1.3779156858549113, | |
| "grad_norm": 4.845434665679932, | |
| "learning_rate": 2.3309383971732312e-05, | |
| "loss": 1.7508, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.3800556387759468, | |
| "grad_norm": 4.90060567855835, | |
| "learning_rate": 2.3163033367113602e-05, | |
| "loss": 1.6904, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.3821955916969826, | |
| "grad_norm": 5.377330303192139, | |
| "learning_rate": 2.3017005018714126e-05, | |
| "loss": 1.7236, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.3843355446180183, | |
| "grad_norm": 5.329347133636475, | |
| "learning_rate": 2.2871300680030995e-05, | |
| "loss": 1.6982, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 1.386475497539054, | |
| "grad_norm": 4.9429931640625, | |
| "learning_rate": 2.2725922100670644e-05, | |
| "loss": 1.6973, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.3886154504600898, | |
| "grad_norm": 5.456453323364258, | |
| "learning_rate": 2.25808710263278e-05, | |
| "loss": 1.7033, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 1.3907554033811256, | |
| "grad_norm": 5.3230791091918945, | |
| "learning_rate": 2.243614919876454e-05, | |
| "loss": 1.7447, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.3928953563021613, | |
| "grad_norm": 5.888358116149902, | |
| "learning_rate": 2.22917583557894e-05, | |
| "loss": 1.7347, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 1.395035309223197, | |
| "grad_norm": 5.590538024902344, | |
| "learning_rate": 2.214770023123641e-05, | |
| "loss": 1.7566, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.3971752621442328, | |
| "grad_norm": 5.970736503601074, | |
| "learning_rate": 2.2003976554944405e-05, | |
| "loss": 1.6941, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 1.3993152150652686, | |
| "grad_norm": 5.476358413696289, | |
| "learning_rate": 2.186058905273618e-05, | |
| "loss": 1.7016, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.4014551679863043, | |
| "grad_norm": 5.488466262817383, | |
| "learning_rate": 2.1717539446397754e-05, | |
| "loss": 1.7063, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.40359512090734, | |
| "grad_norm": 5.273731708526611, | |
| "learning_rate": 2.1574829453657746e-05, | |
| "loss": 1.7218, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.4057350738283758, | |
| "grad_norm": 5.290144920349121, | |
| "learning_rate": 2.1432460788166704e-05, | |
| "loss": 1.6931, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 1.4078750267494116, | |
| "grad_norm": 6.16803503036499, | |
| "learning_rate": 2.1290435159476545e-05, | |
| "loss": 1.6872, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.4100149796704473, | |
| "grad_norm": 5.351411819458008, | |
| "learning_rate": 2.1148754273020038e-05, | |
| "loss": 1.678, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 1.412154932591483, | |
| "grad_norm": 6.350516319274902, | |
| "learning_rate": 2.1007419830090306e-05, | |
| "loss": 1.728, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.4142948855125188, | |
| "grad_norm": 5.602326393127441, | |
| "learning_rate": 2.0866433527820406e-05, | |
| "loss": 1.6969, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 1.4164348384335543, | |
| "grad_norm": 7.531297206878662, | |
| "learning_rate": 2.072579705916295e-05, | |
| "loss": 1.738, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.4185747913545903, | |
| "grad_norm": 5.831987380981445, | |
| "learning_rate": 2.058551211286977e-05, | |
| "loss": 1.7518, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 1.4207147442756258, | |
| "grad_norm": 6.611841678619385, | |
| "learning_rate": 2.0445580373471658e-05, | |
| "loss": 1.6811, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.4228546971966618, | |
| "grad_norm": 4.91347599029541, | |
| "learning_rate": 2.03060035212581e-05, | |
| "loss": 1.6897, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.4249946501176973, | |
| "grad_norm": 6.171419620513916, | |
| "learning_rate": 2.0166783232257154e-05, | |
| "loss": 1.739, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.427134603038733, | |
| "grad_norm": 5.1369524002075195, | |
| "learning_rate": 2.0027921178215274e-05, | |
| "loss": 1.7164, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 1.4292745559597688, | |
| "grad_norm": 6.778637409210205, | |
| "learning_rate": 1.9889419026577266e-05, | |
| "loss": 1.7224, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.4314145088808046, | |
| "grad_norm": 4.898647785186768, | |
| "learning_rate": 1.9751278440466248e-05, | |
| "loss": 1.6844, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 1.4335544618018403, | |
| "grad_norm": 5.825972080230713, | |
| "learning_rate": 1.9613501078663693e-05, | |
| "loss": 1.7451, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.435694414722876, | |
| "grad_norm": 5.397356986999512, | |
| "learning_rate": 1.9476088595589504e-05, | |
| "loss": 1.715, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 1.4378343676439118, | |
| "grad_norm": 5.8002519607543945, | |
| "learning_rate": 1.9339042641282146e-05, | |
| "loss": 1.7006, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.4399743205649476, | |
| "grad_norm": 5.10746431350708, | |
| "learning_rate": 1.9202364861378842e-05, | |
| "loss": 1.6758, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 1.4421142734859833, | |
| "grad_norm": 5.267380237579346, | |
| "learning_rate": 1.9066056897095796e-05, | |
| "loss": 1.7395, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.444254226407019, | |
| "grad_norm": 5.395501136779785, | |
| "learning_rate": 1.8930120385208495e-05, | |
| "loss": 1.7082, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.4463941793280548, | |
| "grad_norm": 5.383699893951416, | |
| "learning_rate": 1.8794556958032062e-05, | |
| "loss": 1.7182, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.4485341322490906, | |
| "grad_norm": 5.4255290031433105, | |
| "learning_rate": 1.865936824340164e-05, | |
| "loss": 1.6796, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 1.4506740851701263, | |
| "grad_norm": 5.540951728820801, | |
| "learning_rate": 1.8524555864652865e-05, | |
| "loss": 1.7312, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.452814038091162, | |
| "grad_norm": 6.032477855682373, | |
| "learning_rate": 1.839012144060236e-05, | |
| "loss": 1.6458, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 1.4549539910121978, | |
| "grad_norm": 5.043034553527832, | |
| "learning_rate": 1.825606658552826e-05, | |
| "loss": 1.6969, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.4570939439332335, | |
| "grad_norm": 5.323840618133545, | |
| "learning_rate": 1.8122392909150904e-05, | |
| "loss": 1.7225, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 1.4592338968542693, | |
| "grad_norm": 5.682520389556885, | |
| "learning_rate": 1.7989102016613463e-05, | |
| "loss": 1.6943, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.4613738497753048, | |
| "grad_norm": 5.428680419921875, | |
| "learning_rate": 1.7856195508462663e-05, | |
| "loss": 1.6992, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 1.4635138026963408, | |
| "grad_norm": 5.1605024337768555, | |
| "learning_rate": 1.7723674980629572e-05, | |
| "loss": 1.6569, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.4656537556173763, | |
| "grad_norm": 5.788964748382568, | |
| "learning_rate": 1.759154202441044e-05, | |
| "loss": 1.7081, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 1.4677937085384123, | |
| "grad_norm": 5.63525390625, | |
| "learning_rate": 1.7459798226447577e-05, | |
| "loss": 1.7133, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.4699336614594478, | |
| "grad_norm": 5.6762003898620605, | |
| "learning_rate": 1.7328445168710323e-05, | |
| "loss": 1.6922, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 1.4720736143804836, | |
| "grad_norm": 5.393453121185303, | |
| "learning_rate": 1.7197484428476023e-05, | |
| "loss": 1.6721, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.4742135673015193, | |
| "grad_norm": 4.811567783355713, | |
| "learning_rate": 1.7066917578311132e-05, | |
| "loss": 1.7071, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 1.476353520222555, | |
| "grad_norm": 5.325284004211426, | |
| "learning_rate": 1.693674618605227e-05, | |
| "loss": 1.7109, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.4784934731435908, | |
| "grad_norm": 5.2245774269104, | |
| "learning_rate": 1.6806971814787458e-05, | |
| "loss": 1.7103, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 1.4806334260646266, | |
| "grad_norm": 5.08640718460083, | |
| "learning_rate": 1.66775960228373e-05, | |
| "loss": 1.7094, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.4827733789856623, | |
| "grad_norm": 4.874035835266113, | |
| "learning_rate": 1.6548620363736294e-05, | |
| "loss": 1.7177, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 1.484913331906698, | |
| "grad_norm": 5.959578037261963, | |
| "learning_rate": 1.6420046386214184e-05, | |
| "loss": 1.6586, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.4870532848277338, | |
| "grad_norm": 5.682189464569092, | |
| "learning_rate": 1.6291875634177346e-05, | |
| "loss": 1.6731, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 1.4891932377487695, | |
| "grad_norm": 5.864963054656982, | |
| "learning_rate": 1.616410964669025e-05, | |
| "loss": 1.6708, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.4913331906698053, | |
| "grad_norm": 5.3527140617370605, | |
| "learning_rate": 1.6036749957956993e-05, | |
| "loss": 1.6596, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 1.493473143590841, | |
| "grad_norm": 4.715349197387695, | |
| "learning_rate": 1.5909798097302865e-05, | |
| "loss": 1.6889, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.4956130965118768, | |
| "grad_norm": 5.459346771240234, | |
| "learning_rate": 1.578325558915598e-05, | |
| "loss": 1.6716, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 1.4977530494329125, | |
| "grad_norm": 5.306508541107178, | |
| "learning_rate": 1.5657123953029003e-05, | |
| "loss": 1.673, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.4998930023539483, | |
| "grad_norm": 5.33494234085083, | |
| "learning_rate": 1.5531404703500845e-05, | |
| "loss": 1.7047, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 1.5020329552749838, | |
| "grad_norm": 5.494200229644775, | |
| "learning_rate": 1.5406099350198544e-05, | |
| "loss": 1.7061, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.5041729081960198, | |
| "grad_norm": 4.832003116607666, | |
| "learning_rate": 1.528120939777908e-05, | |
| "loss": 1.6623, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 1.5063128611170553, | |
| "grad_norm": 5.2969970703125, | |
| "learning_rate": 1.5156736345911342e-05, | |
| "loss": 1.6838, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.5084528140380913, | |
| "grad_norm": 5.263791084289551, | |
| "learning_rate": 1.5032681689258105e-05, | |
| "loss": 1.691, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.5105927669591268, | |
| "grad_norm": 5.589069843292236, | |
| "learning_rate": 1.4909046917458097e-05, | |
| "loss": 1.7456, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.5127327198801628, | |
| "grad_norm": 5.666341781616211, | |
| "learning_rate": 1.4785833515108088e-05, | |
| "loss": 1.7284, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 1.5148726728011983, | |
| "grad_norm": 5.460354328155518, | |
| "learning_rate": 1.4663042961745083e-05, | |
| "loss": 1.6767, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.5170126257222343, | |
| "grad_norm": 5.311362266540527, | |
| "learning_rate": 1.4540676731828546e-05, | |
| "loss": 1.662, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 1.5191525786432698, | |
| "grad_norm": 5.237598419189453, | |
| "learning_rate": 1.4418736294722701e-05, | |
| "loss": 1.6841, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.5212925315643056, | |
| "grad_norm": 5.252871513366699, | |
| "learning_rate": 1.4297223114678887e-05, | |
| "loss": 1.7057, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 1.5234324844853413, | |
| "grad_norm": 4.881004333496094, | |
| "learning_rate": 1.4176138650817967e-05, | |
| "loss": 1.699, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.525572437406377, | |
| "grad_norm": 5.0006103515625, | |
| "learning_rate": 1.4055484357112808e-05, | |
| "loss": 1.7438, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 1.5277123903274128, | |
| "grad_norm": 4.589049339294434, | |
| "learning_rate": 1.3935261682370849e-05, | |
| "loss": 1.7316, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.5298523432484485, | |
| "grad_norm": 5.060586929321289, | |
| "learning_rate": 1.3815472070216656e-05, | |
| "loss": 1.7279, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.5319922961694843, | |
| "grad_norm": 5.7035322189331055, | |
| "learning_rate": 1.3696116959074635e-05, | |
| "loss": 1.7232, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.53413224909052, | |
| "grad_norm": 5.462157726287842, | |
| "learning_rate": 1.3577197782151724e-05, | |
| "loss": 1.674, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 1.5362722020115558, | |
| "grad_norm": 5.079245567321777, | |
| "learning_rate": 1.3458715967420193e-05, | |
| "loss": 1.7135, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.5384121549325915, | |
| "grad_norm": 5.461935043334961, | |
| "learning_rate": 1.3340672937600518e-05, | |
| "loss": 1.6261, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 1.5405521078536273, | |
| "grad_norm": 5.3971076011657715, | |
| "learning_rate": 1.3223070110144265e-05, | |
| "loss": 1.7194, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.5426920607746628, | |
| "grad_norm": 5.163456916809082, | |
| "learning_rate": 1.3105908897217084e-05, | |
| "loss": 1.6877, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 1.5448320136956988, | |
| "grad_norm": 6.106560707092285, | |
| "learning_rate": 1.2989190705681758e-05, | |
| "loss": 1.6893, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.5469719666167343, | |
| "grad_norm": 5.627121925354004, | |
| "learning_rate": 1.2872916937081308e-05, | |
| "loss": 1.6939, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 1.5491119195377703, | |
| "grad_norm": 5.5462117195129395, | |
| "learning_rate": 1.2757088987622152e-05, | |
| "loss": 1.6919, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.5512518724588058, | |
| "grad_norm": 5.333662986755371, | |
| "learning_rate": 1.2641708248157341e-05, | |
| "loss": 1.6444, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.5533918253798418, | |
| "grad_norm": 6.698999881744385, | |
| "learning_rate": 1.2526776104169868e-05, | |
| "loss": 1.7085, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.5555317783008773, | |
| "grad_norm": 5.591616630554199, | |
| "learning_rate": 1.241229393575603e-05, | |
| "loss": 1.6643, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 1.5576717312219133, | |
| "grad_norm": 5.279683589935303, | |
| "learning_rate": 1.2298263117608855e-05, | |
| "loss": 1.624, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.5598116841429488, | |
| "grad_norm": 5.046063423156738, | |
| "learning_rate": 1.2184685019001574e-05, | |
| "loss": 1.6992, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 1.5619516370639845, | |
| "grad_norm": 5.270909786224365, | |
| "learning_rate": 1.2071561003771214e-05, | |
| "loss": 1.6567, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.5640915899850203, | |
| "grad_norm": 5.26682186126709, | |
| "learning_rate": 1.1958892430302198e-05, | |
| "loss": 1.7055, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 1.566231542906056, | |
| "grad_norm": 5.099318504333496, | |
| "learning_rate": 1.184668065151005e-05, | |
| "loss": 1.7185, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.5683714958270918, | |
| "grad_norm": 5.019136905670166, | |
| "learning_rate": 1.1734927014825115e-05, | |
| "loss": 1.7033, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 1.5705114487481275, | |
| "grad_norm": 5.4806437492370605, | |
| "learning_rate": 1.162363286217642e-05, | |
| "loss": 1.7044, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.5726514016691633, | |
| "grad_norm": 4.8851118087768555, | |
| "learning_rate": 1.151279952997556e-05, | |
| "loss": 1.6831, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.574791354590199, | |
| "grad_norm": 5.282357692718506, | |
| "learning_rate": 1.1402428349100585e-05, | |
| "loss": 1.6689, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.5769313075112348, | |
| "grad_norm": 5.409157752990723, | |
| "learning_rate": 1.1292520644880105e-05, | |
| "loss": 1.6455, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 1.5790712604322705, | |
| "grad_norm": 5.711396217346191, | |
| "learning_rate": 1.1183077737077336e-05, | |
| "loss": 1.6363, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.5812112133533063, | |
| "grad_norm": 5.7200541496276855, | |
| "learning_rate": 1.107410093987425e-05, | |
| "loss": 1.6737, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 1.5833511662743418, | |
| "grad_norm": 5.475973606109619, | |
| "learning_rate": 1.0965591561855788e-05, | |
| "loss": 1.6393, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.5854911191953778, | |
| "grad_norm": 5.419764995574951, | |
| "learning_rate": 1.0857550905994175e-05, | |
| "loss": 1.6862, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 1.5876310721164133, | |
| "grad_norm": 4.915116786956787, | |
| "learning_rate": 1.0749980269633243e-05, | |
| "loss": 1.6878, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.5897710250374493, | |
| "grad_norm": 5.841743469238281, | |
| "learning_rate": 1.0642880944472878e-05, | |
| "loss": 1.6474, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 1.5919109779584848, | |
| "grad_norm": 5.388934135437012, | |
| "learning_rate": 1.0536254216553487e-05, | |
| "loss": 1.6828, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.5940509308795208, | |
| "grad_norm": 4.492440223693848, | |
| "learning_rate": 1.0430101366240575e-05, | |
| "loss": 1.6947, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.5961908838005563, | |
| "grad_norm": 4.8876495361328125, | |
| "learning_rate": 1.0324423668209349e-05, | |
| "loss": 1.7048, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.5983308367215923, | |
| "grad_norm": 5.363291263580322, | |
| "learning_rate": 1.021922239142944e-05, | |
| "loss": 1.6837, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 1.6004707896426278, | |
| "grad_norm": 5.814448356628418, | |
| "learning_rate": 1.0114498799149635e-05, | |
| "loss": 1.6784, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.6026107425636638, | |
| "grad_norm": 5.522982120513916, | |
| "learning_rate": 1.0010254148882731e-05, | |
| "loss": 1.6786, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 1.6047506954846993, | |
| "grad_norm": 5.147754192352295, | |
| "learning_rate": 9.906489692390426e-06, | |
| "loss": 1.7548, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.606890648405735, | |
| "grad_norm": 5.689370632171631, | |
| "learning_rate": 9.803206675668286e-06, | |
| "loss": 1.6861, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 1.6090306013267708, | |
| "grad_norm": 5.778447151184082, | |
| "learning_rate": 9.700406338930778e-06, | |
| "loss": 1.7062, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.6111705542478065, | |
| "grad_norm": 4.873246669769287, | |
| "learning_rate": 9.59808991659641e-06, | |
| "loss": 1.6678, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 1.6133105071688423, | |
| "grad_norm": 5.669099807739258, | |
| "learning_rate": 9.496258637272849e-06, | |
| "loss": 1.6874, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.615450460089878, | |
| "grad_norm": 5.129676818847656, | |
| "learning_rate": 9.394913723742227e-06, | |
| "loss": 1.6908, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.6175904130109138, | |
| "grad_norm": 5.257515907287598, | |
| "learning_rate": 9.294056392946427e-06, | |
| "loss": 1.673, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.6197303659319495, | |
| "grad_norm": 4.824296474456787, | |
| "learning_rate": 9.193687855972466e-06, | |
| "loss": 1.673, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 1.6218703188529853, | |
| "grad_norm": 5.123586654663086, | |
| "learning_rate": 9.093809318037989e-06, | |
| "loss": 1.7033, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.624010271774021, | |
| "grad_norm": 4.633101463317871, | |
| "learning_rate": 8.994421978476735e-06, | |
| "loss": 1.693, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 1.6261502246950568, | |
| "grad_norm": 5.188746452331543, | |
| "learning_rate": 8.8955270307242e-06, | |
| "loss": 1.6694, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.6282901776160923, | |
| "grad_norm": 5.478397369384766, | |
| "learning_rate": 8.797125662303257e-06, | |
| "loss": 1.6574, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 1.6304301305371283, | |
| "grad_norm": 5.444027900695801, | |
| "learning_rate": 8.699219054809937e-06, | |
| "loss": 1.6467, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.6325700834581638, | |
| "grad_norm": 4.998203754425049, | |
| "learning_rate": 8.6018083838992e-06, | |
| "loss": 1.6715, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 1.6347100363791998, | |
| "grad_norm": 5.098468780517578, | |
| "learning_rate": 8.504894819270854e-06, | |
| "loss": 1.6857, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.6368499893002353, | |
| "grad_norm": 5.074070453643799, | |
| "learning_rate": 8.408479524655477e-06, | |
| "loss": 1.728, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.6389899422212713, | |
| "grad_norm": 5.682558536529541, | |
| "learning_rate": 8.312563657800475e-06, | |
| "loss": 1.6509, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.6411298951423068, | |
| "grad_norm": 5.497644424438477, | |
| "learning_rate": 8.217148370456152e-06, | |
| "loss": 1.6649, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 1.6432698480633428, | |
| "grad_norm": 4.901436805725098, | |
| "learning_rate": 8.122234808361907e-06, | |
| "loss": 1.6662, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.6454098009843783, | |
| "grad_norm": 4.8296732902526855, | |
| "learning_rate": 8.027824111232435e-06, | |
| "loss": 1.6546, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 1.647549753905414, | |
| "grad_norm": 5.374551773071289, | |
| "learning_rate": 7.933917412744097e-06, | |
| "loss": 1.6182, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.6496897068264498, | |
| "grad_norm": 4.431797504425049, | |
| "learning_rate": 7.840515840521263e-06, | |
| "loss": 1.6917, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 1.6518296597474855, | |
| "grad_norm": 4.973811626434326, | |
| "learning_rate": 7.747620516122777e-06, | |
| "loss": 1.625, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.6539696126685213, | |
| "grad_norm": 5.412280082702637, | |
| "learning_rate": 7.655232555028518e-06, | |
| "loss": 1.6722, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 1.656109565589557, | |
| "grad_norm": 5.874164581298828, | |
| "learning_rate": 7.563353066625972e-06, | |
| "loss": 1.6448, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.6582495185105928, | |
| "grad_norm": 5.387955188751221, | |
| "learning_rate": 7.471983154196932e-06, | |
| "loss": 1.6627, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.6603894714316285, | |
| "grad_norm": 4.935237407684326, | |
| "learning_rate": 7.381123914904231e-06, | |
| "loss": 1.6915, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.6625294243526643, | |
| "grad_norm": 4.828623294830322, | |
| "learning_rate": 7.2907764397785845e-06, | |
| "loss": 1.6893, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 1.6646693772737, | |
| "grad_norm": 5.0376434326171875, | |
| "learning_rate": 7.200941813705497e-06, | |
| "loss": 1.6789, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.6668093301947358, | |
| "grad_norm": 5.704905986785889, | |
| "learning_rate": 7.111621115412193e-06, | |
| "loss": 1.6262, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 1.6689492831157713, | |
| "grad_norm": 5.178145885467529, | |
| "learning_rate": 7.02281541745472e-06, | |
| "loss": 1.7011, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.6710892360368073, | |
| "grad_norm": 5.51984167098999, | |
| "learning_rate": 6.9345257862050264e-06, | |
| "loss": 1.6325, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 1.6732291889578428, | |
| "grad_norm": 5.094444274902344, | |
| "learning_rate": 6.846753281838169e-06, | |
| "loss": 1.687, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.6753691418788788, | |
| "grad_norm": 4.961489200592041, | |
| "learning_rate": 6.759498958319599e-06, | |
| "loss": 1.7111, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 1.6775090947999143, | |
| "grad_norm": 5.067066669464111, | |
| "learning_rate": 6.6727638633924725e-06, | |
| "loss": 1.6547, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.6796490477209503, | |
| "grad_norm": 4.8792033195495605, | |
| "learning_rate": 6.58654903856511e-06, | |
| "loss": 1.645, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.6817890006419858, | |
| "grad_norm": 5.298194408416748, | |
| "learning_rate": 6.500855519098448e-06, | |
| "loss": 1.6642, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.6839289535630217, | |
| "grad_norm": 4.876554489135742, | |
| "learning_rate": 6.415684333993649e-06, | |
| "loss": 1.7346, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 1.6860689064840573, | |
| "grad_norm": 5.350904941558838, | |
| "learning_rate": 6.3310365059797094e-06, | |
| "loss": 1.7127, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.6882088594050932, | |
| "grad_norm": 5.282578945159912, | |
| "learning_rate": 6.246913051501202e-06, | |
| "loss": 1.6912, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 1.6903488123261288, | |
| "grad_norm": 5.434889793395996, | |
| "learning_rate": 6.163314980706058e-06, | |
| "loss": 1.6402, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.6924887652471645, | |
| "grad_norm": 5.1130571365356445, | |
| "learning_rate": 6.080243297433447e-06, | |
| "loss": 1.6301, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 1.6946287181682003, | |
| "grad_norm": 5.187545299530029, | |
| "learning_rate": 5.997698999201723e-06, | |
| "loss": 1.6238, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.696768671089236, | |
| "grad_norm": 4.982557773590088, | |
| "learning_rate": 5.915683077196415e-06, | |
| "loss": 1.6592, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 1.6989086240102718, | |
| "grad_norm": 4.824088096618652, | |
| "learning_rate": 5.834196516258378e-06, | |
| "loss": 1.6593, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.7010485769313075, | |
| "grad_norm": 4.828253746032715, | |
| "learning_rate": 5.753240294871937e-06, | |
| "loss": 1.6426, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 1.7031885298523433, | |
| "grad_norm": 5.136155605316162, | |
| "learning_rate": 5.6728153851531295e-06, | |
| "loss": 1.6446, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.705328482773379, | |
| "grad_norm": 5.751850128173828, | |
| "learning_rate": 5.592922752838053e-06, | |
| "loss": 1.6836, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 1.7074684356944148, | |
| "grad_norm": 5.147610664367676, | |
| "learning_rate": 5.513563357271256e-06, | |
| "loss": 1.6492, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.7096083886154505, | |
| "grad_norm": 5.698842525482178, | |
| "learning_rate": 5.43473815139422e-06, | |
| "loss": 1.643, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 1.7117483415364863, | |
| "grad_norm": 4.993765354156494, | |
| "learning_rate": 5.356448081733922e-06, | |
| "loss": 1.6808, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.7138882944575218, | |
| "grad_norm": 5.209753036499023, | |
| "learning_rate": 5.278694088391462e-06, | |
| "loss": 1.6976, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 1.7160282473785577, | |
| "grad_norm": 4.791884422302246, | |
| "learning_rate": 5.201477105030766e-06, | |
| "loss": 1.6394, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.7181682002995933, | |
| "grad_norm": 5.030315399169922, | |
| "learning_rate": 5.124798058867414e-06, | |
| "loss": 1.5982, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 1.7203081532206292, | |
| "grad_norm": 5.747689723968506, | |
| "learning_rate": 5.048657870657447e-06, | |
| "loss": 1.6619, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.7224481061416648, | |
| "grad_norm": 4.98396110534668, | |
| "learning_rate": 4.973057454686364e-06, | |
| "loss": 1.7076, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 1.7245880590627007, | |
| "grad_norm": 4.828854560852051, | |
| "learning_rate": 4.897997718758107e-06, | |
| "loss": 1.6687, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.7267280119837363, | |
| "grad_norm": 4.917362689971924, | |
| "learning_rate": 4.82347956418418e-06, | |
| "loss": 1.6901, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 1.7288679649047722, | |
| "grad_norm": 5.416051387786865, | |
| "learning_rate": 4.7495038857728155e-06, | |
| "loss": 1.6381, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.7310079178258078, | |
| "grad_norm": 5.832434177398682, | |
| "learning_rate": 4.676071571818236e-06, | |
| "loss": 1.6695, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 1.7331478707468437, | |
| "grad_norm": 5.534262657165527, | |
| "learning_rate": 4.603183504089997e-06, | |
| "loss": 1.7054, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.7352878236678793, | |
| "grad_norm": 5.344274997711182, | |
| "learning_rate": 4.5308405578223635e-06, | |
| "loss": 1.6645, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 1.737427776588915, | |
| "grad_norm": 5.08225679397583, | |
| "learning_rate": 4.45904360170385e-06, | |
| "loss": 1.6623, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.7395677295099508, | |
| "grad_norm": 5.531522274017334, | |
| "learning_rate": 4.387793497866744e-06, | |
| "loss": 1.6633, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 1.7417076824309865, | |
| "grad_norm": 4.792389869689941, | |
| "learning_rate": 4.31709110187678e-06, | |
| "loss": 1.6761, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.7438476353520223, | |
| "grad_norm": 5.555240631103516, | |
| "learning_rate": 4.246937262722866e-06, | |
| "loss": 1.6628, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 1.745987588273058, | |
| "grad_norm": 5.5509796142578125, | |
| "learning_rate": 4.177332822806873e-06, | |
| "loss": 1.6529, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.7481275411940937, | |
| "grad_norm": 4.76316499710083, | |
| "learning_rate": 4.108278617933525e-06, | |
| "loss": 1.6898, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 1.7502674941151295, | |
| "grad_norm": 4.821498870849609, | |
| "learning_rate": 4.039775477300378e-06, | |
| "loss": 1.6823, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.7524074470361652, | |
| "grad_norm": 5.2127299308776855, | |
| "learning_rate": 3.971824223487841e-06, | |
| "loss": 1.7028, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 1.754547399957201, | |
| "grad_norm": 4.9137701988220215, | |
| "learning_rate": 3.90442567244933e-06, | |
| "loss": 1.6858, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.7566873528782367, | |
| "grad_norm": 5.513342380523682, | |
| "learning_rate": 3.837580633501409e-06, | |
| "loss": 1.6466, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 1.7588273057992723, | |
| "grad_norm": 4.806028842926025, | |
| "learning_rate": 3.7712899093141407e-06, | |
| "loss": 1.7288, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.7609672587203082, | |
| "grad_norm": 5.0646796226501465, | |
| "learning_rate": 3.7055542959014087e-06, | |
| "loss": 1.6827, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 1.7631072116413438, | |
| "grad_norm": 5.064296245574951, | |
| "learning_rate": 3.640374582611389e-06, | |
| "loss": 1.6403, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.7652471645623797, | |
| "grad_norm": 6.094568252563477, | |
| "learning_rate": 3.575751552117029e-06, | |
| "loss": 1.625, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 1.7673871174834153, | |
| "grad_norm": 4.940478324890137, | |
| "learning_rate": 3.511685980406676e-06, | |
| "loss": 1.6847, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.7695270704044512, | |
| "grad_norm": 4.668292999267578, | |
| "learning_rate": 3.4481786367747627e-06, | |
| "loss": 1.6886, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 1.7716670233254868, | |
| "grad_norm": 5.22495174407959, | |
| "learning_rate": 3.3852302838125626e-06, | |
| "loss": 1.6492, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.7738069762465227, | |
| "grad_norm": 5.06997013092041, | |
| "learning_rate": 3.322841677399019e-06, | |
| "loss": 1.6441, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 1.7759469291675583, | |
| "grad_norm": 4.730724811553955, | |
| "learning_rate": 3.2610135666917007e-06, | |
| "loss": 1.6843, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.778086882088594, | |
| "grad_norm": 5.150337219238281, | |
| "learning_rate": 3.1997466941177666e-06, | |
| "loss": 1.6854, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 1.7802268350096297, | |
| "grad_norm": 4.789018630981445, | |
| "learning_rate": 3.139041795365094e-06, | |
| "loss": 1.634, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.7823667879306655, | |
| "grad_norm": 4.8880462646484375, | |
| "learning_rate": 3.0788995993734083e-06, | |
| "loss": 1.6258, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 1.7845067408517012, | |
| "grad_norm": 4.961301803588867, | |
| "learning_rate": 3.019320828325539e-06, | |
| "loss": 1.6463, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.786646693772737, | |
| "grad_norm": 4.813554286956787, | |
| "learning_rate": 2.9603061976387736e-06, | |
| "loss": 1.6903, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 1.7887866466937727, | |
| "grad_norm": 4.724278450012207, | |
| "learning_rate": 2.9018564159562224e-06, | |
| "loss": 1.646, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.7909265996148085, | |
| "grad_norm": 5.349635601043701, | |
| "learning_rate": 2.8439721851383383e-06, | |
| "loss": 1.6847, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 1.7930665525358442, | |
| "grad_norm": 4.801799297332764, | |
| "learning_rate": 2.786654200254496e-06, | |
| "loss": 1.6871, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.79520650545688, | |
| "grad_norm": 4.788339138031006, | |
| "learning_rate": 2.7299031495746252e-06, | |
| "loss": 1.7072, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 1.7973464583779157, | |
| "grad_norm": 4.700235843658447, | |
| "learning_rate": 2.6737197145609404e-06, | |
| "loss": 1.6723, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.7994864112989513, | |
| "grad_norm": 4.791804313659668, | |
| "learning_rate": 2.6181045698597972e-06, | |
| "loss": 1.674, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 1.8016263642199872, | |
| "grad_norm": 5.270465850830078, | |
| "learning_rate": 2.563058383293537e-06, | |
| "loss": 1.6434, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.8037663171410228, | |
| "grad_norm": 5.29375696182251, | |
| "learning_rate": 2.508581815852523e-06, | |
| "loss": 1.6719, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 1.8059062700620587, | |
| "grad_norm": 4.851658821105957, | |
| "learning_rate": 2.4546755216871496e-06, | |
| "loss": 1.6652, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.8080462229830943, | |
| "grad_norm": 5.476074695587158, | |
| "learning_rate": 2.40134014810004e-06, | |
| "loss": 1.7037, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 1.8101861759041302, | |
| "grad_norm": 5.279761791229248, | |
| "learning_rate": 2.3485763355382273e-06, | |
| "loss": 1.6697, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.8123261288251657, | |
| "grad_norm": 4.817234039306641, | |
| "learning_rate": 2.296384717585487e-06, | |
| "loss": 1.6609, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 1.8144660817462017, | |
| "grad_norm": 5.009280681610107, | |
| "learning_rate": 2.2447659209547443e-06, | |
| "loss": 1.6959, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.8166060346672372, | |
| "grad_norm": 5.085817813873291, | |
| "learning_rate": 2.1937205654805004e-06, | |
| "loss": 1.6254, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 1.8187459875882732, | |
| "grad_norm": 4.714953899383545, | |
| "learning_rate": 2.143249264111441e-06, | |
| "loss": 1.678, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.8208859405093087, | |
| "grad_norm": 5.392242908477783, | |
| "learning_rate": 2.09335262290305e-06, | |
| "loss": 1.6443, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 1.8230258934303445, | |
| "grad_norm": 4.728943347930908, | |
| "learning_rate": 2.04403124101033e-06, | |
| "loss": 1.6737, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.8251658463513802, | |
| "grad_norm": 4.887874603271484, | |
| "learning_rate": 1.995285710680622e-06, | |
| "loss": 1.6345, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 1.827305799272416, | |
| "grad_norm": 4.753354549407959, | |
| "learning_rate": 1.9471166172464917e-06, | |
| "loss": 1.7165, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.8294457521934517, | |
| "grad_norm": 5.000302314758301, | |
| "learning_rate": 1.8995245391186688e-06, | |
| "loss": 1.6339, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 1.8315857051144875, | |
| "grad_norm": 4.688614368438721, | |
| "learning_rate": 1.8525100477791602e-06, | |
| "loss": 1.6857, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.8337256580355232, | |
| "grad_norm": 4.900327205657959, | |
| "learning_rate": 1.8060737077743416e-06, | |
| "loss": 1.671, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 1.835865610956559, | |
| "grad_norm": 4.996348857879639, | |
| "learning_rate": 1.7602160767081822e-06, | |
| "loss": 1.6386, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.8380055638775947, | |
| "grad_norm": 4.658572673797607, | |
| "learning_rate": 1.7149377052355698e-06, | |
| "loss": 1.6028, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 1.8401455167986305, | |
| "grad_norm": 4.834623336791992, | |
| "learning_rate": 1.6702391370556957e-06, | |
| "loss": 1.6937, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.8422854697196662, | |
| "grad_norm": 4.5502028465271, | |
| "learning_rate": 1.6261209089054986e-06, | |
| "loss": 1.6596, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 1.8444254226407018, | |
| "grad_norm": 4.947803497314453, | |
| "learning_rate": 1.5825835505532516e-06, | |
| "loss": 1.6538, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.8465653755617377, | |
| "grad_norm": 5.3801774978637695, | |
| "learning_rate": 1.539627584792186e-06, | |
| "loss": 1.661, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 1.8487053284827732, | |
| "grad_norm": 5.104175090789795, | |
| "learning_rate": 1.4972535274342225e-06, | |
| "loss": 1.6419, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.8508452814038092, | |
| "grad_norm": 5.043144702911377, | |
| "learning_rate": 1.4554618873037551e-06, | |
| "loss": 1.6123, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 1.8529852343248447, | |
| "grad_norm": 4.843111038208008, | |
| "learning_rate": 1.4142531662315662e-06, | |
| "loss": 1.6482, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.8551251872458807, | |
| "grad_norm": 4.794622898101807, | |
| "learning_rate": 1.3736278590487927e-06, | |
| "loss": 1.7072, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 1.8572651401669162, | |
| "grad_norm": 4.912827491760254, | |
| "learning_rate": 1.3335864535809872e-06, | |
| "loss": 1.6744, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.8594050930879522, | |
| "grad_norm": 5.024925708770752, | |
| "learning_rate": 1.294129430642238e-06, | |
| "loss": 1.6379, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 1.8615450460089877, | |
| "grad_norm": 4.962955951690674, | |
| "learning_rate": 1.2552572640294247e-06, | |
| "loss": 1.6736, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.8636849989300235, | |
| "grad_norm": 4.8275275230407715, | |
| "learning_rate": 1.2169704205165277e-06, | |
| "loss": 1.6271, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 1.8658249518510592, | |
| "grad_norm": 4.821237087249756, | |
| "learning_rate": 1.179269359848989e-06, | |
| "loss": 1.6363, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.867964904772095, | |
| "grad_norm": 4.859674453735352, | |
| "learning_rate": 1.1421545347382378e-06, | |
| "loss": 1.6276, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 1.8701048576931307, | |
| "grad_norm": 4.546759605407715, | |
| "learning_rate": 1.105626390856218e-06, | |
| "loss": 1.6892, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.8722448106141665, | |
| "grad_norm": 4.87622594833374, | |
| "learning_rate": 1.0696853668300588e-06, | |
| "loss": 1.6633, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 1.8743847635352022, | |
| "grad_norm": 5.357635021209717, | |
| "learning_rate": 1.0343318942367951e-06, | |
| "loss": 1.6551, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.876524716456238, | |
| "grad_norm": 4.706364631652832, | |
| "learning_rate": 9.995663975981894e-07, | |
| "loss": 1.6708, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 1.8786646693772737, | |
| "grad_norm": 5.037484645843506, | |
| "learning_rate": 9.653892943756405e-07, | |
| "loss": 1.6359, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.8808046222983095, | |
| "grad_norm": 4.868020534515381, | |
| "learning_rate": 9.31800994965154e-07, | |
| "loss": 1.717, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 1.8829445752193452, | |
| "grad_norm": 5.029200077056885, | |
| "learning_rate": 8.98801902692431e-07, | |
| "loss": 1.6388, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.885084528140381, | |
| "grad_norm": 5.490426063537598, | |
| "learning_rate": 8.663924138080204e-07, | |
| "loss": 1.6471, | |
| "step": 4405 | |
| }, | |
| { | |
| "epoch": 1.8872244810614167, | |
| "grad_norm": 4.652840614318848, | |
| "learning_rate": 8.345729174825623e-07, | |
| "loss": 1.684, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.8893644339824522, | |
| "grad_norm": 4.7486252784729, | |
| "learning_rate": 8.033437958020973e-07, | |
| "loss": 1.701, | |
| "step": 4415 | |
| }, | |
| { | |
| "epoch": 1.8915043869034882, | |
| "grad_norm": 4.830610275268555, | |
| "learning_rate": 7.727054237635146e-07, | |
| "loss": 1.6273, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.8936443398245237, | |
| "grad_norm": 4.977087497711182, | |
| "learning_rate": 7.426581692700052e-07, | |
| "loss": 1.6923, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 1.8957842927455597, | |
| "grad_norm": 4.950675010681152, | |
| "learning_rate": 7.132023931266829e-07, | |
| "loss": 1.6456, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.8979242456665952, | |
| "grad_norm": 4.89157247543335, | |
| "learning_rate": 6.843384490362259e-07, | |
| "loss": 1.6838, | |
| "step": 4435 | |
| }, | |
| { | |
| "epoch": 1.9000641985876312, | |
| "grad_norm": 4.860950946807861, | |
| "learning_rate": 6.560666835946416e-07, | |
| "loss": 1.6309, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.9022041515086667, | |
| "grad_norm": 4.52632999420166, | |
| "learning_rate": 6.283874362871033e-07, | |
| "loss": 1.6225, | |
| "step": 4445 | |
| }, | |
| { | |
| "epoch": 1.9043441044297027, | |
| "grad_norm": 4.779524803161621, | |
| "learning_rate": 6.013010394838702e-07, | |
| "loss": 1.7016, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.9064840573507382, | |
| "grad_norm": 4.958658695220947, | |
| "learning_rate": 5.748078184363015e-07, | |
| "loss": 1.6794, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 1.908624010271774, | |
| "grad_norm": 5.345273017883301, | |
| "learning_rate": 5.489080912729428e-07, | |
| "loss": 1.6796, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.9107639631928097, | |
| "grad_norm": 5.009010314941406, | |
| "learning_rate": 5.236021689957237e-07, | |
| "loss": 1.6552, | |
| "step": 4465 | |
| }, | |
| { | |
| "epoch": 1.9129039161138455, | |
| "grad_norm": 4.576427936553955, | |
| "learning_rate": 4.988903554761948e-07, | |
| "loss": 1.64, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.9150438690348812, | |
| "grad_norm": 4.975499153137207, | |
| "learning_rate": 4.7477294745189584e-07, | |
| "loss": 1.6782, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 1.917183821955917, | |
| "grad_norm": 4.649482250213623, | |
| "learning_rate": 4.512502345228042e-07, | |
| "loss": 1.6728, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.9193237748769527, | |
| "grad_norm": 4.831498146057129, | |
| "learning_rate": 4.283224991478374e-07, | |
| "loss": 1.6622, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 1.9214637277979885, | |
| "grad_norm": 4.552825927734375, | |
| "learning_rate": 4.0599001664146097e-07, | |
| "loss": 1.6806, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.9236036807190242, | |
| "grad_norm": 4.854389190673828, | |
| "learning_rate": 3.842530551704027e-07, | |
| "loss": 1.5925, | |
| "step": 4495 | |
| }, | |
| { | |
| "epoch": 1.92574363364006, | |
| "grad_norm": 5.051529884338379, | |
| "learning_rate": 3.631118757504159e-07, | |
| "loss": 1.7039, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.9278835865610957, | |
| "grad_norm": 4.859341144561768, | |
| "learning_rate": 3.425667322431436e-07, | |
| "loss": 1.6972, | |
| "step": 4505 | |
| }, | |
| { | |
| "epoch": 1.9300235394821312, | |
| "grad_norm": 4.893909454345703, | |
| "learning_rate": 3.226178713530814e-07, | |
| "loss": 1.635, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.9321634924031672, | |
| "grad_norm": 4.657041549682617, | |
| "learning_rate": 3.0326553262460255e-07, | |
| "loss": 1.6971, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 1.9343034453242027, | |
| "grad_norm": 4.919985294342041, | |
| "learning_rate": 2.845099484391045e-07, | |
| "loss": 1.6954, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.9364433982452387, | |
| "grad_norm": 5.278502464294434, | |
| "learning_rate": 2.663513440121834e-07, | |
| "loss": 1.6607, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 1.9385833511662742, | |
| "grad_norm": 4.634570598602295, | |
| "learning_rate": 2.4878993739095857e-07, | |
| "loss": 1.6459, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.9407233040873102, | |
| "grad_norm": 4.695330619812012, | |
| "learning_rate": 2.3182593945144105e-07, | |
| "loss": 1.6391, | |
| "step": 4535 | |
| }, | |
| { | |
| "epoch": 1.9428632570083457, | |
| "grad_norm": 4.747430324554443, | |
| "learning_rate": 2.1545955389600248e-07, | |
| "loss": 1.6485, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.9450032099293817, | |
| "grad_norm": 5.009528636932373, | |
| "learning_rate": 1.9969097725094366e-07, | |
| "loss": 1.686, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 1.9471431628504172, | |
| "grad_norm": 4.869572639465332, | |
| "learning_rate": 1.8452039886410199e-07, | |
| "loss": 1.6684, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.9492831157714532, | |
| "grad_norm": 4.405606269836426, | |
| "learning_rate": 1.6994800090261997e-07, | |
| "loss": 1.6607, | |
| "step": 4555 | |
| }, | |
| { | |
| "epoch": 1.9514230686924887, | |
| "grad_norm": 4.575326919555664, | |
| "learning_rate": 1.5597395835071915e-07, | |
| "loss": 1.6839, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.9535630216135245, | |
| "grad_norm": 4.905466079711914, | |
| "learning_rate": 1.425984390076185e-07, | |
| "loss": 1.6075, | |
| "step": 4565 | |
| }, | |
| { | |
| "epoch": 1.9557029745345602, | |
| "grad_norm": 4.696757793426514, | |
| "learning_rate": 1.298216034855304e-07, | |
| "loss": 1.5987, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.957842927455596, | |
| "grad_norm": 5.084411144256592, | |
| "learning_rate": 1.1764360520769568e-07, | |
| "loss": 1.6828, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 1.9599828803766317, | |
| "grad_norm": 4.676747798919678, | |
| "learning_rate": 1.0606459040657935e-07, | |
| "loss": 1.7003, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.9621228332976675, | |
| "grad_norm": 4.811861991882324, | |
| "learning_rate": 9.508469812209986e-08, | |
| "loss": 1.6344, | |
| "step": 4585 | |
| }, | |
| { | |
| "epoch": 1.9642627862187032, | |
| "grad_norm": 4.689334392547607, | |
| "learning_rate": 8.470406019994714e-08, | |
| "loss": 1.673, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.966402739139739, | |
| "grad_norm": 4.811148166656494, | |
| "learning_rate": 7.492280129002271e-08, | |
| "loss": 1.6674, | |
| "step": 4595 | |
| }, | |
| { | |
| "epoch": 1.9685426920607747, | |
| "grad_norm": 4.551917552947998, | |
| "learning_rate": 6.574103884492422e-08, | |
| "loss": 1.6488, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.9706826449818104, | |
| "grad_norm": 4.65984582901001, | |
| "learning_rate": 5.715888311855211e-08, | |
| "loss": 1.6826, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 1.9728225979028462, | |
| "grad_norm": 4.9454240798950195, | |
| "learning_rate": 4.9176437164760726e-08, | |
| "loss": 1.6969, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.9749625508238817, | |
| "grad_norm": 4.670926570892334, | |
| "learning_rate": 4.1793796836142596e-08, | |
| "loss": 1.6635, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 1.9771025037449177, | |
| "grad_norm": 4.9202399253845215, | |
| "learning_rate": 3.5011050782879364e-08, | |
| "loss": 1.6676, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.9792424566659532, | |
| "grad_norm": 4.523751258850098, | |
| "learning_rate": 2.8828280451653755e-08, | |
| "loss": 1.6235, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 1.9813824095869892, | |
| "grad_norm": 4.875797748565674, | |
| "learning_rate": 2.3245560084700357e-08, | |
| "loss": 1.7377, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.9835223625080247, | |
| "grad_norm": 4.714092254638672, | |
| "learning_rate": 1.8262956718884117e-08, | |
| "loss": 1.6771, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 1.9856623154290607, | |
| "grad_norm": 4.643798351287842, | |
| "learning_rate": 1.3880530184934293e-08, | |
| "loss": 1.6668, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.9878022683500962, | |
| "grad_norm": 4.464611530303955, | |
| "learning_rate": 1.0098333106672852e-08, | |
| "loss": 1.6362, | |
| "step": 4645 | |
| }, | |
| { | |
| "epoch": 1.9899422212711322, | |
| "grad_norm": 4.848086357116699, | |
| "learning_rate": 6.9164109004427046e-09, | |
| "loss": 1.6405, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.9920821741921677, | |
| "grad_norm": 4.318144798278809, | |
| "learning_rate": 4.33480177451373e-09, | |
| "loss": 1.6798, | |
| "step": 4655 | |
| }, | |
| { | |
| "epoch": 1.9942221271132035, | |
| "grad_norm": 5.152481555938721, | |
| "learning_rate": 2.3535367286497966e-09, | |
| "loss": 1.6778, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.9963620800342392, | |
| "grad_norm": 4.668585777282715, | |
| "learning_rate": 9.726395537312806e-10, | |
| "loss": 1.6754, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 1.998502032955275, | |
| "grad_norm": 4.8391313552856445, | |
| "learning_rate": 1.921268314608593e-10, | |
| "loss": 1.6921, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 4674, | |
| "total_flos": 7.23540271133465e+18, | |
| "train_loss": 2.1290212889537883, | |
| "train_runtime": 7155.5853, | |
| "train_samples_per_second": 104.472, | |
| "train_steps_per_second": 0.653 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4674, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 468, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.23540271133465e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |