| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9958960328317374, | |
| "eval_steps": 250, | |
| "global_step": 1095, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0027359781121751026, | |
| "grad_norm": 0.24075667560100555, | |
| "learning_rate": 2.2727272727272728e-06, | |
| "loss": 1.9078, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005471956224350205, | |
| "grad_norm": 0.2584983706474304, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 2.1152, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008207934336525308, | |
| "grad_norm": 0.24978317320346832, | |
| "learning_rate": 6.818181818181818e-06, | |
| "loss": 1.9649, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01094391244870041, | |
| "grad_norm": 0.2477930635213852, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 1.9163, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013679890560875513, | |
| "grad_norm": 0.2425757646560669, | |
| "learning_rate": 1.1363636363636365e-05, | |
| "loss": 2.1254, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.016415868673050615, | |
| "grad_norm": 0.251596599817276, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 2.0832, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.019151846785225718, | |
| "grad_norm": 0.2359585464000702, | |
| "learning_rate": 1.590909090909091e-05, | |
| "loss": 1.8204, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.02188782489740082, | |
| "grad_norm": 0.25108030438423157, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 1.9224, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.024623803009575923, | |
| "grad_norm": 0.25240400433540344, | |
| "learning_rate": 2.0454545454545457e-05, | |
| "loss": 1.8371, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.027359781121751026, | |
| "grad_norm": 0.22698192298412323, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 1.9962, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.030095759233926128, | |
| "grad_norm": 0.24213118851184845, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.9918, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03283173734610123, | |
| "grad_norm": 0.20904198288917542, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 1.9463, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03556771545827633, | |
| "grad_norm": 0.21374110877513885, | |
| "learning_rate": 2.954545454545455e-05, | |
| "loss": 1.8628, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.038303693570451436, | |
| "grad_norm": 0.27983948588371277, | |
| "learning_rate": 3.181818181818182e-05, | |
| "loss": 2.1597, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04103967168262654, | |
| "grad_norm": 0.2309175282716751, | |
| "learning_rate": 3.409090909090909e-05, | |
| "loss": 2.0305, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04377564979480164, | |
| "grad_norm": 0.21376170217990875, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 1.8866, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.046511627906976744, | |
| "grad_norm": 0.23382064700126648, | |
| "learning_rate": 3.8636363636363636e-05, | |
| "loss": 1.9912, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.049247606019151846, | |
| "grad_norm": 0.20445913076400757, | |
| "learning_rate": 4.0909090909090915e-05, | |
| "loss": 1.9773, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.05198358413132695, | |
| "grad_norm": 0.21313798427581787, | |
| "learning_rate": 4.318181818181819e-05, | |
| "loss": 1.9965, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05471956224350205, | |
| "grad_norm": 0.22150328755378723, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 1.9538, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.057455540355677154, | |
| "grad_norm": 0.17602485418319702, | |
| "learning_rate": 4.772727272727273e-05, | |
| "loss": 1.9113, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.060191518467852256, | |
| "grad_norm": 0.20136825740337372, | |
| "learning_rate": 5e-05, | |
| "loss": 1.9837, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06292749658002736, | |
| "grad_norm": 0.1977069228887558, | |
| "learning_rate": 4.999989284560115e-05, | |
| "loss": 1.8121, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06566347469220246, | |
| "grad_norm": 0.18849150836467743, | |
| "learning_rate": 4.9999571383323136e-05, | |
| "loss": 1.8013, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06839945280437756, | |
| "grad_norm": 0.17495211958885193, | |
| "learning_rate": 4.9999035615921664e-05, | |
| "loss": 1.8375, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07113543091655267, | |
| "grad_norm": 0.28524237871170044, | |
| "learning_rate": 4.999828554798952e-05, | |
| "loss": 2.026, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.07387140902872777, | |
| "grad_norm": 0.2135252058506012, | |
| "learning_rate": 4.999732118595654e-05, | |
| "loss": 1.7874, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.07660738714090287, | |
| "grad_norm": 0.213677778840065, | |
| "learning_rate": 4.999614253808959e-05, | |
| "loss": 1.7844, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07934336525307797, | |
| "grad_norm": 0.20930789411067963, | |
| "learning_rate": 4.999474961449243e-05, | |
| "loss": 1.857, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.08207934336525308, | |
| "grad_norm": 0.18232876062393188, | |
| "learning_rate": 4.999314242710572e-05, | |
| "loss": 1.7145, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08481532147742818, | |
| "grad_norm": 0.2061523050069809, | |
| "learning_rate": 4.9991320989706816e-05, | |
| "loss": 1.8137, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08755129958960328, | |
| "grad_norm": 0.19810044765472412, | |
| "learning_rate": 4.9989285317909725e-05, | |
| "loss": 1.8154, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.09028727770177838, | |
| "grad_norm": 0.19258971512317657, | |
| "learning_rate": 4.9987035429164954e-05, | |
| "loss": 1.8612, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.09302325581395349, | |
| "grad_norm": 0.21273574233055115, | |
| "learning_rate": 4.9984571342759326e-05, | |
| "loss": 1.6282, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.09575923392612859, | |
| "grad_norm": 0.18505199253559113, | |
| "learning_rate": 4.998189307981586e-05, | |
| "loss": 1.7134, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09849521203830369, | |
| "grad_norm": 0.20874658226966858, | |
| "learning_rate": 4.997900066329357e-05, | |
| "loss": 1.7648, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1012311901504788, | |
| "grad_norm": 0.18423666059970856, | |
| "learning_rate": 4.9975894117987265e-05, | |
| "loss": 1.7784, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.1039671682626539, | |
| "grad_norm": 0.16870878636837006, | |
| "learning_rate": 4.9972573470527354e-05, | |
| "loss": 1.7352, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.106703146374829, | |
| "grad_norm": 0.20023159682750702, | |
| "learning_rate": 4.996903874937959e-05, | |
| "loss": 1.7784, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1094391244870041, | |
| "grad_norm": 0.22696524858474731, | |
| "learning_rate": 4.9965289984844846e-05, | |
| "loss": 1.7722, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1121751025991792, | |
| "grad_norm": 0.18610844016075134, | |
| "learning_rate": 4.9961327209058844e-05, | |
| "loss": 1.761, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.11491108071135431, | |
| "grad_norm": 0.17688068747520447, | |
| "learning_rate": 4.99571504559919e-05, | |
| "loss": 1.6316, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 0.20090024173259735, | |
| "learning_rate": 4.995275976144862e-05, | |
| "loss": 1.8064, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.12038303693570451, | |
| "grad_norm": 0.17762398719787598, | |
| "learning_rate": 4.9948155163067565e-05, | |
| "loss": 1.8417, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.12311901504787962, | |
| "grad_norm": 0.1874111294746399, | |
| "learning_rate": 4.9943336700320985e-05, | |
| "loss": 1.7245, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.12585499316005472, | |
| "grad_norm": 0.16139264404773712, | |
| "learning_rate": 4.993830441451444e-05, | |
| "loss": 1.8852, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.12859097127222982, | |
| "grad_norm": 0.16358217597007751, | |
| "learning_rate": 4.993305834878643e-05, | |
| "loss": 1.7314, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.13132694938440492, | |
| "grad_norm": 0.16465303301811218, | |
| "learning_rate": 4.992759854810811e-05, | |
| "loss": 1.6362, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.13406292749658003, | |
| "grad_norm": 0.1597519814968109, | |
| "learning_rate": 4.99219250592828e-05, | |
| "loss": 1.5936, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.13679890560875513, | |
| "grad_norm": 0.16433416306972504, | |
| "learning_rate": 4.991603793094564e-05, | |
| "loss": 1.5236, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13953488372093023, | |
| "grad_norm": 0.16490432620048523, | |
| "learning_rate": 4.9909937213563165e-05, | |
| "loss": 1.7191, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.14227086183310533, | |
| "grad_norm": 0.1576634645462036, | |
| "learning_rate": 4.990362295943287e-05, | |
| "loss": 1.5944, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.14500683994528044, | |
| "grad_norm": 0.16687043011188507, | |
| "learning_rate": 4.989709522268278e-05, | |
| "loss": 1.7204, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.14774281805745554, | |
| "grad_norm": 0.18680374324321747, | |
| "learning_rate": 4.989035405927093e-05, | |
| "loss": 1.8231, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.15047879616963064, | |
| "grad_norm": 0.16919435560703278, | |
| "learning_rate": 4.9883399526984944e-05, | |
| "loss": 1.7527, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.15321477428180574, | |
| "grad_norm": 0.16811755299568176, | |
| "learning_rate": 4.9876231685441545e-05, | |
| "loss": 1.6824, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.15595075239398085, | |
| "grad_norm": 0.15195395052433014, | |
| "learning_rate": 4.986885059608597e-05, | |
| "loss": 1.6515, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.15868673050615595, | |
| "grad_norm": 0.17854413390159607, | |
| "learning_rate": 4.986125632219152e-05, | |
| "loss": 1.613, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.16142270861833105, | |
| "grad_norm": 0.168907031416893, | |
| "learning_rate": 4.985344892885899e-05, | |
| "loss": 1.798, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.16415868673050615, | |
| "grad_norm": 0.173956036567688, | |
| "learning_rate": 4.98454284830161e-05, | |
| "loss": 1.4941, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16689466484268126, | |
| "grad_norm": 0.19879266619682312, | |
| "learning_rate": 4.983719505341693e-05, | |
| "loss": 1.7941, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.16963064295485636, | |
| "grad_norm": 0.16308054327964783, | |
| "learning_rate": 4.982874871064134e-05, | |
| "loss": 1.6984, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.17236662106703146, | |
| "grad_norm": 0.16789625585079193, | |
| "learning_rate": 4.982008952709435e-05, | |
| "loss": 1.6375, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.17510259917920656, | |
| "grad_norm": 0.16414505243301392, | |
| "learning_rate": 4.9811217577005533e-05, | |
| "loss": 1.5667, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.17783857729138167, | |
| "grad_norm": 0.16586610674858093, | |
| "learning_rate": 4.980213293642837e-05, | |
| "loss": 1.6688, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.18057455540355677, | |
| "grad_norm": 0.21007002890110016, | |
| "learning_rate": 4.979283568323958e-05, | |
| "loss": 1.7249, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.18331053351573187, | |
| "grad_norm": 0.15806783735752106, | |
| "learning_rate": 4.97833258971385e-05, | |
| "loss": 1.6918, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.18604651162790697, | |
| "grad_norm": 0.17978119850158691, | |
| "learning_rate": 4.977360365964637e-05, | |
| "loss": 1.6815, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.18878248974008208, | |
| "grad_norm": 0.16976381838321686, | |
| "learning_rate": 4.976366905410562e-05, | |
| "loss": 1.7212, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.19151846785225718, | |
| "grad_norm": 0.18810401856899261, | |
| "learning_rate": 4.9753522165679195e-05, | |
| "loss": 1.707, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.19425444596443228, | |
| "grad_norm": 0.17066176235675812, | |
| "learning_rate": 4.974316308134978e-05, | |
| "loss": 1.7765, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.19699042407660738, | |
| "grad_norm": 0.17143549025058746, | |
| "learning_rate": 4.9732591889919114e-05, | |
| "loss": 1.7317, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1997264021887825, | |
| "grad_norm": 0.15364663302898407, | |
| "learning_rate": 4.972180868200714e-05, | |
| "loss": 1.6406, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.2024623803009576, | |
| "grad_norm": 0.1750328093767166, | |
| "learning_rate": 4.971081355005133e-05, | |
| "loss": 1.8737, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.2051983584131327, | |
| "grad_norm": 0.17950010299682617, | |
| "learning_rate": 4.969960658830582e-05, | |
| "loss": 1.7941, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.2079343365253078, | |
| "grad_norm": 0.1876641809940338, | |
| "learning_rate": 4.968818789284063e-05, | |
| "loss": 1.5792, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.2106703146374829, | |
| "grad_norm": 0.17048972845077515, | |
| "learning_rate": 4.9676557561540845e-05, | |
| "loss": 1.607, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.213406292749658, | |
| "grad_norm": 0.18965713679790497, | |
| "learning_rate": 4.966471569410574e-05, | |
| "loss": 1.6142, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2161422708618331, | |
| "grad_norm": 0.16774089634418488, | |
| "learning_rate": 4.965266239204798e-05, | |
| "loss": 1.6446, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2188782489740082, | |
| "grad_norm": 0.16337555646896362, | |
| "learning_rate": 4.9640397758692715e-05, | |
| "loss": 1.6319, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2216142270861833, | |
| "grad_norm": 0.17532555758953094, | |
| "learning_rate": 4.9627921899176686e-05, | |
| "loss": 1.554, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.2243502051983584, | |
| "grad_norm": 0.16369390487670898, | |
| "learning_rate": 4.9615234920447365e-05, | |
| "loss": 1.7499, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.2270861833105335, | |
| "grad_norm": 0.170010045170784, | |
| "learning_rate": 4.9602336931261995e-05, | |
| "loss": 1.7786, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.22982216142270862, | |
| "grad_norm": 0.18399612605571747, | |
| "learning_rate": 4.958922804218667e-05, | |
| "loss": 1.8338, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.23255813953488372, | |
| "grad_norm": 0.16553553938865662, | |
| "learning_rate": 4.957590836559541e-05, | |
| "loss": 1.8186, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 0.17777277529239655, | |
| "learning_rate": 4.9562378015669166e-05, | |
| "loss": 1.7326, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.23803009575923392, | |
| "grad_norm": 0.18170584738254547, | |
| "learning_rate": 4.954863710839485e-05, | |
| "loss": 1.7293, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.24076607387140903, | |
| "grad_norm": 0.17840476334095, | |
| "learning_rate": 4.953468576156437e-05, | |
| "loss": 1.6941, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.24350205198358413, | |
| "grad_norm": 0.16968044638633728, | |
| "learning_rate": 4.9520524094773575e-05, | |
| "loss": 1.7643, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.24623803009575923, | |
| "grad_norm": 0.17756614089012146, | |
| "learning_rate": 4.950615222942125e-05, | |
| "loss": 1.7599, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.24897400820793433, | |
| "grad_norm": 0.162063866853714, | |
| "learning_rate": 4.94915702887081e-05, | |
| "loss": 1.7365, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.25170998632010944, | |
| "grad_norm": 0.18559984862804413, | |
| "learning_rate": 4.947677839763563e-05, | |
| "loss": 1.4424, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.25444596443228457, | |
| "grad_norm": 0.17151783406734467, | |
| "learning_rate": 4.946177668300515e-05, | |
| "loss": 1.5231, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.25718194254445964, | |
| "grad_norm": 0.20623093843460083, | |
| "learning_rate": 4.944656527341663e-05, | |
| "loss": 1.8024, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.25991792065663477, | |
| "grad_norm": 0.19318287074565887, | |
| "learning_rate": 4.943114429926763e-05, | |
| "loss": 1.6366, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.26265389876880985, | |
| "grad_norm": 0.1877240240573883, | |
| "learning_rate": 4.941551389275217e-05, | |
| "loss": 1.5424, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.265389876880985, | |
| "grad_norm": 0.17889100313186646, | |
| "learning_rate": 4.939967418785959e-05, | |
| "loss": 1.6603, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.26812585499316005, | |
| "grad_norm": 0.18533948063850403, | |
| "learning_rate": 4.938362532037341e-05, | |
| "loss": 1.7546, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2708618331053352, | |
| "grad_norm": 0.19442051649093628, | |
| "learning_rate": 4.936736742787018e-05, | |
| "loss": 1.6938, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.27359781121751026, | |
| "grad_norm": 0.20610813796520233, | |
| "learning_rate": 4.935090064971827e-05, | |
| "loss": 1.7104, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2763337893296854, | |
| "grad_norm": 0.19741714000701904, | |
| "learning_rate": 4.933422512707671e-05, | |
| "loss": 1.6205, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.27906976744186046, | |
| "grad_norm": 0.20613451302051544, | |
| "learning_rate": 4.931734100289393e-05, | |
| "loss": 1.7549, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2818057455540356, | |
| "grad_norm": 0.19265608489513397, | |
| "learning_rate": 4.930024842190658e-05, | |
| "loss": 1.8493, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.28454172366621067, | |
| "grad_norm": 0.1976325511932373, | |
| "learning_rate": 4.9282947530638295e-05, | |
| "loss": 1.8059, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2872777017783858, | |
| "grad_norm": 0.20444822311401367, | |
| "learning_rate": 4.926543847739841e-05, | |
| "loss": 1.5852, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.29001367989056087, | |
| "grad_norm": 0.18967504799365997, | |
| "learning_rate": 4.9247721412280667e-05, | |
| "loss": 1.6282, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.292749658002736, | |
| "grad_norm": 0.206316277384758, | |
| "learning_rate": 4.9229796487162e-05, | |
| "loss": 1.6186, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.2954856361149111, | |
| "grad_norm": 0.17524588108062744, | |
| "learning_rate": 4.9211663855701165e-05, | |
| "loss": 1.6383, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2982216142270862, | |
| "grad_norm": 0.2032768577337265, | |
| "learning_rate": 4.9193323673337476e-05, | |
| "loss": 1.785, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.3009575923392613, | |
| "grad_norm": 0.17396964132785797, | |
| "learning_rate": 4.917477609728941e-05, | |
| "loss": 1.4461, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3036935704514364, | |
| "grad_norm": 0.19011397659778595, | |
| "learning_rate": 4.915602128655333e-05, | |
| "loss": 1.7313, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.3064295485636115, | |
| "grad_norm": 0.19239680469036102, | |
| "learning_rate": 4.913705940190207e-05, | |
| "loss": 1.6015, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.3091655266757866, | |
| "grad_norm": 0.19917477667331696, | |
| "learning_rate": 4.9117890605883574e-05, | |
| "loss": 1.6411, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.3119015047879617, | |
| "grad_norm": 0.19318480789661407, | |
| "learning_rate": 4.909851506281952e-05, | |
| "loss": 1.6737, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.3146374829001368, | |
| "grad_norm": 0.1994866579771042, | |
| "learning_rate": 4.907893293880387e-05, | |
| "loss": 1.518, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.3173734610123119, | |
| "grad_norm": 0.1941988468170166, | |
| "learning_rate": 4.905914440170147e-05, | |
| "loss": 1.7589, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.320109439124487, | |
| "grad_norm": 0.20112977921962738, | |
| "learning_rate": 4.903914962114665e-05, | |
| "loss": 1.6671, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.3228454172366621, | |
| "grad_norm": 0.21141856908798218, | |
| "learning_rate": 4.9018948768541694e-05, | |
| "loss": 1.6731, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.32558139534883723, | |
| "grad_norm": 0.18585005402565002, | |
| "learning_rate": 4.899854201705542e-05, | |
| "loss": 1.6276, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3283173734610123, | |
| "grad_norm": 0.1975102424621582, | |
| "learning_rate": 4.8977929541621676e-05, | |
| "loss": 1.6103, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.33105335157318744, | |
| "grad_norm": 0.19365696609020233, | |
| "learning_rate": 4.895711151893786e-05, | |
| "loss": 1.5401, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3337893296853625, | |
| "grad_norm": 0.20823922753334045, | |
| "learning_rate": 4.8936088127463396e-05, | |
| "loss": 1.5811, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.33652530779753764, | |
| "grad_norm": 0.2523277997970581, | |
| "learning_rate": 4.8914859547418186e-05, | |
| "loss": 1.5561, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3392612859097127, | |
| "grad_norm": 0.19290074706077576, | |
| "learning_rate": 4.889342596078109e-05, | |
| "loss": 1.6078, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.34199726402188785, | |
| "grad_norm": 0.17994068562984467, | |
| "learning_rate": 4.887178755128835e-05, | |
| "loss": 1.7232, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3447332421340629, | |
| "grad_norm": 0.18888920545578003, | |
| "learning_rate": 4.8849944504432035e-05, | |
| "loss": 1.5538, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.34746922024623805, | |
| "grad_norm": 0.18798014521598816, | |
| "learning_rate": 4.882789700745843e-05, | |
| "loss": 1.6081, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.35020519835841313, | |
| "grad_norm": 0.19080984592437744, | |
| "learning_rate": 4.880564524936643e-05, | |
| "loss": 1.4809, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 0.19421504437923431, | |
| "learning_rate": 4.878318942090594e-05, | |
| "loss": 1.6523, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.35567715458276333, | |
| "grad_norm": 0.1982857584953308, | |
| "learning_rate": 4.876052971457623e-05, | |
| "loss": 1.6948, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.35841313269493846, | |
| "grad_norm": 0.20162495970726013, | |
| "learning_rate": 4.8737666324624265e-05, | |
| "loss": 1.7504, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.36114911080711354, | |
| "grad_norm": 0.20931164920330048, | |
| "learning_rate": 4.8714599447043085e-05, | |
| "loss": 1.4829, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.36388508891928867, | |
| "grad_norm": 0.21997657418251038, | |
| "learning_rate": 4.869132927957007e-05, | |
| "loss": 1.5546, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.36662106703146374, | |
| "grad_norm": 0.20648987591266632, | |
| "learning_rate": 4.866785602168528e-05, | |
| "loss": 1.6716, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3693570451436389, | |
| "grad_norm": 0.20821620523929596, | |
| "learning_rate": 4.864417987460975e-05, | |
| "loss": 1.6197, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.37209302325581395, | |
| "grad_norm": 0.21084287762641907, | |
| "learning_rate": 4.862030104130375e-05, | |
| "loss": 1.6454, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3748290013679891, | |
| "grad_norm": 0.21337808668613434, | |
| "learning_rate": 4.859621972646503e-05, | |
| "loss": 1.5733, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.37756497948016415, | |
| "grad_norm": 0.21093016862869263, | |
| "learning_rate": 4.857193613652711e-05, | |
| "loss": 1.6716, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3803009575923393, | |
| "grad_norm": 0.20863549411296844, | |
| "learning_rate": 4.854745047965745e-05, | |
| "loss": 1.6117, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.38303693570451436, | |
| "grad_norm": 0.20271432399749756, | |
| "learning_rate": 4.852276296575573e-05, | |
| "loss": 1.5765, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3857729138166895, | |
| "grad_norm": 0.20297355949878693, | |
| "learning_rate": 4.849787380645201e-05, | |
| "loss": 1.7296, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.38850889192886456, | |
| "grad_norm": 0.20244932174682617, | |
| "learning_rate": 4.847278321510491e-05, | |
| "loss": 1.7651, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3912448700410397, | |
| "grad_norm": 0.20252294838428497, | |
| "learning_rate": 4.844749140679982e-05, | |
| "loss": 1.6427, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.39398084815321477, | |
| "grad_norm": 0.20163066685199738, | |
| "learning_rate": 4.842199859834702e-05, | |
| "loss": 1.6398, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3967168262653899, | |
| "grad_norm": 0.21187250316143036, | |
| "learning_rate": 4.839630500827982e-05, | |
| "loss": 1.6173, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.399452804377565, | |
| "grad_norm": 0.18655481934547424, | |
| "learning_rate": 4.837041085685273e-05, | |
| "loss": 1.5184, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.4021887824897401, | |
| "grad_norm": 0.21056880056858063, | |
| "learning_rate": 4.834431636603953e-05, | |
| "loss": 1.5687, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.4049247606019152, | |
| "grad_norm": 0.2020365595817566, | |
| "learning_rate": 4.831802175953138e-05, | |
| "loss": 1.6445, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.4076607387140903, | |
| "grad_norm": 0.18952035903930664, | |
| "learning_rate": 4.8291527262734886e-05, | |
| "loss": 1.7945, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.4103967168262654, | |
| "grad_norm": 0.1896515041589737, | |
| "learning_rate": 4.826483310277021e-05, | |
| "loss": 1.546, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4131326949384405, | |
| "grad_norm": 0.20328114926815033, | |
| "learning_rate": 4.8237939508469075e-05, | |
| "loss": 1.5621, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.4158686730506156, | |
| "grad_norm": 0.18790675699710846, | |
| "learning_rate": 4.821084671037285e-05, | |
| "loss": 1.6609, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.4186046511627907, | |
| "grad_norm": 0.20515787601470947, | |
| "learning_rate": 4.8183554940730524e-05, | |
| "loss": 1.671, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.4213406292749658, | |
| "grad_norm": 0.19960254430770874, | |
| "learning_rate": 4.815606443349675e-05, | |
| "loss": 1.4732, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4240766073871409, | |
| "grad_norm": 0.21313494443893433, | |
| "learning_rate": 4.812837542432984e-05, | |
| "loss": 1.7094, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.426812585499316, | |
| "grad_norm": 0.1988086998462677, | |
| "learning_rate": 4.8100488150589704e-05, | |
| "loss": 1.6241, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.42954856361149113, | |
| "grad_norm": 0.1885240077972412, | |
| "learning_rate": 4.80724028513359e-05, | |
| "loss": 1.64, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.4322845417236662, | |
| "grad_norm": 0.19793474674224854, | |
| "learning_rate": 4.8044119767325455e-05, | |
| "loss": 1.6615, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.43502051983584133, | |
| "grad_norm": 0.2065788209438324, | |
| "learning_rate": 4.801563914101096e-05, | |
| "loss": 1.5414, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.4377564979480164, | |
| "grad_norm": 0.19855208694934845, | |
| "learning_rate": 4.798696121653833e-05, | |
| "loss": 1.7804, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.44049247606019154, | |
| "grad_norm": 0.21048115193843842, | |
| "learning_rate": 4.795808623974485e-05, | |
| "loss": 1.6356, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.4432284541723666, | |
| "grad_norm": 0.18900255858898163, | |
| "learning_rate": 4.792901445815698e-05, | |
| "loss": 1.7154, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.44596443228454175, | |
| "grad_norm": 0.21627336740493774, | |
| "learning_rate": 4.7899746120988245e-05, | |
| "loss": 1.5644, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.4487004103967168, | |
| "grad_norm": 0.22913017868995667, | |
| "learning_rate": 4.7870281479137146e-05, | |
| "loss": 1.5764, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.45143638850889195, | |
| "grad_norm": 0.1932600438594818, | |
| "learning_rate": 4.784062078518496e-05, | |
| "loss": 1.7484, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.454172366621067, | |
| "grad_norm": 0.2302398383617401, | |
| "learning_rate": 4.781076429339359e-05, | |
| "loss": 1.5941, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.45690834473324216, | |
| "grad_norm": 0.2317684292793274, | |
| "learning_rate": 4.77807122597034e-05, | |
| "loss": 1.6844, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.45964432284541723, | |
| "grad_norm": 0.2009151577949524, | |
| "learning_rate": 4.775046494173098e-05, | |
| "loss": 1.735, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.46238030095759236, | |
| "grad_norm": 0.23902402818202972, | |
| "learning_rate": 4.7720022598767e-05, | |
| "loss": 1.6606, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 0.22881416976451874, | |
| "learning_rate": 4.768938549177393e-05, | |
| "loss": 1.6456, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.46785225718194257, | |
| "grad_norm": 0.1795547604560852, | |
| "learning_rate": 4.765855388338383e-05, | |
| "loss": 1.6853, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 0.1893799602985382, | |
| "learning_rate": 4.7627528037896106e-05, | |
| "loss": 1.5587, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.47332421340629277, | |
| "grad_norm": 0.21993231773376465, | |
| "learning_rate": 4.759630822127522e-05, | |
| "loss": 1.8, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.47606019151846785, | |
| "grad_norm": 0.2114606648683548, | |
| "learning_rate": 4.756489470114842e-05, | |
| "loss": 1.543, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.478796169630643, | |
| "grad_norm": 0.24682380259037018, | |
| "learning_rate": 4.753328774680347e-05, | |
| "loss": 1.6537, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.48153214774281805, | |
| "grad_norm": 0.21504569053649902, | |
| "learning_rate": 4.750148762918629e-05, | |
| "loss": 1.7841, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.4842681258549932, | |
| "grad_norm": 0.2285577356815338, | |
| "learning_rate": 4.746949462089868e-05, | |
| "loss": 1.5911, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.48700410396716826, | |
| "grad_norm": 0.20859524607658386, | |
| "learning_rate": 4.743730899619598e-05, | |
| "loss": 1.5816, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.4897400820793434, | |
| "grad_norm": 0.2234918773174286, | |
| "learning_rate": 4.740493103098468e-05, | |
| "loss": 1.5763, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.49247606019151846, | |
| "grad_norm": 0.20745696127414703, | |
| "learning_rate": 4.7372361002820085e-05, | |
| "loss": 1.4441, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4952120383036936, | |
| "grad_norm": 0.18486277759075165, | |
| "learning_rate": 4.733959919090396e-05, | |
| "loss": 1.6772, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.49794801641586867, | |
| "grad_norm": 0.21360483765602112, | |
| "learning_rate": 4.7306645876082066e-05, | |
| "loss": 1.5955, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5006839945280438, | |
| "grad_norm": 0.2167551964521408, | |
| "learning_rate": 4.727350134084182e-05, | |
| "loss": 1.6222, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.5034199726402189, | |
| "grad_norm": 0.2134450525045395, | |
| "learning_rate": 4.7240165869309846e-05, | |
| "loss": 1.6059, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.506155950752394, | |
| "grad_norm": 0.21351881325244904, | |
| "learning_rate": 4.720663974724953e-05, | |
| "loss": 1.7258, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5088919288645691, | |
| "grad_norm": 0.1952323615550995, | |
| "learning_rate": 4.71729232620586e-05, | |
| "loss": 1.4591, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.5116279069767442, | |
| "grad_norm": 0.2462124228477478, | |
| "learning_rate": 4.7139016702766615e-05, | |
| "loss": 1.5708, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.5143638850889193, | |
| "grad_norm": 0.24391762912273407, | |
| "learning_rate": 4.7104920360032545e-05, | |
| "loss": 1.7813, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.5170998632010944, | |
| "grad_norm": 0.21117964386940002, | |
| "learning_rate": 4.707063452614224e-05, | |
| "loss": 1.6948, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.5198358413132695, | |
| "grad_norm": 0.20722931623458862, | |
| "learning_rate": 4.703615949500593e-05, | |
| "loss": 1.5925, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5225718194254446, | |
| "grad_norm": 0.22132059931755066, | |
| "learning_rate": 4.700149556215571e-05, | |
| "loss": 1.6448, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.5253077975376197, | |
| "grad_norm": 0.22742822766304016, | |
| "learning_rate": 4.696664302474302e-05, | |
| "loss": 1.5866, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5280437756497948, | |
| "grad_norm": 0.2043454349040985, | |
| "learning_rate": 4.693160218153607e-05, | |
| "loss": 1.704, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.53077975376197, | |
| "grad_norm": 0.21721500158309937, | |
| "learning_rate": 4.68963733329173e-05, | |
| "loss": 1.6768, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.533515731874145, | |
| "grad_norm": 0.2336382120847702, | |
| "learning_rate": 4.6860956780880796e-05, | |
| "loss": 1.5906, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5362517099863201, | |
| "grad_norm": 0.2269752472639084, | |
| "learning_rate": 4.6825352829029705e-05, | |
| "loss": 1.5308, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5389876880984952, | |
| "grad_norm": 0.21256719529628754, | |
| "learning_rate": 4.6789561782573635e-05, | |
| "loss": 1.4821, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.5417236662106704, | |
| "grad_norm": 0.21226170659065247, | |
| "learning_rate": 4.675358394832603e-05, | |
| "loss": 1.7818, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5444596443228454, | |
| "grad_norm": 0.21833762526512146, | |
| "learning_rate": 4.671741963470155e-05, | |
| "loss": 1.6556, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5471956224350205, | |
| "grad_norm": 0.21411390602588654, | |
| "learning_rate": 4.668106915171341e-05, | |
| "loss": 1.5892, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5499316005471956, | |
| "grad_norm": 0.22407633066177368, | |
| "learning_rate": 4.664453281097075e-05, | |
| "loss": 1.6843, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5526675786593708, | |
| "grad_norm": 0.20786328613758087, | |
| "learning_rate": 4.660781092567593e-05, | |
| "loss": 1.6561, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5554035567715458, | |
| "grad_norm": 0.2268221080303192, | |
| "learning_rate": 4.657090381062189e-05, | |
| "loss": 1.585, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5581395348837209, | |
| "grad_norm": 0.2261151373386383, | |
| "learning_rate": 4.6533811782189385e-05, | |
| "loss": 1.6033, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.560875512995896, | |
| "grad_norm": 0.19799037277698517, | |
| "learning_rate": 4.649653515834436e-05, | |
| "loss": 1.6294, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5636114911080712, | |
| "grad_norm": 0.20902524888515472, | |
| "learning_rate": 4.645907425863514e-05, | |
| "loss": 1.7012, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5663474692202463, | |
| "grad_norm": 0.2055850476026535, | |
| "learning_rate": 4.642142940418973e-05, | |
| "loss": 1.5587, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5690834473324213, | |
| "grad_norm": 0.2205619066953659, | |
| "learning_rate": 4.638360091771309e-05, | |
| "loss": 1.7727, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5718194254445964, | |
| "grad_norm": 0.25723960995674133, | |
| "learning_rate": 4.6345589123484314e-05, | |
| "loss": 1.6037, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.5745554035567716, | |
| "grad_norm": 0.24420645833015442, | |
| "learning_rate": 4.630739434735387e-05, | |
| "loss": 1.7479, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5772913816689467, | |
| "grad_norm": 0.24823623895645142, | |
| "learning_rate": 4.626901691674083e-05, | |
| "loss": 1.7085, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5800273597811217, | |
| "grad_norm": 0.1966015100479126, | |
| "learning_rate": 4.623045716063002e-05, | |
| "loss": 1.544, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5827633378932968, | |
| "grad_norm": 0.22418932616710663, | |
| "learning_rate": 4.6191715409569244e-05, | |
| "loss": 1.6949, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.585499316005472, | |
| "grad_norm": 0.24894970655441284, | |
| "learning_rate": 4.6152791995666445e-05, | |
| "loss": 1.5838, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 0.230922132730484, | |
| "learning_rate": 4.61136872525868e-05, | |
| "loss": 1.6267, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.5909712722298222, | |
| "grad_norm": 0.22752411663532257, | |
| "learning_rate": 4.6074401515549934e-05, | |
| "loss": 1.4752, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5937072503419972, | |
| "grad_norm": 0.22325444221496582, | |
| "learning_rate": 4.6034935121327025e-05, | |
| "loss": 1.7513, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5964432284541724, | |
| "grad_norm": 0.21255116164684296, | |
| "learning_rate": 4.599528840823787e-05, | |
| "loss": 1.642, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5991792065663475, | |
| "grad_norm": 0.24686329066753387, | |
| "learning_rate": 4.5955461716148065e-05, | |
| "loss": 1.6138, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.6019151846785226, | |
| "grad_norm": 0.20993772149085999, | |
| "learning_rate": 4.5915455386466014e-05, | |
| "loss": 1.7554, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6046511627906976, | |
| "grad_norm": 0.2285137176513672, | |
| "learning_rate": 4.587526976214006e-05, | |
| "loss": 1.6073, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.6073871409028728, | |
| "grad_norm": 0.22348423302173615, | |
| "learning_rate": 4.5834905187655526e-05, | |
| "loss": 1.6707, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.6101231190150479, | |
| "grad_norm": 0.23014281690120697, | |
| "learning_rate": 4.5794362009031735e-05, | |
| "loss": 1.4968, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.612859097127223, | |
| "grad_norm": 0.23900607228279114, | |
| "learning_rate": 4.575364057381909e-05, | |
| "loss": 1.6317, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.615595075239398, | |
| "grad_norm": 0.24715475738048553, | |
| "learning_rate": 4.571274123109606e-05, | |
| "loss": 1.5805, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.6183310533515732, | |
| "grad_norm": 0.22451570630073547, | |
| "learning_rate": 4.5671664331466205e-05, | |
| "loss": 1.5956, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6210670314637483, | |
| "grad_norm": 0.22505944967269897, | |
| "learning_rate": 4.5630410227055154e-05, | |
| "loss": 1.64, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.6238030095759234, | |
| "grad_norm": 0.24184450507164001, | |
| "learning_rate": 4.5588979271507625e-05, | |
| "loss": 1.6055, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6265389876880985, | |
| "grad_norm": 0.22158478200435638, | |
| "learning_rate": 4.5547371819984344e-05, | |
| "loss": 1.5691, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.6292749658002736, | |
| "grad_norm": 0.23034054040908813, | |
| "learning_rate": 4.5505588229159025e-05, | |
| "loss": 1.6157, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6320109439124487, | |
| "grad_norm": 0.240454763174057, | |
| "learning_rate": 4.5463628857215306e-05, | |
| "loss": 1.6337, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6347469220246238, | |
| "grad_norm": 0.2200327068567276, | |
| "learning_rate": 4.5421494063843695e-05, | |
| "loss": 1.4941, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6374829001367989, | |
| "grad_norm": 0.23234665393829346, | |
| "learning_rate": 4.537918421023848e-05, | |
| "loss": 1.5992, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.640218878248974, | |
| "grad_norm": 0.23742978274822235, | |
| "learning_rate": 4.53366996590946e-05, | |
| "loss": 1.481, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.6429548563611491, | |
| "grad_norm": 0.22323279082775116, | |
| "learning_rate": 4.5294040774604576e-05, | |
| "loss": 1.5291, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6456908344733242, | |
| "grad_norm": 0.21182994544506073, | |
| "learning_rate": 4.525120792245538e-05, | |
| "loss": 1.5151, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6484268125854993, | |
| "grad_norm": 0.24394573271274567, | |
| "learning_rate": 4.5208201469825304e-05, | |
| "loss": 1.6732, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6511627906976745, | |
| "grad_norm": 0.20500163733959198, | |
| "learning_rate": 4.516502178538079e-05, | |
| "loss": 1.7066, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6538987688098495, | |
| "grad_norm": 0.2329857349395752, | |
| "learning_rate": 4.512166923927329e-05, | |
| "loss": 1.4977, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6566347469220246, | |
| "grad_norm": 0.23349729180335999, | |
| "learning_rate": 4.507814420313608e-05, | |
| "loss": 1.5837, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6593707250341997, | |
| "grad_norm": 0.24870243668556213, | |
| "learning_rate": 4.503444705008107e-05, | |
| "loss": 1.6073, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6621067031463749, | |
| "grad_norm": 0.2594373822212219, | |
| "learning_rate": 4.4990578154695676e-05, | |
| "loss": 1.6752, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.66484268125855, | |
| "grad_norm": 0.2545240819454193, | |
| "learning_rate": 4.4946537893039476e-05, | |
| "loss": 1.5185, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.667578659370725, | |
| "grad_norm": 0.24223731458187103, | |
| "learning_rate": 4.4902326642641095e-05, | |
| "loss": 1.7599, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6703146374829001, | |
| "grad_norm": 0.23293475806713104, | |
| "learning_rate": 4.485794478249493e-05, | |
| "loss": 1.6919, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6730506155950753, | |
| "grad_norm": 0.2452557235956192, | |
| "learning_rate": 4.481339269305792e-05, | |
| "loss": 1.4946, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6757865937072504, | |
| "grad_norm": 0.23280148208141327, | |
| "learning_rate": 4.4768670756246237e-05, | |
| "loss": 1.548, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6785225718194254, | |
| "grad_norm": 0.254730224609375, | |
| "learning_rate": 4.4723779355432056e-05, | |
| "loss": 1.6227, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6812585499316005, | |
| "grad_norm": 0.25781765580177307, | |
| "learning_rate": 4.4678718875440276e-05, | |
| "loss": 1.6403, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6839945280437757, | |
| "grad_norm": 0.2513294816017151, | |
| "learning_rate": 4.463348970254518e-05, | |
| "loss": 1.5416, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6839945280437757, | |
| "eval_loss": 1.6065112352371216, | |
| "eval_runtime": 13.3947, | |
| "eval_samples_per_second": 11.497, | |
| "eval_steps_per_second": 11.497, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6867305061559508, | |
| "grad_norm": 0.26413506269454956, | |
| "learning_rate": 4.458809222446717e-05, | |
| "loss": 1.4294, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6894664842681258, | |
| "grad_norm": 0.24414490163326263, | |
| "learning_rate": 4.454252683036939e-05, | |
| "loss": 1.6371, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6922024623803009, | |
| "grad_norm": 0.23543910682201385, | |
| "learning_rate": 4.449679391085444e-05, | |
| "loss": 1.6226, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.6949384404924761, | |
| "grad_norm": 0.24758242070674896, | |
| "learning_rate": 4.445089385796099e-05, | |
| "loss": 1.7229, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6976744186046512, | |
| "grad_norm": 0.2550957500934601, | |
| "learning_rate": 4.4404827065160455e-05, | |
| "loss": 1.6636, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.7004103967168263, | |
| "grad_norm": 0.22393615543842316, | |
| "learning_rate": 4.4358593927353585e-05, | |
| "loss": 1.5795, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.7031463748290013, | |
| "grad_norm": 0.22478732466697693, | |
| "learning_rate": 4.431219484086712e-05, | |
| "loss": 1.6236, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 0.21901032328605652, | |
| "learning_rate": 4.4265630203450355e-05, | |
| "loss": 1.5929, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.7086183310533516, | |
| "grad_norm": 0.2183765321969986, | |
| "learning_rate": 4.421890041427174e-05, | |
| "loss": 1.6372, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.7113543091655267, | |
| "grad_norm": 0.23780032992362976, | |
| "learning_rate": 4.417200587391547e-05, | |
| "loss": 1.4413, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7140902872777017, | |
| "grad_norm": 0.22141356766223907, | |
| "learning_rate": 4.412494698437806e-05, | |
| "loss": 1.6594, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.7168262653898769, | |
| "grad_norm": 0.2270331084728241, | |
| "learning_rate": 4.407772414906487e-05, | |
| "loss": 1.6701, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.719562243502052, | |
| "grad_norm": 0.24480143189430237, | |
| "learning_rate": 4.403033777278666e-05, | |
| "loss": 1.4914, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.7222982216142271, | |
| "grad_norm": 0.23042482137680054, | |
| "learning_rate": 4.3982788261756114e-05, | |
| "loss": 1.6845, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.7250341997264022, | |
| "grad_norm": 0.23995645344257355, | |
| "learning_rate": 4.393507602358438e-05, | |
| "loss": 1.658, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.7277701778385773, | |
| "grad_norm": 0.25735434889793396, | |
| "learning_rate": 4.388720146727756e-05, | |
| "loss": 1.4613, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.7305061559507524, | |
| "grad_norm": 0.27036193013191223, | |
| "learning_rate": 4.3839165003233195e-05, | |
| "loss": 1.5341, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.7332421340629275, | |
| "grad_norm": 0.2285039722919464, | |
| "learning_rate": 4.3790967043236754e-05, | |
| "loss": 1.453, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7359781121751026, | |
| "grad_norm": 0.2619383931159973, | |
| "learning_rate": 4.374260800045812e-05, | |
| "loss": 1.4931, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.7387140902872777, | |
| "grad_norm": 0.22855600714683533, | |
| "learning_rate": 4.369408828944801e-05, | |
| "loss": 1.7458, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7414500683994528, | |
| "grad_norm": 0.23299537599086761, | |
| "learning_rate": 4.364540832613449e-05, | |
| "loss": 1.5653, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.7441860465116279, | |
| "grad_norm": 0.25914594531059265, | |
| "learning_rate": 4.35965685278193e-05, | |
| "loss": 1.6876, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.746922024623803, | |
| "grad_norm": 0.2430303692817688, | |
| "learning_rate": 4.3547569313174404e-05, | |
| "loss": 1.5761, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.7496580027359782, | |
| "grad_norm": 0.23968341946601868, | |
| "learning_rate": 4.34984111022383e-05, | |
| "loss": 1.5577, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7523939808481532, | |
| "grad_norm": 0.25374993681907654, | |
| "learning_rate": 4.3449094316412485e-05, | |
| "loss": 1.6936, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.7551299589603283, | |
| "grad_norm": 0.25317367911338806, | |
| "learning_rate": 4.339961937845779e-05, | |
| "loss": 1.6957, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.7578659370725034, | |
| "grad_norm": 0.231050044298172, | |
| "learning_rate": 4.3349986712490795e-05, | |
| "loss": 1.5386, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.7606019151846786, | |
| "grad_norm": 0.23644736409187317, | |
| "learning_rate": 4.330019674398019e-05, | |
| "loss": 1.6485, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7633378932968536, | |
| "grad_norm": 0.24971966445446014, | |
| "learning_rate": 4.32502498997431e-05, | |
| "loss": 1.6196, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7660738714090287, | |
| "grad_norm": 0.22572147846221924, | |
| "learning_rate": 4.3200146607941444e-05, | |
| "loss": 1.6654, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7688098495212038, | |
| "grad_norm": 0.256597101688385, | |
| "learning_rate": 4.3149887298078276e-05, | |
| "loss": 1.6552, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.771545827633379, | |
| "grad_norm": 0.226451575756073, | |
| "learning_rate": 4.3099472400994084e-05, | |
| "loss": 1.6164, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.774281805745554, | |
| "grad_norm": 0.2507132291793823, | |
| "learning_rate": 4.3048902348863116e-05, | |
| "loss": 1.555, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.7770177838577291, | |
| "grad_norm": 0.23009182512760162, | |
| "learning_rate": 4.2998177575189644e-05, | |
| "loss": 1.4741, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7797537619699042, | |
| "grad_norm": 0.24451418220996857, | |
| "learning_rate": 4.294729851480429e-05, | |
| "loss": 1.5704, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7824897400820794, | |
| "grad_norm": 0.2464141994714737, | |
| "learning_rate": 4.2896265603860246e-05, | |
| "loss": 1.6357, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7852257181942545, | |
| "grad_norm": 0.24296295642852783, | |
| "learning_rate": 4.28450792798296e-05, | |
| "loss": 1.5492, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7879616963064295, | |
| "grad_norm": 0.2681036591529846, | |
| "learning_rate": 4.279373998149954e-05, | |
| "loss": 1.6973, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7906976744186046, | |
| "grad_norm": 0.23149123787879944, | |
| "learning_rate": 4.2742248148968576e-05, | |
| "loss": 1.6626, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.7934336525307798, | |
| "grad_norm": 0.22642168402671814, | |
| "learning_rate": 4.269060422364284e-05, | |
| "loss": 1.7018, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7961696306429549, | |
| "grad_norm": 0.23424457013607025, | |
| "learning_rate": 4.263880864823221e-05, | |
| "loss": 1.6973, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.79890560875513, | |
| "grad_norm": 0.24797867238521576, | |
| "learning_rate": 4.2586861866746606e-05, | |
| "loss": 1.6631, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.801641586867305, | |
| "grad_norm": 0.24017377197742462, | |
| "learning_rate": 4.2534764324492115e-05, | |
| "loss": 1.692, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.8043775649794802, | |
| "grad_norm": 0.22952371835708618, | |
| "learning_rate": 4.248251646806719e-05, | |
| "loss": 1.3692, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.8071135430916553, | |
| "grad_norm": 0.2534586787223816, | |
| "learning_rate": 4.243011874535886e-05, | |
| "loss": 1.7223, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.8098495212038304, | |
| "grad_norm": 0.23179998993873596, | |
| "learning_rate": 4.237757160553883e-05, | |
| "loss": 1.6688, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.8125854993160054, | |
| "grad_norm": 0.21562454104423523, | |
| "learning_rate": 4.2324875499059693e-05, | |
| "loss": 1.5706, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.8153214774281806, | |
| "grad_norm": 0.30047473311424255, | |
| "learning_rate": 4.227203087765099e-05, | |
| "loss": 1.5275, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.8180574555403557, | |
| "grad_norm": 0.23342545330524445, | |
| "learning_rate": 4.221903819431543e-05, | |
| "loss": 1.6569, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.8207934336525308, | |
| "grad_norm": 0.2696247100830078, | |
| "learning_rate": 4.216589790332495e-05, | |
| "loss": 1.5803, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 0.24961510300636292, | |
| "learning_rate": 4.21126104602168e-05, | |
| "loss": 1.4535, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.826265389876881, | |
| "grad_norm": 0.24784626066684723, | |
| "learning_rate": 4.205917632178972e-05, | |
| "loss": 1.6035, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.8290013679890561, | |
| "grad_norm": 0.22941957414150238, | |
| "learning_rate": 4.200559594609994e-05, | |
| "loss": 1.6502, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.8317373461012312, | |
| "grad_norm": 0.23505181074142456, | |
| "learning_rate": 4.195186979245728e-05, | |
| "loss": 1.7577, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.8344733242134063, | |
| "grad_norm": 0.2516261041164398, | |
| "learning_rate": 4.189799832142126e-05, | |
| "loss": 1.4947, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.8372093023255814, | |
| "grad_norm": 0.28713610768318176, | |
| "learning_rate": 4.1843981994797075e-05, | |
| "loss": 1.8651, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.8399452804377565, | |
| "grad_norm": 0.2537023425102234, | |
| "learning_rate": 4.178982127563169e-05, | |
| "loss": 1.3971, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.8426812585499316, | |
| "grad_norm": 0.2575843930244446, | |
| "learning_rate": 4.173551662820985e-05, | |
| "loss": 1.7023, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8454172366621067, | |
| "grad_norm": 0.2560933828353882, | |
| "learning_rate": 4.168106851805009e-05, | |
| "loss": 1.5333, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.8481532147742818, | |
| "grad_norm": 0.22800202667713165, | |
| "learning_rate": 4.16264774119008e-05, | |
| "loss": 1.5548, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8508891928864569, | |
| "grad_norm": 0.2175893485546112, | |
| "learning_rate": 4.1571743777736116e-05, | |
| "loss": 1.7173, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.853625170998632, | |
| "grad_norm": 0.23807293176651, | |
| "learning_rate": 4.151686808475204e-05, | |
| "loss": 1.5548, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.8563611491108071, | |
| "grad_norm": 0.26670193672180176, | |
| "learning_rate": 4.1461850803362314e-05, | |
| "loss": 1.5273, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.8590971272229823, | |
| "grad_norm": 0.2579667866230011, | |
| "learning_rate": 4.140669240519442e-05, | |
| "loss": 1.6407, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.8618331053351573, | |
| "grad_norm": 0.23136496543884277, | |
| "learning_rate": 4.135139336308559e-05, | |
| "loss": 1.5134, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.8645690834473324, | |
| "grad_norm": 0.2494489699602127, | |
| "learning_rate": 4.129595415107864e-05, | |
| "loss": 1.5429, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8673050615595075, | |
| "grad_norm": 0.2552199363708496, | |
| "learning_rate": 4.124037524441803e-05, | |
| "loss": 1.5348, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.8700410396716827, | |
| "grad_norm": 0.2449236959218979, | |
| "learning_rate": 4.118465711954569e-05, | |
| "loss": 1.7689, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.8727770177838577, | |
| "grad_norm": 0.2517363429069519, | |
| "learning_rate": 4.112880025409701e-05, | |
| "loss": 1.6258, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.8755129958960328, | |
| "grad_norm": 0.27004796266555786, | |
| "learning_rate": 4.107280512689668e-05, | |
| "loss": 1.5855, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8782489740082079, | |
| "grad_norm": 0.27461642026901245, | |
| "learning_rate": 4.101667221795465e-05, | |
| "loss": 1.57, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.8809849521203831, | |
| "grad_norm": 0.2513730525970459, | |
| "learning_rate": 4.0960402008461955e-05, | |
| "loss": 1.5509, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8837209302325582, | |
| "grad_norm": 0.2727186977863312, | |
| "learning_rate": 4.090399498078664e-05, | |
| "loss": 1.5905, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.8864569083447332, | |
| "grad_norm": 0.24590528011322021, | |
| "learning_rate": 4.084745161846961e-05, | |
| "loss": 1.5459, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8891928864569083, | |
| "grad_norm": 0.22626182436943054, | |
| "learning_rate": 4.079077240622043e-05, | |
| "loss": 1.6127, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.8919288645690835, | |
| "grad_norm": 0.24243368208408356, | |
| "learning_rate": 4.0733957829913296e-05, | |
| "loss": 1.5687, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8946648426812586, | |
| "grad_norm": 0.2647876739501953, | |
| "learning_rate": 4.0677008376582716e-05, | |
| "loss": 1.6761, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.8974008207934336, | |
| "grad_norm": 0.2398100048303604, | |
| "learning_rate": 4.061992453441946e-05, | |
| "loss": 1.5666, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.9001367989056087, | |
| "grad_norm": 0.2463439702987671, | |
| "learning_rate": 4.056270679276631e-05, | |
| "loss": 1.7682, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.9028727770177839, | |
| "grad_norm": 0.23695454001426697, | |
| "learning_rate": 4.050535564211388e-05, | |
| "loss": 1.4589, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.905608755129959, | |
| "grad_norm": 0.24376732110977173, | |
| "learning_rate": 4.044787157409642e-05, | |
| "loss": 1.4797, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.908344733242134, | |
| "grad_norm": 0.24585182964801788, | |
| "learning_rate": 4.039025508148758e-05, | |
| "loss": 1.5843, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.9110807113543091, | |
| "grad_norm": 0.266337126493454, | |
| "learning_rate": 4.0332506658196225e-05, | |
| "loss": 1.7075, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.9138166894664843, | |
| "grad_norm": 0.2664279043674469, | |
| "learning_rate": 4.027462679926215e-05, | |
| "loss": 1.6063, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.9165526675786594, | |
| "grad_norm": 0.2796095013618469, | |
| "learning_rate": 4.021661600085189e-05, | |
| "loss": 1.7072, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.9192886456908345, | |
| "grad_norm": 0.26439887285232544, | |
| "learning_rate": 4.01584747602544e-05, | |
| "loss": 1.5579, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.9220246238030095, | |
| "grad_norm": 0.2440110445022583, | |
| "learning_rate": 4.010020357587687e-05, | |
| "loss": 1.5581, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.9247606019151847, | |
| "grad_norm": 0.2567618787288666, | |
| "learning_rate": 4.00418029472404e-05, | |
| "loss": 1.7241, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.9274965800273598, | |
| "grad_norm": 0.26454323530197144, | |
| "learning_rate": 3.9983273374975726e-05, | |
| "loss": 1.5839, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 0.2620810866355896, | |
| "learning_rate": 3.9924615360818934e-05, | |
| "loss": 1.7599, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.93296853625171, | |
| "grad_norm": 0.2683962285518646, | |
| "learning_rate": 3.986582940760717e-05, | |
| "loss": 1.6423, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.9357045143638851, | |
| "grad_norm": 0.22277259826660156, | |
| "learning_rate": 3.9806916019274306e-05, | |
| "loss": 1.546, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.9384404924760602, | |
| "grad_norm": 0.26338738203048706, | |
| "learning_rate": 3.9747875700846646e-05, | |
| "loss": 1.5739, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 0.27578940987586975, | |
| "learning_rate": 3.968870895843858e-05, | |
| "loss": 1.6281, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.9439124487004104, | |
| "grad_norm": 0.24065442383289337, | |
| "learning_rate": 3.962941629924824e-05, | |
| "loss": 1.4456, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.9466484268125855, | |
| "grad_norm": 0.25176844000816345, | |
| "learning_rate": 3.956999823155315e-05, | |
| "loss": 1.6577, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9493844049247606, | |
| "grad_norm": 0.2668481767177582, | |
| "learning_rate": 3.951045526470592e-05, | |
| "loss": 1.5013, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.9521203830369357, | |
| "grad_norm": 0.27450481057167053, | |
| "learning_rate": 3.94507879091298e-05, | |
| "loss": 1.5217, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.9548563611491108, | |
| "grad_norm": 0.2651226818561554, | |
| "learning_rate": 3.939099667631438e-05, | |
| "loss": 1.5877, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.957592339261286, | |
| "grad_norm": 0.27575191855430603, | |
| "learning_rate": 3.933108207881112e-05, | |
| "loss": 1.5836, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.960328317373461, | |
| "grad_norm": 0.248700350522995, | |
| "learning_rate": 3.927104463022906e-05, | |
| "loss": 1.6258, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.9630642954856361, | |
| "grad_norm": 0.27306076884269714, | |
| "learning_rate": 3.921088484523032e-05, | |
| "loss": 1.5255, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9658002735978112, | |
| "grad_norm": 0.2685610055923462, | |
| "learning_rate": 3.9150603239525765e-05, | |
| "loss": 1.607, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.9685362517099864, | |
| "grad_norm": 0.2934316396713257, | |
| "learning_rate": 3.909020032987051e-05, | |
| "loss": 1.5252, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.9712722298221614, | |
| "grad_norm": 0.2768978774547577, | |
| "learning_rate": 3.902967663405956e-05, | |
| "loss": 1.6291, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.9740082079343365, | |
| "grad_norm": 0.29954272508621216, | |
| "learning_rate": 3.8969032670923346e-05, | |
| "loss": 1.481, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.9767441860465116, | |
| "grad_norm": 0.26432037353515625, | |
| "learning_rate": 3.890826896032326e-05, | |
| "loss": 1.8236, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.9794801641586868, | |
| "grad_norm": 0.2719402611255646, | |
| "learning_rate": 3.884738602314719e-05, | |
| "loss": 1.626, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9822161422708618, | |
| "grad_norm": 0.2573629915714264, | |
| "learning_rate": 3.8786384381305123e-05, | |
| "loss": 1.4863, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.9849521203830369, | |
| "grad_norm": 0.27962571382522583, | |
| "learning_rate": 3.87252645577246e-05, | |
| "loss": 1.567, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.987688098495212, | |
| "grad_norm": 0.3235297203063965, | |
| "learning_rate": 3.866402707634624e-05, | |
| "loss": 1.5815, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.9904240766073872, | |
| "grad_norm": 0.24766239523887634, | |
| "learning_rate": 3.8602672462119294e-05, | |
| "loss": 1.6327, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.9931600547195623, | |
| "grad_norm": 0.2470933198928833, | |
| "learning_rate": 3.85412012409971e-05, | |
| "loss": 1.626, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.9958960328317373, | |
| "grad_norm": 0.2527347207069397, | |
| "learning_rate": 3.847961393993261e-05, | |
| "loss": 1.5373, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.9986320109439124, | |
| "grad_norm": 0.24633722007274628, | |
| "learning_rate": 3.8417911086873834e-05, | |
| "loss": 1.5994, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.0013679890560876, | |
| "grad_norm": 0.2634814381599426, | |
| "learning_rate": 3.835609321075934e-05, | |
| "loss": 1.5801, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.0041039671682626, | |
| "grad_norm": 0.2554261386394501, | |
| "learning_rate": 3.8294160841513715e-05, | |
| "loss": 1.6, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.0068399452804377, | |
| "grad_norm": 0.23322933912277222, | |
| "learning_rate": 3.823211451004304e-05, | |
| "loss": 1.4199, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.009575923392613, | |
| "grad_norm": 0.23709669709205627, | |
| "learning_rate": 3.816995474823028e-05, | |
| "loss": 1.5409, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.012311901504788, | |
| "grad_norm": 0.2459351122379303, | |
| "learning_rate": 3.8107682088930794e-05, | |
| "loss": 1.5137, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.015047879616963, | |
| "grad_norm": 0.2492617666721344, | |
| "learning_rate": 3.8045297065967744e-05, | |
| "loss": 1.6054, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.0177838577291383, | |
| "grad_norm": 0.2558859884738922, | |
| "learning_rate": 3.798280021412749e-05, | |
| "loss": 1.6189, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.0205198358413132, | |
| "grad_norm": 0.23855186998844147, | |
| "learning_rate": 3.792019206915504e-05, | |
| "loss": 1.7069, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.0232558139534884, | |
| "grad_norm": 0.2665524482727051, | |
| "learning_rate": 3.7857473167749435e-05, | |
| "loss": 1.5505, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.0259917920656634, | |
| "grad_norm": 0.24079938232898712, | |
| "learning_rate": 3.779464404755919e-05, | |
| "loss": 1.4712, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.0287277701778386, | |
| "grad_norm": 0.2564024329185486, | |
| "learning_rate": 3.773170524717763e-05, | |
| "loss": 1.5464, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.0314637482900137, | |
| "grad_norm": 0.23460035026073456, | |
| "learning_rate": 3.766865730613828e-05, | |
| "loss": 1.6146, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.0341997264021887, | |
| "grad_norm": 0.24386173486709595, | |
| "learning_rate": 3.760550076491031e-05, | |
| "loss": 1.4695, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.036935704514364, | |
| "grad_norm": 0.29760661721229553, | |
| "learning_rate": 3.754223616489379e-05, | |
| "loss": 1.3848, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.039671682626539, | |
| "grad_norm": 0.243236243724823, | |
| "learning_rate": 3.7478864048415136e-05, | |
| "loss": 1.4215, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.042407660738714, | |
| "grad_norm": 0.259069561958313, | |
| "learning_rate": 3.7415384958722445e-05, | |
| "loss": 1.5352, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.0451436388508892, | |
| "grad_norm": 0.281246542930603, | |
| "learning_rate": 3.735179943998081e-05, | |
| "loss": 1.6266, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.0478796169630642, | |
| "grad_norm": 0.2552565634250641, | |
| "learning_rate": 3.728810803726767e-05, | |
| "loss": 1.6849, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.0506155950752394, | |
| "grad_norm": 0.25851351022720337, | |
| "learning_rate": 3.7224311296568134e-05, | |
| "loss": 1.4284, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.0533515731874146, | |
| "grad_norm": 0.2506335973739624, | |
| "learning_rate": 3.716040976477033e-05, | |
| "loss": 1.4498, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.0560875512995895, | |
| "grad_norm": 0.3301680088043213, | |
| "learning_rate": 3.709640398966067e-05, | |
| "loss": 1.5649, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.0588235294117647, | |
| "grad_norm": 0.2851655185222626, | |
| "learning_rate": 3.703229451991918e-05, | |
| "loss": 1.6324, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.06155950752394, | |
| "grad_norm": 0.27497461438179016, | |
| "learning_rate": 3.69680819051148e-05, | |
| "loss": 1.5628, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.0642954856361149, | |
| "grad_norm": 0.2582625448703766, | |
| "learning_rate": 3.6903766695700656e-05, | |
| "loss": 1.5979, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.06703146374829, | |
| "grad_norm": 0.2679811418056488, | |
| "learning_rate": 3.6839349443009364e-05, | |
| "loss": 1.51, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.069767441860465, | |
| "grad_norm": 0.2523844242095947, | |
| "learning_rate": 3.677483069924827e-05, | |
| "loss": 1.6217, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.0725034199726402, | |
| "grad_norm": 0.23916080594062805, | |
| "learning_rate": 3.671021101749476e-05, | |
| "loss": 1.6128, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.0752393980848154, | |
| "grad_norm": 0.2704644203186035, | |
| "learning_rate": 3.664549095169148e-05, | |
| "loss": 1.5524, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.0779753761969904, | |
| "grad_norm": 0.27870067954063416, | |
| "learning_rate": 3.6580671056641616e-05, | |
| "loss": 1.6693, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.0807113543091655, | |
| "grad_norm": 0.2483988106250763, | |
| "learning_rate": 3.6515751888004113e-05, | |
| "loss": 1.4771, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.0834473324213407, | |
| "grad_norm": 0.2584916949272156, | |
| "learning_rate": 3.645073400228895e-05, | |
| "loss": 1.5099, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.0861833105335157, | |
| "grad_norm": 0.24778081476688385, | |
| "learning_rate": 3.6385617956852286e-05, | |
| "loss": 1.6118, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.0889192886456909, | |
| "grad_norm": 0.2260492891073227, | |
| "learning_rate": 3.632040430989181e-05, | |
| "loss": 1.5512, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.0916552667578658, | |
| "grad_norm": 0.2751753330230713, | |
| "learning_rate": 3.6255093620441834e-05, | |
| "loss": 1.559, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.094391244870041, | |
| "grad_norm": 0.2648756206035614, | |
| "learning_rate": 3.618968644836859e-05, | |
| "loss": 1.5666, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0971272229822162, | |
| "grad_norm": 0.24056974053382874, | |
| "learning_rate": 3.612418335436536e-05, | |
| "loss": 1.5169, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.0998632010943912, | |
| "grad_norm": 0.23621565103530884, | |
| "learning_rate": 3.605858489994771e-05, | |
| "loss": 1.7854, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.1025991792065664, | |
| "grad_norm": 0.25893279910087585, | |
| "learning_rate": 3.5992891647448696e-05, | |
| "loss": 1.6353, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.1053351573187415, | |
| "grad_norm": 0.2806542217731476, | |
| "learning_rate": 3.592710416001398e-05, | |
| "loss": 1.7623, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.1080711354309165, | |
| "grad_norm": 0.29038006067276, | |
| "learning_rate": 3.586122300159707e-05, | |
| "loss": 1.4457, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.1108071135430917, | |
| "grad_norm": 0.27859950065612793, | |
| "learning_rate": 3.5795248736954426e-05, | |
| "loss": 1.4407, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.1135430916552667, | |
| "grad_norm": 0.2583613991737366, | |
| "learning_rate": 3.5729181931640674e-05, | |
| "loss": 1.7205, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.1162790697674418, | |
| "grad_norm": 0.25556913018226624, | |
| "learning_rate": 3.5663023152003705e-05, | |
| "loss": 1.5662, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.119015047879617, | |
| "grad_norm": 0.26569557189941406, | |
| "learning_rate": 3.559677296517987e-05, | |
| "loss": 1.5788, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.121751025991792, | |
| "grad_norm": 0.2778777778148651, | |
| "learning_rate": 3.5530431939089084e-05, | |
| "loss": 1.747, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.1244870041039672, | |
| "grad_norm": 0.29031357169151306, | |
| "learning_rate": 3.546400064242997e-05, | |
| "loss": 1.6342, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.1272229822161424, | |
| "grad_norm": 0.26415035128593445, | |
| "learning_rate": 3.5397479644674964e-05, | |
| "loss": 1.5624, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.1299589603283173, | |
| "grad_norm": 0.2662270963191986, | |
| "learning_rate": 3.533086951606549e-05, | |
| "loss": 1.6745, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.1326949384404925, | |
| "grad_norm": 0.2567162811756134, | |
| "learning_rate": 3.5264170827607004e-05, | |
| "loss": 1.6727, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.1354309165526675, | |
| "grad_norm": 0.28177884221076965, | |
| "learning_rate": 3.519738415106413e-05, | |
| "loss": 1.4554, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.1381668946648427, | |
| "grad_norm": 0.277425616979599, | |
| "learning_rate": 3.513051005895576e-05, | |
| "loss": 1.5096, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.1409028727770179, | |
| "grad_norm": 0.2886553108692169, | |
| "learning_rate": 3.506354912455016e-05, | |
| "loss": 1.6723, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.1436388508891928, | |
| "grad_norm": 0.245221808552742, | |
| "learning_rate": 3.499650192186001e-05, | |
| "loss": 1.4508, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.146374829001368, | |
| "grad_norm": 0.31830281019210815, | |
| "learning_rate": 3.492936902563754e-05, | |
| "loss": 1.4235, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.1491108071135432, | |
| "grad_norm": 0.2477853149175644, | |
| "learning_rate": 3.486215101136954e-05, | |
| "loss": 1.4991, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.1518467852257182, | |
| "grad_norm": 0.2735631465911865, | |
| "learning_rate": 3.47948484552725e-05, | |
| "loss": 1.5141, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.1545827633378933, | |
| "grad_norm": 0.263138085603714, | |
| "learning_rate": 3.47274619342876e-05, | |
| "loss": 1.7006, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.1573187414500685, | |
| "grad_norm": 0.2716856598854065, | |
| "learning_rate": 3.465999202607583e-05, | |
| "loss": 1.6627, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.1600547195622435, | |
| "grad_norm": 0.26602792739868164, | |
| "learning_rate": 3.459243930901297e-05, | |
| "loss": 1.5501, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.1627906976744187, | |
| "grad_norm": 0.27663204073905945, | |
| "learning_rate": 3.452480436218471e-05, | |
| "loss": 1.7229, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.1655266757865936, | |
| "grad_norm": 0.3033023178577423, | |
| "learning_rate": 3.4457087765381584e-05, | |
| "loss": 1.5789, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.1682626538987688, | |
| "grad_norm": 0.2735162675380707, | |
| "learning_rate": 3.43892900990941e-05, | |
| "loss": 1.6559, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.170998632010944, | |
| "grad_norm": 0.26795685291290283, | |
| "learning_rate": 3.432141194450772e-05, | |
| "loss": 1.4425, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.173734610123119, | |
| "grad_norm": 0.2699303925037384, | |
| "learning_rate": 3.425345388349786e-05, | |
| "loss": 1.5429, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 0.2614208459854126, | |
| "learning_rate": 3.418541649862494e-05, | |
| "loss": 1.5916, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.1792065663474691, | |
| "grad_norm": 0.30162835121154785, | |
| "learning_rate": 3.4117300373129376e-05, | |
| "loss": 1.4801, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.1819425444596443, | |
| "grad_norm": 0.2973942458629608, | |
| "learning_rate": 3.404910609092655e-05, | |
| "loss": 1.5476, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.1846785225718195, | |
| "grad_norm": 0.26698416471481323, | |
| "learning_rate": 3.3980834236601853e-05, | |
| "loss": 1.6382, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.1874145006839945, | |
| "grad_norm": 0.3085285723209381, | |
| "learning_rate": 3.391248539540565e-05, | |
| "loss": 1.5059, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.1901504787961696, | |
| "grad_norm": 0.25455278158187866, | |
| "learning_rate": 3.384406015324826e-05, | |
| "loss": 1.6612, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.1928864569083448, | |
| "grad_norm": 0.2725888192653656, | |
| "learning_rate": 3.3775559096694933e-05, | |
| "loss": 1.5598, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.1956224350205198, | |
| "grad_norm": 0.34251880645751953, | |
| "learning_rate": 3.370698281296083e-05, | |
| "loss": 1.5973, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.198358413132695, | |
| "grad_norm": 0.2710118889808655, | |
| "learning_rate": 3.363833188990599e-05, | |
| "loss": 1.4116, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.2010943912448702, | |
| "grad_norm": 0.2622835636138916, | |
| "learning_rate": 3.3569606916030294e-05, | |
| "loss": 1.4546, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.2038303693570451, | |
| "grad_norm": 0.2685026228427887, | |
| "learning_rate": 3.350080848046839e-05, | |
| "loss": 1.5789, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.2065663474692203, | |
| "grad_norm": 0.2792280912399292, | |
| "learning_rate": 3.343193717298469e-05, | |
| "loss": 1.5287, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.2093023255813953, | |
| "grad_norm": 0.3190619647502899, | |
| "learning_rate": 3.3362993583968264e-05, | |
| "loss": 1.5803, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.2120383036935705, | |
| "grad_norm": 0.27128350734710693, | |
| "learning_rate": 3.329397830442784e-05, | |
| "loss": 1.5371, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.2147742818057456, | |
| "grad_norm": 0.2853277325630188, | |
| "learning_rate": 3.322489192598665e-05, | |
| "loss": 1.5739, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.2175102599179206, | |
| "grad_norm": 0.2518289387226105, | |
| "learning_rate": 3.3155735040877465e-05, | |
| "loss": 1.6408, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.2202462380300958, | |
| "grad_norm": 0.297750324010849, | |
| "learning_rate": 3.308650824193744e-05, | |
| "loss": 1.6255, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.2229822161422708, | |
| "grad_norm": 0.2988694906234741, | |
| "learning_rate": 3.301721212260306e-05, | |
| "loss": 1.5215, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.225718194254446, | |
| "grad_norm": 0.2906099557876587, | |
| "learning_rate": 3.294784727690503e-05, | |
| "loss": 1.5124, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.2284541723666211, | |
| "grad_norm": 0.28081414103507996, | |
| "learning_rate": 3.2878414299463225e-05, | |
| "loss": 1.6691, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.231190150478796, | |
| "grad_norm": 0.26118987798690796, | |
| "learning_rate": 3.280891378548156e-05, | |
| "loss": 1.524, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.2339261285909713, | |
| "grad_norm": 0.2828025817871094, | |
| "learning_rate": 3.273934633074291e-05, | |
| "loss": 1.7315, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.2366621067031465, | |
| "grad_norm": 0.27713146805763245, | |
| "learning_rate": 3.2669712531603966e-05, | |
| "loss": 1.534, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.2393980848153214, | |
| "grad_norm": 0.30562645196914673, | |
| "learning_rate": 3.2600012984990165e-05, | |
| "loss": 1.4404, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.2421340629274966, | |
| "grad_norm": 0.30407342314720154, | |
| "learning_rate": 3.2530248288390555e-05, | |
| "loss": 1.5327, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.2448700410396718, | |
| "grad_norm": 0.3026122450828552, | |
| "learning_rate": 3.246041903985264e-05, | |
| "loss": 1.4441, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.2476060191518468, | |
| "grad_norm": 0.3250129520893097, | |
| "learning_rate": 3.2390525837977334e-05, | |
| "loss": 1.661, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.250341997264022, | |
| "grad_norm": 0.29971036314964294, | |
| "learning_rate": 3.232056928191376e-05, | |
| "loss": 1.5989, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.253077975376197, | |
| "grad_norm": 0.296903133392334, | |
| "learning_rate": 3.225054997135413e-05, | |
| "loss": 1.5724, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.255813953488372, | |
| "grad_norm": 0.31808045506477356, | |
| "learning_rate": 3.218046850652862e-05, | |
| "loss": 1.6791, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.2585499316005473, | |
| "grad_norm": 0.2797161340713501, | |
| "learning_rate": 3.211032548820019e-05, | |
| "loss": 1.4345, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.2612859097127223, | |
| "grad_norm": 0.3660871386528015, | |
| "learning_rate": 3.20401215176595e-05, | |
| "loss": 1.5669, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.2640218878248974, | |
| "grad_norm": 0.30877435207366943, | |
| "learning_rate": 3.196985719671968e-05, | |
| "loss": 1.5406, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.2667578659370724, | |
| "grad_norm": 0.29524242877960205, | |
| "learning_rate": 3.1899533127711215e-05, | |
| "loss": 1.3333, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.2694938440492476, | |
| "grad_norm": 0.3187881112098694, | |
| "learning_rate": 3.182914991347677e-05, | |
| "loss": 1.5237, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.2722298221614228, | |
| "grad_norm": 0.2810283303260803, | |
| "learning_rate": 3.1758708157366036e-05, | |
| "loss": 1.4334, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.2749658002735977, | |
| "grad_norm": 0.27640587091445923, | |
| "learning_rate": 3.168820846323053e-05, | |
| "loss": 1.5078, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.277701778385773, | |
| "grad_norm": 0.2678714692592621, | |
| "learning_rate": 3.161765143541843e-05, | |
| "loss": 1.439, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.2804377564979479, | |
| "grad_norm": 0.315186470746994, | |
| "learning_rate": 3.154703767876942e-05, | |
| "loss": 1.4901, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.283173734610123, | |
| "grad_norm": 0.32562458515167236, | |
| "learning_rate": 3.1476367798609475e-05, | |
| "loss": 1.4822, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.2859097127222983, | |
| "grad_norm": 0.31798624992370605, | |
| "learning_rate": 3.1405642400745664e-05, | |
| "loss": 1.5145, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.2886456908344734, | |
| "grad_norm": 0.29922667145729065, | |
| "learning_rate": 3.133486209146099e-05, | |
| "loss": 1.4984, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.2913816689466484, | |
| "grad_norm": 0.3259302079677582, | |
| "learning_rate": 3.12640274775092e-05, | |
| "loss": 1.5048, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.2941176470588236, | |
| "grad_norm": 0.2870180010795593, | |
| "learning_rate": 3.119313916610948e-05, | |
| "loss": 1.52, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.2968536251709986, | |
| "grad_norm": 0.3135444223880768, | |
| "learning_rate": 3.112219776494142e-05, | |
| "loss": 1.494, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.2995896032831737, | |
| "grad_norm": 0.29159173369407654, | |
| "learning_rate": 3.105120388213966e-05, | |
| "loss": 1.7019, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.302325581395349, | |
| "grad_norm": 0.31548362970352173, | |
| "learning_rate": 3.098015812628875e-05, | |
| "loss": 1.6465, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.305061559507524, | |
| "grad_norm": 0.2820277214050293, | |
| "learning_rate": 3.090906110641791e-05, | |
| "loss": 1.5705, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.307797537619699, | |
| "grad_norm": 0.2773056924343109, | |
| "learning_rate": 3.083791343199582e-05, | |
| "loss": 1.785, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.310533515731874, | |
| "grad_norm": 0.2650894522666931, | |
| "learning_rate": 3.0766715712925384e-05, | |
| "loss": 1.6026, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.3132694938440492, | |
| "grad_norm": 0.2996635138988495, | |
| "learning_rate": 3.06954685595385e-05, | |
| "loss": 1.489, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.3160054719562244, | |
| "grad_norm": 0.3386262059211731, | |
| "learning_rate": 3.062417258259084e-05, | |
| "loss": 1.538, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.3187414500683994, | |
| "grad_norm": 0.30783751606941223, | |
| "learning_rate": 3.055282839325661e-05, | |
| "loss": 1.42, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.3214774281805746, | |
| "grad_norm": 0.28174683451652527, | |
| "learning_rate": 3.0481436603123292e-05, | |
| "loss": 1.5068, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.3242134062927495, | |
| "grad_norm": 0.2959563732147217, | |
| "learning_rate": 3.0409997824186453e-05, | |
| "loss": 1.4343, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.3269493844049247, | |
| "grad_norm": 0.24625587463378906, | |
| "learning_rate": 3.0338512668844443e-05, | |
| "loss": 1.5942, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.3296853625171, | |
| "grad_norm": 0.3001210689544678, | |
| "learning_rate": 3.0266981749893157e-05, | |
| "loss": 1.5715, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.332421340629275, | |
| "grad_norm": 0.2976323962211609, | |
| "learning_rate": 3.0195405680520828e-05, | |
| "loss": 1.4471, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.33515731874145, | |
| "grad_norm": 0.3303026854991913, | |
| "learning_rate": 3.012378507430269e-05, | |
| "loss": 1.6357, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.3378932968536252, | |
| "grad_norm": 0.308095246553421, | |
| "learning_rate": 3.005212054519579e-05, | |
| "loss": 1.6527, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.3406292749658002, | |
| "grad_norm": 0.2990988790988922, | |
| "learning_rate": 2.99804127075337e-05, | |
| "loss": 1.5795, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.3433652530779754, | |
| "grad_norm": 0.29910552501678467, | |
| "learning_rate": 2.9908662176021225e-05, | |
| "loss": 1.6597, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.3461012311901506, | |
| "grad_norm": 0.28705233335494995, | |
| "learning_rate": 2.9836869565729176e-05, | |
| "loss": 1.4978, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.3488372093023255, | |
| "grad_norm": 0.33388686180114746, | |
| "learning_rate": 2.9765035492089072e-05, | |
| "loss": 1.4049, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.3515731874145007, | |
| "grad_norm": 0.28467613458633423, | |
| "learning_rate": 2.9693160570887873e-05, | |
| "loss": 1.4809, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.3543091655266757, | |
| "grad_norm": 0.2524206340312958, | |
| "learning_rate": 2.9621245418262694e-05, | |
| "loss": 1.5561, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.3570451436388509, | |
| "grad_norm": 0.27135151624679565, | |
| "learning_rate": 2.954929065069554e-05, | |
| "loss": 1.5686, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.359781121751026, | |
| "grad_norm": 0.31753405928611755, | |
| "learning_rate": 2.9477296885007988e-05, | |
| "loss": 1.4994, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.362517099863201, | |
| "grad_norm": 0.274442583322525, | |
| "learning_rate": 2.9405264738355946e-05, | |
| "loss": 1.5835, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.3652530779753762, | |
| "grad_norm": 0.3164355754852295, | |
| "learning_rate": 2.9333194828224316e-05, | |
| "loss": 1.4077, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.3679890560875512, | |
| "grad_norm": 0.3447605073451996, | |
| "learning_rate": 2.926108777242172e-05, | |
| "loss": 1.4768, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3679890560875512, | |
| "eval_loss": 1.573856234550476, | |
| "eval_runtime": 13.7722, | |
| "eval_samples_per_second": 11.182, | |
| "eval_steps_per_second": 11.182, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3707250341997264, | |
| "grad_norm": 0.31595364212989807, | |
| "learning_rate": 2.9188944189075235e-05, | |
| "loss": 1.6381, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.3734610123119015, | |
| "grad_norm": 0.28893882036209106, | |
| "learning_rate": 2.9116764696625033e-05, | |
| "loss": 1.6677, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.3761969904240767, | |
| "grad_norm": 0.27313435077667236, | |
| "learning_rate": 2.9044549913819124e-05, | |
| "loss": 1.5746, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.3789329685362517, | |
| "grad_norm": 0.2701267600059509, | |
| "learning_rate": 2.897230045970804e-05, | |
| "loss": 1.2618, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.3816689466484269, | |
| "grad_norm": 0.30766165256500244, | |
| "learning_rate": 2.890001695363953e-05, | |
| "loss": 1.4354, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.3844049247606018, | |
| "grad_norm": 0.34688156843185425, | |
| "learning_rate": 2.8827700015253246e-05, | |
| "loss": 1.4171, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.387140902872777, | |
| "grad_norm": 0.28550222516059875, | |
| "learning_rate": 2.875535026447543e-05, | |
| "loss": 1.6484, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.3898768809849522, | |
| "grad_norm": 0.2944300174713135, | |
| "learning_rate": 2.868296832151361e-05, | |
| "loss": 1.6701, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.3926128590971272, | |
| "grad_norm": 0.2670783996582031, | |
| "learning_rate": 2.8610554806851264e-05, | |
| "loss": 1.5159, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.3953488372093024, | |
| "grad_norm": 0.2730109989643097, | |
| "learning_rate": 2.853811034124253e-05, | |
| "loss": 1.463, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.3980848153214773, | |
| "grad_norm": 0.2924908995628357, | |
| "learning_rate": 2.8465635545706858e-05, | |
| "loss": 1.6673, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.4008207934336525, | |
| "grad_norm": 0.2832358777523041, | |
| "learning_rate": 2.8393131041523702e-05, | |
| "loss": 1.4224, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.4035567715458277, | |
| "grad_norm": 0.3116619884967804, | |
| "learning_rate": 2.8320597450227186e-05, | |
| "loss": 1.4651, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.4062927496580027, | |
| "grad_norm": 0.2656640410423279, | |
| "learning_rate": 2.824803539360078e-05, | |
| "loss": 1.5117, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.4090287277701778, | |
| "grad_norm": 0.321814626455307, | |
| "learning_rate": 2.8175445493671972e-05, | |
| "loss": 1.5859, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "grad_norm": 0.3401627540588379, | |
| "learning_rate": 2.8102828372706926e-05, | |
| "loss": 1.6612, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.414500683994528, | |
| "grad_norm": 0.3001568019390106, | |
| "learning_rate": 2.803018465320515e-05, | |
| "loss": 1.655, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.4172366621067032, | |
| "grad_norm": 0.27258241176605225, | |
| "learning_rate": 2.795751495789418e-05, | |
| "loss": 1.6612, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.4199726402188784, | |
| "grad_norm": 0.2985587418079376, | |
| "learning_rate": 2.7884819909724224e-05, | |
| "loss": 1.452, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.4227086183310533, | |
| "grad_norm": 0.2565891146659851, | |
| "learning_rate": 2.78121001318628e-05, | |
| "loss": 1.6304, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.4254445964432285, | |
| "grad_norm": 0.29893383383750916, | |
| "learning_rate": 2.7739356247689446e-05, | |
| "loss": 1.4655, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.4281805745554035, | |
| "grad_norm": 0.28050941228866577, | |
| "learning_rate": 2.7666588880790335e-05, | |
| "loss": 1.5134, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.4309165526675787, | |
| "grad_norm": 0.29267653822898865, | |
| "learning_rate": 2.759379865495294e-05, | |
| "loss": 1.4355, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.4336525307797539, | |
| "grad_norm": 0.279461145401001, | |
| "learning_rate": 2.752098619416069e-05, | |
| "loss": 1.5606, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.4363885088919288, | |
| "grad_norm": 0.260345995426178, | |
| "learning_rate": 2.7448152122587634e-05, | |
| "loss": 1.702, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.439124487004104, | |
| "grad_norm": 0.2932511270046234, | |
| "learning_rate": 2.7375297064593063e-05, | |
| "loss": 1.3368, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.441860465116279, | |
| "grad_norm": 0.3288111686706543, | |
| "learning_rate": 2.730242164471616e-05, | |
| "loss": 1.4919, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.4445964432284542, | |
| "grad_norm": 0.27260783314704895, | |
| "learning_rate": 2.7229526487670676e-05, | |
| "loss": 1.5306, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.4473324213406293, | |
| "grad_norm": 0.2748279273509979, | |
| "learning_rate": 2.7156612218339544e-05, | |
| "loss": 1.5238, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.4500683994528043, | |
| "grad_norm": 0.26095664501190186, | |
| "learning_rate": 2.708367946176956e-05, | |
| "loss": 1.5456, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.4528043775649795, | |
| "grad_norm": 0.3169284164905548, | |
| "learning_rate": 2.701072884316595e-05, | |
| "loss": 1.6298, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.4555403556771545, | |
| "grad_norm": 0.2977512776851654, | |
| "learning_rate": 2.6937760987887112e-05, | |
| "loss": 1.4394, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.4582763337893296, | |
| "grad_norm": 0.2777084708213806, | |
| "learning_rate": 2.6864776521439166e-05, | |
| "loss": 1.5062, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.4610123119015048, | |
| "grad_norm": 0.29830804467201233, | |
| "learning_rate": 2.6791776069470658e-05, | |
| "loss": 1.5056, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.46374829001368, | |
| "grad_norm": 0.35224923491477966, | |
| "learning_rate": 2.6718760257767135e-05, | |
| "loss": 1.4127, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.466484268125855, | |
| "grad_norm": 0.31385019421577454, | |
| "learning_rate": 2.6645729712245832e-05, | |
| "loss": 1.6469, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.4692202462380302, | |
| "grad_norm": 0.3666173815727234, | |
| "learning_rate": 2.6572685058950298e-05, | |
| "loss": 1.6698, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.4719562243502051, | |
| "grad_norm": 0.29228919744491577, | |
| "learning_rate": 2.649962692404499e-05, | |
| "loss": 1.5971, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.4746922024623803, | |
| "grad_norm": 0.2931617796421051, | |
| "learning_rate": 2.6426555933809954e-05, | |
| "loss": 1.4914, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.4774281805745555, | |
| "grad_norm": 0.2743561863899231, | |
| "learning_rate": 2.635347271463544e-05, | |
| "loss": 1.5475, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.4801641586867305, | |
| "grad_norm": 0.3290240466594696, | |
| "learning_rate": 2.6280377893016516e-05, | |
| "loss": 1.4155, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.4829001367989056, | |
| "grad_norm": 0.30161550641059875, | |
| "learning_rate": 2.6207272095547718e-05, | |
| "loss": 1.4564, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.4856361149110806, | |
| "grad_norm": 0.3175734877586365, | |
| "learning_rate": 2.613415594891767e-05, | |
| "loss": 1.5312, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.4883720930232558, | |
| "grad_norm": 0.3023424744606018, | |
| "learning_rate": 2.606103007990371e-05, | |
| "loss": 1.4688, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.491108071135431, | |
| "grad_norm": 0.3181714415550232, | |
| "learning_rate": 2.5987895115366516e-05, | |
| "loss": 1.738, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.493844049247606, | |
| "grad_norm": 0.2678375244140625, | |
| "learning_rate": 2.5914751682244748e-05, | |
| "loss": 1.5491, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.4965800273597811, | |
| "grad_norm": 0.3041532039642334, | |
| "learning_rate": 2.5841600407549642e-05, | |
| "loss": 1.6654, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.499316005471956, | |
| "grad_norm": 0.30127188563346863, | |
| "learning_rate": 2.5768441918359692e-05, | |
| "loss": 1.5671, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.5020519835841313, | |
| "grad_norm": 0.34145042300224304, | |
| "learning_rate": 2.5695276841815186e-05, | |
| "loss": 1.6131, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.5047879616963065, | |
| "grad_norm": 0.2628389000892639, | |
| "learning_rate": 2.562210580511291e-05, | |
| "loss": 1.7636, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.5075239398084817, | |
| "grad_norm": 0.2784722149372101, | |
| "learning_rate": 2.5548929435500758e-05, | |
| "loss": 1.4967, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.5102599179206566, | |
| "grad_norm": 0.29418709874153137, | |
| "learning_rate": 2.547574836027231e-05, | |
| "loss": 1.5771, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.5129958960328316, | |
| "grad_norm": 0.29850900173187256, | |
| "learning_rate": 2.54025632067615e-05, | |
| "loss": 1.4134, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.5157318741450068, | |
| "grad_norm": 0.29349079728126526, | |
| "learning_rate": 2.5329374602337215e-05, | |
| "loss": 1.4498, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.518467852257182, | |
| "grad_norm": 0.27603679895401, | |
| "learning_rate": 2.525618317439793e-05, | |
| "loss": 1.5103, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.5212038303693571, | |
| "grad_norm": 0.29642152786254883, | |
| "learning_rate": 2.518298955036632e-05, | |
| "loss": 1.5597, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.523939808481532, | |
| "grad_norm": 0.3032752573490143, | |
| "learning_rate": 2.5109794357683885e-05, | |
| "loss": 1.5697, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.5266757865937073, | |
| "grad_norm": 0.3295765519142151, | |
| "learning_rate": 2.503659822380558e-05, | |
| "loss": 1.6343, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.5294117647058822, | |
| "grad_norm": 0.28324607014656067, | |
| "learning_rate": 2.496340177619442e-05, | |
| "loss": 1.551, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.5321477428180574, | |
| "grad_norm": 0.2895617187023163, | |
| "learning_rate": 2.489020564231612e-05, | |
| "loss": 1.6563, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.5348837209302326, | |
| "grad_norm": 0.3268161714076996, | |
| "learning_rate": 2.4817010449633688e-05, | |
| "loss": 1.6045, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.5376196990424078, | |
| "grad_norm": 0.27146685123443604, | |
| "learning_rate": 2.474381682560208e-05, | |
| "loss": 1.5727, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.5403556771545828, | |
| "grad_norm": 0.27629193663597107, | |
| "learning_rate": 2.4670625397662787e-05, | |
| "loss": 1.4352, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.5430916552667577, | |
| "grad_norm": 0.27017953991889954, | |
| "learning_rate": 2.4597436793238506e-05, | |
| "loss": 1.646, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.545827633378933, | |
| "grad_norm": 0.289787232875824, | |
| "learning_rate": 2.45242516397277e-05, | |
| "loss": 1.7107, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.548563611491108, | |
| "grad_norm": 0.2762577533721924, | |
| "learning_rate": 2.4451070564499245e-05, | |
| "loss": 1.5694, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.5512995896032833, | |
| "grad_norm": 0.2959858179092407, | |
| "learning_rate": 2.4377894194887095e-05, | |
| "loss": 1.639, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.5540355677154583, | |
| "grad_norm": 0.28595441579818726, | |
| "learning_rate": 2.4304723158184827e-05, | |
| "loss": 1.3091, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.5567715458276332, | |
| "grad_norm": 0.3006284236907959, | |
| "learning_rate": 2.4231558081640314e-05, | |
| "loss": 1.569, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.5595075239398084, | |
| "grad_norm": 0.2800249457359314, | |
| "learning_rate": 2.415839959245036e-05, | |
| "loss": 1.5124, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.5622435020519836, | |
| "grad_norm": 0.3035907745361328, | |
| "learning_rate": 2.4085248317755254e-05, | |
| "loss": 1.5785, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.5649794801641588, | |
| "grad_norm": 0.3111293315887451, | |
| "learning_rate": 2.4012104884633486e-05, | |
| "loss": 1.5544, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.5677154582763337, | |
| "grad_norm": 0.31877052783966064, | |
| "learning_rate": 2.39389699200963e-05, | |
| "loss": 1.482, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.570451436388509, | |
| "grad_norm": 0.3056713342666626, | |
| "learning_rate": 2.386584405108233e-05, | |
| "loss": 1.5234, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.573187414500684, | |
| "grad_norm": 0.289650022983551, | |
| "learning_rate": 2.3792727904452285e-05, | |
| "loss": 1.398, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.575923392612859, | |
| "grad_norm": 0.28617268800735474, | |
| "learning_rate": 2.3719622106983486e-05, | |
| "loss": 1.5721, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.5786593707250343, | |
| "grad_norm": 0.29906001687049866, | |
| "learning_rate": 2.3646527285364565e-05, | |
| "loss": 1.3543, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.5813953488372094, | |
| "grad_norm": 0.2876405417919159, | |
| "learning_rate": 2.3573444066190052e-05, | |
| "loss": 1.5365, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.5841313269493844, | |
| "grad_norm": 0.3173394799232483, | |
| "learning_rate": 2.3500373075955022e-05, | |
| "loss": 1.4437, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.5868673050615594, | |
| "grad_norm": 0.2820620834827423, | |
| "learning_rate": 2.342731494104971e-05, | |
| "loss": 1.5992, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.5896032831737346, | |
| "grad_norm": 0.3694467842578888, | |
| "learning_rate": 2.3354270287754174e-05, | |
| "loss": 1.5443, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.5923392612859097, | |
| "grad_norm": 0.2960888147354126, | |
| "learning_rate": 2.328123974223288e-05, | |
| "loss": 1.5173, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.595075239398085, | |
| "grad_norm": 0.3080427348613739, | |
| "learning_rate": 2.3208223930529347e-05, | |
| "loss": 1.749, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.59781121751026, | |
| "grad_norm": 0.29521963000297546, | |
| "learning_rate": 2.313522347856084e-05, | |
| "loss": 1.522, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.6005471956224349, | |
| "grad_norm": 0.32432520389556885, | |
| "learning_rate": 2.306223901211289e-05, | |
| "loss": 1.6394, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.60328317373461, | |
| "grad_norm": 0.326926052570343, | |
| "learning_rate": 2.2989271156834057e-05, | |
| "loss": 1.5092, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.6060191518467852, | |
| "grad_norm": 0.2793523967266083, | |
| "learning_rate": 2.291632053823045e-05, | |
| "loss": 1.5266, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.6087551299589604, | |
| "grad_norm": 0.3044006824493408, | |
| "learning_rate": 2.2843387781660452e-05, | |
| "loss": 1.7137, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.6114911080711354, | |
| "grad_norm": 0.33133581280708313, | |
| "learning_rate": 2.2770473512329333e-05, | |
| "loss": 1.5494, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.6142270861833106, | |
| "grad_norm": 0.30066078901290894, | |
| "learning_rate": 2.269757835528385e-05, | |
| "loss": 1.387, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.6169630642954855, | |
| "grad_norm": 0.29674389958381653, | |
| "learning_rate": 2.2624702935406943e-05, | |
| "loss": 1.6985, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.6196990424076607, | |
| "grad_norm": 0.2950797975063324, | |
| "learning_rate": 2.255184787741237e-05, | |
| "loss": 1.558, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.622435020519836, | |
| "grad_norm": 0.32414302229881287, | |
| "learning_rate": 2.2479013805839318e-05, | |
| "loss": 1.4547, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.625170998632011, | |
| "grad_norm": 0.2893849015235901, | |
| "learning_rate": 2.240620134504707e-05, | |
| "loss": 1.6205, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.627906976744186, | |
| "grad_norm": 0.30795803666114807, | |
| "learning_rate": 2.233341111920967e-05, | |
| "loss": 1.4725, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.630642954856361, | |
| "grad_norm": 0.28829425573349, | |
| "learning_rate": 2.226064375231056e-05, | |
| "loss": 1.5676, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.6333789329685362, | |
| "grad_norm": 0.32631853222846985, | |
| "learning_rate": 2.2187899868137206e-05, | |
| "loss": 1.447, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.6361149110807114, | |
| "grad_norm": 0.2999882996082306, | |
| "learning_rate": 2.211518009027579e-05, | |
| "loss": 1.4184, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.6388508891928866, | |
| "grad_norm": 0.302869975566864, | |
| "learning_rate": 2.204248504210582e-05, | |
| "loss": 1.5356, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.6415868673050615, | |
| "grad_norm": 0.32066816091537476, | |
| "learning_rate": 2.1969815346794857e-05, | |
| "loss": 1.507, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.6443228454172365, | |
| "grad_norm": 0.26394781470298767, | |
| "learning_rate": 2.189717162729309e-05, | |
| "loss": 1.519, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.6470588235294117, | |
| "grad_norm": 0.28778427839279175, | |
| "learning_rate": 2.182455450632803e-05, | |
| "loss": 1.6243, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.6497948016415869, | |
| "grad_norm": 0.28757721185684204, | |
| "learning_rate": 2.1751964606399224e-05, | |
| "loss": 1.6119, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.652530779753762, | |
| "grad_norm": 0.32660529017448425, | |
| "learning_rate": 2.167940254977282e-05, | |
| "loss": 1.481, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.655266757865937, | |
| "grad_norm": 0.2857491075992584, | |
| "learning_rate": 2.1606868958476304e-05, | |
| "loss": 1.4287, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.6580027359781122, | |
| "grad_norm": 0.3346758782863617, | |
| "learning_rate": 2.1534364454293148e-05, | |
| "loss": 1.6915, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.6607387140902872, | |
| "grad_norm": 0.2858891189098358, | |
| "learning_rate": 2.146188965875747e-05, | |
| "loss": 1.4857, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.6634746922024624, | |
| "grad_norm": 0.2926919758319855, | |
| "learning_rate": 2.1389445193148742e-05, | |
| "loss": 1.5348, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.6662106703146375, | |
| "grad_norm": 0.3735974133014679, | |
| "learning_rate": 2.1317031678486402e-05, | |
| "loss": 1.697, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.6689466484268127, | |
| "grad_norm": 0.3130210340023041, | |
| "learning_rate": 2.124464973552457e-05, | |
| "loss": 1.6426, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.6716826265389877, | |
| "grad_norm": 0.36122313141822815, | |
| "learning_rate": 2.117229998474676e-05, | |
| "loss": 1.7516, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.6744186046511627, | |
| "grad_norm": 0.2955617904663086, | |
| "learning_rate": 2.109998304636048e-05, | |
| "loss": 1.4681, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.6771545827633378, | |
| "grad_norm": 0.3331190347671509, | |
| "learning_rate": 2.1027699540291965e-05, | |
| "loss": 1.5334, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.679890560875513, | |
| "grad_norm": 0.39551764726638794, | |
| "learning_rate": 2.0955450086180882e-05, | |
| "loss": 1.497, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.6826265389876882, | |
| "grad_norm": 0.2910989224910736, | |
| "learning_rate": 2.088323530337498e-05, | |
| "loss": 1.4593, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.6853625170998632, | |
| "grad_norm": 0.2927062511444092, | |
| "learning_rate": 2.0811055810924768e-05, | |
| "loss": 1.5019, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.6880984952120381, | |
| "grad_norm": 0.2986646592617035, | |
| "learning_rate": 2.0738912227578283e-05, | |
| "loss": 1.5167, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.6908344733242133, | |
| "grad_norm": 0.2710409462451935, | |
| "learning_rate": 2.0666805171775687e-05, | |
| "loss": 1.5491, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.6935704514363885, | |
| "grad_norm": 0.32555902004241943, | |
| "learning_rate": 2.0594735261644056e-05, | |
| "loss": 1.5945, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.6963064295485637, | |
| "grad_norm": 0.3129582703113556, | |
| "learning_rate": 2.0522703114992018e-05, | |
| "loss": 1.5388, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.6990424076607387, | |
| "grad_norm": 0.29060521721839905, | |
| "learning_rate": 2.0450709349304463e-05, | |
| "loss": 1.5335, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.7017783857729138, | |
| "grad_norm": 0.2991231381893158, | |
| "learning_rate": 2.0378754581737308e-05, | |
| "loss": 1.5857, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.7045143638850888, | |
| "grad_norm": 0.3089081346988678, | |
| "learning_rate": 2.0306839429112136e-05, | |
| "loss": 1.509, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.707250341997264, | |
| "grad_norm": 0.30709123611450195, | |
| "learning_rate": 2.023496450791093e-05, | |
| "loss": 1.5935, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.7099863201094392, | |
| "grad_norm": 0.29340359568595886, | |
| "learning_rate": 2.0163130434270833e-05, | |
| "loss": 1.547, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.7127222982216144, | |
| "grad_norm": 0.3518681824207306, | |
| "learning_rate": 2.009133782397879e-05, | |
| "loss": 1.513, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.7154582763337893, | |
| "grad_norm": 0.31759360432624817, | |
| "learning_rate": 2.0019587292466306e-05, | |
| "loss": 1.5252, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.7181942544459643, | |
| "grad_norm": 0.2931315302848816, | |
| "learning_rate": 1.9947879454804216e-05, | |
| "loss": 1.7602, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.7209302325581395, | |
| "grad_norm": 0.2938210368156433, | |
| "learning_rate": 1.9876214925697323e-05, | |
| "loss": 1.5154, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.7236662106703147, | |
| "grad_norm": 0.28195831179618835, | |
| "learning_rate": 1.980459431947918e-05, | |
| "loss": 1.4129, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.7264021887824899, | |
| "grad_norm": 0.31024137139320374, | |
| "learning_rate": 1.973301825010685e-05, | |
| "loss": 1.5008, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.7291381668946648, | |
| "grad_norm": 0.3060285747051239, | |
| "learning_rate": 1.9661487331155563e-05, | |
| "loss": 1.5469, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.7318741450068398, | |
| "grad_norm": 0.38896477222442627, | |
| "learning_rate": 1.9590002175813553e-05, | |
| "loss": 1.6018, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.734610123119015, | |
| "grad_norm": 0.32051512598991394, | |
| "learning_rate": 1.9518563396876717e-05, | |
| "loss": 1.5078, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.7373461012311902, | |
| "grad_norm": 0.2726178467273712, | |
| "learning_rate": 1.9447171606743398e-05, | |
| "loss": 1.4586, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.7400820793433653, | |
| "grad_norm": 0.2772533595561981, | |
| "learning_rate": 1.9375827417409165e-05, | |
| "loss": 1.5022, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.7428180574555403, | |
| "grad_norm": 0.30202561616897583, | |
| "learning_rate": 1.9304531440461506e-05, | |
| "loss": 1.5036, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.7455540355677155, | |
| "grad_norm": 0.2932077944278717, | |
| "learning_rate": 1.923328428707461e-05, | |
| "loss": 1.5541, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.7482900136798905, | |
| "grad_norm": 0.3096188008785248, | |
| "learning_rate": 1.916208656800418e-05, | |
| "loss": 1.3744, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.7510259917920656, | |
| "grad_norm": 0.282226026058197, | |
| "learning_rate": 1.9090938893582088e-05, | |
| "loss": 1.6306, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.7537619699042408, | |
| "grad_norm": 0.27213621139526367, | |
| "learning_rate": 1.9019841873711255e-05, | |
| "loss": 1.4953, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.756497948016416, | |
| "grad_norm": 0.30166521668434143, | |
| "learning_rate": 1.8948796117860348e-05, | |
| "loss": 1.5158, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.759233926128591, | |
| "grad_norm": 0.27089768648147583, | |
| "learning_rate": 1.8877802235058585e-05, | |
| "loss": 1.4954, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.761969904240766, | |
| "grad_norm": 0.31477153301239014, | |
| "learning_rate": 1.8806860833890528e-05, | |
| "loss": 1.5449, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": 0.2903262674808502, | |
| "learning_rate": 1.8735972522490818e-05, | |
| "loss": 1.4129, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.7674418604651163, | |
| "grad_norm": 0.3140583038330078, | |
| "learning_rate": 1.8665137908539004e-05, | |
| "loss": 1.6374, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.7701778385772915, | |
| "grad_norm": 0.29897943139076233, | |
| "learning_rate": 1.859435759925434e-05, | |
| "loss": 1.605, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.7729138166894665, | |
| "grad_norm": 0.2993355989456177, | |
| "learning_rate": 1.8523632201390537e-05, | |
| "loss": 1.495, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.7756497948016414, | |
| "grad_norm": 0.3301627039909363, | |
| "learning_rate": 1.845296232123058e-05, | |
| "loss": 1.6064, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.7783857729138166, | |
| "grad_norm": 0.30024030804634094, | |
| "learning_rate": 1.8382348564581574e-05, | |
| "loss": 1.3258, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.7811217510259918, | |
| "grad_norm": 0.3279036283493042, | |
| "learning_rate": 1.8311791536769483e-05, | |
| "loss": 1.4348, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.783857729138167, | |
| "grad_norm": 0.3231453001499176, | |
| "learning_rate": 1.8241291842633966e-05, | |
| "loss": 1.4327, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.786593707250342, | |
| "grad_norm": 0.3033272624015808, | |
| "learning_rate": 1.817085008652324e-05, | |
| "loss": 1.4668, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.7893296853625171, | |
| "grad_norm": 0.28523799777030945, | |
| "learning_rate": 1.810046687228879e-05, | |
| "loss": 1.5058, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.792065663474692, | |
| "grad_norm": 0.35666465759277344, | |
| "learning_rate": 1.803014280328033e-05, | |
| "loss": 1.4551, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.7948016415868673, | |
| "grad_norm": 0.33736082911491394, | |
| "learning_rate": 1.795987848234051e-05, | |
| "loss": 1.2991, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.7975376196990425, | |
| "grad_norm": 0.3194032311439514, | |
| "learning_rate": 1.7889674511799812e-05, | |
| "loss": 1.4764, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.8002735978112177, | |
| "grad_norm": 0.28460192680358887, | |
| "learning_rate": 1.7819531493471392e-05, | |
| "loss": 1.4962, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.8030095759233926, | |
| "grad_norm": 0.3380921483039856, | |
| "learning_rate": 1.7749450028645875e-05, | |
| "loss": 1.4772, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.8057455540355676, | |
| "grad_norm": 0.32514598965644836, | |
| "learning_rate": 1.7679430718086243e-05, | |
| "loss": 1.6421, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.8084815321477428, | |
| "grad_norm": 0.293314129114151, | |
| "learning_rate": 1.7609474162022665e-05, | |
| "loss": 1.5151, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.811217510259918, | |
| "grad_norm": 0.3001095950603485, | |
| "learning_rate": 1.753958096014737e-05, | |
| "loss": 1.4422, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.8139534883720931, | |
| "grad_norm": 0.3403547704219818, | |
| "learning_rate": 1.7469751711609454e-05, | |
| "loss": 1.6122, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.816689466484268, | |
| "grad_norm": 0.3033615052700043, | |
| "learning_rate": 1.739998701500984e-05, | |
| "loss": 1.596, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.819425444596443, | |
| "grad_norm": 0.34662926197052, | |
| "learning_rate": 1.7330287468396033e-05, | |
| "loss": 1.5458, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.8221614227086183, | |
| "grad_norm": 0.32165685296058655, | |
| "learning_rate": 1.7260653669257093e-05, | |
| "loss": 1.5433, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.8248974008207934, | |
| "grad_norm": 0.32157987356185913, | |
| "learning_rate": 1.7191086214518447e-05, | |
| "loss": 1.6236, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.8276333789329686, | |
| "grad_norm": 0.3016352355480194, | |
| "learning_rate": 1.712158570053678e-05, | |
| "loss": 1.3893, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.8303693570451436, | |
| "grad_norm": 0.40459707379341125, | |
| "learning_rate": 1.7052152723094976e-05, | |
| "loss": 1.3767, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.8331053351573188, | |
| "grad_norm": 0.34724652767181396, | |
| "learning_rate": 1.698278787739695e-05, | |
| "loss": 1.6067, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.8358413132694937, | |
| "grad_norm": 0.3703734874725342, | |
| "learning_rate": 1.6913491758062557e-05, | |
| "loss": 1.4538, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.838577291381669, | |
| "grad_norm": 0.31886354088783264, | |
| "learning_rate": 1.6844264959122534e-05, | |
| "loss": 1.6776, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.841313269493844, | |
| "grad_norm": 0.32411274313926697, | |
| "learning_rate": 1.6775108074013356e-05, | |
| "loss": 1.5405, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.8440492476060193, | |
| "grad_norm": 0.32398101687431335, | |
| "learning_rate": 1.670602169557217e-05, | |
| "loss": 1.5429, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.8467852257181943, | |
| "grad_norm": 0.3220042288303375, | |
| "learning_rate": 1.663700641603174e-05, | |
| "loss": 1.5339, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.8495212038303692, | |
| "grad_norm": 0.2694465219974518, | |
| "learning_rate": 1.6568062827015317e-05, | |
| "loss": 1.5641, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.8522571819425444, | |
| "grad_norm": 0.3089315593242645, | |
| "learning_rate": 1.6499191519531614e-05, | |
| "loss": 1.3319, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.8549931600547196, | |
| "grad_norm": 0.3302357792854309, | |
| "learning_rate": 1.643039308396971e-05, | |
| "loss": 1.478, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.8577291381668948, | |
| "grad_norm": 0.30908071994781494, | |
| "learning_rate": 1.6361668110094007e-05, | |
| "loss": 1.5557, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.8604651162790697, | |
| "grad_norm": 0.2804611027240753, | |
| "learning_rate": 1.6293017187039174e-05, | |
| "loss": 1.6756, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.8632010943912447, | |
| "grad_norm": 0.3693443238735199, | |
| "learning_rate": 1.6224440903305076e-05, | |
| "loss": 1.5825, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.86593707250342, | |
| "grad_norm": 0.33380305767059326, | |
| "learning_rate": 1.615593984675174e-05, | |
| "loss": 1.5312, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.868673050615595, | |
| "grad_norm": 0.3250941038131714, | |
| "learning_rate": 1.6087514604594353e-05, | |
| "loss": 1.5642, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.8714090287277703, | |
| "grad_norm": 0.3749421238899231, | |
| "learning_rate": 1.6019165763398152e-05, | |
| "loss": 1.6141, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.8741450068399452, | |
| "grad_norm": 0.32209932804107666, | |
| "learning_rate": 1.5950893909073453e-05, | |
| "loss": 1.4791, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.8768809849521204, | |
| "grad_norm": 0.34071722626686096, | |
| "learning_rate": 1.5882699626870633e-05, | |
| "loss": 1.6126, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.8796169630642954, | |
| "grad_norm": 0.2984413802623749, | |
| "learning_rate": 1.5814583501375064e-05, | |
| "loss": 1.5191, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "grad_norm": 0.33248278498649597, | |
| "learning_rate": 1.574654611650214e-05, | |
| "loss": 1.6346, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.8850889192886457, | |
| "grad_norm": 0.308098703622818, | |
| "learning_rate": 1.567858805549229e-05, | |
| "loss": 1.5521, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.887824897400821, | |
| "grad_norm": 0.2899650037288666, | |
| "learning_rate": 1.56107099009059e-05, | |
| "loss": 1.3675, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.890560875512996, | |
| "grad_norm": 0.33524468541145325, | |
| "learning_rate": 1.5542912234618422e-05, | |
| "loss": 1.6501, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.8932968536251709, | |
| "grad_norm": 0.3770296275615692, | |
| "learning_rate": 1.54751956378153e-05, | |
| "loss": 1.5518, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.896032831737346, | |
| "grad_norm": 0.3348696529865265, | |
| "learning_rate": 1.540756069098702e-05, | |
| "loss": 1.4388, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.8987688098495212, | |
| "grad_norm": 0.37193214893341064, | |
| "learning_rate": 1.5340007973924176e-05, | |
| "loss": 1.499, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.9015047879616964, | |
| "grad_norm": 0.33301231265068054, | |
| "learning_rate": 1.5272538065712403e-05, | |
| "loss": 1.6687, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.9042407660738714, | |
| "grad_norm": 0.33597227931022644, | |
| "learning_rate": 1.5205151544727509e-05, | |
| "loss": 1.589, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.9069767441860463, | |
| "grad_norm": 0.3002086281776428, | |
| "learning_rate": 1.5137848988630465e-05, | |
| "loss": 1.7462, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.9097127222982215, | |
| "grad_norm": 0.30558449029922485, | |
| "learning_rate": 1.5070630974362473e-05, | |
| "loss": 1.6179, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.9124487004103967, | |
| "grad_norm": 0.314813494682312, | |
| "learning_rate": 1.5003498078139988e-05, | |
| "loss": 1.6029, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.915184678522572, | |
| "grad_norm": 0.31106236577033997, | |
| "learning_rate": 1.4936450875449845e-05, | |
| "loss": 1.5462, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.9179206566347469, | |
| "grad_norm": 0.32419320940971375, | |
| "learning_rate": 1.4869489941044235e-05, | |
| "loss": 1.632, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.920656634746922, | |
| "grad_norm": 0.28728801012039185, | |
| "learning_rate": 1.4802615848935875e-05, | |
| "loss": 1.5625, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.923392612859097, | |
| "grad_norm": 0.35819682478904724, | |
| "learning_rate": 1.4735829172393007e-05, | |
| "loss": 1.416, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.9261285909712722, | |
| "grad_norm": 0.3097285032272339, | |
| "learning_rate": 1.4669130483934512e-05, | |
| "loss": 1.4333, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.9288645690834474, | |
| "grad_norm": 0.31058430671691895, | |
| "learning_rate": 1.4602520355325039e-05, | |
| "loss": 1.5175, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.9316005471956226, | |
| "grad_norm": 0.3093342185020447, | |
| "learning_rate": 1.4535999357570046e-05, | |
| "loss": 1.4105, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.9343365253077975, | |
| "grad_norm": 0.3064606189727783, | |
| "learning_rate": 1.4469568060910915e-05, | |
| "loss": 1.6593, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.9370725034199725, | |
| "grad_norm": 0.3192756772041321, | |
| "learning_rate": 1.4403227034820139e-05, | |
| "loss": 1.5919, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.9398084815321477, | |
| "grad_norm": 0.30623388290405273, | |
| "learning_rate": 1.4336976847996303e-05, | |
| "loss": 1.5053, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.9425444596443229, | |
| "grad_norm": 0.3014221787452698, | |
| "learning_rate": 1.4270818068359336e-05, | |
| "loss": 1.4876, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.945280437756498, | |
| "grad_norm": 0.34191763401031494, | |
| "learning_rate": 1.420475126304558e-05, | |
| "loss": 1.435, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.948016415868673, | |
| "grad_norm": 0.33173930644989014, | |
| "learning_rate": 1.4138776998402927e-05, | |
| "loss": 1.5467, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.950752393980848, | |
| "grad_norm": 0.3360328674316406, | |
| "learning_rate": 1.4072895839986023e-05, | |
| "loss": 1.4525, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.9534883720930232, | |
| "grad_norm": 0.2906378209590912, | |
| "learning_rate": 1.4007108352551313e-05, | |
| "loss": 1.5788, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.9562243502051984, | |
| "grad_norm": 0.3077907860279083, | |
| "learning_rate": 1.3941415100052293e-05, | |
| "loss": 1.4087, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.9589603283173735, | |
| "grad_norm": 0.2951776385307312, | |
| "learning_rate": 1.387581664563465e-05, | |
| "loss": 1.4699, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.9616963064295485, | |
| "grad_norm": 0.29453444480895996, | |
| "learning_rate": 1.3810313551631426e-05, | |
| "loss": 1.6203, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.9644322845417237, | |
| "grad_norm": 0.3803625702857971, | |
| "learning_rate": 1.3744906379558165e-05, | |
| "loss": 1.6514, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.9671682626538987, | |
| "grad_norm": 0.3245275318622589, | |
| "learning_rate": 1.3679595690108193e-05, | |
| "loss": 1.5904, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.9699042407660738, | |
| "grad_norm": 0.3239131271839142, | |
| "learning_rate": 1.3614382043147725e-05, | |
| "loss": 1.5117, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.972640218878249, | |
| "grad_norm": 0.3001211881637573, | |
| "learning_rate": 1.3549265997711057e-05, | |
| "loss": 1.5985, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.9753761969904242, | |
| "grad_norm": 0.29681622982025146, | |
| "learning_rate": 1.3484248111995892e-05, | |
| "loss": 1.3762, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.9781121751025992, | |
| "grad_norm": 0.38813284039497375, | |
| "learning_rate": 1.3419328943358392e-05, | |
| "loss": 1.548, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.9808481532147741, | |
| "grad_norm": 0.29337620735168457, | |
| "learning_rate": 1.3354509048308527e-05, | |
| "loss": 1.4466, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.9835841313269493, | |
| "grad_norm": 0.3134080171585083, | |
| "learning_rate": 1.328978898250525e-05, | |
| "loss": 1.523, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.9863201094391245, | |
| "grad_norm": 0.32457372546195984, | |
| "learning_rate": 1.3225169300751738e-05, | |
| "loss": 1.4018, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.9890560875512997, | |
| "grad_norm": 0.30359819531440735, | |
| "learning_rate": 1.3160650556990644e-05, | |
| "loss": 1.4209, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.9917920656634747, | |
| "grad_norm": 0.3376854360103607, | |
| "learning_rate": 1.3096233304299346e-05, | |
| "loss": 1.4914, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.9945280437756496, | |
| "grad_norm": 0.3038989305496216, | |
| "learning_rate": 1.30319180948852e-05, | |
| "loss": 1.4995, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.9972640218878248, | |
| "grad_norm": 0.2987303137779236, | |
| "learning_rate": 1.2967705480080819e-05, | |
| "loss": 1.5415, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.36076483130455017, | |
| "learning_rate": 1.2903596010339338e-05, | |
| "loss": 1.3103, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 2.002735978112175, | |
| "grad_norm": 0.329522043466568, | |
| "learning_rate": 1.2839590235229668e-05, | |
| "loss": 1.7937, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.0054719562243504, | |
| "grad_norm": 0.33667078614234924, | |
| "learning_rate": 1.2775688703431871e-05, | |
| "loss": 1.333, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 2.008207934336525, | |
| "grad_norm": 0.30493631958961487, | |
| "learning_rate": 1.2711891962732342e-05, | |
| "loss": 1.3868, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.0109439124487003, | |
| "grad_norm": 0.3139844536781311, | |
| "learning_rate": 1.26482005600192e-05, | |
| "loss": 1.4361, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 2.0136798905608755, | |
| "grad_norm": 0.30529195070266724, | |
| "learning_rate": 1.258461504127756e-05, | |
| "loss": 1.5679, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.0164158686730507, | |
| "grad_norm": 0.3701570928096771, | |
| "learning_rate": 1.252113595158487e-05, | |
| "loss": 1.7087, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 2.019151846785226, | |
| "grad_norm": 0.3016231060028076, | |
| "learning_rate": 1.245776383510622e-05, | |
| "loss": 1.5592, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.0218878248974006, | |
| "grad_norm": 0.29768821597099304, | |
| "learning_rate": 1.2394499235089699e-05, | |
| "loss": 1.4926, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 2.024623803009576, | |
| "grad_norm": 0.3342791497707367, | |
| "learning_rate": 1.2331342693861716e-05, | |
| "loss": 1.4944, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.027359781121751, | |
| "grad_norm": 0.30851346254348755, | |
| "learning_rate": 1.2268294752822376e-05, | |
| "loss": 1.7049, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 2.030095759233926, | |
| "grad_norm": 0.3023451864719391, | |
| "learning_rate": 1.2205355952440817e-05, | |
| "loss": 1.5705, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.0328317373461013, | |
| "grad_norm": 0.3253107964992523, | |
| "learning_rate": 1.2142526832250561e-05, | |
| "loss": 1.4404, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 2.0355677154582765, | |
| "grad_norm": 0.33139538764953613, | |
| "learning_rate": 1.2079807930844977e-05, | |
| "loss": 1.4355, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.0383036935704513, | |
| "grad_norm": 0.3191182613372803, | |
| "learning_rate": 1.2017199785872523e-05, | |
| "loss": 1.428, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 2.0410396716826265, | |
| "grad_norm": 0.33677810430526733, | |
| "learning_rate": 1.1954702934032267e-05, | |
| "loss": 1.4214, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.0437756497948016, | |
| "grad_norm": 0.31797662377357483, | |
| "learning_rate": 1.1892317911069212e-05, | |
| "loss": 1.5024, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 2.046511627906977, | |
| "grad_norm": 0.30894604325294495, | |
| "learning_rate": 1.183004525176973e-05, | |
| "loss": 1.5084, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.049247606019152, | |
| "grad_norm": 0.30327004194259644, | |
| "learning_rate": 1.176788548995697e-05, | |
| "loss": 1.5586, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 2.0519835841313268, | |
| "grad_norm": 0.31576216220855713, | |
| "learning_rate": 1.1705839158486284e-05, | |
| "loss": 1.5315, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.0519835841313268, | |
| "eval_loss": 1.5589618682861328, | |
| "eval_runtime": 12.9752, | |
| "eval_samples_per_second": 11.869, | |
| "eval_steps_per_second": 11.869, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.054719562243502, | |
| "grad_norm": 0.3079953193664551, | |
| "learning_rate": 1.1643906789240664e-05, | |
| "loss": 1.6127, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 2.057455540355677, | |
| "grad_norm": 0.30269429087638855, | |
| "learning_rate": 1.1582088913126172e-05, | |
| "loss": 1.5077, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 2.0601915184678523, | |
| "grad_norm": 0.34395742416381836, | |
| "learning_rate": 1.1520386060067401e-05, | |
| "loss": 1.4675, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 2.0629274965800275, | |
| "grad_norm": 0.3478207588195801, | |
| "learning_rate": 1.1458798759002897e-05, | |
| "loss": 1.5474, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 2.0656634746922027, | |
| "grad_norm": 0.3087264895439148, | |
| "learning_rate": 1.139732753788072e-05, | |
| "loss": 1.4078, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 2.0683994528043774, | |
| "grad_norm": 0.2971005141735077, | |
| "learning_rate": 1.1335972923653774e-05, | |
| "loss": 1.6057, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 2.0711354309165526, | |
| "grad_norm": 0.30974239110946655, | |
| "learning_rate": 1.1274735442275402e-05, | |
| "loss": 1.4899, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 2.073871409028728, | |
| "grad_norm": 0.3504452705383301, | |
| "learning_rate": 1.121361561869488e-05, | |
| "loss": 1.5413, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 2.076607387140903, | |
| "grad_norm": 0.3584803640842438, | |
| "learning_rate": 1.1152613976852804e-05, | |
| "loss": 1.5634, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 2.079343365253078, | |
| "grad_norm": 0.3279834985733032, | |
| "learning_rate": 1.1091731039676754e-05, | |
| "loss": 1.4428, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.082079343365253, | |
| "grad_norm": 0.2989148199558258, | |
| "learning_rate": 1.1030967329076658e-05, | |
| "loss": 1.3392, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 2.084815321477428, | |
| "grad_norm": 0.3270657956600189, | |
| "learning_rate": 1.0970323365940444e-05, | |
| "loss": 1.4114, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 2.0875512995896033, | |
| "grad_norm": 0.31774088740348816, | |
| "learning_rate": 1.0909799670129497e-05, | |
| "loss": 1.4251, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 2.0902872777017785, | |
| "grad_norm": 0.3060950040817261, | |
| "learning_rate": 1.0849396760474246e-05, | |
| "loss": 1.4517, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 2.0930232558139537, | |
| "grad_norm": 0.2948378026485443, | |
| "learning_rate": 1.078911515476968e-05, | |
| "loss": 1.4872, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 2.0957592339261284, | |
| "grad_norm": 0.2836145758628845, | |
| "learning_rate": 1.0728955369770941e-05, | |
| "loss": 1.4987, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 2.0984952120383036, | |
| "grad_norm": 0.33373308181762695, | |
| "learning_rate": 1.0668917921188885e-05, | |
| "loss": 1.4806, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 2.1012311901504788, | |
| "grad_norm": 0.29727426171302795, | |
| "learning_rate": 1.060900332368562e-05, | |
| "loss": 1.506, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.103967168262654, | |
| "grad_norm": 0.3088816702365875, | |
| "learning_rate": 1.0549212090870203e-05, | |
| "loss": 1.7262, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 2.106703146374829, | |
| "grad_norm": 0.37396734952926636, | |
| "learning_rate": 1.0489544735294088e-05, | |
| "loss": 1.4311, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.109439124487004, | |
| "grad_norm": 0.3265590965747833, | |
| "learning_rate": 1.0430001768446856e-05, | |
| "loss": 1.3106, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 2.112175102599179, | |
| "grad_norm": 0.3493903577327728, | |
| "learning_rate": 1.0370583700751774e-05, | |
| "loss": 1.6644, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 2.1149110807113543, | |
| "grad_norm": 0.32931625843048096, | |
| "learning_rate": 1.0311291041561428e-05, | |
| "loss": 1.448, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "grad_norm": 0.3126463294029236, | |
| "learning_rate": 1.0252124299153353e-05, | |
| "loss": 1.5439, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 2.1203830369357046, | |
| "grad_norm": 0.3198404014110565, | |
| "learning_rate": 1.0193083980725696e-05, | |
| "loss": 1.4068, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 2.12311901504788, | |
| "grad_norm": 0.3446153998374939, | |
| "learning_rate": 1.0134170592392836e-05, | |
| "loss": 1.4597, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 2.1258549931600546, | |
| "grad_norm": 0.34225985407829285, | |
| "learning_rate": 1.007538463918107e-05, | |
| "loss": 1.5849, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 2.1285909712722297, | |
| "grad_norm": 0.3094581663608551, | |
| "learning_rate": 1.0016726625024287e-05, | |
| "loss": 1.5853, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 2.131326949384405, | |
| "grad_norm": 0.27853038907051086, | |
| "learning_rate": 9.9581970527596e-06, | |
| "loss": 1.5799, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 2.13406292749658, | |
| "grad_norm": 0.33034032583236694, | |
| "learning_rate": 9.899796424123136e-06, | |
| "loss": 1.456, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.1367989056087553, | |
| "grad_norm": 0.3268153965473175, | |
| "learning_rate": 9.841525239745605e-06, | |
| "loss": 1.3883, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 2.13953488372093, | |
| "grad_norm": 0.32485464215278625, | |
| "learning_rate": 9.783383999148118e-06, | |
| "loss": 1.4159, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 2.1422708618331052, | |
| "grad_norm": 0.29737138748168945, | |
| "learning_rate": 9.72537320073785e-06, | |
| "loss": 1.5254, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 2.1450068399452804, | |
| "grad_norm": 0.2851496934890747, | |
| "learning_rate": 9.667493341803777e-06, | |
| "loss": 1.3903, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 2.1477428180574556, | |
| "grad_norm": 0.32795658707618713, | |
| "learning_rate": 9.60974491851242e-06, | |
| "loss": 1.5147, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 2.150478796169631, | |
| "grad_norm": 0.33503639698028564, | |
| "learning_rate": 9.552128425903586e-06, | |
| "loss": 1.4495, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 2.153214774281806, | |
| "grad_norm": 0.31696316599845886, | |
| "learning_rate": 9.494644357886124e-06, | |
| "loss": 1.5787, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 2.1559507523939807, | |
| "grad_norm": 0.321809321641922, | |
| "learning_rate": 9.437293207233695e-06, | |
| "loss": 1.5817, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 2.158686730506156, | |
| "grad_norm": 0.29343757033348083, | |
| "learning_rate": 9.380075465580552e-06, | |
| "loss": 1.5604, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 2.161422708618331, | |
| "grad_norm": 0.4285193383693695, | |
| "learning_rate": 9.322991623417285e-06, | |
| "loss": 1.4387, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.1641586867305063, | |
| "grad_norm": 0.29187867045402527, | |
| "learning_rate": 9.266042170086717e-06, | |
| "loss": 1.4871, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 2.1668946648426815, | |
| "grad_norm": 0.2987869679927826, | |
| "learning_rate": 9.209227593779573e-06, | |
| "loss": 1.3972, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 2.169630642954856, | |
| "grad_norm": 0.29556939005851746, | |
| "learning_rate": 9.152548381530405e-06, | |
| "loss": 1.4057, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 2.1723666210670314, | |
| "grad_norm": 0.32454240322113037, | |
| "learning_rate": 9.096005019213363e-06, | |
| "loss": 1.5509, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 2.1751025991792066, | |
| "grad_norm": 0.30358558893203735, | |
| "learning_rate": 9.039597991538043e-06, | |
| "loss": 1.5466, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 2.1778385772913817, | |
| "grad_norm": 0.3079793155193329, | |
| "learning_rate": 8.983327782045359e-06, | |
| "loss": 1.8167, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 2.180574555403557, | |
| "grad_norm": 0.3283805847167969, | |
| "learning_rate": 8.927194873103322e-06, | |
| "loss": 1.5464, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 2.1833105335157317, | |
| "grad_norm": 0.3089551031589508, | |
| "learning_rate": 8.871199745902997e-06, | |
| "loss": 1.4067, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 2.186046511627907, | |
| "grad_norm": 0.3624851405620575, | |
| "learning_rate": 8.815342880454311e-06, | |
| "loss": 1.4652, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 2.188782489740082, | |
| "grad_norm": 0.31375181674957275, | |
| "learning_rate": 8.75962475558198e-06, | |
| "loss": 1.4584, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.1915184678522572, | |
| "grad_norm": 0.36298835277557373, | |
| "learning_rate": 8.704045848921358e-06, | |
| "loss": 1.6755, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 2.1942544459644324, | |
| "grad_norm": 0.3472815155982971, | |
| "learning_rate": 8.648606636914416e-06, | |
| "loss": 1.4749, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 2.196990424076607, | |
| "grad_norm": 0.30575889348983765, | |
| "learning_rate": 8.593307594805586e-06, | |
| "loss": 1.616, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 2.1997264021887823, | |
| "grad_norm": 0.33051928877830505, | |
| "learning_rate": 8.53814919663769e-06, | |
| "loss": 1.4458, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 2.2024623803009575, | |
| "grad_norm": 0.31675076484680176, | |
| "learning_rate": 8.483131915247968e-06, | |
| "loss": 1.4981, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 2.2051983584131327, | |
| "grad_norm": 0.33154037594795227, | |
| "learning_rate": 8.428256222263888e-06, | |
| "loss": 1.5108, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 2.207934336525308, | |
| "grad_norm": 0.3389985263347626, | |
| "learning_rate": 8.373522588099211e-06, | |
| "loss": 1.4522, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 2.210670314637483, | |
| "grad_norm": 0.35865381360054016, | |
| "learning_rate": 8.318931481949906e-06, | |
| "loss": 1.5059, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 2.213406292749658, | |
| "grad_norm": 0.3100070357322693, | |
| "learning_rate": 8.264483371790156e-06, | |
| "loss": 1.441, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 2.216142270861833, | |
| "grad_norm": 0.3514600694179535, | |
| "learning_rate": 8.210178724368312e-06, | |
| "loss": 1.6657, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.218878248974008, | |
| "grad_norm": 0.2982495427131653, | |
| "learning_rate": 8.156018005202926e-06, | |
| "loss": 1.5945, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 2.2216142270861834, | |
| "grad_norm": 0.2904994487762451, | |
| "learning_rate": 8.10200167857874e-06, | |
| "loss": 1.4298, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 2.2243502051983586, | |
| "grad_norm": 0.3352917432785034, | |
| "learning_rate": 8.04813020754272e-06, | |
| "loss": 1.5512, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 2.2270861833105333, | |
| "grad_norm": 0.31548169255256653, | |
| "learning_rate": 7.994404053900076e-06, | |
| "loss": 1.5735, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 2.2298221614227085, | |
| "grad_norm": 0.31222277879714966, | |
| "learning_rate": 7.940823678210282e-06, | |
| "loss": 1.4862, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 2.2325581395348837, | |
| "grad_norm": 0.3492494225502014, | |
| "learning_rate": 7.887389539783208e-06, | |
| "loss": 1.6087, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 2.235294117647059, | |
| "grad_norm": 0.32157906889915466, | |
| "learning_rate": 7.834102096675064e-06, | |
| "loss": 1.5771, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 2.238030095759234, | |
| "grad_norm": 0.3391493260860443, | |
| "learning_rate": 7.780961805684575e-06, | |
| "loss": 1.5289, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 2.2407660738714092, | |
| "grad_norm": 0.32275089621543884, | |
| "learning_rate": 7.727969122349017e-06, | |
| "loss": 1.407, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 2.243502051983584, | |
| "grad_norm": 0.34808090329170227, | |
| "learning_rate": 7.675124500940317e-06, | |
| "loss": 1.4914, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.246238030095759, | |
| "grad_norm": 0.35853490233421326, | |
| "learning_rate": 7.62242839446117e-06, | |
| "loss": 1.4587, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 2.2489740082079344, | |
| "grad_norm": 0.298554390668869, | |
| "learning_rate": 7.569881254641142e-06, | |
| "loss": 1.52, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 2.2517099863201095, | |
| "grad_norm": 0.3177081048488617, | |
| "learning_rate": 7.517483531932809e-06, | |
| "loss": 1.3685, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 2.2544459644322847, | |
| "grad_norm": 0.30928516387939453, | |
| "learning_rate": 7.4652356755078895e-06, | |
| "loss": 1.4636, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 2.2571819425444595, | |
| "grad_norm": 0.3332304060459137, | |
| "learning_rate": 7.4131381332534e-06, | |
| "loss": 1.4894, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 2.2599179206566347, | |
| "grad_norm": 0.331177681684494, | |
| "learning_rate": 7.3611913517677875e-06, | |
| "loss": 1.5236, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 2.26265389876881, | |
| "grad_norm": 0.3152174949645996, | |
| "learning_rate": 7.30939577635717e-06, | |
| "loss": 1.4508, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 2.265389876880985, | |
| "grad_norm": 0.33167538046836853, | |
| "learning_rate": 7.257751851031428e-06, | |
| "loss": 1.4507, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 2.26812585499316, | |
| "grad_norm": 0.26186057925224304, | |
| "learning_rate": 7.206260018500469e-06, | |
| "loss": 1.581, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 2.270861833105335, | |
| "grad_norm": 0.31686386466026306, | |
| "learning_rate": 7.154920720170399e-06, | |
| "loss": 1.5677, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.27359781121751, | |
| "grad_norm": 0.32669878005981445, | |
| "learning_rate": 7.1037343961397495e-06, | |
| "loss": 1.5512, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 2.2763337893296853, | |
| "grad_norm": 0.3603561520576477, | |
| "learning_rate": 7.05270148519572e-06, | |
| "loss": 1.6478, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 2.2790697674418605, | |
| "grad_norm": 0.3097962737083435, | |
| "learning_rate": 7.001822424810359e-06, | |
| "loss": 1.5819, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 2.2818057455540357, | |
| "grad_norm": 0.32818931341171265, | |
| "learning_rate": 6.951097651136889e-06, | |
| "loss": 1.5053, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 2.2845417236662104, | |
| "grad_norm": 0.41288748383522034, | |
| "learning_rate": 6.900527599005918e-06, | |
| "loss": 1.5391, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 2.2872777017783856, | |
| "grad_norm": 0.3428272008895874, | |
| "learning_rate": 6.8501127019217346e-06, | |
| "loss": 1.4097, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 2.290013679890561, | |
| "grad_norm": 0.34903377294540405, | |
| "learning_rate": 6.799853392058561e-06, | |
| "loss": 1.3627, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 2.292749658002736, | |
| "grad_norm": 0.3197937309741974, | |
| "learning_rate": 6.749750100256902e-06, | |
| "loss": 1.5362, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 2.295485636114911, | |
| "grad_norm": 0.3300299644470215, | |
| "learning_rate": 6.699803256019815e-06, | |
| "loss": 1.6251, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 2.2982216142270864, | |
| "grad_norm": 0.33497533202171326, | |
| "learning_rate": 6.650013287509199e-06, | |
| "loss": 1.5388, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.300957592339261, | |
| "grad_norm": 0.36092567443847656, | |
| "learning_rate": 6.600380621542216e-06, | |
| "loss": 1.4969, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 2.3036935704514363, | |
| "grad_norm": 0.3416774868965149, | |
| "learning_rate": 6.550905683587513e-06, | |
| "loss": 1.5842, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 2.3064295485636115, | |
| "grad_norm": 0.30258703231811523, | |
| "learning_rate": 6.5015888977617016e-06, | |
| "loss": 1.3653, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 2.3091655266757867, | |
| "grad_norm": 0.2986971139907837, | |
| "learning_rate": 6.452430686825603e-06, | |
| "loss": 1.5223, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 2.311901504787962, | |
| "grad_norm": 0.32431551814079285, | |
| "learning_rate": 6.403431472180707e-06, | |
| "loss": 1.4931, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 2.314637482900137, | |
| "grad_norm": 0.32962799072265625, | |
| "learning_rate": 6.354591673865523e-06, | |
| "loss": 1.4833, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 2.317373461012312, | |
| "grad_norm": 0.36730074882507324, | |
| "learning_rate": 6.30591171055199e-06, | |
| "loss": 1.4419, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 2.320109439124487, | |
| "grad_norm": 0.3103657066822052, | |
| "learning_rate": 6.257391999541887e-06, | |
| "loss": 1.4914, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 2.322845417236662, | |
| "grad_norm": 0.3909322917461395, | |
| "learning_rate": 6.209032956763247e-06, | |
| "loss": 1.4929, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 2.3255813953488373, | |
| "grad_norm": 0.346003919839859, | |
| "learning_rate": 6.160834996766815e-06, | |
| "loss": 1.3601, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.3283173734610125, | |
| "grad_norm": 0.3200609087944031, | |
| "learning_rate": 6.112798532722438e-06, | |
| "loss": 1.6096, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 2.3310533515731873, | |
| "grad_norm": 0.31736400723457336, | |
| "learning_rate": 6.064923976415626e-06, | |
| "loss": 1.4462, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 2.3337893296853625, | |
| "grad_norm": 0.2982243299484253, | |
| "learning_rate": 6.0172117382438944e-06, | |
| "loss": 1.6742, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 2.3365253077975376, | |
| "grad_norm": 0.36678165197372437, | |
| "learning_rate": 5.969662227213352e-06, | |
| "loss": 1.429, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 2.339261285909713, | |
| "grad_norm": 0.29900214076042175, | |
| "learning_rate": 5.922275850935136e-06, | |
| "loss": 1.5513, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 2.341997264021888, | |
| "grad_norm": 0.3283601403236389, | |
| "learning_rate": 5.875053015621943e-06, | |
| "loss": 1.7077, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 2.3447332421340628, | |
| "grad_norm": 0.33298107981681824, | |
| "learning_rate": 5.827994126084532e-06, | |
| "loss": 1.5916, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 2.347469220246238, | |
| "grad_norm": 0.3546065390110016, | |
| "learning_rate": 5.781099585728267e-06, | |
| "loss": 1.4382, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 2.350205198358413, | |
| "grad_norm": 0.30142465233802795, | |
| "learning_rate": 5.734369796549652e-06, | |
| "loss": 1.4578, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 0.3024732768535614, | |
| "learning_rate": 5.687805159132881e-06, | |
| "loss": 1.4102, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.3556771545827635, | |
| "grad_norm": 0.3300219774246216, | |
| "learning_rate": 5.641406072646418e-06, | |
| "loss": 1.5036, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 2.3584131326949382, | |
| "grad_norm": 0.37552839517593384, | |
| "learning_rate": 5.595172934839546e-06, | |
| "loss": 1.4401, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 2.3611491108071134, | |
| "grad_norm": 0.3124082386493683, | |
| "learning_rate": 5.549106142039018e-06, | |
| "loss": 1.3749, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 2.3638850889192886, | |
| "grad_norm": 0.34462398290634155, | |
| "learning_rate": 5.503206089145568e-06, | |
| "loss": 1.578, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 2.366621067031464, | |
| "grad_norm": 0.3301493227481842, | |
| "learning_rate": 5.4574731696306146e-06, | |
| "loss": 1.7064, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 2.369357045143639, | |
| "grad_norm": 0.33227112889289856, | |
| "learning_rate": 5.411907775532832e-06, | |
| "loss": 1.4069, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 2.3720930232558137, | |
| "grad_norm": 0.3299727439880371, | |
| "learning_rate": 5.366510297454816e-06, | |
| "loss": 1.4481, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 2.374829001367989, | |
| "grad_norm": 0.3437407314777374, | |
| "learning_rate": 5.321281124559727e-06, | |
| "loss": 1.3406, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 2.377564979480164, | |
| "grad_norm": 0.3354208469390869, | |
| "learning_rate": 5.276220644567948e-06, | |
| "loss": 1.3835, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 2.3803009575923393, | |
| "grad_norm": 0.29964345693588257, | |
| "learning_rate": 5.231329243753772e-06, | |
| "loss": 1.3945, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.3830369357045145, | |
| "grad_norm": 0.3642790615558624, | |
| "learning_rate": 5.186607306942085e-06, | |
| "loss": 1.3748, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 2.3857729138166897, | |
| "grad_norm": 0.29215675592422485, | |
| "learning_rate": 5.142055217505074e-06, | |
| "loss": 1.5209, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 2.3885088919288644, | |
| "grad_norm": 0.35326606035232544, | |
| "learning_rate": 5.097673357358907e-06, | |
| "loss": 1.5554, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 2.3912448700410396, | |
| "grad_norm": 0.3523205816745758, | |
| "learning_rate": 5.053462106960532e-06, | |
| "loss": 1.5203, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 2.3939808481532148, | |
| "grad_norm": 0.3213190734386444, | |
| "learning_rate": 5.00942184530433e-06, | |
| "loss": 1.4171, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 2.39671682626539, | |
| "grad_norm": 0.3184296488761902, | |
| "learning_rate": 4.96555294991892e-06, | |
| "loss": 1.3045, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 2.399452804377565, | |
| "grad_norm": 0.3270394802093506, | |
| "learning_rate": 4.921855796863933e-06, | |
| "loss": 1.5365, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 2.4021887824897403, | |
| "grad_norm": 0.36887189745903015, | |
| "learning_rate": 4.878330760726713e-06, | |
| "loss": 1.2823, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 2.404924760601915, | |
| "grad_norm": 0.3077680766582489, | |
| "learning_rate": 4.834978214619215e-06, | |
| "loss": 1.5222, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 2.4076607387140903, | |
| "grad_norm": 0.3177037835121155, | |
| "learning_rate": 4.791798530174699e-06, | |
| "loss": 1.4489, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.4103967168262654, | |
| "grad_norm": 0.3479926586151123, | |
| "learning_rate": 4.748792077544623e-06, | |
| "loss": 1.5376, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 2.4131326949384406, | |
| "grad_norm": 0.32515963912010193, | |
| "learning_rate": 4.7059592253954315e-06, | |
| "loss": 1.5803, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 2.415868673050616, | |
| "grad_norm": 0.33568742871284485, | |
| "learning_rate": 4.66330034090541e-06, | |
| "loss": 1.5066, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 2.4186046511627906, | |
| "grad_norm": 0.328945130109787, | |
| "learning_rate": 4.620815789761526e-06, | |
| "loss": 1.4279, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 2.4213406292749657, | |
| "grad_norm": 0.3277706503868103, | |
| "learning_rate": 4.578505936156302e-06, | |
| "loss": 1.5832, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 2.424076607387141, | |
| "grad_norm": 0.35384321212768555, | |
| "learning_rate": 4.5363711427847015e-06, | |
| "loss": 1.4504, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 2.426812585499316, | |
| "grad_norm": 0.3523906469345093, | |
| "learning_rate": 4.494411770840978e-06, | |
| "loss": 1.5453, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 2.4295485636114913, | |
| "grad_norm": 0.30951249599456787, | |
| "learning_rate": 4.452628180015664e-06, | |
| "loss": 1.5488, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 2.432284541723666, | |
| "grad_norm": 0.3258694112300873, | |
| "learning_rate": 4.411020728492374e-06, | |
| "loss": 1.4878, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 2.4350205198358412, | |
| "grad_norm": 0.33756595849990845, | |
| "learning_rate": 4.3695897729448485e-06, | |
| "loss": 1.4415, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.4377564979480164, | |
| "grad_norm": 0.3134118318557739, | |
| "learning_rate": 4.328335668533806e-06, | |
| "loss": 1.3717, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 2.4404924760601916, | |
| "grad_norm": 0.2975947856903076, | |
| "learning_rate": 4.2872587689039484e-06, | |
| "loss": 1.5299, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.443228454172367, | |
| "grad_norm": 0.3447892963886261, | |
| "learning_rate": 4.246359426180918e-06, | |
| "loss": 1.4769, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 2.4459644322845415, | |
| "grad_norm": 0.32196173071861267, | |
| "learning_rate": 4.20563799096827e-06, | |
| "loss": 1.4507, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.4487004103967167, | |
| "grad_norm": 0.30560824275016785, | |
| "learning_rate": 4.165094812344478e-06, | |
| "loss": 1.6074, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 2.451436388508892, | |
| "grad_norm": 0.3304429352283478, | |
| "learning_rate": 4.124730237859939e-06, | |
| "loss": 1.5368, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.454172366621067, | |
| "grad_norm": 0.33104217052459717, | |
| "learning_rate": 4.0845446135339945e-06, | |
| "loss": 1.4162, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 2.4569083447332423, | |
| "grad_norm": 0.3179687261581421, | |
| "learning_rate": 4.0445382838519365e-06, | |
| "loss": 1.6278, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.459644322845417, | |
| "grad_norm": 0.3108776807785034, | |
| "learning_rate": 4.004711591762133e-06, | |
| "loss": 1.6218, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 2.462380300957592, | |
| "grad_norm": 0.3062944710254669, | |
| "learning_rate": 3.965064878672983e-06, | |
| "loss": 1.4816, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.4651162790697674, | |
| "grad_norm": 0.3266476094722748, | |
| "learning_rate": 3.925598484450066e-06, | |
| "loss": 1.846, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 2.4678522571819426, | |
| "grad_norm": 0.344200998544693, | |
| "learning_rate": 3.886312747413204e-06, | |
| "loss": 1.3894, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.4705882352941178, | |
| "grad_norm": 0.315949410200119, | |
| "learning_rate": 3.847208004333561e-06, | |
| "loss": 1.4174, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 2.473324213406293, | |
| "grad_norm": 0.3364209234714508, | |
| "learning_rate": 3.8082845904307525e-06, | |
| "loss": 1.4093, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.4760601915184677, | |
| "grad_norm": 0.3172418475151062, | |
| "learning_rate": 3.7695428393699854e-06, | |
| "loss": 1.3446, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 2.478796169630643, | |
| "grad_norm": 0.3134661316871643, | |
| "learning_rate": 3.730983083259179e-06, | |
| "loss": 1.507, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.481532147742818, | |
| "grad_norm": 0.3210635185241699, | |
| "learning_rate": 3.6926056526461334e-06, | |
| "loss": 1.3344, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 2.4842681258549932, | |
| "grad_norm": 0.32122674584388733, | |
| "learning_rate": 3.6544108765156933e-06, | |
| "loss": 1.5514, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.4870041039671684, | |
| "grad_norm": 0.36177971959114075, | |
| "learning_rate": 3.6163990822869088e-06, | |
| "loss": 1.5981, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 2.4897400820793436, | |
| "grad_norm": 0.3207845389842987, | |
| "learning_rate": 3.578570595810274e-06, | |
| "loss": 1.5989, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.4924760601915183, | |
| "grad_norm": 0.32466524839401245, | |
| "learning_rate": 3.540925741364873e-06, | |
| "loss": 1.483, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 2.4952120383036935, | |
| "grad_norm": 0.31727364659309387, | |
| "learning_rate": 3.5034648416556486e-06, | |
| "loss": 1.5308, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.4979480164158687, | |
| "grad_norm": 0.3401485085487366, | |
| "learning_rate": 3.4661882178106176e-06, | |
| "loss": 1.6316, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 2.500683994528044, | |
| "grad_norm": 0.3355841338634491, | |
| "learning_rate": 3.429096189378114e-06, | |
| "loss": 1.5952, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.503419972640219, | |
| "grad_norm": 0.3085014522075653, | |
| "learning_rate": 3.392189074324073e-06, | |
| "loss": 1.6774, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 2.506155950752394, | |
| "grad_norm": 0.3420393466949463, | |
| "learning_rate": 3.355467189029257e-06, | |
| "loss": 1.4805, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.508891928864569, | |
| "grad_norm": 0.3142971098423004, | |
| "learning_rate": 3.3189308482865917e-06, | |
| "loss": 1.4958, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 2.511627906976744, | |
| "grad_norm": 0.37307271361351013, | |
| "learning_rate": 3.2825803652984516e-06, | |
| "loss": 1.4541, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.5143638850889194, | |
| "grad_norm": 0.33256828784942627, | |
| "learning_rate": 3.2464160516739755e-06, | |
| "loss": 1.4565, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 2.5170998632010946, | |
| "grad_norm": 0.29588747024536133, | |
| "learning_rate": 3.210438217426365e-06, | |
| "loss": 1.4852, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.5198358413132693, | |
| "grad_norm": 0.314485102891922, | |
| "learning_rate": 3.1746471709702964e-06, | |
| "loss": 1.3962, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 2.5225718194254445, | |
| "grad_norm": 0.30730655789375305, | |
| "learning_rate": 3.1390432191192115e-06, | |
| "loss": 1.5221, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.5253077975376197, | |
| "grad_norm": 0.34515947103500366, | |
| "learning_rate": 3.1036266670827014e-06, | |
| "loss": 1.4745, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.528043775649795, | |
| "grad_norm": 0.3266810476779938, | |
| "learning_rate": 3.068397818463936e-06, | |
| "loss": 1.4541, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.53077975376197, | |
| "grad_norm": 0.2986440062522888, | |
| "learning_rate": 3.033356975256979e-06, | |
| "loss": 1.5795, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.533515731874145, | |
| "grad_norm": 0.337341845035553, | |
| "learning_rate": 2.9985044378442933e-06, | |
| "loss": 1.5815, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.53625170998632, | |
| "grad_norm": 0.3632700741291046, | |
| "learning_rate": 2.963840504994075e-06, | |
| "loss": 1.54, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.538987688098495, | |
| "grad_norm": 0.3714698553085327, | |
| "learning_rate": 2.9293654738577647e-06, | |
| "loss": 1.5019, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.5417236662106704, | |
| "grad_norm": 0.3200991153717041, | |
| "learning_rate": 2.8950796399674573e-06, | |
| "loss": 1.4587, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.5444596443228455, | |
| "grad_norm": 0.3523624539375305, | |
| "learning_rate": 2.860983297233388e-06, | |
| "loss": 1.4111, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.5471956224350203, | |
| "grad_norm": 0.3558140993118286, | |
| "learning_rate": 2.8270767379414087e-06, | |
| "loss": 1.3241, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.5499316005471955, | |
| "grad_norm": 0.3104929029941559, | |
| "learning_rate": 2.7933602527504738e-06, | |
| "loss": 1.6253, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.5526675786593707, | |
| "grad_norm": 0.33675897121429443, | |
| "learning_rate": 2.7598341306901643e-06, | |
| "loss": 1.5365, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.555403556771546, | |
| "grad_norm": 0.39364495873451233, | |
| "learning_rate": 2.726498659158183e-06, | |
| "loss": 1.5522, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.558139534883721, | |
| "grad_norm": 0.31920477747917175, | |
| "learning_rate": 2.693354123917943e-06, | |
| "loss": 1.5633, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.5608755129958958, | |
| "grad_norm": 0.37531107664108276, | |
| "learning_rate": 2.660400809096045e-06, | |
| "loss": 1.3603, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.5636114911080714, | |
| "grad_norm": 0.31020471453666687, | |
| "learning_rate": 2.6276389971799153e-06, | |
| "loss": 1.3862, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.566347469220246, | |
| "grad_norm": 0.3524358570575714, | |
| "learning_rate": 2.595068969015327e-06, | |
| "loss": 1.5923, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.5690834473324213, | |
| "grad_norm": 0.3156653940677643, | |
| "learning_rate": 2.562691003804024e-06, | |
| "loss": 1.6537, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.5718194254445965, | |
| "grad_norm": 0.3592538833618164, | |
| "learning_rate": 2.5305053791013194e-06, | |
| "loss": 1.3821, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.5745554035567717, | |
| "grad_norm": 0.3512822091579437, | |
| "learning_rate": 2.498512370813716e-06, | |
| "loss": 1.5147, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.577291381668947, | |
| "grad_norm": 0.31111329793930054, | |
| "learning_rate": 2.466712253196535e-06, | |
| "loss": 1.443, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.5800273597811216, | |
| "grad_norm": 0.31163808703422546, | |
| "learning_rate": 2.4351052988515783e-06, | |
| "loss": 1.5938, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.582763337893297, | |
| "grad_norm": 0.34021854400634766, | |
| "learning_rate": 2.403691778724787e-06, | |
| "loss": 1.4879, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.585499316005472, | |
| "grad_norm": 0.30090874433517456, | |
| "learning_rate": 2.3724719621038923e-06, | |
| "loss": 1.5188, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.588235294117647, | |
| "grad_norm": 0.2972038984298706, | |
| "learning_rate": 2.341446116616172e-06, | |
| "loss": 1.4422, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.5909712722298224, | |
| "grad_norm": 0.360759973526001, | |
| "learning_rate": 2.310614508226078e-06, | |
| "loss": 1.4662, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.593707250341997, | |
| "grad_norm": 0.33597201108932495, | |
| "learning_rate": 2.2799774012330076e-06, | |
| "loss": 1.4008, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.5964432284541723, | |
| "grad_norm": 0.3385523855686188, | |
| "learning_rate": 2.2495350582690254e-06, | |
| "loss": 1.4652, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.5991792065663475, | |
| "grad_norm": 0.3732840418815613, | |
| "learning_rate": 2.219287740296605e-06, | |
| "loss": 1.3399, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.6019151846785227, | |
| "grad_norm": 0.37210366129875183, | |
| "learning_rate": 2.1892357066064128e-06, | |
| "loss": 1.3226, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.604651162790698, | |
| "grad_norm": 0.33239394426345825, | |
| "learning_rate": 2.1593792148150437e-06, | |
| "loss": 1.3563, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.6073871409028726, | |
| "grad_norm": 0.3127799332141876, | |
| "learning_rate": 2.1297185208628585e-06, | |
| "loss": 1.5005, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.610123119015048, | |
| "grad_norm": 0.3832361698150635, | |
| "learning_rate": 2.10025387901176e-06, | |
| "loss": 1.3859, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.612859097127223, | |
| "grad_norm": 0.3399605453014374, | |
| "learning_rate": 2.0709855418430317e-06, | |
| "loss": 1.559, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.615595075239398, | |
| "grad_norm": 0.36757224798202515, | |
| "learning_rate": 2.0419137602551516e-06, | |
| "loss": 1.475, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.6183310533515733, | |
| "grad_norm": 0.3125869929790497, | |
| "learning_rate": 2.013038783461674e-06, | |
| "loss": 1.4586, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.621067031463748, | |
| "grad_norm": 0.31474483013153076, | |
| "learning_rate": 1.9843608589890513e-06, | |
| "loss": 1.4757, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.6238030095759233, | |
| "grad_norm": 0.3160763382911682, | |
| "learning_rate": 1.9558802326745424e-06, | |
| "loss": 1.5655, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.6265389876880985, | |
| "grad_norm": 0.41808021068573, | |
| "learning_rate": 1.927597148664112e-06, | |
| "loss": 1.4522, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.6292749658002736, | |
| "grad_norm": 0.38478416204452515, | |
| "learning_rate": 1.8995118494102903e-06, | |
| "loss": 1.5056, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.632010943912449, | |
| "grad_norm": 0.33798423409461975, | |
| "learning_rate": 1.8716245756701694e-06, | |
| "loss": 1.5478, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.6347469220246236, | |
| "grad_norm": 0.34396031498908997, | |
| "learning_rate": 1.843935566503252e-06, | |
| "loss": 1.438, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.6374829001367988, | |
| "grad_norm": 0.317111998796463, | |
| "learning_rate": 1.816445059269481e-06, | |
| "loss": 1.4886, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.640218878248974, | |
| "grad_norm": 0.314860075712204, | |
| "learning_rate": 1.7891532896271547e-06, | |
| "loss": 1.5923, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.642954856361149, | |
| "grad_norm": 0.29904037714004517, | |
| "learning_rate": 1.7620604915309257e-06, | |
| "loss": 1.3853, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.6456908344733243, | |
| "grad_norm": 0.34991443157196045, | |
| "learning_rate": 1.7351668972297924e-06, | |
| "loss": 1.5588, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.648426812585499, | |
| "grad_norm": 0.3276471197605133, | |
| "learning_rate": 1.7084727372651155e-06, | |
| "loss": 1.4885, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.6511627906976747, | |
| "grad_norm": 0.3197418749332428, | |
| "learning_rate": 1.6819782404686263e-06, | |
| "loss": 1.5063, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.6538987688098494, | |
| "grad_norm": 0.3408433198928833, | |
| "learning_rate": 1.655683633960467e-06, | |
| "loss": 1.4564, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.6566347469220246, | |
| "grad_norm": 0.33302435278892517, | |
| "learning_rate": 1.62958914314727e-06, | |
| "loss": 1.3688, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.6593707250342, | |
| "grad_norm": 0.34161651134490967, | |
| "learning_rate": 1.6036949917201783e-06, | |
| "loss": 1.5104, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.662106703146375, | |
| "grad_norm": 0.33324992656707764, | |
| "learning_rate": 1.578001401652987e-06, | |
| "loss": 1.4552, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.66484268125855, | |
| "grad_norm": 0.30797359347343445, | |
| "learning_rate": 1.5525085932001832e-06, | |
| "loss": 1.5157, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.667578659370725, | |
| "grad_norm": 0.3368433117866516, | |
| "learning_rate": 1.5272167848950913e-06, | |
| "loss": 1.435, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.6703146374829, | |
| "grad_norm": 0.3299857974052429, | |
| "learning_rate": 1.5021261935479924e-06, | |
| "loss": 1.5031, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.6730506155950753, | |
| "grad_norm": 0.29794323444366455, | |
| "learning_rate": 1.4772370342442694e-06, | |
| "loss": 1.4679, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.6757865937072505, | |
| "grad_norm": 0.31903737783432007, | |
| "learning_rate": 1.4525495203425498e-06, | |
| "loss": 1.6308, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.6785225718194257, | |
| "grad_norm": 0.3376205861568451, | |
| "learning_rate": 1.428063863472895e-06, | |
| "loss": 1.5566, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.6812585499316004, | |
| "grad_norm": 0.3032618761062622, | |
| "learning_rate": 1.4037802735349743e-06, | |
| "loss": 1.4632, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.6839945280437756, | |
| "grad_norm": 0.33116862177848816, | |
| "learning_rate": 1.3796989586962544e-06, | |
| "loss": 1.5811, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.6867305061559508, | |
| "grad_norm": 0.34618982672691345, | |
| "learning_rate": 1.3558201253902531e-06, | |
| "loss": 1.3734, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.689466484268126, | |
| "grad_norm": 0.3325488567352295, | |
| "learning_rate": 1.3321439783147233e-06, | |
| "loss": 1.4946, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.692202462380301, | |
| "grad_norm": 0.32722416520118713, | |
| "learning_rate": 1.3086707204299414e-06, | |
| "loss": 1.5425, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.694938440492476, | |
| "grad_norm": 0.3376059830188751, | |
| "learning_rate": 1.2854005529569224e-06, | |
| "loss": 1.4031, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.697674418604651, | |
| "grad_norm": 0.3256185054779053, | |
| "learning_rate": 1.2623336753757348e-06, | |
| "loss": 1.493, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.7004103967168263, | |
| "grad_norm": 0.3338906168937683, | |
| "learning_rate": 1.2394702854237744e-06, | |
| "loss": 1.5289, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.7031463748290014, | |
| "grad_norm": 0.31372472643852234, | |
| "learning_rate": 1.2168105790940603e-06, | |
| "loss": 1.5584, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.7058823529411766, | |
| "grad_norm": 0.3929591774940491, | |
| "learning_rate": 1.1943547506335723e-06, | |
| "loss": 1.482, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.7086183310533514, | |
| "grad_norm": 0.3206530809402466, | |
| "learning_rate": 1.1721029925415738e-06, | |
| "loss": 1.5406, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.7113543091655266, | |
| "grad_norm": 0.32885223627090454, | |
| "learning_rate": 1.15005549556797e-06, | |
| "loss": 1.3643, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.7140902872777017, | |
| "grad_norm": 0.3618469536304474, | |
| "learning_rate": 1.1282124487116518e-06, | |
| "loss": 1.5104, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.716826265389877, | |
| "grad_norm": 0.31617462635040283, | |
| "learning_rate": 1.106574039218919e-06, | |
| "loss": 1.4661, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.719562243502052, | |
| "grad_norm": 0.31909239292144775, | |
| "learning_rate": 1.0851404525818204e-06, | |
| "loss": 1.5708, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.722298221614227, | |
| "grad_norm": 0.3482041358947754, | |
| "learning_rate": 1.0639118725366049e-06, | |
| "loss": 1.4782, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.725034199726402, | |
| "grad_norm": 0.3372204601764679, | |
| "learning_rate": 1.0428884810621403e-06, | |
| "loss": 1.4833, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.7277701778385772, | |
| "grad_norm": 0.32383978366851807, | |
| "learning_rate": 1.0220704583783242e-06, | |
| "loss": 1.5989, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.7305061559507524, | |
| "grad_norm": 0.3657885491847992, | |
| "learning_rate": 1.0014579829445842e-06, | |
| "loss": 1.6592, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.7332421340629276, | |
| "grad_norm": 0.3399549722671509, | |
| "learning_rate": 9.810512314583075e-07, | |
| "loss": 1.3902, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.7359781121751023, | |
| "grad_norm": 0.3272568881511688, | |
| "learning_rate": 9.60850378853348e-07, | |
| "loss": 1.3684, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.7359781121751023, | |
| "eval_loss": 1.5569868087768555, | |
| "eval_runtime": 13.8259, | |
| "eval_samples_per_second": 11.138, | |
| "eval_steps_per_second": 11.138, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.738714090287278, | |
| "grad_norm": 0.3006269037723541, | |
| "learning_rate": 9.408555982985262e-07, | |
| "loss": 1.4081, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.7414500683994527, | |
| "grad_norm": 0.32424142956733704, | |
| "learning_rate": 9.210670611961375e-07, | |
| "loss": 1.47, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.744186046511628, | |
| "grad_norm": 0.3788749873638153, | |
| "learning_rate": 9.014849371804829e-07, | |
| "loss": 1.4683, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.746922024623803, | |
| "grad_norm": 0.3551234006881714, | |
| "learning_rate": 8.821093941164233e-07, | |
| "loss": 1.6791, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.7496580027359783, | |
| "grad_norm": 0.38576632738113403, | |
| "learning_rate": 8.629405980979332e-07, | |
| "loss": 1.49, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.7523939808481535, | |
| "grad_norm": 0.3681055009365082, | |
| "learning_rate": 8.439787134466692e-07, | |
| "loss": 1.4691, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.755129958960328, | |
| "grad_norm": 0.3119199872016907, | |
| "learning_rate": 8.252239027105924e-07, | |
| "loss": 1.5351, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.7578659370725034, | |
| "grad_norm": 0.31998804211616516, | |
| "learning_rate": 8.066763266625282e-07, | |
| "loss": 1.468, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.7606019151846786, | |
| "grad_norm": 0.3693805932998657, | |
| "learning_rate": 7.883361442988374e-07, | |
| "loss": 1.5278, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.7633378932968538, | |
| "grad_norm": 0.3538669943809509, | |
| "learning_rate": 7.702035128380053e-07, | |
| "loss": 1.5411, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.766073871409029, | |
| "grad_norm": 0.3684946596622467, | |
| "learning_rate": 7.522785877193378e-07, | |
| "loss": 1.4696, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.7688098495212037, | |
| "grad_norm": 0.28718748688697815, | |
| "learning_rate": 7.345615226016011e-07, | |
| "loss": 1.5301, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.771545827633379, | |
| "grad_norm": 0.33110928535461426, | |
| "learning_rate": 7.170524693617064e-07, | |
| "loss": 1.6021, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.774281805745554, | |
| "grad_norm": 0.31040212512016296, | |
| "learning_rate": 6.997515780934244e-07, | |
| "loss": 1.4569, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.7770177838577292, | |
| "grad_norm": 0.3069436848163605, | |
| "learning_rate": 6.826589971060837e-07, | |
| "loss": 1.4963, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.7797537619699044, | |
| "grad_norm": 0.3085753321647644, | |
| "learning_rate": 6.657748729233026e-07, | |
| "loss": 1.6278, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.782489740082079, | |
| "grad_norm": 0.31628352403640747, | |
| "learning_rate": 6.490993502817289e-07, | |
| "loss": 1.4166, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.7852257181942544, | |
| "grad_norm": 0.32792040705680847, | |
| "learning_rate": 6.32632572129821e-07, | |
| "loss": 1.4757, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.7879616963064295, | |
| "grad_norm": 0.34112995862960815, | |
| "learning_rate": 6.163746796265885e-07, | |
| "loss": 1.4505, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.7906976744186047, | |
| "grad_norm": 0.3526972830295563, | |
| "learning_rate": 6.003258121404148e-07, | |
| "loss": 1.4417, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.79343365253078, | |
| "grad_norm": 0.3196633458137512, | |
| "learning_rate": 5.844861072478336e-07, | |
| "loss": 1.5024, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.7961696306429547, | |
| "grad_norm": 0.3368687033653259, | |
| "learning_rate": 5.688557007323708e-07, | |
| "loss": 1.6036, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.79890560875513, | |
| "grad_norm": 0.33089619874954224, | |
| "learning_rate": 5.534347265833711e-07, | |
| "loss": 1.5716, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.801641586867305, | |
| "grad_norm": 0.35482943058013916, | |
| "learning_rate": 5.382233169948541e-07, | |
| "loss": 1.5263, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.80437756497948, | |
| "grad_norm": 0.3278489112854004, | |
| "learning_rate": 5.232216023643711e-07, | |
| "loss": 1.4273, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.8071135430916554, | |
| "grad_norm": 0.3070161044597626, | |
| "learning_rate": 5.084297112919051e-07, | |
| "loss": 1.7237, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.80984952120383, | |
| "grad_norm": 0.3581262528896332, | |
| "learning_rate": 4.938477705787508e-07, | |
| "loss": 1.4228, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.8125854993160053, | |
| "grad_norm": 0.3500869572162628, | |
| "learning_rate": 4.794759052264259e-07, | |
| "loss": 1.6744, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.8153214774281805, | |
| "grad_norm": 0.3327549397945404, | |
| "learning_rate": 4.653142384356324e-07, | |
| "loss": 1.3299, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.8180574555403557, | |
| "grad_norm": 0.32055315375328064, | |
| "learning_rate": 4.513628916051532e-07, | |
| "loss": 1.5822, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.820793433652531, | |
| "grad_norm": 0.35483717918395996, | |
| "learning_rate": 4.376219843308438e-07, | |
| "loss": 1.4478, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.8235294117647056, | |
| "grad_norm": 0.3212404251098633, | |
| "learning_rate": 4.2409163440459697e-07, | |
| "loss": 1.6226, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.8262653898768813, | |
| "grad_norm": 0.3329022526741028, | |
| "learning_rate": 4.107719578133301e-07, | |
| "loss": 1.5192, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.829001367989056, | |
| "grad_norm": 0.3651573061943054, | |
| "learning_rate": 3.9766306873801064e-07, | |
| "loss": 1.5169, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.831737346101231, | |
| "grad_norm": 0.3816221356391907, | |
| "learning_rate": 3.8476507955263475e-07, | |
| "loss": 1.39, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.8344733242134064, | |
| "grad_norm": 0.35796451568603516, | |
| "learning_rate": 3.720781008233143e-07, | |
| "loss": 1.4573, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.8372093023255816, | |
| "grad_norm": 0.3537936210632324, | |
| "learning_rate": 3.5960224130728857e-07, | |
| "loss": 1.5344, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.8399452804377567, | |
| "grad_norm": 0.35659074783325195, | |
| "learning_rate": 3.473376079520224e-07, | |
| "loss": 1.3814, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.8426812585499315, | |
| "grad_norm": 0.31420376896858215, | |
| "learning_rate": 3.352843058942623e-07, | |
| "loss": 1.4956, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.8454172366621067, | |
| "grad_norm": 0.31859591603279114, | |
| "learning_rate": 3.2344243845915945e-07, | |
| "loss": 1.6171, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.848153214774282, | |
| "grad_norm": 0.30240893363952637, | |
| "learning_rate": 3.118121071593677e-07, | |
| "loss": 1.4719, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.850889192886457, | |
| "grad_norm": 0.3180094361305237, | |
| "learning_rate": 3.003934116941776e-07, | |
| "loss": 1.4596, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.853625170998632, | |
| "grad_norm": 0.3070809841156006, | |
| "learning_rate": 2.891864499486724e-07, | |
| "loss": 1.5858, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.856361149110807, | |
| "grad_norm": 0.3325824737548828, | |
| "learning_rate": 2.7819131799285967e-07, | |
| "loss": 1.5769, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.859097127222982, | |
| "grad_norm": 0.3298075199127197, | |
| "learning_rate": 2.6740811008089393e-07, | |
| "loss": 1.5655, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.8618331053351573, | |
| "grad_norm": 0.35839757323265076, | |
| "learning_rate": 2.568369186502162e-07, | |
| "loss": 1.3463, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.8645690834473325, | |
| "grad_norm": 0.3779064118862152, | |
| "learning_rate": 2.464778343208074e-07, | |
| "loss": 1.4612, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.8673050615595077, | |
| "grad_norm": 0.32933345437049866, | |
| "learning_rate": 2.36330945894378e-07, | |
| "loss": 1.4131, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.8700410396716824, | |
| "grad_norm": 0.3202325999736786, | |
| "learning_rate": 2.2639634035363234e-07, | |
| "loss": 1.5189, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.8727770177838576, | |
| "grad_norm": 0.3435043394565582, | |
| "learning_rate": 2.1667410286149992e-07, | |
| "loss": 1.4026, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.875512995896033, | |
| "grad_norm": 0.33565640449523926, | |
| "learning_rate": 2.071643167604248e-07, | |
| "loss": 1.4982, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.878248974008208, | |
| "grad_norm": 0.31919005513191223, | |
| "learning_rate": 1.9786706357163842e-07, | |
| "loss": 1.6241, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.880984952120383, | |
| "grad_norm": 0.33048737049102783, | |
| "learning_rate": 1.8878242299446848e-07, | |
| "loss": 1.586, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.883720930232558, | |
| "grad_norm": 0.31295284628868103, | |
| "learning_rate": 1.7991047290565056e-07, | |
| "loss": 1.5296, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.886456908344733, | |
| "grad_norm": 0.30597612261772156, | |
| "learning_rate": 1.712512893586593e-07, | |
| "loss": 1.4865, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.8891928864569083, | |
| "grad_norm": 0.3064374327659607, | |
| "learning_rate": 1.6280494658307e-07, | |
| "loss": 1.5211, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.8919288645690835, | |
| "grad_norm": 0.3949945867061615, | |
| "learning_rate": 1.5457151698390347e-07, | |
| "loss": 1.5316, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.8946648426812587, | |
| "grad_norm": 0.4148292541503906, | |
| "learning_rate": 1.4655107114101007e-07, | |
| "loss": 1.3986, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.8974008207934334, | |
| "grad_norm": 0.32118478417396545, | |
| "learning_rate": 1.387436778084783e-07, | |
| "loss": 1.7486, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.9001367989056086, | |
| "grad_norm": 0.34837549924850464, | |
| "learning_rate": 1.3114940391403263e-07, | |
| "loss": 1.3545, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.902872777017784, | |
| "grad_norm": 0.331442266702652, | |
| "learning_rate": 1.2376831455845893e-07, | |
| "loss": 1.5355, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.905608755129959, | |
| "grad_norm": 0.3617335855960846, | |
| "learning_rate": 1.1660047301505495e-07, | |
| "loss": 1.5907, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.908344733242134, | |
| "grad_norm": 0.32971206307411194, | |
| "learning_rate": 1.0964594072907786e-07, | |
| "loss": 1.6133, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.911080711354309, | |
| "grad_norm": 0.34654051065444946, | |
| "learning_rate": 1.0290477731722814e-07, | |
| "loss": 1.7007, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.9138166894664845, | |
| "grad_norm": 0.3612649738788605, | |
| "learning_rate": 9.637704056713049e-08, | |
| "loss": 1.2852, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.9165526675786593, | |
| "grad_norm": 0.2997490465641022, | |
| "learning_rate": 9.006278643683696e-08, | |
| "loss": 1.5689, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.9192886456908345, | |
| "grad_norm": 0.3305412828922272, | |
| "learning_rate": 8.396206905436355e-08, | |
| "loss": 1.5476, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.9220246238030096, | |
| "grad_norm": 0.36031877994537354, | |
| "learning_rate": 7.807494071720156e-08, | |
| "loss": 1.6777, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.924760601915185, | |
| "grad_norm": 0.3095710873603821, | |
| "learning_rate": 7.24014518918903e-08, | |
| "loss": 1.589, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.92749658002736, | |
| "grad_norm": 0.30806005001068115, | |
| "learning_rate": 6.694165121356732e-08, | |
| "loss": 1.4103, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.9302325581395348, | |
| "grad_norm": 0.36173051595687866, | |
| "learning_rate": 6.169558548556887e-08, | |
| "loss": 1.3676, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.93296853625171, | |
| "grad_norm": 0.2915642559528351, | |
| "learning_rate": 5.6663299679018955e-08, | |
| "loss": 1.4523, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.935704514363885, | |
| "grad_norm": 0.3511941730976105, | |
| "learning_rate": 5.1844836932438134e-08, | |
| "loss": 1.5194, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.9384404924760603, | |
| "grad_norm": 0.3125559389591217, | |
| "learning_rate": 4.7240238551385376e-08, | |
| "loss": 1.4604, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 0.3312918245792389, | |
| "learning_rate": 4.284954400810004e-08, | |
| "loss": 1.4807, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.9439124487004102, | |
| "grad_norm": 0.3593026101589203, | |
| "learning_rate": 3.86727909411605e-08, | |
| "loss": 1.7197, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.9466484268125854, | |
| "grad_norm": 0.3443048298358917, | |
| "learning_rate": 3.471001515516215e-08, | |
| "loss": 1.6237, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.9493844049247606, | |
| "grad_norm": 0.32331955432891846, | |
| "learning_rate": 3.096125062041488e-08, | |
| "loss": 1.5434, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.952120383036936, | |
| "grad_norm": 0.3328414261341095, | |
| "learning_rate": 2.7426529472648898e-08, | |
| "loss": 1.6007, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.954856361149111, | |
| "grad_norm": 0.31277886033058167, | |
| "learning_rate": 2.4105882012734336e-08, | |
| "loss": 1.6117, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.9575923392612857, | |
| "grad_norm": 0.32368844747543335, | |
| "learning_rate": 2.0999336706434282e-08, | |
| "loss": 1.4795, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.960328317373461, | |
| "grad_norm": 0.3160129487514496, | |
| "learning_rate": 1.810692018414384e-08, | |
| "loss": 1.5325, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.963064295485636, | |
| "grad_norm": 0.3517950773239136, | |
| "learning_rate": 1.5428657240676437e-08, | |
| "loss": 1.4103, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.9658002735978113, | |
| "grad_norm": 0.36211562156677246, | |
| "learning_rate": 1.2964570835047318e-08, | |
| "loss": 1.6729, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.9685362517099865, | |
| "grad_norm": 0.3121832609176636, | |
| "learning_rate": 1.0714682090270933e-08, | |
| "loss": 1.3272, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.971272229822161, | |
| "grad_norm": 0.33123496174812317, | |
| "learning_rate": 8.679010293183298e-09, | |
| "loss": 1.506, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.9740082079343364, | |
| "grad_norm": 0.32456517219543457, | |
| "learning_rate": 6.857572894281017e-09, | |
| "loss": 1.5943, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.9767441860465116, | |
| "grad_norm": 0.3651670813560486, | |
| "learning_rate": 5.250385507568623e-09, | |
| "loss": 1.4488, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.9794801641586868, | |
| "grad_norm": 0.3096288740634918, | |
| "learning_rate": 3.8574619104170265e-09, | |
| "loss": 1.4775, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.982216142270862, | |
| "grad_norm": 0.33430325984954834, | |
| "learning_rate": 2.67881404346082e-09, | |
| "loss": 1.5276, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.9849521203830367, | |
| "grad_norm": 0.2988739609718323, | |
| "learning_rate": 1.7144520104844798e-09, | |
| "loss": 1.4893, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.987688098495212, | |
| "grad_norm": 0.35142961144447327, | |
| "learning_rate": 9.643840783363224e-10, | |
| "loss": 1.4864, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.990424076607387, | |
| "grad_norm": 0.2971309721469879, | |
| "learning_rate": 4.2861667686744377e-10, | |
| "loss": 1.6171, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.9931600547195623, | |
| "grad_norm": 0.36940792202949524, | |
| "learning_rate": 1.0715439885677913e-10, | |
| "loss": 1.5264, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.9958960328317374, | |
| "grad_norm": 0.31959155201911926, | |
| "learning_rate": 0.0, | |
| "loss": 1.585, | |
| "step": 1095 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1095, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 250, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0313245522312704e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |