| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998983843105376, | |
| "eval_steps": 500, | |
| "global_step": 1230, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.000812925515699624, | |
| "grad_norm": 34.804351806640625, | |
| "learning_rate": 5.405405405405406e-07, | |
| "loss": 2.022, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.001625851031399248, | |
| "grad_norm": 35.288848876953125, | |
| "learning_rate": 1.0810810810810812e-06, | |
| "loss": 2.1055, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.002438776547098872, | |
| "grad_norm": 37.58893585205078, | |
| "learning_rate": 1.6216216216216219e-06, | |
| "loss": 2.0685, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.003251702062798496, | |
| "grad_norm": 28.51118278503418, | |
| "learning_rate": 2.1621621621621623e-06, | |
| "loss": 2.0364, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00406462757849812, | |
| "grad_norm": 24.874475479125977, | |
| "learning_rate": 2.702702702702703e-06, | |
| "loss": 1.9688, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.004877553094197744, | |
| "grad_norm": 12.156012535095215, | |
| "learning_rate": 3.2432432432432437e-06, | |
| "loss": 1.8677, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.005690478609897368, | |
| "grad_norm": 7.017012119293213, | |
| "learning_rate": 3.7837837837837844e-06, | |
| "loss": 1.7774, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.006503404125596992, | |
| "grad_norm": 11.024828910827637, | |
| "learning_rate": 4.324324324324325e-06, | |
| "loss": 1.8042, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.007316329641296616, | |
| "grad_norm": 6.988280296325684, | |
| "learning_rate": 4.864864864864866e-06, | |
| "loss": 1.7973, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.00812925515699624, | |
| "grad_norm": 8.541196823120117, | |
| "learning_rate": 5.405405405405406e-06, | |
| "loss": 1.7946, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008942180672695864, | |
| "grad_norm": 7.084593772888184, | |
| "learning_rate": 5.945945945945947e-06, | |
| "loss": 1.8178, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.009755106188395488, | |
| "grad_norm": 5.755589962005615, | |
| "learning_rate": 6.486486486486487e-06, | |
| "loss": 1.7748, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.010568031704095112, | |
| "grad_norm": 4.855886459350586, | |
| "learning_rate": 7.027027027027028e-06, | |
| "loss": 1.6665, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.011380957219794737, | |
| "grad_norm": 5.280701160430908, | |
| "learning_rate": 7.567567567567569e-06, | |
| "loss": 1.7226, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.01219388273549436, | |
| "grad_norm": 4.513389587402344, | |
| "learning_rate": 8.108108108108109e-06, | |
| "loss": 1.7219, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.013006808251193984, | |
| "grad_norm": 4.92287015914917, | |
| "learning_rate": 8.64864864864865e-06, | |
| "loss": 1.697, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.013819733766893608, | |
| "grad_norm": 4.488801002502441, | |
| "learning_rate": 9.189189189189191e-06, | |
| "loss": 1.6584, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.014632659282593233, | |
| "grad_norm": 3.736851930618286, | |
| "learning_rate": 9.729729729729732e-06, | |
| "loss": 1.6752, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.015445584798292857, | |
| "grad_norm": 3.7089431285858154, | |
| "learning_rate": 1.027027027027027e-05, | |
| "loss": 1.602, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.01625851031399248, | |
| "grad_norm": 3.9821619987487793, | |
| "learning_rate": 1.0810810810810812e-05, | |
| "loss": 1.6492, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.017071435829692106, | |
| "grad_norm": 3.72698974609375, | |
| "learning_rate": 1.1351351351351352e-05, | |
| "loss": 1.6893, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.017884361345391727, | |
| "grad_norm": 3.0124993324279785, | |
| "learning_rate": 1.1891891891891894e-05, | |
| "loss": 1.5879, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.01869728686109135, | |
| "grad_norm": 9.361907005310059, | |
| "learning_rate": 1.2432432432432433e-05, | |
| "loss": 1.6172, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.019510212376790976, | |
| "grad_norm": 3.431147813796997, | |
| "learning_rate": 1.2972972972972975e-05, | |
| "loss": 1.6354, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0203231378924906, | |
| "grad_norm": 3.3041067123413086, | |
| "learning_rate": 1.3513513513513515e-05, | |
| "loss": 1.5998, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.021136063408190225, | |
| "grad_norm": 3.4122121334075928, | |
| "learning_rate": 1.4054054054054055e-05, | |
| "loss": 1.5737, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.02194898892388985, | |
| "grad_norm": 3.538844585418701, | |
| "learning_rate": 1.4594594594594596e-05, | |
| "loss": 1.5737, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.022761914439589474, | |
| "grad_norm": 3.245887041091919, | |
| "learning_rate": 1.5135135135135138e-05, | |
| "loss": 1.5893, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.023574839955289098, | |
| "grad_norm": 3.7787671089172363, | |
| "learning_rate": 1.5675675675675676e-05, | |
| "loss": 1.5923, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.02438776547098872, | |
| "grad_norm": 3.557563066482544, | |
| "learning_rate": 1.6216216216216218e-05, | |
| "loss": 1.5906, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.025200690986688343, | |
| "grad_norm": 3.1536169052124023, | |
| "learning_rate": 1.6756756756756757e-05, | |
| "loss": 1.5976, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.026013616502387968, | |
| "grad_norm": 3.060678005218506, | |
| "learning_rate": 1.72972972972973e-05, | |
| "loss": 1.5239, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.026826542018087592, | |
| "grad_norm": 3.0163331031799316, | |
| "learning_rate": 1.783783783783784e-05, | |
| "loss": 1.5703, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.027639467533787217, | |
| "grad_norm": 3.0648066997528076, | |
| "learning_rate": 1.8378378378378383e-05, | |
| "loss": 1.5421, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.02845239304948684, | |
| "grad_norm": 2.8359413146972656, | |
| "learning_rate": 1.891891891891892e-05, | |
| "loss": 1.5698, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.029265318565186466, | |
| "grad_norm": 3.0256259441375732, | |
| "learning_rate": 1.9459459459459463e-05, | |
| "loss": 1.5258, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.03007824408088609, | |
| "grad_norm": 3.305952548980713, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5979, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.030891169596585714, | |
| "grad_norm": 2.7958834171295166, | |
| "learning_rate": 1.99999653272242e-05, | |
| "loss": 1.5065, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.031704095112285335, | |
| "grad_norm": 3.515479564666748, | |
| "learning_rate": 1.9999861309137232e-05, | |
| "loss": 1.4837, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.03251702062798496, | |
| "grad_norm": 2.7845990657806396, | |
| "learning_rate": 1.999968794646042e-05, | |
| "loss": 1.5634, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.033329946143684584, | |
| "grad_norm": 3.0540645122528076, | |
| "learning_rate": 1.9999445240395953e-05, | |
| "loss": 1.5001, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.03414287165938421, | |
| "grad_norm": 3.059220790863037, | |
| "learning_rate": 1.9999133192626893e-05, | |
| "loss": 1.502, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.03495579717508383, | |
| "grad_norm": 2.594452142715454, | |
| "learning_rate": 1.9998751805317152e-05, | |
| "loss": 1.5245, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.035768722690783454, | |
| "grad_norm": 3.0076844692230225, | |
| "learning_rate": 1.999830108111148e-05, | |
| "loss": 1.5032, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.03658164820648308, | |
| "grad_norm": 2.9521396160125732, | |
| "learning_rate": 1.999778102313545e-05, | |
| "loss": 1.5381, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.0373945737221827, | |
| "grad_norm": 3.280303478240967, | |
| "learning_rate": 1.999719163499543e-05, | |
| "loss": 1.5478, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.03820749923788233, | |
| "grad_norm": 2.9089877605438232, | |
| "learning_rate": 1.999653292077857e-05, | |
| "loss": 1.4783, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.03902042475358195, | |
| "grad_norm": 5.1869635581970215, | |
| "learning_rate": 1.999580488505276e-05, | |
| "loss": 1.5067, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.03983335026928158, | |
| "grad_norm": 3.053921699523926, | |
| "learning_rate": 1.9995007532866594e-05, | |
| "loss": 1.503, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.0406462757849812, | |
| "grad_norm": 2.952059507369995, | |
| "learning_rate": 1.9994140869749366e-05, | |
| "loss": 1.4579, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04145920130068083, | |
| "grad_norm": 2.609379291534424, | |
| "learning_rate": 1.9993204901710995e-05, | |
| "loss": 1.4679, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.04227212681638045, | |
| "grad_norm": 3.41717267036438, | |
| "learning_rate": 1.9992199635241997e-05, | |
| "loss": 1.5197, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.04308505233208007, | |
| "grad_norm": 2.8707101345062256, | |
| "learning_rate": 1.999112507731346e-05, | |
| "loss": 1.5074, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.0438979778477797, | |
| "grad_norm": 3.325697660446167, | |
| "learning_rate": 1.9989981235376956e-05, | |
| "loss": 1.427, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.04471090336347932, | |
| "grad_norm": 2.7196686267852783, | |
| "learning_rate": 1.9988768117364526e-05, | |
| "loss": 1.4868, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.04552382887917895, | |
| "grad_norm": 2.9488351345062256, | |
| "learning_rate": 1.9987485731688595e-05, | |
| "loss": 1.5011, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.04633675439487857, | |
| "grad_norm": 2.7776849269866943, | |
| "learning_rate": 1.998613408724195e-05, | |
| "loss": 1.4664, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.047149679910578196, | |
| "grad_norm": 2.719594717025757, | |
| "learning_rate": 1.998471319339763e-05, | |
| "loss": 1.4905, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.04796260542627782, | |
| "grad_norm": 2.8028323650360107, | |
| "learning_rate": 1.9983223060008908e-05, | |
| "loss": 1.4754, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.04877553094197744, | |
| "grad_norm": 3.1789817810058594, | |
| "learning_rate": 1.9981663697409203e-05, | |
| "loss": 1.4618, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.049588456457677066, | |
| "grad_norm": 3.077449321746826, | |
| "learning_rate": 1.998003511641199e-05, | |
| "loss": 1.453, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.05040138197337669, | |
| "grad_norm": 2.960418939590454, | |
| "learning_rate": 1.997833732831076e-05, | |
| "loss": 1.4564, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.051214307489076315, | |
| "grad_norm": 5.316094875335693, | |
| "learning_rate": 1.9976570344878916e-05, | |
| "loss": 1.4711, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.052027233004775936, | |
| "grad_norm": 3.257415771484375, | |
| "learning_rate": 1.9974734178369702e-05, | |
| "loss": 1.4606, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.052840158520475564, | |
| "grad_norm": 3.0437912940979004, | |
| "learning_rate": 1.997282884151612e-05, | |
| "loss": 1.5075, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.053653084036175185, | |
| "grad_norm": 3.3059332370758057, | |
| "learning_rate": 1.9970854347530828e-05, | |
| "loss": 1.484, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.05446600955187481, | |
| "grad_norm": 4.510897636413574, | |
| "learning_rate": 1.9968810710106065e-05, | |
| "loss": 1.5091, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.05527893506757443, | |
| "grad_norm": 3.3621528148651123, | |
| "learning_rate": 1.9966697943413548e-05, | |
| "loss": 1.4603, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.056091860583274054, | |
| "grad_norm": 2.878563642501831, | |
| "learning_rate": 1.9964516062104377e-05, | |
| "loss": 1.4438, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.05690478609897368, | |
| "grad_norm": 2.8587141036987305, | |
| "learning_rate": 1.996226508130892e-05, | |
| "loss": 1.441, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0577177116146733, | |
| "grad_norm": 3.2675728797912598, | |
| "learning_rate": 1.995994501663674e-05, | |
| "loss": 1.4515, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.05853063713037293, | |
| "grad_norm": 3.018068790435791, | |
| "learning_rate": 1.995755588417644e-05, | |
| "loss": 1.4499, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.05934356264607255, | |
| "grad_norm": 3.715628147125244, | |
| "learning_rate": 1.99550977004956e-05, | |
| "loss": 1.4624, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.06015648816177218, | |
| "grad_norm": 2.7632699012756348, | |
| "learning_rate": 1.9952570482640628e-05, | |
| "loss": 1.4437, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0609694136774718, | |
| "grad_norm": 3.3581650257110596, | |
| "learning_rate": 1.9949974248136655e-05, | |
| "loss": 1.4865, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.06178233919317143, | |
| "grad_norm": 3.090432643890381, | |
| "learning_rate": 1.9947309014987414e-05, | |
| "loss": 1.4416, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.06259526470887104, | |
| "grad_norm": 3.3709418773651123, | |
| "learning_rate": 1.9944574801675106e-05, | |
| "loss": 1.4184, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.06340819022457067, | |
| "grad_norm": 3.6959853172302246, | |
| "learning_rate": 1.9941771627160287e-05, | |
| "loss": 1.4694, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.0642211157402703, | |
| "grad_norm": 3.2907724380493164, | |
| "learning_rate": 1.9938899510881732e-05, | |
| "loss": 1.4121, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.06503404125596993, | |
| "grad_norm": 2.7885124683380127, | |
| "learning_rate": 1.9935958472756283e-05, | |
| "loss": 1.4033, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06584696677166954, | |
| "grad_norm": 2.8771262168884277, | |
| "learning_rate": 1.993294853317873e-05, | |
| "loss": 1.4466, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.06665989228736917, | |
| "grad_norm": 3.572303056716919, | |
| "learning_rate": 1.9929869713021668e-05, | |
| "loss": 1.3854, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0674728178030688, | |
| "grad_norm": 2.636934757232666, | |
| "learning_rate": 1.9926722033635343e-05, | |
| "loss": 1.4186, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.06828574331876842, | |
| "grad_norm": 3.1140427589416504, | |
| "learning_rate": 1.9923505516847514e-05, | |
| "loss": 1.424, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.06909866883446804, | |
| "grad_norm": 2.808480739593506, | |
| "learning_rate": 1.9920220184963296e-05, | |
| "loss": 1.4744, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.06991159435016767, | |
| "grad_norm": 3.118234872817993, | |
| "learning_rate": 1.9916866060764994e-05, | |
| "loss": 1.4277, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.0707245198658673, | |
| "grad_norm": 4.0702033042907715, | |
| "learning_rate": 1.991344316751198e-05, | |
| "loss": 1.4236, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.07153744538156691, | |
| "grad_norm": 2.938345193862915, | |
| "learning_rate": 1.9909951528940485e-05, | |
| "loss": 1.4119, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.07235037089726654, | |
| "grad_norm": 2.960853338241577, | |
| "learning_rate": 1.990639116926348e-05, | |
| "loss": 1.471, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.07316329641296616, | |
| "grad_norm": 3.146742343902588, | |
| "learning_rate": 1.9902762113170467e-05, | |
| "loss": 1.4751, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07397622192866579, | |
| "grad_norm": 3.3954169750213623, | |
| "learning_rate": 1.989906438582734e-05, | |
| "loss": 1.467, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.0747891474443654, | |
| "grad_norm": 2.9790520668029785, | |
| "learning_rate": 1.9895298012876192e-05, | |
| "loss": 1.507, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.07560207296006503, | |
| "grad_norm": 2.577925682067871, | |
| "learning_rate": 1.9891463020435144e-05, | |
| "loss": 1.4728, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.07641499847576466, | |
| "grad_norm": 3.437133550643921, | |
| "learning_rate": 1.9887559435098162e-05, | |
| "loss": 1.4472, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.07722792399146428, | |
| "grad_norm": 2.806886911392212, | |
| "learning_rate": 1.9883587283934875e-05, | |
| "loss": 1.4497, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.0780408495071639, | |
| "grad_norm": 2.703793525695801, | |
| "learning_rate": 1.9879546594490383e-05, | |
| "loss": 1.4643, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.07885377502286353, | |
| "grad_norm": 3.2830615043640137, | |
| "learning_rate": 1.987543739478507e-05, | |
| "loss": 1.4162, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.07966670053856316, | |
| "grad_norm": 2.5376830101013184, | |
| "learning_rate": 1.987125971331441e-05, | |
| "loss": 1.494, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.08047962605426277, | |
| "grad_norm": 2.532893180847168, | |
| "learning_rate": 1.9867013579048765e-05, | |
| "loss": 1.4575, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.0812925515699624, | |
| "grad_norm": 2.838155508041382, | |
| "learning_rate": 1.9862699021433186e-05, | |
| "loss": 1.4007, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08210547708566203, | |
| "grad_norm": 2.5777368545532227, | |
| "learning_rate": 1.9858316070387208e-05, | |
| "loss": 1.4213, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.08291840260136166, | |
| "grad_norm": 2.634209394454956, | |
| "learning_rate": 1.9853864756304654e-05, | |
| "loss": 1.4544, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.08373132811706127, | |
| "grad_norm": 2.9893202781677246, | |
| "learning_rate": 1.9849345110053405e-05, | |
| "loss": 1.4361, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.0845442536327609, | |
| "grad_norm": 2.668808698654175, | |
| "learning_rate": 1.984475716297519e-05, | |
| "loss": 1.4267, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.08535717914846053, | |
| "grad_norm": 3.2199463844299316, | |
| "learning_rate": 1.984010094688539e-05, | |
| "loss": 1.4731, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.08617010466416014, | |
| "grad_norm": 2.746006965637207, | |
| "learning_rate": 1.9835376494072788e-05, | |
| "loss": 1.385, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.08698303017985977, | |
| "grad_norm": 2.955232620239258, | |
| "learning_rate": 1.9830583837299363e-05, | |
| "loss": 1.3984, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.0877959556955594, | |
| "grad_norm": 5.357511520385742, | |
| "learning_rate": 1.9825723009800058e-05, | |
| "loss": 1.4562, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.08860888121125902, | |
| "grad_norm": 2.5583655834198, | |
| "learning_rate": 1.9820794045282553e-05, | |
| "loss": 1.4222, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.08942180672695864, | |
| "grad_norm": 2.6951992511749268, | |
| "learning_rate": 1.9815796977927015e-05, | |
| "loss": 1.4697, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.09023473224265827, | |
| "grad_norm": 2.714019775390625, | |
| "learning_rate": 1.9810731842385892e-05, | |
| "loss": 1.4696, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.0910476577583579, | |
| "grad_norm": 3.2998311519622803, | |
| "learning_rate": 1.9805598673783644e-05, | |
| "loss": 1.4034, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.09186058327405751, | |
| "grad_norm": 7.7324652671813965, | |
| "learning_rate": 1.980039750771651e-05, | |
| "loss": 1.4697, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.09267350878975714, | |
| "grad_norm": 2.6200242042541504, | |
| "learning_rate": 1.9795128380252263e-05, | |
| "loss": 1.451, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.09348643430545676, | |
| "grad_norm": 2.937061071395874, | |
| "learning_rate": 1.978979132792996e-05, | |
| "loss": 1.4348, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.09429935982115639, | |
| "grad_norm": 3.087085247039795, | |
| "learning_rate": 1.9784386387759684e-05, | |
| "loss": 1.4271, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.095112285336856, | |
| "grad_norm": 2.6796271800994873, | |
| "learning_rate": 1.977891359722229e-05, | |
| "loss": 1.4933, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.09592521085255563, | |
| "grad_norm": 3.4506633281707764, | |
| "learning_rate": 1.9773372994269147e-05, | |
| "loss": 1.427, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.09673813636825526, | |
| "grad_norm": 2.532562732696533, | |
| "learning_rate": 1.976776461732187e-05, | |
| "loss": 1.436, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.09755106188395488, | |
| "grad_norm": 3.619605541229248, | |
| "learning_rate": 1.976208850527206e-05, | |
| "loss": 1.4384, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0983639873996545, | |
| "grad_norm": 3.5245602130889893, | |
| "learning_rate": 1.9756344697481027e-05, | |
| "loss": 1.4303, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.09917691291535413, | |
| "grad_norm": 2.649686336517334, | |
| "learning_rate": 1.975053323377952e-05, | |
| "loss": 1.4692, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.09998983843105376, | |
| "grad_norm": 2.516016721725464, | |
| "learning_rate": 1.9744654154467468e-05, | |
| "loss": 1.4154, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.10080276394675337, | |
| "grad_norm": 2.591168165206909, | |
| "learning_rate": 1.9738707500313655e-05, | |
| "loss": 1.403, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.101615689462453, | |
| "grad_norm": 2.522486686706543, | |
| "learning_rate": 1.9732693312555492e-05, | |
| "loss": 1.4575, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.10242861497815263, | |
| "grad_norm": 2.8282413482666016, | |
| "learning_rate": 1.9726611632898693e-05, | |
| "loss": 1.377, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.10324154049385226, | |
| "grad_norm": 2.598076820373535, | |
| "learning_rate": 1.9720462503517e-05, | |
| "loss": 1.4382, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.10405446600955187, | |
| "grad_norm": 2.688178777694702, | |
| "learning_rate": 1.971424596705189e-05, | |
| "loss": 1.4132, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.1048673915252515, | |
| "grad_norm": 2.7512471675872803, | |
| "learning_rate": 1.9707962066612278e-05, | |
| "loss": 1.4193, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.10568031704095113, | |
| "grad_norm": 2.8025805950164795, | |
| "learning_rate": 1.970161084577422e-05, | |
| "loss": 1.3829, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10649324255665074, | |
| "grad_norm": 2.6514623165130615, | |
| "learning_rate": 1.9695192348580606e-05, | |
| "loss": 1.4362, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.10730616807235037, | |
| "grad_norm": 2.4559547901153564, | |
| "learning_rate": 1.9688706619540863e-05, | |
| "loss": 1.4357, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.10811909358805, | |
| "grad_norm": 2.8258724212646484, | |
| "learning_rate": 1.968215370363063e-05, | |
| "loss": 1.4501, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.10893201910374962, | |
| "grad_norm": 2.8553593158721924, | |
| "learning_rate": 1.9675533646291463e-05, | |
| "loss": 1.4841, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.10974494461944924, | |
| "grad_norm": 3.625079870223999, | |
| "learning_rate": 1.9668846493430522e-05, | |
| "loss": 1.47, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.11055787013514887, | |
| "grad_norm": 2.9793193340301514, | |
| "learning_rate": 1.9662092291420233e-05, | |
| "loss": 1.3969, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1113707956508485, | |
| "grad_norm": 2.5699939727783203, | |
| "learning_rate": 1.965527108709798e-05, | |
| "loss": 1.4258, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.11218372116654811, | |
| "grad_norm": 2.7961106300354004, | |
| "learning_rate": 1.964838292776579e-05, | |
| "loss": 1.4637, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.11299664668224774, | |
| "grad_norm": 3.3331451416015625, | |
| "learning_rate": 1.9641427861189973e-05, | |
| "loss": 1.3976, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.11380957219794736, | |
| "grad_norm": 2.5645205974578857, | |
| "learning_rate": 1.963440593560083e-05, | |
| "loss": 1.409, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11462249771364699, | |
| "grad_norm": 2.5996487140655518, | |
| "learning_rate": 1.9627317199692287e-05, | |
| "loss": 1.4834, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.1154354232293466, | |
| "grad_norm": 2.9811034202575684, | |
| "learning_rate": 1.962016170262157e-05, | |
| "loss": 1.4508, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.11624834874504623, | |
| "grad_norm": 2.4133377075195312, | |
| "learning_rate": 1.961293949400888e-05, | |
| "loss": 1.4077, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.11706127426074586, | |
| "grad_norm": 2.622091770172119, | |
| "learning_rate": 1.960565062393701e-05, | |
| "loss": 1.4046, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.11787419977644548, | |
| "grad_norm": 2.857346534729004, | |
| "learning_rate": 1.9598295142951035e-05, | |
| "loss": 1.4217, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.1186871252921451, | |
| "grad_norm": 2.600935220718384, | |
| "learning_rate": 1.9590873102057948e-05, | |
| "loss": 1.403, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.11950005080784473, | |
| "grad_norm": 2.820359945297241, | |
| "learning_rate": 1.9583384552726294e-05, | |
| "loss": 1.4358, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.12031297632354436, | |
| "grad_norm": 2.6272051334381104, | |
| "learning_rate": 1.957582954688584e-05, | |
| "loss": 1.4505, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.12112590183924397, | |
| "grad_norm": 2.8003182411193848, | |
| "learning_rate": 1.9568208136927177e-05, | |
| "loss": 1.3977, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.1219388273549436, | |
| "grad_norm": 3.560518264770508, | |
| "learning_rate": 1.9560520375701408e-05, | |
| "loss": 1.3992, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12275175287064323, | |
| "grad_norm": 2.6377906799316406, | |
| "learning_rate": 1.9552766316519726e-05, | |
| "loss": 1.4022, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.12356467838634286, | |
| "grad_norm": 2.7982730865478516, | |
| "learning_rate": 1.9544946013153093e-05, | |
| "loss": 1.409, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.12437760390204247, | |
| "grad_norm": 2.7179160118103027, | |
| "learning_rate": 1.9537059519831822e-05, | |
| "loss": 1.415, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.12519052941774209, | |
| "grad_norm": 2.959397554397583, | |
| "learning_rate": 1.9529106891245244e-05, | |
| "loss": 1.4296, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.12600345493344173, | |
| "grad_norm": 3.3228979110717773, | |
| "learning_rate": 1.9521088182541298e-05, | |
| "loss": 1.4282, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.12681638044914134, | |
| "grad_norm": 2.763151168823242, | |
| "learning_rate": 1.951300344932616e-05, | |
| "loss": 1.3686, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.12762930596484098, | |
| "grad_norm": 2.863190174102783, | |
| "learning_rate": 1.9504852747663862e-05, | |
| "loss": 1.4227, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1284422314805406, | |
| "grad_norm": 2.889604330062866, | |
| "learning_rate": 1.9496636134075894e-05, | |
| "loss": 1.4658, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.1292551569962402, | |
| "grad_norm": 3.024122476577759, | |
| "learning_rate": 1.9488353665540813e-05, | |
| "loss": 1.4081, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.13006808251193985, | |
| "grad_norm": 2.4810218811035156, | |
| "learning_rate": 1.9480005399493857e-05, | |
| "loss": 1.4296, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.13088100802763947, | |
| "grad_norm": 2.643673896789551, | |
| "learning_rate": 1.9471591393826536e-05, | |
| "loss": 1.3652, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.13169393354333908, | |
| "grad_norm": 2.888829231262207, | |
| "learning_rate": 1.9463111706886234e-05, | |
| "loss": 1.4003, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.13250685905903872, | |
| "grad_norm": 2.7480149269104004, | |
| "learning_rate": 1.9454566397475813e-05, | |
| "loss": 1.4195, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.13331978457473834, | |
| "grad_norm": 2.68972110748291, | |
| "learning_rate": 1.944595552485319e-05, | |
| "loss": 1.3848, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.13413271009043795, | |
| "grad_norm": 2.8888440132141113, | |
| "learning_rate": 1.943727914873094e-05, | |
| "loss": 1.481, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1349456356061376, | |
| "grad_norm": 2.8409390449523926, | |
| "learning_rate": 1.9428537329275862e-05, | |
| "loss": 1.4176, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.1357585611218372, | |
| "grad_norm": 2.4992098808288574, | |
| "learning_rate": 1.941973012710859e-05, | |
| "loss": 1.395, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.13657148663753685, | |
| "grad_norm": 4.587447166442871, | |
| "learning_rate": 1.941085760330316e-05, | |
| "loss": 1.3905, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.13738441215323646, | |
| "grad_norm": 2.4778833389282227, | |
| "learning_rate": 1.940191981938657e-05, | |
| "loss": 1.3707, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.13819733766893608, | |
| "grad_norm": 2.7843387126922607, | |
| "learning_rate": 1.9392916837338376e-05, | |
| "loss": 1.3698, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.13901026318463572, | |
| "grad_norm": 2.731437921524048, | |
| "learning_rate": 1.9383848719590257e-05, | |
| "loss": 1.4358, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.13982318870033533, | |
| "grad_norm": 3.079371213912964, | |
| "learning_rate": 1.9374715529025575e-05, | |
| "loss": 1.4027, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.14063611421603495, | |
| "grad_norm": 3.6557998657226562, | |
| "learning_rate": 1.9365517328978943e-05, | |
| "loss": 1.428, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.1414490397317346, | |
| "grad_norm": 2.9291248321533203, | |
| "learning_rate": 1.9356254183235785e-05, | |
| "loss": 1.4039, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.1422619652474342, | |
| "grad_norm": 2.498507499694824, | |
| "learning_rate": 1.93469261560319e-05, | |
| "loss": 1.3731, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.14307489076313382, | |
| "grad_norm": 3.6117923259735107, | |
| "learning_rate": 1.9337533312053002e-05, | |
| "loss": 1.4263, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.14388781627883346, | |
| "grad_norm": 2.490755319595337, | |
| "learning_rate": 1.9328075716434287e-05, | |
| "loss": 1.4215, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.14470074179453307, | |
| "grad_norm": 2.9008986949920654, | |
| "learning_rate": 1.931855343475998e-05, | |
| "loss": 1.3968, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.14551366731023269, | |
| "grad_norm": 5.284730911254883, | |
| "learning_rate": 1.930896653306286e-05, | |
| "loss": 1.418, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.14632659282593233, | |
| "grad_norm": 2.826756000518799, | |
| "learning_rate": 1.929931507782383e-05, | |
| "loss": 1.3996, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.14713951834163194, | |
| "grad_norm": 2.8084652423858643, | |
| "learning_rate": 1.9289599135971437e-05, | |
| "loss": 1.374, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.14795244385733158, | |
| "grad_norm": 2.736046075820923, | |
| "learning_rate": 1.9279818774881418e-05, | |
| "loss": 1.3687, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.1487653693730312, | |
| "grad_norm": 2.7098567485809326, | |
| "learning_rate": 1.9269974062376224e-05, | |
| "loss": 1.4059, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.1495782948887308, | |
| "grad_norm": 2.7764878273010254, | |
| "learning_rate": 1.926006506672456e-05, | |
| "loss": 1.42, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.15039122040443045, | |
| "grad_norm": 2.7715649604797363, | |
| "learning_rate": 1.9250091856640895e-05, | |
| "loss": 1.4549, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.15120414592013007, | |
| "grad_norm": 2.4104158878326416, | |
| "learning_rate": 1.9240054501285015e-05, | |
| "loss": 1.4129, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.15201707143582968, | |
| "grad_norm": 2.75614595413208, | |
| "learning_rate": 1.922995307026151e-05, | |
| "loss": 1.3959, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.15282999695152932, | |
| "grad_norm": 2.813262939453125, | |
| "learning_rate": 1.921978763361931e-05, | |
| "loss": 1.4139, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.15364292246722894, | |
| "grad_norm": 2.5106594562530518, | |
| "learning_rate": 1.9209558261851194e-05, | |
| "loss": 1.3683, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.15445584798292855, | |
| "grad_norm": 2.9257330894470215, | |
| "learning_rate": 1.919926502589331e-05, | |
| "loss": 1.3387, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1552687734986282, | |
| "grad_norm": 2.5029993057250977, | |
| "learning_rate": 1.9188907997124666e-05, | |
| "loss": 1.3892, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.1560816990143278, | |
| "grad_norm": 2.6917388439178467, | |
| "learning_rate": 1.9178487247366652e-05, | |
| "loss": 1.3946, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.15689462453002745, | |
| "grad_norm": 2.7038626670837402, | |
| "learning_rate": 1.916800284888253e-05, | |
| "loss": 1.4082, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.15770755004572706, | |
| "grad_norm": 2.586545467376709, | |
| "learning_rate": 1.915745487437694e-05, | |
| "loss": 1.3431, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.15852047556142668, | |
| "grad_norm": 3.043938159942627, | |
| "learning_rate": 1.9146843396995396e-05, | |
| "loss": 1.3967, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.15933340107712632, | |
| "grad_norm": 2.80709171295166, | |
| "learning_rate": 1.9136168490323772e-05, | |
| "loss": 1.3617, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.16014632659282593, | |
| "grad_norm": 5.03334903717041, | |
| "learning_rate": 1.9125430228387794e-05, | |
| "loss": 1.4326, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.16095925210852555, | |
| "grad_norm": 4.717489719390869, | |
| "learning_rate": 1.9114628685652535e-05, | |
| "loss": 1.3459, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.1617721776242252, | |
| "grad_norm": 3.0668435096740723, | |
| "learning_rate": 1.9103763937021887e-05, | |
| "loss": 1.3763, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.1625851031399248, | |
| "grad_norm": 2.712122678756714, | |
| "learning_rate": 1.909283605783805e-05, | |
| "loss": 1.3319, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16339802865562442, | |
| "grad_norm": 2.7631924152374268, | |
| "learning_rate": 1.9081845123881002e-05, | |
| "loss": 1.3641, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.16421095417132406, | |
| "grad_norm": 3.499955654144287, | |
| "learning_rate": 1.9070791211367984e-05, | |
| "loss": 1.3259, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.16502387968702367, | |
| "grad_norm": 2.913755416870117, | |
| "learning_rate": 1.9059674396952963e-05, | |
| "loss": 1.3386, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.16583680520272331, | |
| "grad_norm": 2.5671772956848145, | |
| "learning_rate": 1.90484947577261e-05, | |
| "loss": 1.3301, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.16664973071842293, | |
| "grad_norm": 3.3566508293151855, | |
| "learning_rate": 1.903725237121322e-05, | |
| "loss": 1.3375, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.16746265623412254, | |
| "grad_norm": 2.3617210388183594, | |
| "learning_rate": 1.902594731537527e-05, | |
| "loss": 1.4476, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.16827558174982218, | |
| "grad_norm": 2.8202669620513916, | |
| "learning_rate": 1.901457966860779e-05, | |
| "loss": 1.334, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.1690885072655218, | |
| "grad_norm": 2.5990843772888184, | |
| "learning_rate": 1.9003149509740347e-05, | |
| "loss": 1.4321, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.1699014327812214, | |
| "grad_norm": 5.9826507568359375, | |
| "learning_rate": 1.899165691803601e-05, | |
| "loss": 1.4338, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.17071435829692105, | |
| "grad_norm": 3.9570019245147705, | |
| "learning_rate": 1.8980101973190787e-05, | |
| "loss": 1.3265, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.17152728381262067, | |
| "grad_norm": 2.8985307216644287, | |
| "learning_rate": 1.896848475533309e-05, | |
| "loss": 1.3297, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.17234020932832028, | |
| "grad_norm": 3.2575559616088867, | |
| "learning_rate": 1.8956805345023145e-05, | |
| "loss": 1.4086, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.17315313484401992, | |
| "grad_norm": 3.264796733856201, | |
| "learning_rate": 1.894506382325248e-05, | |
| "loss": 1.391, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.17396606035971954, | |
| "grad_norm": 2.767975330352783, | |
| "learning_rate": 1.8933260271443313e-05, | |
| "loss": 1.3731, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.17477898587541915, | |
| "grad_norm": 2.3556087017059326, | |
| "learning_rate": 1.8921394771448032e-05, | |
| "loss": 1.3288, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.1755919113911188, | |
| "grad_norm": 4.253211975097656, | |
| "learning_rate": 1.89094674055486e-05, | |
| "loss": 1.3776, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.1764048369068184, | |
| "grad_norm": 3.0681605339050293, | |
| "learning_rate": 1.889747825645599e-05, | |
| "loss": 1.4169, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.17721776242251805, | |
| "grad_norm": 2.3741588592529297, | |
| "learning_rate": 1.8885427407309627e-05, | |
| "loss": 1.3392, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.17803068793821766, | |
| "grad_norm": 2.968780279159546, | |
| "learning_rate": 1.887331494167678e-05, | |
| "loss": 1.4019, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.17884361345391728, | |
| "grad_norm": 2.3684914112091064, | |
| "learning_rate": 1.8861140943552014e-05, | |
| "loss": 1.3599, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.17965653896961692, | |
| "grad_norm": 3.0405993461608887, | |
| "learning_rate": 1.884890549735659e-05, | |
| "loss": 1.4245, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.18046946448531653, | |
| "grad_norm": 3.397047281265259, | |
| "learning_rate": 1.8836608687937883e-05, | |
| "loss": 1.392, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.18128239000101615, | |
| "grad_norm": 2.4693644046783447, | |
| "learning_rate": 1.8824250600568798e-05, | |
| "loss": 1.3726, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.1820953155167158, | |
| "grad_norm": 6.75039005279541, | |
| "learning_rate": 1.8811831320947177e-05, | |
| "loss": 1.3473, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.1829082410324154, | |
| "grad_norm": 2.922574758529663, | |
| "learning_rate": 1.879935093519519e-05, | |
| "loss": 1.4221, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.18372116654811502, | |
| "grad_norm": 5.6719136238098145, | |
| "learning_rate": 1.878680952985877e-05, | |
| "loss": 1.3844, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.18453409206381466, | |
| "grad_norm": 2.6967201232910156, | |
| "learning_rate": 1.8774207191906976e-05, | |
| "loss": 1.344, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.18534701757951427, | |
| "grad_norm": 3.049881935119629, | |
| "learning_rate": 1.8761544008731426e-05, | |
| "loss": 1.3912, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.18615994309521391, | |
| "grad_norm": 3.1408843994140625, | |
| "learning_rate": 1.874882006814565e-05, | |
| "loss": 1.4048, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.18697286861091353, | |
| "grad_norm": 2.6653666496276855, | |
| "learning_rate": 1.8736035458384528e-05, | |
| "loss": 1.3844, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.18778579412661314, | |
| "grad_norm": 2.6866488456726074, | |
| "learning_rate": 1.8723190268103634e-05, | |
| "loss": 1.3586, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.18859871964231278, | |
| "grad_norm": 3.2653231620788574, | |
| "learning_rate": 1.8710284586378645e-05, | |
| "loss": 1.3856, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.1894116451580124, | |
| "grad_norm": 2.841388463973999, | |
| "learning_rate": 1.8697318502704734e-05, | |
| "loss": 1.3868, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.190224570673712, | |
| "grad_norm": 2.797558307647705, | |
| "learning_rate": 1.8684292106995916e-05, | |
| "loss": 1.3885, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.19103749618941165, | |
| "grad_norm": 2.915003776550293, | |
| "learning_rate": 1.8671205489584453e-05, | |
| "loss": 1.3434, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.19185042170511127, | |
| "grad_norm": 3.2142281532287598, | |
| "learning_rate": 1.865805874122021e-05, | |
| "loss": 1.3975, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.19266334722081088, | |
| "grad_norm": 3.0831453800201416, | |
| "learning_rate": 1.8644851953070045e-05, | |
| "loss": 1.367, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.19347627273651052, | |
| "grad_norm": 3.2555181980133057, | |
| "learning_rate": 1.863158521671716e-05, | |
| "loss": 1.33, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.19428919825221014, | |
| "grad_norm": 2.8768310546875, | |
| "learning_rate": 1.8618258624160465e-05, | |
| "loss": 1.3867, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.19510212376790975, | |
| "grad_norm": 2.9737942218780518, | |
| "learning_rate": 1.8604872267813954e-05, | |
| "loss": 1.3726, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1959150492836094, | |
| "grad_norm": 2.5942904949188232, | |
| "learning_rate": 1.859142624050605e-05, | |
| "loss": 1.3704, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.196727974799309, | |
| "grad_norm": 2.6901443004608154, | |
| "learning_rate": 1.8577920635478976e-05, | |
| "loss": 1.3523, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.19754090031500865, | |
| "grad_norm": 2.4508392810821533, | |
| "learning_rate": 1.8564355546388094e-05, | |
| "loss": 1.3758, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.19835382583070826, | |
| "grad_norm": 2.3041279315948486, | |
| "learning_rate": 1.855073106730126e-05, | |
| "loss": 1.3491, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.19916675134640788, | |
| "grad_norm": 2.8388736248016357, | |
| "learning_rate": 1.8537047292698175e-05, | |
| "loss": 1.3578, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.19997967686210752, | |
| "grad_norm": 3.058314085006714, | |
| "learning_rate": 1.852330431746973e-05, | |
| "loss": 1.3547, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.20079260237780713, | |
| "grad_norm": 2.881788492202759, | |
| "learning_rate": 1.8509502236917353e-05, | |
| "loss": 1.3823, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.20160552789350675, | |
| "grad_norm": 2.623408794403076, | |
| "learning_rate": 1.8495641146752322e-05, | |
| "loss": 1.4516, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.2024184534092064, | |
| "grad_norm": 2.662614345550537, | |
| "learning_rate": 1.848172114309513e-05, | |
| "loss": 1.3924, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.203231378924906, | |
| "grad_norm": 2.520263671875, | |
| "learning_rate": 1.8467742322474822e-05, | |
| "loss": 1.4097, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.20404430444060562, | |
| "grad_norm": 4.465703964233398, | |
| "learning_rate": 1.845370478182829e-05, | |
| "loss": 1.3645, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.20485722995630526, | |
| "grad_norm": 2.5109176635742188, | |
| "learning_rate": 1.8439608618499637e-05, | |
| "loss": 1.3238, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.20567015547200487, | |
| "grad_norm": 2.703659772872925, | |
| "learning_rate": 1.842545393023949e-05, | |
| "loss": 1.4027, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.20648308098770451, | |
| "grad_norm": 3.483933448791504, | |
| "learning_rate": 1.841124081520431e-05, | |
| "loss": 1.4167, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.20729600650340413, | |
| "grad_norm": 2.7172889709472656, | |
| "learning_rate": 1.8396969371955724e-05, | |
| "loss": 1.3017, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.20810893201910374, | |
| "grad_norm": 2.512045383453369, | |
| "learning_rate": 1.838263969945985e-05, | |
| "loss": 1.4112, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.20892185753480338, | |
| "grad_norm": 2.4449141025543213, | |
| "learning_rate": 1.836825189708659e-05, | |
| "loss": 1.3396, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.209734783050503, | |
| "grad_norm": 2.9280951023101807, | |
| "learning_rate": 1.8353806064608953e-05, | |
| "loss": 1.3461, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.2105477085662026, | |
| "grad_norm": 3.962769031524658, | |
| "learning_rate": 1.833930230220236e-05, | |
| "loss": 1.3347, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.21136063408190225, | |
| "grad_norm": 3.3168771266937256, | |
| "learning_rate": 1.8324740710443955e-05, | |
| "loss": 1.3264, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.21217355959760187, | |
| "grad_norm": 2.754786252975464, | |
| "learning_rate": 1.831012139031189e-05, | |
| "loss": 1.3859, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.21298648511330148, | |
| "grad_norm": 2.5179426670074463, | |
| "learning_rate": 1.829544444318466e-05, | |
| "loss": 1.3653, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.21379941062900112, | |
| "grad_norm": 2.9228906631469727, | |
| "learning_rate": 1.8280709970840352e-05, | |
| "loss": 1.3929, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.21461233614470074, | |
| "grad_norm": 2.732806921005249, | |
| "learning_rate": 1.8265918075455985e-05, | |
| "loss": 1.3197, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.21542526166040035, | |
| "grad_norm": 2.7236287593841553, | |
| "learning_rate": 1.8251068859606777e-05, | |
| "loss": 1.3156, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.2162381871761, | |
| "grad_norm": 3.677654504776001, | |
| "learning_rate": 1.823616242626542e-05, | |
| "loss": 1.3565, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2170511126917996, | |
| "grad_norm": 2.4574098587036133, | |
| "learning_rate": 1.8221198878801415e-05, | |
| "loss": 1.3802, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.21786403820749925, | |
| "grad_norm": 3.2601144313812256, | |
| "learning_rate": 1.8206178320980295e-05, | |
| "loss": 1.3606, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.21867696372319886, | |
| "grad_norm": 2.4183156490325928, | |
| "learning_rate": 1.819110085696295e-05, | |
| "loss": 1.3327, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.21948988923889848, | |
| "grad_norm": 2.6820755004882812, | |
| "learning_rate": 1.817596659130489e-05, | |
| "loss": 1.3676, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.22030281475459812, | |
| "grad_norm": 2.619580030441284, | |
| "learning_rate": 1.816077562895551e-05, | |
| "loss": 1.408, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.22111574027029773, | |
| "grad_norm": 2.4499645233154297, | |
| "learning_rate": 1.814552807525738e-05, | |
| "loss": 1.3445, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.22192866578599735, | |
| "grad_norm": 2.5966873168945312, | |
| "learning_rate": 1.81302240359455e-05, | |
| "loss": 1.3354, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.222741591301697, | |
| "grad_norm": 8.227926254272461, | |
| "learning_rate": 1.8114863617146576e-05, | |
| "loss": 1.3495, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.2235545168173966, | |
| "grad_norm": 5.334491729736328, | |
| "learning_rate": 1.8099446925378278e-05, | |
| "loss": 1.3845, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.22436744233309622, | |
| "grad_norm": 2.436473846435547, | |
| "learning_rate": 1.8083974067548506e-05, | |
| "loss": 1.3152, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.22518036784879586, | |
| "grad_norm": 2.4906110763549805, | |
| "learning_rate": 1.806844515095465e-05, | |
| "loss": 1.3213, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.22599329336449547, | |
| "grad_norm": 2.627547264099121, | |
| "learning_rate": 1.8052860283282832e-05, | |
| "loss": 1.3394, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.22680621888019512, | |
| "grad_norm": 3.9034616947174072, | |
| "learning_rate": 1.8037219572607177e-05, | |
| "loss": 1.2956, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.22761914439589473, | |
| "grad_norm": 2.9307639598846436, | |
| "learning_rate": 1.8021523127389066e-05, | |
| "loss": 1.3507, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.22843206991159434, | |
| "grad_norm": 2.6711225509643555, | |
| "learning_rate": 1.800577105647635e-05, | |
| "loss": 1.4043, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.22924499542729398, | |
| "grad_norm": 2.9251246452331543, | |
| "learning_rate": 1.7989963469102643e-05, | |
| "loss": 1.3424, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.2300579209429936, | |
| "grad_norm": 2.2818679809570312, | |
| "learning_rate": 1.797410047488653e-05, | |
| "loss": 1.334, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.2308708464586932, | |
| "grad_norm": 2.6961264610290527, | |
| "learning_rate": 1.7958182183830816e-05, | |
| "loss": 1.3411, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.23168377197439285, | |
| "grad_norm": 2.5082268714904785, | |
| "learning_rate": 1.794220870632177e-05, | |
| "loss": 1.3815, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.23249669749009247, | |
| "grad_norm": 2.6569674015045166, | |
| "learning_rate": 1.7926180153128358e-05, | |
| "loss": 1.4037, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.23330962300579208, | |
| "grad_norm": 2.559483289718628, | |
| "learning_rate": 1.791009663540146e-05, | |
| "loss": 1.333, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.23412254852149172, | |
| "grad_norm": 2.6982040405273438, | |
| "learning_rate": 1.789395826467312e-05, | |
| "loss": 1.4168, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.23493547403719134, | |
| "grad_norm": 2.414900541305542, | |
| "learning_rate": 1.7877765152855757e-05, | |
| "loss": 1.3583, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.23574839955289095, | |
| "grad_norm": 2.465045928955078, | |
| "learning_rate": 1.78615174122414e-05, | |
| "loss": 1.44, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2365613250685906, | |
| "grad_norm": 2.306795597076416, | |
| "learning_rate": 1.78452151555009e-05, | |
| "loss": 1.3215, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.2373742505842902, | |
| "grad_norm": 2.6841700077056885, | |
| "learning_rate": 1.7828858495683162e-05, | |
| "loss": 1.351, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.23818717609998985, | |
| "grad_norm": 2.4231340885162354, | |
| "learning_rate": 1.781244754621434e-05, | |
| "loss": 1.3923, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.23900010161568946, | |
| "grad_norm": 2.8300161361694336, | |
| "learning_rate": 1.779598242089707e-05, | |
| "loss": 1.3876, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.23981302713138908, | |
| "grad_norm": 2.6287200450897217, | |
| "learning_rate": 1.7779463233909677e-05, | |
| "loss": 1.3609, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.24062595264708872, | |
| "grad_norm": 2.656332015991211, | |
| "learning_rate": 1.7762890099805362e-05, | |
| "loss": 1.3538, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.24143887816278833, | |
| "grad_norm": 2.5331099033355713, | |
| "learning_rate": 1.774626313351145e-05, | |
| "loss": 1.3154, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.24225180367848795, | |
| "grad_norm": 2.8881306648254395, | |
| "learning_rate": 1.7729582450328547e-05, | |
| "loss": 1.3561, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.2430647291941876, | |
| "grad_norm": 2.4491260051727295, | |
| "learning_rate": 1.771284816592978e-05, | |
| "loss": 1.3494, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.2438776547098872, | |
| "grad_norm": 2.8161392211914062, | |
| "learning_rate": 1.7696060396359956e-05, | |
| "loss": 1.3125, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.24469058022558682, | |
| "grad_norm": 2.788238048553467, | |
| "learning_rate": 1.7679219258034798e-05, | |
| "loss": 1.41, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.24550350574128646, | |
| "grad_norm": 3.0948519706726074, | |
| "learning_rate": 1.7662324867740102e-05, | |
| "loss": 1.4138, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.24631643125698607, | |
| "grad_norm": 3.617783308029175, | |
| "learning_rate": 1.7645377342630956e-05, | |
| "loss": 1.3995, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.24712935677268572, | |
| "grad_norm": 2.713531255722046, | |
| "learning_rate": 1.76283768002309e-05, | |
| "loss": 1.354, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.24794228228838533, | |
| "grad_norm": 3.9215407371520996, | |
| "learning_rate": 1.7611323358431145e-05, | |
| "loss": 1.3939, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.24875520780408494, | |
| "grad_norm": 3.519932508468628, | |
| "learning_rate": 1.759421713548971e-05, | |
| "loss": 1.3311, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.24956813331978459, | |
| "grad_norm": 3.0680055618286133, | |
| "learning_rate": 1.757705825003065e-05, | |
| "loss": 1.4131, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.25038105883548417, | |
| "grad_norm": 2.456533908843994, | |
| "learning_rate": 1.7559846821043205e-05, | |
| "loss": 1.3132, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.25119398435118384, | |
| "grad_norm": 2.6937081813812256, | |
| "learning_rate": 1.754258296788097e-05, | |
| "loss": 1.3041, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.25200690986688346, | |
| "grad_norm": 5.319806098937988, | |
| "learning_rate": 1.7525266810261096e-05, | |
| "loss": 1.3544, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.25281983538258307, | |
| "grad_norm": 2.9595742225646973, | |
| "learning_rate": 1.7507898468263422e-05, | |
| "loss": 1.3528, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.2536327608982827, | |
| "grad_norm": 4.085862636566162, | |
| "learning_rate": 1.7490478062329686e-05, | |
| "loss": 1.3314, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.2544456864139823, | |
| "grad_norm": 2.4585909843444824, | |
| "learning_rate": 1.7473005713262644e-05, | |
| "loss": 1.3622, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.25525861192968197, | |
| "grad_norm": 2.4798450469970703, | |
| "learning_rate": 1.7455481542225272e-05, | |
| "loss": 1.3804, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.2560715374453816, | |
| "grad_norm": 2.686068534851074, | |
| "learning_rate": 1.7437905670739893e-05, | |
| "loss": 1.2945, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.2568844629610812, | |
| "grad_norm": 2.7424585819244385, | |
| "learning_rate": 1.7420278220687366e-05, | |
| "loss": 1.3561, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.2576973884767808, | |
| "grad_norm": 2.964237928390503, | |
| "learning_rate": 1.7402599314306207e-05, | |
| "loss": 1.3701, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.2585103139924804, | |
| "grad_norm": 2.7983458042144775, | |
| "learning_rate": 1.7384869074191777e-05, | |
| "loss": 1.3536, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.25932323950818004, | |
| "grad_norm": 2.6008524894714355, | |
| "learning_rate": 1.7367087623295394e-05, | |
| "loss": 1.3394, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.2601361650238797, | |
| "grad_norm": 2.4116249084472656, | |
| "learning_rate": 1.7349255084923517e-05, | |
| "loss": 1.3785, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2609490905395793, | |
| "grad_norm": 2.9649388790130615, | |
| "learning_rate": 1.7331371582736864e-05, | |
| "loss": 1.3779, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.26176201605527893, | |
| "grad_norm": 2.692847490310669, | |
| "learning_rate": 1.731343724074957e-05, | |
| "loss": 1.3715, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.26257494157097855, | |
| "grad_norm": 2.6246955394744873, | |
| "learning_rate": 1.7295452183328317e-05, | |
| "loss": 1.3856, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.26338786708667816, | |
| "grad_norm": 2.822334051132202, | |
| "learning_rate": 1.7277416535191478e-05, | |
| "loss": 1.3289, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.26420079260237783, | |
| "grad_norm": 2.703158378601074, | |
| "learning_rate": 1.7259330421408247e-05, | |
| "loss": 1.3447, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.26501371811807745, | |
| "grad_norm": 2.5357322692871094, | |
| "learning_rate": 1.7241193967397784e-05, | |
| "loss": 1.3414, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.26582664363377706, | |
| "grad_norm": 2.7839202880859375, | |
| "learning_rate": 1.7223007298928322e-05, | |
| "loss": 1.3725, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.2666395691494767, | |
| "grad_norm": 2.6645684242248535, | |
| "learning_rate": 1.7204770542116326e-05, | |
| "loss": 1.3163, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.2674524946651763, | |
| "grad_norm": 4.677945137023926, | |
| "learning_rate": 1.7186483823425582e-05, | |
| "loss": 1.3583, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.2682654201808759, | |
| "grad_norm": 2.948094367980957, | |
| "learning_rate": 1.7168147269666357e-05, | |
| "loss": 1.3643, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.26907834569657557, | |
| "grad_norm": 2.5047991275787354, | |
| "learning_rate": 1.714976100799449e-05, | |
| "loss": 1.3542, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.2698912712122752, | |
| "grad_norm": 2.680239677429199, | |
| "learning_rate": 1.713132516591053e-05, | |
| "loss": 1.3204, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.2707041967279748, | |
| "grad_norm": 2.703165054321289, | |
| "learning_rate": 1.7112839871258838e-05, | |
| "loss": 1.3467, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.2715171222436744, | |
| "grad_norm": 2.5855846405029297, | |
| "learning_rate": 1.7094305252226713e-05, | |
| "loss": 1.3807, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.272330047759374, | |
| "grad_norm": 2.8401761054992676, | |
| "learning_rate": 1.7075721437343488e-05, | |
| "loss": 1.4032, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.2731429732750737, | |
| "grad_norm": 2.727287530899048, | |
| "learning_rate": 1.705708855547966e-05, | |
| "loss": 1.3416, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.2739558987907733, | |
| "grad_norm": 2.9767589569091797, | |
| "learning_rate": 1.7038406735845967e-05, | |
| "loss": 1.3062, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.2747688243064729, | |
| "grad_norm": 2.6532137393951416, | |
| "learning_rate": 1.7019676107992523e-05, | |
| "loss": 1.3717, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.27558174982217254, | |
| "grad_norm": 5.618951797485352, | |
| "learning_rate": 1.70008968018079e-05, | |
| "loss": 1.4021, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.27639467533787215, | |
| "grad_norm": 2.75219464302063, | |
| "learning_rate": 1.6982068947518235e-05, | |
| "loss": 1.3345, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.27720760085357177, | |
| "grad_norm": 2.7771074771881104, | |
| "learning_rate": 1.6963192675686312e-05, | |
| "loss": 1.3613, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.27802052636927144, | |
| "grad_norm": 2.4822003841400146, | |
| "learning_rate": 1.694426811721069e-05, | |
| "loss": 1.3465, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.27883345188497105, | |
| "grad_norm": 2.684894323348999, | |
| "learning_rate": 1.6925295403324758e-05, | |
| "loss": 1.337, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.27964637740067066, | |
| "grad_norm": 2.804255962371826, | |
| "learning_rate": 1.6906274665595854e-05, | |
| "loss": 1.2862, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.2804593029163703, | |
| "grad_norm": 2.7327306270599365, | |
| "learning_rate": 1.688720603592432e-05, | |
| "loss": 1.3826, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.2812722284320699, | |
| "grad_norm": 3.0967769622802734, | |
| "learning_rate": 1.6868089646542632e-05, | |
| "loss": 1.3406, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.28208515394776956, | |
| "grad_norm": 2.4972376823425293, | |
| "learning_rate": 1.6848925630014445e-05, | |
| "loss": 1.3315, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.2828980794634692, | |
| "grad_norm": 22.60991668701172, | |
| "learning_rate": 1.6829714119233688e-05, | |
| "loss": 1.3325, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.2837110049791688, | |
| "grad_norm": 3.207625389099121, | |
| "learning_rate": 1.6810455247423634e-05, | |
| "loss": 1.3926, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.2845239304948684, | |
| "grad_norm": 2.6568946838378906, | |
| "learning_rate": 1.6791149148136003e-05, | |
| "loss": 1.3464, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.285336856010568, | |
| "grad_norm": 2.9483156204223633, | |
| "learning_rate": 1.677179595525e-05, | |
| "loss": 1.2875, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.28614978152626763, | |
| "grad_norm": 2.841442584991455, | |
| "learning_rate": 1.675239580297141e-05, | |
| "loss": 1.3441, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.2869627070419673, | |
| "grad_norm": 3.3877551555633545, | |
| "learning_rate": 1.6732948825831657e-05, | |
| "loss": 1.3662, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.2877756325576669, | |
| "grad_norm": 2.9442946910858154, | |
| "learning_rate": 1.671345515868688e-05, | |
| "loss": 1.3075, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.28858855807336653, | |
| "grad_norm": 2.672950029373169, | |
| "learning_rate": 1.6693914936716983e-05, | |
| "loss": 1.2982, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.28940148358906614, | |
| "grad_norm": 2.7699198722839355, | |
| "learning_rate": 1.6674328295424723e-05, | |
| "loss": 1.3331, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.29021440910476576, | |
| "grad_norm": 2.578444719314575, | |
| "learning_rate": 1.6654695370634738e-05, | |
| "loss": 1.3768, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.29102733462046537, | |
| "grad_norm": 2.748466968536377, | |
| "learning_rate": 1.6635016298492628e-05, | |
| "loss": 1.3108, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.29184026013616504, | |
| "grad_norm": 2.818321943283081, | |
| "learning_rate": 1.6615291215464005e-05, | |
| "loss": 1.2586, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.29265318565186466, | |
| "grad_norm": 3.6742396354675293, | |
| "learning_rate": 1.6595520258333545e-05, | |
| "loss": 1.3112, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.29346611116756427, | |
| "grad_norm": 2.999140977859497, | |
| "learning_rate": 1.657570356420404e-05, | |
| "loss": 1.2923, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.2942790366832639, | |
| "grad_norm": 2.704463481903076, | |
| "learning_rate": 1.6555841270495456e-05, | |
| "loss": 1.3329, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.2950919621989635, | |
| "grad_norm": 3.2639801502227783, | |
| "learning_rate": 1.6535933514943955e-05, | |
| "loss": 1.3215, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.29590488771466317, | |
| "grad_norm": 3.2200841903686523, | |
| "learning_rate": 1.6515980435600965e-05, | |
| "loss": 1.3792, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.2967178132303628, | |
| "grad_norm": 2.9226245880126953, | |
| "learning_rate": 1.6495982170832224e-05, | |
| "loss": 1.3565, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.2975307387460624, | |
| "grad_norm": 3.096405029296875, | |
| "learning_rate": 1.6475938859316795e-05, | |
| "loss": 1.3857, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.298343664261762, | |
| "grad_norm": 2.7694365978240967, | |
| "learning_rate": 1.6455850640046134e-05, | |
| "loss": 1.3782, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.2991565897774616, | |
| "grad_norm": 3.011751890182495, | |
| "learning_rate": 1.6435717652323097e-05, | |
| "loss": 1.3426, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.29996951529316124, | |
| "grad_norm": 2.7828853130340576, | |
| "learning_rate": 1.6415540035761008e-05, | |
| "loss": 1.3429, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.3007824408088609, | |
| "grad_norm": 2.5543785095214844, | |
| "learning_rate": 1.639531793028265e-05, | |
| "loss": 1.3768, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3015953663245605, | |
| "grad_norm": 2.8462271690368652, | |
| "learning_rate": 1.637505147611934e-05, | |
| "loss": 1.3203, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.30240829184026013, | |
| "grad_norm": 2.404257297515869, | |
| "learning_rate": 1.6354740813809917e-05, | |
| "loss": 1.3693, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.30322121735595975, | |
| "grad_norm": 2.674553394317627, | |
| "learning_rate": 1.6334386084199787e-05, | |
| "loss": 1.3518, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.30403414287165936, | |
| "grad_norm": 2.4954397678375244, | |
| "learning_rate": 1.631398742843995e-05, | |
| "loss": 1.3669, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.30484706838735903, | |
| "grad_norm": 3.333721876144409, | |
| "learning_rate": 1.629354498798601e-05, | |
| "loss": 1.3358, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.30565999390305865, | |
| "grad_norm": 2.859560966491699, | |
| "learning_rate": 1.627305890459719e-05, | |
| "loss": 1.3334, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.30647291941875826, | |
| "grad_norm": 2.8346803188323975, | |
| "learning_rate": 1.625252932033538e-05, | |
| "loss": 1.3366, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.3072858449344579, | |
| "grad_norm": 2.64909029006958, | |
| "learning_rate": 1.6231956377564095e-05, | |
| "loss": 1.3398, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.3080987704501575, | |
| "grad_norm": 3.935067653656006, | |
| "learning_rate": 1.621134021894756e-05, | |
| "loss": 1.2953, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.3089116959658571, | |
| "grad_norm": 5.056494235992432, | |
| "learning_rate": 1.619068098744965e-05, | |
| "loss": 1.3245, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.30972462148155677, | |
| "grad_norm": 2.9668800830841064, | |
| "learning_rate": 1.6169978826332955e-05, | |
| "loss": 1.3199, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.3105375469972564, | |
| "grad_norm": 2.6101276874542236, | |
| "learning_rate": 1.6149233879157747e-05, | |
| "loss": 1.3317, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.311350472512956, | |
| "grad_norm": 2.677374839782715, | |
| "learning_rate": 1.6128446289781012e-05, | |
| "loss": 1.304, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.3121633980286556, | |
| "grad_norm": 4.049331188201904, | |
| "learning_rate": 1.610761620235543e-05, | |
| "loss": 1.3241, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.31297632354435523, | |
| "grad_norm": 2.566908836364746, | |
| "learning_rate": 1.60867437613284e-05, | |
| "loss": 1.3392, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3137892490600549, | |
| "grad_norm": 2.550367832183838, | |
| "learning_rate": 1.6065829111441e-05, | |
| "loss": 1.3274, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.3146021745757545, | |
| "grad_norm": 4.543491363525391, | |
| "learning_rate": 1.6044872397727037e-05, | |
| "loss": 1.2993, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.3154151000914541, | |
| "grad_norm": 2.8900489807128906, | |
| "learning_rate": 1.6023873765511993e-05, | |
| "loss": 1.3274, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.31622802560715374, | |
| "grad_norm": 2.4930450916290283, | |
| "learning_rate": 1.6002833360412044e-05, | |
| "loss": 1.3074, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.31704095112285335, | |
| "grad_norm": 3.0221235752105713, | |
| "learning_rate": 1.5981751328333036e-05, | |
| "loss": 1.3077, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.31785387663855297, | |
| "grad_norm": 3.0569851398468018, | |
| "learning_rate": 1.5960627815469486e-05, | |
| "loss": 1.3705, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.31866680215425264, | |
| "grad_norm": 7.261632442474365, | |
| "learning_rate": 1.5939462968303554e-05, | |
| "loss": 1.3564, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.31947972766995225, | |
| "grad_norm": 3.0555789470672607, | |
| "learning_rate": 1.5918256933604047e-05, | |
| "loss": 1.3451, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.32029265318565187, | |
| "grad_norm": 3.360779047012329, | |
| "learning_rate": 1.589700985842538e-05, | |
| "loss": 1.2764, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.3211055787013515, | |
| "grad_norm": 2.9022507667541504, | |
| "learning_rate": 1.5875721890106574e-05, | |
| "loss": 1.3424, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3219185042170511, | |
| "grad_norm": 5.119380474090576, | |
| "learning_rate": 1.5854393176270205e-05, | |
| "loss": 1.3392, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.32273142973275076, | |
| "grad_norm": 2.7554409503936768, | |
| "learning_rate": 1.5833023864821427e-05, | |
| "loss": 1.3762, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.3235443552484504, | |
| "grad_norm": 2.553323984146118, | |
| "learning_rate": 1.5811614103946905e-05, | |
| "loss": 1.3066, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.32435728076415, | |
| "grad_norm": 3.514381170272827, | |
| "learning_rate": 1.5790164042113805e-05, | |
| "loss": 1.3575, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.3251702062798496, | |
| "grad_norm": 2.89054012298584, | |
| "learning_rate": 1.576867382806877e-05, | |
| "loss": 1.3106, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3259831317955492, | |
| "grad_norm": 2.9955763816833496, | |
| "learning_rate": 1.5747143610836873e-05, | |
| "loss": 1.3634, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.32679605731124883, | |
| "grad_norm": 3.175438404083252, | |
| "learning_rate": 1.5725573539720592e-05, | |
| "loss": 1.2876, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.3276089828269485, | |
| "grad_norm": 2.6269116401672363, | |
| "learning_rate": 1.570396376429877e-05, | |
| "loss": 1.342, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.3284219083426481, | |
| "grad_norm": 2.900568962097168, | |
| "learning_rate": 1.5682314434425593e-05, | |
| "loss": 1.3133, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.32923483385834773, | |
| "grad_norm": 2.6711323261260986, | |
| "learning_rate": 1.5660625700229526e-05, | |
| "loss": 1.2702, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.33004775937404734, | |
| "grad_norm": 2.8045928478240967, | |
| "learning_rate": 1.5638897712112303e-05, | |
| "loss": 1.3336, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.33086068488974696, | |
| "grad_norm": 2.9632303714752197, | |
| "learning_rate": 1.561713062074785e-05, | |
| "loss": 1.3546, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.33167361040544663, | |
| "grad_norm": 2.5156984329223633, | |
| "learning_rate": 1.5595324577081265e-05, | |
| "loss": 1.3587, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.33248653592114624, | |
| "grad_norm": 2.6634364128112793, | |
| "learning_rate": 1.5573479732327758e-05, | |
| "loss": 1.3317, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.33329946143684586, | |
| "grad_norm": 4.38008451461792, | |
| "learning_rate": 1.555159623797161e-05, | |
| "loss": 1.3078, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.33411238695254547, | |
| "grad_norm": 3.089078903198242, | |
| "learning_rate": 1.552967424576512e-05, | |
| "loss": 1.328, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.3349253124682451, | |
| "grad_norm": 2.9011247158050537, | |
| "learning_rate": 1.5507713907727557e-05, | |
| "loss": 1.349, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.3357382379839447, | |
| "grad_norm": 2.431152582168579, | |
| "learning_rate": 1.5485715376144087e-05, | |
| "loss": 1.383, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.33655116349964437, | |
| "grad_norm": 2.6097633838653564, | |
| "learning_rate": 1.5463678803564753e-05, | |
| "loss": 1.3414, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.337364089015344, | |
| "grad_norm": 2.9973533153533936, | |
| "learning_rate": 1.5441604342803374e-05, | |
| "loss": 1.3359, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.3381770145310436, | |
| "grad_norm": 2.849950075149536, | |
| "learning_rate": 1.5419492146936518e-05, | |
| "loss": 1.3378, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.3389899400467432, | |
| "grad_norm": 2.600947856903076, | |
| "learning_rate": 1.5397342369302425e-05, | |
| "loss": 1.3411, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.3398028655624428, | |
| "grad_norm": 2.946190595626831, | |
| "learning_rate": 1.5375155163499953e-05, | |
| "loss": 1.2981, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.34061579107814244, | |
| "grad_norm": 3.5300893783569336, | |
| "learning_rate": 1.5352930683387502e-05, | |
| "loss": 1.3717, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.3414287165938421, | |
| "grad_norm": 2.342288017272949, | |
| "learning_rate": 1.5330669083081956e-05, | |
| "loss": 1.2734, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3422416421095417, | |
| "grad_norm": 3.7037856578826904, | |
| "learning_rate": 1.5308370516957617e-05, | |
| "loss": 1.3402, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.34305456762524134, | |
| "grad_norm": 2.5814309120178223, | |
| "learning_rate": 1.528603513964511e-05, | |
| "loss": 1.3207, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.34386749314094095, | |
| "grad_norm": 2.4542317390441895, | |
| "learning_rate": 1.5263663106030347e-05, | |
| "loss": 1.3257, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.34468041865664056, | |
| "grad_norm": 2.689870595932007, | |
| "learning_rate": 1.5241254571253433e-05, | |
| "loss": 1.3105, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.34549334417234023, | |
| "grad_norm": 2.900061845779419, | |
| "learning_rate": 1.5218809690707583e-05, | |
| "loss": 1.3113, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.34630626968803985, | |
| "grad_norm": 2.7165238857269287, | |
| "learning_rate": 1.5196328620038059e-05, | |
| "loss": 1.335, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.34711919520373946, | |
| "grad_norm": 2.3893747329711914, | |
| "learning_rate": 1.5173811515141083e-05, | |
| "loss": 1.3062, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.3479321207194391, | |
| "grad_norm": 2.568575143814087, | |
| "learning_rate": 1.5151258532162771e-05, | |
| "loss": 1.3338, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.3487450462351387, | |
| "grad_norm": 3.406301736831665, | |
| "learning_rate": 1.5128669827498024e-05, | |
| "loss": 1.3189, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.3495579717508383, | |
| "grad_norm": 2.752307653427124, | |
| "learning_rate": 1.5106045557789453e-05, | |
| "loss": 1.331, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.350370897266538, | |
| "grad_norm": 2.570742130279541, | |
| "learning_rate": 1.5083385879926309e-05, | |
| "loss": 1.2887, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.3511838227822376, | |
| "grad_norm": 2.4754555225372314, | |
| "learning_rate": 1.5060690951043385e-05, | |
| "loss": 1.3432, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.3519967482979372, | |
| "grad_norm": 3.853609561920166, | |
| "learning_rate": 1.5037960928519902e-05, | |
| "loss": 1.3625, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.3528096738136368, | |
| "grad_norm": 2.6506130695343018, | |
| "learning_rate": 1.501519596997847e-05, | |
| "loss": 1.2797, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.35362259932933643, | |
| "grad_norm": 2.8529601097106934, | |
| "learning_rate": 1.499239623328394e-05, | |
| "loss": 1.2868, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.3544355248450361, | |
| "grad_norm": 4.091727256774902, | |
| "learning_rate": 1.4969561876542348e-05, | |
| "loss": 1.2648, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.3552484503607357, | |
| "grad_norm": 2.5217483043670654, | |
| "learning_rate": 1.4946693058099802e-05, | |
| "loss": 1.2792, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.3560613758764353, | |
| "grad_norm": 3.035297155380249, | |
| "learning_rate": 1.4923789936541378e-05, | |
| "loss": 1.3267, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.35687430139213494, | |
| "grad_norm": 4.371755599975586, | |
| "learning_rate": 1.4900852670690044e-05, | |
| "loss": 1.3114, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.35768722690783455, | |
| "grad_norm": 2.904101610183716, | |
| "learning_rate": 1.487788141960553e-05, | |
| "loss": 1.3716, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.35850015242353417, | |
| "grad_norm": 2.663241147994995, | |
| "learning_rate": 1.4854876342583246e-05, | |
| "loss": 1.3269, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.35931307793923384, | |
| "grad_norm": 2.626646041870117, | |
| "learning_rate": 1.4831837599153165e-05, | |
| "loss": 1.3077, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.36012600345493345, | |
| "grad_norm": 2.8876073360443115, | |
| "learning_rate": 1.4808765349078729e-05, | |
| "loss": 1.2807, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.36093892897063307, | |
| "grad_norm": 2.5428106784820557, | |
| "learning_rate": 1.4785659752355724e-05, | |
| "loss": 1.3242, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.3617518544863327, | |
| "grad_norm": 2.7515244483947754, | |
| "learning_rate": 1.4762520969211186e-05, | |
| "loss": 1.3356, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.3625647800020323, | |
| "grad_norm": 2.771684408187866, | |
| "learning_rate": 1.4739349160102285e-05, | |
| "loss": 1.3255, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.36337770551773196, | |
| "grad_norm": 2.7270543575286865, | |
| "learning_rate": 1.4716144485715209e-05, | |
| "loss": 1.2797, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.3641906310334316, | |
| "grad_norm": 3.5211868286132812, | |
| "learning_rate": 1.4692907106964051e-05, | |
| "loss": 1.3098, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.3650035565491312, | |
| "grad_norm": 5.923196315765381, | |
| "learning_rate": 1.4669637184989696e-05, | |
| "loss": 1.3212, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.3658164820648308, | |
| "grad_norm": 2.50697922706604, | |
| "learning_rate": 1.4646334881158704e-05, | |
| "loss": 1.3195, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3666294075805304, | |
| "grad_norm": 3.3721578121185303, | |
| "learning_rate": 1.4623000357062184e-05, | |
| "loss": 1.2747, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.36744233309623003, | |
| "grad_norm": 2.429243803024292, | |
| "learning_rate": 1.459963377451468e-05, | |
| "loss": 1.3122, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.3682552586119297, | |
| "grad_norm": 4.240250587463379, | |
| "learning_rate": 1.457623529555305e-05, | |
| "loss": 1.3447, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.3690681841276293, | |
| "grad_norm": 2.631667137145996, | |
| "learning_rate": 1.4552805082435333e-05, | |
| "loss": 1.3171, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.36988110964332893, | |
| "grad_norm": 2.906388521194458, | |
| "learning_rate": 1.4529343297639638e-05, | |
| "loss": 1.3193, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.37069403515902855, | |
| "grad_norm": 3.047884464263916, | |
| "learning_rate": 1.4505850103863007e-05, | |
| "loss": 1.3181, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.37150696067472816, | |
| "grad_norm": 2.3922433853149414, | |
| "learning_rate": 1.448232566402028e-05, | |
| "loss": 1.3203, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.37231988619042783, | |
| "grad_norm": 3.278813123703003, | |
| "learning_rate": 1.4458770141242992e-05, | |
| "loss": 1.3309, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.37313281170612744, | |
| "grad_norm": 2.7148866653442383, | |
| "learning_rate": 1.4435183698878212e-05, | |
| "loss": 1.3408, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.37394573722182706, | |
| "grad_norm": 2.913823366165161, | |
| "learning_rate": 1.4411566500487425e-05, | |
| "loss": 1.3426, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.37475866273752667, | |
| "grad_norm": 2.435643196105957, | |
| "learning_rate": 1.4387918709845395e-05, | |
| "loss": 1.3357, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.3755715882532263, | |
| "grad_norm": 2.6099560260772705, | |
| "learning_rate": 1.4364240490939032e-05, | |
| "loss": 1.3013, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.3763845137689259, | |
| "grad_norm": 2.7896599769592285, | |
| "learning_rate": 1.4340532007966252e-05, | |
| "loss": 1.3284, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.37719743928462557, | |
| "grad_norm": 2.857205867767334, | |
| "learning_rate": 1.4316793425334836e-05, | |
| "loss": 1.2926, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.3780103648003252, | |
| "grad_norm": 2.4580750465393066, | |
| "learning_rate": 1.4293024907661295e-05, | |
| "loss": 1.3926, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.3788232903160248, | |
| "grad_norm": 2.6340065002441406, | |
| "learning_rate": 1.4269226619769727e-05, | |
| "loss": 1.3315, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.3796362158317244, | |
| "grad_norm": 3.416398525238037, | |
| "learning_rate": 1.424539872669067e-05, | |
| "loss": 1.2822, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.380449141347424, | |
| "grad_norm": 2.4222054481506348, | |
| "learning_rate": 1.4221541393659966e-05, | |
| "loss": 1.2894, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.38126206686312364, | |
| "grad_norm": 2.797074794769287, | |
| "learning_rate": 1.4197654786117604e-05, | |
| "loss": 1.3519, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.3820749923788233, | |
| "grad_norm": 2.563831329345703, | |
| "learning_rate": 1.4173739069706586e-05, | |
| "loss": 1.3474, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.3828879178945229, | |
| "grad_norm": 2.4004971981048584, | |
| "learning_rate": 1.414979441027176e-05, | |
| "loss": 1.3007, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.38370084341022254, | |
| "grad_norm": 2.532390594482422, | |
| "learning_rate": 1.4125820973858693e-05, | |
| "loss": 1.2613, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.38451376892592215, | |
| "grad_norm": 2.5733683109283447, | |
| "learning_rate": 1.41018189267125e-05, | |
| "loss": 1.3212, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.38532669444162176, | |
| "grad_norm": 2.710106134414673, | |
| "learning_rate": 1.4077788435276701e-05, | |
| "loss": 1.3235, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.38613961995732143, | |
| "grad_norm": 2.996795892715454, | |
| "learning_rate": 1.4053729666192067e-05, | |
| "loss": 1.3722, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.38695254547302105, | |
| "grad_norm": 2.4392545223236084, | |
| "learning_rate": 1.4029642786295452e-05, | |
| "loss": 1.3706, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.38776547098872066, | |
| "grad_norm": 2.6843369007110596, | |
| "learning_rate": 1.400552796261866e-05, | |
| "loss": 1.3382, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.3885783965044203, | |
| "grad_norm": 2.405515193939209, | |
| "learning_rate": 1.3981385362387268e-05, | |
| "loss": 1.316, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.3893913220201199, | |
| "grad_norm": 2.425203800201416, | |
| "learning_rate": 1.3957215153019463e-05, | |
| "loss": 1.3578, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.3902042475358195, | |
| "grad_norm": 2.5134634971618652, | |
| "learning_rate": 1.3933017502124897e-05, | |
| "loss": 1.3531, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3910171730515192, | |
| "grad_norm": 2.4274141788482666, | |
| "learning_rate": 1.3908792577503514e-05, | |
| "loss": 1.3705, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.3918300985672188, | |
| "grad_norm": 2.881443500518799, | |
| "learning_rate": 1.3884540547144393e-05, | |
| "loss": 1.3196, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.3926430240829184, | |
| "grad_norm": 2.5505170822143555, | |
| "learning_rate": 1.3860261579224574e-05, | |
| "loss": 1.3221, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.393455949598618, | |
| "grad_norm": 2.5604939460754395, | |
| "learning_rate": 1.3835955842107897e-05, | |
| "loss": 1.2565, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.39426887511431763, | |
| "grad_norm": 2.8203351497650146, | |
| "learning_rate": 1.3811623504343845e-05, | |
| "loss": 1.323, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.3950818006300173, | |
| "grad_norm": 3.9116978645324707, | |
| "learning_rate": 1.378726473466635e-05, | |
| "loss": 1.3188, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.3958947261457169, | |
| "grad_norm": 2.918548822402954, | |
| "learning_rate": 1.3762879701992642e-05, | |
| "loss": 1.337, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.3967076516614165, | |
| "grad_norm": 3.048039674758911, | |
| "learning_rate": 1.373846857542208e-05, | |
| "loss": 1.3379, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.39752057717711614, | |
| "grad_norm": 2.6825406551361084, | |
| "learning_rate": 1.3714031524234965e-05, | |
| "loss": 1.3096, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.39833350269281576, | |
| "grad_norm": 2.5955066680908203, | |
| "learning_rate": 1.3689568717891381e-05, | |
| "loss": 1.2947, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.39914642820851537, | |
| "grad_norm": 2.5204849243164062, | |
| "learning_rate": 1.3665080326029997e-05, | |
| "loss": 1.2852, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.39995935372421504, | |
| "grad_norm": 3.158151865005493, | |
| "learning_rate": 1.364056651846693e-05, | |
| "loss": 1.3323, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.40077227923991465, | |
| "grad_norm": 2.787951946258545, | |
| "learning_rate": 1.3616027465194525e-05, | |
| "loss": 1.325, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.40158520475561427, | |
| "grad_norm": 3.462423324584961, | |
| "learning_rate": 1.35914633363802e-05, | |
| "loss": 1.2689, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.4023981302713139, | |
| "grad_norm": 3.3612263202667236, | |
| "learning_rate": 1.356687430236526e-05, | |
| "loss": 1.2846, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.4032110557870135, | |
| "grad_norm": 2.521135091781616, | |
| "learning_rate": 1.3542260533663723e-05, | |
| "loss": 1.2845, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.40402398130271316, | |
| "grad_norm": 2.702359914779663, | |
| "learning_rate": 1.351762220096112e-05, | |
| "loss": 1.2982, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.4048369068184128, | |
| "grad_norm": 2.928270101547241, | |
| "learning_rate": 1.3492959475113332e-05, | |
| "loss": 1.2878, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.4056498323341124, | |
| "grad_norm": 2.491701126098633, | |
| "learning_rate": 1.3468272527145388e-05, | |
| "loss": 1.2913, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.406462757849812, | |
| "grad_norm": 2.8777735233306885, | |
| "learning_rate": 1.3443561528250295e-05, | |
| "loss": 1.328, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4072756833655116, | |
| "grad_norm": 3.4918212890625, | |
| "learning_rate": 1.3418826649787834e-05, | |
| "loss": 1.3415, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.40808860888121123, | |
| "grad_norm": 2.6940505504608154, | |
| "learning_rate": 1.3394068063283387e-05, | |
| "loss": 1.3017, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.4089015343969109, | |
| "grad_norm": 3.9722023010253906, | |
| "learning_rate": 1.3369285940426737e-05, | |
| "loss": 1.3161, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.4097144599126105, | |
| "grad_norm": 2.6105010509490967, | |
| "learning_rate": 1.334448045307088e-05, | |
| "loss": 1.2853, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.41052738542831013, | |
| "grad_norm": 4.094304084777832, | |
| "learning_rate": 1.331965177323084e-05, | |
| "loss": 1.3059, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.41134031094400975, | |
| "grad_norm": 2.5570600032806396, | |
| "learning_rate": 1.3294800073082464e-05, | |
| "loss": 1.2957, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.41215323645970936, | |
| "grad_norm": 2.60870099067688, | |
| "learning_rate": 1.3269925524961237e-05, | |
| "loss": 1.2887, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.41296616197540903, | |
| "grad_norm": 2.3958325386047363, | |
| "learning_rate": 1.3245028301361086e-05, | |
| "loss": 1.3207, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.41377908749110864, | |
| "grad_norm": 2.718470811843872, | |
| "learning_rate": 1.3220108574933185e-05, | |
| "loss": 1.2884, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.41459201300680826, | |
| "grad_norm": 2.9990408420562744, | |
| "learning_rate": 1.3195166518484748e-05, | |
| "loss": 1.3104, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.41540493852250787, | |
| "grad_norm": 3.256333589553833, | |
| "learning_rate": 1.317020230497784e-05, | |
| "loss": 1.2586, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.4162178640382075, | |
| "grad_norm": 3.0497708320617676, | |
| "learning_rate": 1.3145216107528178e-05, | |
| "loss": 1.2946, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.4170307895539071, | |
| "grad_norm": 2.6696412563323975, | |
| "learning_rate": 1.3120208099403926e-05, | |
| "loss": 1.3413, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.41784371506960677, | |
| "grad_norm": 2.592937469482422, | |
| "learning_rate": 1.3095178454024496e-05, | |
| "loss": 1.2827, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.4186566405853064, | |
| "grad_norm": 2.450669288635254, | |
| "learning_rate": 1.3070127344959348e-05, | |
| "loss": 1.2505, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.419469566101006, | |
| "grad_norm": 4.529777526855469, | |
| "learning_rate": 1.3045054945926775e-05, | |
| "loss": 1.3001, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.4202824916167056, | |
| "grad_norm": 3.2491648197174072, | |
| "learning_rate": 1.3019961430792711e-05, | |
| "loss": 1.2932, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.4210954171324052, | |
| "grad_norm": 3.3505818843841553, | |
| "learning_rate": 1.2994846973569524e-05, | |
| "loss": 1.3516, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.4219083426481049, | |
| "grad_norm": 3.5476715564727783, | |
| "learning_rate": 1.2969711748414804e-05, | |
| "loss": 1.2834, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.4227212681638045, | |
| "grad_norm": 2.738903522491455, | |
| "learning_rate": 1.2944555929630152e-05, | |
| "loss": 1.2978, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.4235341936795041, | |
| "grad_norm": 2.5854766368865967, | |
| "learning_rate": 1.2919379691659979e-05, | |
| "loss": 1.293, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.42434711919520374, | |
| "grad_norm": 3.76955246925354, | |
| "learning_rate": 1.2894183209090304e-05, | |
| "loss": 1.2517, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.42516004471090335, | |
| "grad_norm": 2.566361904144287, | |
| "learning_rate": 1.2868966656647522e-05, | |
| "loss": 1.3295, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.42597297022660296, | |
| "grad_norm": 2.7477164268493652, | |
| "learning_rate": 1.2843730209197203e-05, | |
| "loss": 1.3067, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.42678589574230263, | |
| "grad_norm": 3.0560967922210693, | |
| "learning_rate": 1.2818474041742885e-05, | |
| "loss": 1.2951, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.42759882125800225, | |
| "grad_norm": 2.9634625911712646, | |
| "learning_rate": 1.2793198329424858e-05, | |
| "loss": 1.268, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.42841174677370186, | |
| "grad_norm": 2.8108301162719727, | |
| "learning_rate": 1.2767903247518945e-05, | |
| "loss": 1.3319, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.4292246722894015, | |
| "grad_norm": 3.85799241065979, | |
| "learning_rate": 1.2742588971435276e-05, | |
| "loss": 1.3764, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.4300375978051011, | |
| "grad_norm": 2.564434766769409, | |
| "learning_rate": 1.2717255676717106e-05, | |
| "loss": 1.2854, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.4308505233208007, | |
| "grad_norm": 5.098544597625732, | |
| "learning_rate": 1.2691903539039563e-05, | |
| "loss": 1.3143, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.4316634488365004, | |
| "grad_norm": 7.195343017578125, | |
| "learning_rate": 1.2666532734208437e-05, | |
| "loss": 1.3026, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.4324763743522, | |
| "grad_norm": 2.743298053741455, | |
| "learning_rate": 1.264114343815898e-05, | |
| "loss": 1.3124, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.4332892998678996, | |
| "grad_norm": 3.183859348297119, | |
| "learning_rate": 1.2615735826954664e-05, | |
| "loss": 1.3132, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.4341022253835992, | |
| "grad_norm": 7.095142364501953, | |
| "learning_rate": 1.2590310076785974e-05, | |
| "loss": 1.2599, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.43491515089929883, | |
| "grad_norm": 2.91894268989563, | |
| "learning_rate": 1.256486636396917e-05, | |
| "loss": 1.3251, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.4357280764149985, | |
| "grad_norm": 2.931509494781494, | |
| "learning_rate": 1.2539404864945087e-05, | |
| "loss": 1.3347, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.4365410019306981, | |
| "grad_norm": 2.4552268981933594, | |
| "learning_rate": 1.2513925756277894e-05, | |
| "loss": 1.3469, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.43735392744639773, | |
| "grad_norm": 2.846196174621582, | |
| "learning_rate": 1.2488429214653871e-05, | |
| "loss": 1.2654, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.43816685296209734, | |
| "grad_norm": 3.494403600692749, | |
| "learning_rate": 1.24629154168802e-05, | |
| "loss": 1.2688, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.43897977847779696, | |
| "grad_norm": 3.00067138671875, | |
| "learning_rate": 1.2437384539883715e-05, | |
| "loss": 1.2865, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.43979270399349657, | |
| "grad_norm": 3.0412096977233887, | |
| "learning_rate": 1.2411836760709686e-05, | |
| "loss": 1.269, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.44060562950919624, | |
| "grad_norm": 2.3580715656280518, | |
| "learning_rate": 1.2386272256520606e-05, | |
| "loss": 1.2752, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.44141855502489585, | |
| "grad_norm": 9.030720710754395, | |
| "learning_rate": 1.2360691204594937e-05, | |
| "loss": 1.3074, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.44223148054059547, | |
| "grad_norm": 3.970172882080078, | |
| "learning_rate": 1.2335093782325889e-05, | |
| "loss": 1.3117, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.4430444060562951, | |
| "grad_norm": 2.8179943561553955, | |
| "learning_rate": 1.2309480167220203e-05, | |
| "loss": 1.3196, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.4438573315719947, | |
| "grad_norm": 2.9376232624053955, | |
| "learning_rate": 1.2283850536896907e-05, | |
| "loss": 1.2614, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.44467025708769436, | |
| "grad_norm": 2.811709403991699, | |
| "learning_rate": 1.2258205069086082e-05, | |
| "loss": 1.2666, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.445483182603394, | |
| "grad_norm": 3.060638427734375, | |
| "learning_rate": 1.2232543941627641e-05, | |
| "loss": 1.2891, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.4462961081190936, | |
| "grad_norm": 2.581530809402466, | |
| "learning_rate": 1.2206867332470091e-05, | |
| "loss": 1.2875, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.4471090336347932, | |
| "grad_norm": 2.588129997253418, | |
| "learning_rate": 1.2181175419669293e-05, | |
| "loss": 1.2964, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4479219591504928, | |
| "grad_norm": 3.0943429470062256, | |
| "learning_rate": 1.215546838138723e-05, | |
| "loss": 1.29, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.44873488466619244, | |
| "grad_norm": 2.960190534591675, | |
| "learning_rate": 1.212974639589078e-05, | |
| "loss": 1.2812, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.4495478101818921, | |
| "grad_norm": 2.7364282608032227, | |
| "learning_rate": 1.2104009641550472e-05, | |
| "loss": 1.2783, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.4503607356975917, | |
| "grad_norm": 2.509277105331421, | |
| "learning_rate": 1.2078258296839245e-05, | |
| "loss": 1.2859, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.45117366121329133, | |
| "grad_norm": 2.769371747970581, | |
| "learning_rate": 1.2052492540331218e-05, | |
| "loss": 1.2866, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.45198658672899095, | |
| "grad_norm": 3.057968854904175, | |
| "learning_rate": 1.2026712550700457e-05, | |
| "loss": 1.3051, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.45279951224469056, | |
| "grad_norm": 3.4182374477386475, | |
| "learning_rate": 1.200091850671972e-05, | |
| "loss": 1.3266, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.45361243776039023, | |
| "grad_norm": 2.6871426105499268, | |
| "learning_rate": 1.1975110587259222e-05, | |
| "loss": 1.2596, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.45442536327608984, | |
| "grad_norm": 3.463675022125244, | |
| "learning_rate": 1.1949288971285411e-05, | |
| "loss": 1.2767, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.45523828879178946, | |
| "grad_norm": 2.8260090351104736, | |
| "learning_rate": 1.1923453837859706e-05, | |
| "loss": 1.2734, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.4560512143074891, | |
| "grad_norm": 2.6161341667175293, | |
| "learning_rate": 1.1897605366137264e-05, | |
| "loss": 1.2377, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.4568641398231887, | |
| "grad_norm": 2.847534418106079, | |
| "learning_rate": 1.1871743735365735e-05, | |
| "loss": 1.3128, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.4576770653388883, | |
| "grad_norm": 3.116063117980957, | |
| "learning_rate": 1.1845869124884027e-05, | |
| "loss": 1.3114, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.45848999085458797, | |
| "grad_norm": 3.2849061489105225, | |
| "learning_rate": 1.1819981714121054e-05, | |
| "loss": 1.2761, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.4593029163702876, | |
| "grad_norm": 2.484531879425049, | |
| "learning_rate": 1.1794081682594491e-05, | |
| "loss": 1.2978, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.4601158418859872, | |
| "grad_norm": 3.111940383911133, | |
| "learning_rate": 1.176816920990954e-05, | |
| "loss": 1.2928, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.4609287674016868, | |
| "grad_norm": 3.063422918319702, | |
| "learning_rate": 1.174224447575767e-05, | |
| "loss": 1.3137, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.4617416929173864, | |
| "grad_norm": 4.031757831573486, | |
| "learning_rate": 1.171630765991538e-05, | |
| "loss": 1.2986, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.4625546184330861, | |
| "grad_norm": 2.650336980819702, | |
| "learning_rate": 1.169035894224295e-05, | |
| "loss": 1.3328, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.4633675439487857, | |
| "grad_norm": 2.574526309967041, | |
| "learning_rate": 1.1664398502683194e-05, | |
| "loss": 1.3078, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.4641804694644853, | |
| "grad_norm": 2.3674449920654297, | |
| "learning_rate": 1.1638426521260211e-05, | |
| "loss": 1.2819, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.46499339498018494, | |
| "grad_norm": 2.8870980739593506, | |
| "learning_rate": 1.1612443178078138e-05, | |
| "loss": 1.2661, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.46580632049588455, | |
| "grad_norm": 2.4961047172546387, | |
| "learning_rate": 1.1586448653319908e-05, | |
| "loss": 1.3042, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.46661924601158417, | |
| "grad_norm": 2.6196508407592773, | |
| "learning_rate": 1.156044312724598e-05, | |
| "loss": 1.2306, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.46743217152728384, | |
| "grad_norm": 2.7249913215637207, | |
| "learning_rate": 1.153442678019311e-05, | |
| "loss": 1.3095, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.46824509704298345, | |
| "grad_norm": 2.9108643531799316, | |
| "learning_rate": 1.1508399792573095e-05, | |
| "loss": 1.2513, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.46905802255868306, | |
| "grad_norm": 2.7690494060516357, | |
| "learning_rate": 1.1482362344871514e-05, | |
| "loss": 1.3445, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.4698709480743827, | |
| "grad_norm": 3.629122734069824, | |
| "learning_rate": 1.1456314617646482e-05, | |
| "loss": 1.2616, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.4706838735900823, | |
| "grad_norm": 2.6831417083740234, | |
| "learning_rate": 1.1430256791527406e-05, | |
| "loss": 1.2786, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.4714967991057819, | |
| "grad_norm": 2.5316171646118164, | |
| "learning_rate": 1.1404189047213716e-05, | |
| "loss": 1.3195, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.4723097246214816, | |
| "grad_norm": 4.602120399475098, | |
| "learning_rate": 1.137811156547362e-05, | |
| "loss": 1.2378, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.4731226501371812, | |
| "grad_norm": 2.5073766708374023, | |
| "learning_rate": 1.1352024527142855e-05, | |
| "loss": 1.2426, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.4739355756528808, | |
| "grad_norm": 2.5561444759368896, | |
| "learning_rate": 1.1325928113123431e-05, | |
| "loss": 1.318, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.4747485011685804, | |
| "grad_norm": 2.8386447429656982, | |
| "learning_rate": 1.129982250438237e-05, | |
| "loss": 1.2529, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.47556142668428003, | |
| "grad_norm": 2.3654778003692627, | |
| "learning_rate": 1.1273707881950445e-05, | |
| "loss": 1.2822, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.4763743521999797, | |
| "grad_norm": 3.125446081161499, | |
| "learning_rate": 1.1247584426920962e-05, | |
| "loss": 1.3588, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.4771872777156793, | |
| "grad_norm": 3.600827217102051, | |
| "learning_rate": 1.1221452320448449e-05, | |
| "loss": 1.3023, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.47800020323137893, | |
| "grad_norm": 3.858783483505249, | |
| "learning_rate": 1.1195311743747445e-05, | |
| "loss": 1.2784, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.47881312874707854, | |
| "grad_norm": 2.841679334640503, | |
| "learning_rate": 1.116916287809122e-05, | |
| "loss": 1.3084, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.47962605426277816, | |
| "grad_norm": 2.9722323417663574, | |
| "learning_rate": 1.1143005904810527e-05, | |
| "loss": 1.2983, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.48043897977847777, | |
| "grad_norm": 2.560037136077881, | |
| "learning_rate": 1.1116841005292339e-05, | |
| "loss": 1.3175, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.48125190529417744, | |
| "grad_norm": 3.1770455837249756, | |
| "learning_rate": 1.1090668360978589e-05, | |
| "loss": 1.2603, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.48206483080987705, | |
| "grad_norm": 2.4485607147216797, | |
| "learning_rate": 1.106448815336493e-05, | |
| "loss": 1.2792, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.48287775632557667, | |
| "grad_norm": 3.7001748085021973, | |
| "learning_rate": 1.1038300563999455e-05, | |
| "loss": 1.2846, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.4836906818412763, | |
| "grad_norm": 2.6942710876464844, | |
| "learning_rate": 1.1012105774481446e-05, | |
| "loss": 1.2864, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.4845036073569759, | |
| "grad_norm": 2.5104377269744873, | |
| "learning_rate": 1.0985903966460115e-05, | |
| "loss": 1.256, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.48531653287267557, | |
| "grad_norm": 2.4864704608917236, | |
| "learning_rate": 1.0959695321633346e-05, | |
| "loss": 1.2838, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.4861294583883752, | |
| "grad_norm": 3.2645606994628906, | |
| "learning_rate": 1.0933480021746432e-05, | |
| "loss": 1.2966, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.4869423839040748, | |
| "grad_norm": 28.041383743286133, | |
| "learning_rate": 1.0907258248590816e-05, | |
| "loss": 1.2513, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.4877553094197744, | |
| "grad_norm": 2.736785888671875, | |
| "learning_rate": 1.0881030184002827e-05, | |
| "loss": 1.3217, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.488568234935474, | |
| "grad_norm": 4.294330596923828, | |
| "learning_rate": 1.0854796009862434e-05, | |
| "loss": 1.3007, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.48938116045117364, | |
| "grad_norm": 2.629371404647827, | |
| "learning_rate": 1.0828555908091958e-05, | |
| "loss": 1.2884, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.4901940859668733, | |
| "grad_norm": 3.166304588317871, | |
| "learning_rate": 1.0802310060654832e-05, | |
| "loss": 1.3127, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.4910070114825729, | |
| "grad_norm": 2.5344200134277344, | |
| "learning_rate": 1.0776058649554336e-05, | |
| "loss": 1.249, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.49181993699827253, | |
| "grad_norm": 3.2902913093566895, | |
| "learning_rate": 1.0749801856832325e-05, | |
| "loss": 1.2341, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.49263286251397215, | |
| "grad_norm": 2.5863964557647705, | |
| "learning_rate": 1.0723539864567983e-05, | |
| "loss": 1.3534, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.49344578802967176, | |
| "grad_norm": 3.1407294273376465, | |
| "learning_rate": 1.0697272854876537e-05, | |
| "loss": 1.2452, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.49425871354537143, | |
| "grad_norm": 2.339702844619751, | |
| "learning_rate": 1.0671001009908015e-05, | |
| "loss": 1.2597, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.49507163906107104, | |
| "grad_norm": 2.5861027240753174, | |
| "learning_rate": 1.0644724511845976e-05, | |
| "loss": 1.304, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.49588456457677066, | |
| "grad_norm": 2.6124143600463867, | |
| "learning_rate": 1.0618443542906251e-05, | |
| "loss": 1.2333, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.4966974900924703, | |
| "grad_norm": 2.53468918800354, | |
| "learning_rate": 1.059215828533566e-05, | |
| "loss": 1.2587, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.4975104156081699, | |
| "grad_norm": 5.205654621124268, | |
| "learning_rate": 1.0565868921410776e-05, | |
| "loss": 1.2758, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.4983233411238695, | |
| "grad_norm": 3.3307433128356934, | |
| "learning_rate": 1.0539575633436645e-05, | |
| "loss": 1.3197, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.49913626663956917, | |
| "grad_norm": 2.4654664993286133, | |
| "learning_rate": 1.0513278603745523e-05, | |
| "loss": 1.2733, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.4999491921552688, | |
| "grad_norm": 2.5150272846221924, | |
| "learning_rate": 1.0486978014695606e-05, | |
| "loss": 1.2841, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.5007621176709683, | |
| "grad_norm": 2.660186767578125, | |
| "learning_rate": 1.0460674048669783e-05, | |
| "loss": 1.3007, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.501575043186668, | |
| "grad_norm": 2.7415716648101807, | |
| "learning_rate": 1.0434366888074363e-05, | |
| "loss": 1.2974, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.5023879687023677, | |
| "grad_norm": 2.479142427444458, | |
| "learning_rate": 1.0408056715337797e-05, | |
| "loss": 1.301, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.5032008942180672, | |
| "grad_norm": 2.4590210914611816, | |
| "learning_rate": 1.0381743712909424e-05, | |
| "loss": 1.2253, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.5040138197337669, | |
| "grad_norm": 2.4704954624176025, | |
| "learning_rate": 1.0355428063258224e-05, | |
| "loss": 1.1927, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5048267452494665, | |
| "grad_norm": 3.5037641525268555, | |
| "learning_rate": 1.0329109948871512e-05, | |
| "loss": 1.2727, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.5056396707651661, | |
| "grad_norm": 2.6537327766418457, | |
| "learning_rate": 1.0302789552253702e-05, | |
| "loss": 1.2295, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.5064525962808658, | |
| "grad_norm": 3.4443886280059814, | |
| "learning_rate": 1.0276467055925044e-05, | |
| "loss": 1.2403, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.5072655217965654, | |
| "grad_norm": 4.377493858337402, | |
| "learning_rate": 1.0250142642420335e-05, | |
| "loss": 1.2667, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.508078447312265, | |
| "grad_norm": 2.712472677230835, | |
| "learning_rate": 1.0223816494287675e-05, | |
| "loss": 1.3323, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.5088913728279646, | |
| "grad_norm": 2.922093152999878, | |
| "learning_rate": 1.0197488794087188e-05, | |
| "loss": 1.2713, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.5097042983436643, | |
| "grad_norm": 11.951809883117676, | |
| "learning_rate": 1.0171159724389766e-05, | |
| "loss": 1.2997, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.5105172238593639, | |
| "grad_norm": 2.5700554847717285, | |
| "learning_rate": 1.0144829467775794e-05, | |
| "loss": 1.261, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.5113301493750635, | |
| "grad_norm": 2.6800413131713867, | |
| "learning_rate": 1.0118498206833886e-05, | |
| "loss": 1.3292, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.5121430748907632, | |
| "grad_norm": 4.24453592300415, | |
| "learning_rate": 1.0092166124159628e-05, | |
| "loss": 1.3281, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.5129560004064627, | |
| "grad_norm": 2.7513749599456787, | |
| "learning_rate": 1.0065833402354302e-05, | |
| "loss": 1.2944, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.5137689259221624, | |
| "grad_norm": 2.610588788986206, | |
| "learning_rate": 1.003950022402361e-05, | |
| "loss": 1.3129, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.5145818514378621, | |
| "grad_norm": 2.949564218521118, | |
| "learning_rate": 1.0013166771776441e-05, | |
| "loss": 1.2961, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.5153947769535616, | |
| "grad_norm": 2.5617198944091797, | |
| "learning_rate": 9.986833228223562e-06, | |
| "loss": 1.2898, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.5162077024692613, | |
| "grad_norm": 2.779733896255493, | |
| "learning_rate": 9.96049977597639e-06, | |
| "loss": 1.2988, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.5170206279849608, | |
| "grad_norm": 2.8505136966705322, | |
| "learning_rate": 9.934166597645703e-06, | |
| "loss": 1.2652, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.5178335535006605, | |
| "grad_norm": 2.847262144088745, | |
| "learning_rate": 9.907833875840374e-06, | |
| "loss": 1.3076, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.5186464790163601, | |
| "grad_norm": 4.957255840301514, | |
| "learning_rate": 9.881501793166117e-06, | |
| "loss": 1.214, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.5194594045320597, | |
| "grad_norm": 2.7829556465148926, | |
| "learning_rate": 9.85517053222421e-06, | |
| "loss": 1.2379, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.5202723300477594, | |
| "grad_norm": 2.7060935497283936, | |
| "learning_rate": 9.82884027561024e-06, | |
| "loss": 1.3016, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.521085255563459, | |
| "grad_norm": 6.336554527282715, | |
| "learning_rate": 9.802511205912815e-06, | |
| "loss": 1.269, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.5218981810791586, | |
| "grad_norm": 3.0378448963165283, | |
| "learning_rate": 9.776183505712327e-06, | |
| "loss": 1.317, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.5227111065948582, | |
| "grad_norm": 5.806065082550049, | |
| "learning_rate": 9.749857357579667e-06, | |
| "loss": 1.3165, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.5235240321105579, | |
| "grad_norm": 2.7738869190216064, | |
| "learning_rate": 9.723532944074961e-06, | |
| "loss": 1.2835, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.5243369576262575, | |
| "grad_norm": 2.6603453159332275, | |
| "learning_rate": 9.6972104477463e-06, | |
| "loss": 1.2673, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.5251498831419571, | |
| "grad_norm": 2.9316189289093018, | |
| "learning_rate": 9.670890051128493e-06, | |
| "loss": 1.249, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.5259628086576568, | |
| "grad_norm": 2.8541407585144043, | |
| "learning_rate": 9.644571936741778e-06, | |
| "loss": 1.2835, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.5267757341733563, | |
| "grad_norm": 2.6935575008392334, | |
| "learning_rate": 9.618256287090576e-06, | |
| "loss": 1.2859, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.527588659689056, | |
| "grad_norm": 3.057039260864258, | |
| "learning_rate": 9.591943284662206e-06, | |
| "loss": 1.2538, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.5284015852047557, | |
| "grad_norm": 3.2430379390716553, | |
| "learning_rate": 9.56563311192564e-06, | |
| "loss": 1.294, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5292145107204552, | |
| "grad_norm": 2.378072500228882, | |
| "learning_rate": 9.53932595133022e-06, | |
| "loss": 1.2793, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.5300274362361549, | |
| "grad_norm": 3.2185440063476562, | |
| "learning_rate": 9.513021985304399e-06, | |
| "loss": 1.2868, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.5308403617518545, | |
| "grad_norm": 3.272632122039795, | |
| "learning_rate": 9.486721396254484e-06, | |
| "loss": 1.2128, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.5316532872675541, | |
| "grad_norm": 3.163884401321411, | |
| "learning_rate": 9.460424366563355e-06, | |
| "loss": 1.2962, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.5324662127832538, | |
| "grad_norm": 3.096857786178589, | |
| "learning_rate": 9.434131078589224e-06, | |
| "loss": 1.2575, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.5332791382989533, | |
| "grad_norm": 2.711069107055664, | |
| "learning_rate": 9.407841714664343e-06, | |
| "loss": 1.2969, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.534092063814653, | |
| "grad_norm": 4.4655866622924805, | |
| "learning_rate": 9.381556457093752e-06, | |
| "loss": 1.2229, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.5349049893303526, | |
| "grad_norm": 2.7365305423736572, | |
| "learning_rate": 9.355275488154025e-06, | |
| "loss": 1.285, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.5357179148460522, | |
| "grad_norm": 3.4264895915985107, | |
| "learning_rate": 9.32899899009199e-06, | |
| "loss": 1.3222, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.5365308403617518, | |
| "grad_norm": 2.9572296142578125, | |
| "learning_rate": 9.30272714512347e-06, | |
| "loss": 1.2771, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5373437658774515, | |
| "grad_norm": 3.124464988708496, | |
| "learning_rate": 9.276460135432019e-06, | |
| "loss": 1.2362, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.5381566913931511, | |
| "grad_norm": 3.484861373901367, | |
| "learning_rate": 9.250198143167675e-06, | |
| "loss": 1.2624, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.5389696169088507, | |
| "grad_norm": 3.191455602645874, | |
| "learning_rate": 9.223941350445666e-06, | |
| "loss": 1.3271, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.5397825424245504, | |
| "grad_norm": 3.055478572845459, | |
| "learning_rate": 9.19768993934517e-06, | |
| "loss": 1.2476, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.5405954679402499, | |
| "grad_norm": 2.8661985397338867, | |
| "learning_rate": 9.171444091908046e-06, | |
| "loss": 1.2575, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.5414083934559496, | |
| "grad_norm": 3.042300224304199, | |
| "learning_rate": 9.145203990137571e-06, | |
| "loss": 1.2472, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.5422213189716493, | |
| "grad_norm": 3.324767827987671, | |
| "learning_rate": 9.118969815997174e-06, | |
| "loss": 1.2608, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.5430342444873488, | |
| "grad_norm": 2.8374948501586914, | |
| "learning_rate": 9.092741751409186e-06, | |
| "loss": 1.2865, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.5438471700030485, | |
| "grad_norm": 3.3593552112579346, | |
| "learning_rate": 9.06651997825357e-06, | |
| "loss": 1.2746, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.544660095518748, | |
| "grad_norm": 3.2432382106781006, | |
| "learning_rate": 9.040304678366658e-06, | |
| "loss": 1.2864, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5454730210344477, | |
| "grad_norm": 2.890409469604492, | |
| "learning_rate": 9.014096033539889e-06, | |
| "loss": 1.2685, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.5462859465501474, | |
| "grad_norm": 3.0769150257110596, | |
| "learning_rate": 8.987894225518556e-06, | |
| "loss": 1.2701, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.547098872065847, | |
| "grad_norm": 3.453287363052368, | |
| "learning_rate": 8.961699436000548e-06, | |
| "loss": 1.2218, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.5479117975815466, | |
| "grad_norm": 3.1950011253356934, | |
| "learning_rate": 8.93551184663507e-06, | |
| "loss": 1.2267, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.5487247230972462, | |
| "grad_norm": 3.445006847381592, | |
| "learning_rate": 8.909331639021414e-06, | |
| "loss": 1.283, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.5495376486129459, | |
| "grad_norm": 2.5453741550445557, | |
| "learning_rate": 8.883158994707666e-06, | |
| "loss": 1.3102, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.5503505741286454, | |
| "grad_norm": 4.167499542236328, | |
| "learning_rate": 8.856994095189477e-06, | |
| "loss": 1.2881, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.5511634996443451, | |
| "grad_norm": 2.6888363361358643, | |
| "learning_rate": 8.830837121908783e-06, | |
| "loss": 1.2332, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.5519764251600447, | |
| "grad_norm": 2.9484667778015137, | |
| "learning_rate": 8.804688256252557e-06, | |
| "loss": 1.2676, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.5527893506757443, | |
| "grad_norm": 2.5477519035339355, | |
| "learning_rate": 8.778547679551555e-06, | |
| "loss": 1.2956, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.553602276191444, | |
| "grad_norm": 2.3307385444641113, | |
| "learning_rate": 8.75241557307904e-06, | |
| "loss": 1.3021, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.5544152017071435, | |
| "grad_norm": 3.1104202270507812, | |
| "learning_rate": 8.726292118049555e-06, | |
| "loss": 1.2861, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.5552281272228432, | |
| "grad_norm": 3.2731287479400635, | |
| "learning_rate": 8.700177495617635e-06, | |
| "loss": 1.33, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.5560410527385429, | |
| "grad_norm": 2.923478364944458, | |
| "learning_rate": 8.674071886876572e-06, | |
| "loss": 1.2946, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.5568539782542424, | |
| "grad_norm": 3.1030538082122803, | |
| "learning_rate": 8.647975472857148e-06, | |
| "loss": 1.2481, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.5576669037699421, | |
| "grad_norm": 2.6904759407043457, | |
| "learning_rate": 8.621888434526382e-06, | |
| "loss": 1.2637, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.5584798292856417, | |
| "grad_norm": 3.6781442165374756, | |
| "learning_rate": 8.595810952786289e-06, | |
| "loss": 1.2875, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.5592927548013413, | |
| "grad_norm": 4.897818565368652, | |
| "learning_rate": 8.569743208472594e-06, | |
| "loss": 1.2804, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.560105680317041, | |
| "grad_norm": 2.9090828895568848, | |
| "learning_rate": 8.543685382353518e-06, | |
| "loss": 1.2817, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.5609186058327406, | |
| "grad_norm": 3.3284378051757812, | |
| "learning_rate": 8.51763765512849e-06, | |
| "loss": 1.2928, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5617315313484402, | |
| "grad_norm": 3.440209150314331, | |
| "learning_rate": 8.491600207426907e-06, | |
| "loss": 1.2667, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.5625444568641398, | |
| "grad_norm": 3.1297762393951416, | |
| "learning_rate": 8.465573219806893e-06, | |
| "loss": 1.2752, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.5633573823798395, | |
| "grad_norm": 3.460277795791626, | |
| "learning_rate": 8.439556872754025e-06, | |
| "loss": 1.2611, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.5641703078955391, | |
| "grad_norm": 2.6390557289123535, | |
| "learning_rate": 8.413551346680095e-06, | |
| "loss": 1.2339, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.5649832334112387, | |
| "grad_norm": 2.365945339202881, | |
| "learning_rate": 8.38755682192186e-06, | |
| "loss": 1.2333, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.5657961589269384, | |
| "grad_norm": 3.140129804611206, | |
| "learning_rate": 8.36157347873979e-06, | |
| "loss": 1.2614, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.5666090844426379, | |
| "grad_norm": 4.027166366577148, | |
| "learning_rate": 8.335601497316809e-06, | |
| "loss": 1.263, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.5674220099583376, | |
| "grad_norm": 2.6872942447662354, | |
| "learning_rate": 8.309641057757052e-06, | |
| "loss": 1.2479, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.5682349354740371, | |
| "grad_norm": 2.575493574142456, | |
| "learning_rate": 8.283692340084623e-06, | |
| "loss": 1.2818, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.5690478609897368, | |
| "grad_norm": 2.6429176330566406, | |
| "learning_rate": 8.257755524242333e-06, | |
| "loss": 1.2921, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5698607865054365, | |
| "grad_norm": 4.695654392242432, | |
| "learning_rate": 8.231830790090461e-06, | |
| "loss": 1.2046, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.570673712021136, | |
| "grad_norm": 2.4642715454101562, | |
| "learning_rate": 8.205918317405508e-06, | |
| "loss": 1.3013, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.5714866375368357, | |
| "grad_norm": 2.567474842071533, | |
| "learning_rate": 8.18001828587895e-06, | |
| "loss": 1.3458, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.5722995630525353, | |
| "grad_norm": 2.934668779373169, | |
| "learning_rate": 8.154130875115978e-06, | |
| "loss": 1.2804, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.5731124885682349, | |
| "grad_norm": 2.669285297393799, | |
| "learning_rate": 8.12825626463427e-06, | |
| "loss": 1.2329, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.5739254140839346, | |
| "grad_norm": 2.7390220165252686, | |
| "learning_rate": 8.102394633862743e-06, | |
| "loss": 1.2177, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.5747383395996342, | |
| "grad_norm": 3.19964861869812, | |
| "learning_rate": 8.0765461621403e-06, | |
| "loss": 1.2625, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.5755512651153338, | |
| "grad_norm": 2.753469705581665, | |
| "learning_rate": 8.050711028714589e-06, | |
| "loss": 1.2357, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.5763641906310334, | |
| "grad_norm": 3.3288702964782715, | |
| "learning_rate": 8.02488941274078e-06, | |
| "loss": 1.217, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.5771771161467331, | |
| "grad_norm": 2.808100700378418, | |
| "learning_rate": 7.999081493280283e-06, | |
| "loss": 1.3156, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5779900416624327, | |
| "grad_norm": 2.8736870288848877, | |
| "learning_rate": 7.973287449299545e-06, | |
| "loss": 1.3122, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.5788029671781323, | |
| "grad_norm": 6.863023281097412, | |
| "learning_rate": 7.947507459668784e-06, | |
| "loss": 1.2218, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.579615892693832, | |
| "grad_norm": 4.454842567443848, | |
| "learning_rate": 7.921741703160758e-06, | |
| "loss": 1.1918, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.5804288182095315, | |
| "grad_norm": 2.4465959072113037, | |
| "learning_rate": 7.895990358449533e-06, | |
| "loss": 1.2705, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.5812417437252312, | |
| "grad_norm": 3.3625428676605225, | |
| "learning_rate": 7.87025360410922e-06, | |
| "loss": 1.2644, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.5820546692409307, | |
| "grad_norm": 2.846947431564331, | |
| "learning_rate": 7.844531618612772e-06, | |
| "loss": 1.2612, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.5828675947566304, | |
| "grad_norm": 3.332118034362793, | |
| "learning_rate": 7.81882458033071e-06, | |
| "loss": 1.2597, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.5836805202723301, | |
| "grad_norm": 2.646106719970703, | |
| "learning_rate": 7.79313266752991e-06, | |
| "loss": 1.2613, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.5844934457880296, | |
| "grad_norm": 2.8592135906219482, | |
| "learning_rate": 7.767456058372362e-06, | |
| "loss": 1.282, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.5853063713037293, | |
| "grad_norm": 2.748481035232544, | |
| "learning_rate": 7.741794930913922e-06, | |
| "loss": 1.2869, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5861192968194289, | |
| "grad_norm": 2.8134074211120605, | |
| "learning_rate": 7.7161494631031e-06, | |
| "loss": 1.3079, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.5869322223351285, | |
| "grad_norm": 3.059119939804077, | |
| "learning_rate": 7.690519832779799e-06, | |
| "loss": 1.2705, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.5877451478508282, | |
| "grad_norm": 2.6439130306243896, | |
| "learning_rate": 7.664906217674115e-06, | |
| "loss": 1.2413, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.5885580733665278, | |
| "grad_norm": 2.812056303024292, | |
| "learning_rate": 7.639308795405066e-06, | |
| "loss": 1.2543, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.5893709988822274, | |
| "grad_norm": 3.2603330612182617, | |
| "learning_rate": 7.613727743479395e-06, | |
| "loss": 1.2442, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.590183924397927, | |
| "grad_norm": 2.544433116912842, | |
| "learning_rate": 7.588163239290316e-06, | |
| "loss": 1.3034, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.5909968499136267, | |
| "grad_norm": 4.0246262550354, | |
| "learning_rate": 7.562615460116289e-06, | |
| "loss": 1.3188, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.5918097754293263, | |
| "grad_norm": 4.249239444732666, | |
| "learning_rate": 7.537084583119802e-06, | |
| "loss": 1.3091, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.5926227009450259, | |
| "grad_norm": 2.7686362266540527, | |
| "learning_rate": 7.511570785346129e-06, | |
| "loss": 1.2449, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.5934356264607256, | |
| "grad_norm": 2.8529245853424072, | |
| "learning_rate": 7.486074243722109e-06, | |
| "loss": 1.2392, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5942485519764251, | |
| "grad_norm": 3.073486328125, | |
| "learning_rate": 7.460595135054916e-06, | |
| "loss": 1.2848, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.5950614774921248, | |
| "grad_norm": 3.365366220474243, | |
| "learning_rate": 7.435133636030831e-06, | |
| "loss": 1.2912, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.5958744030078245, | |
| "grad_norm": 2.4938106536865234, | |
| "learning_rate": 7.4096899232140295e-06, | |
| "loss": 1.2965, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.596687328523524, | |
| "grad_norm": 2.9927473068237305, | |
| "learning_rate": 7.384264173045339e-06, | |
| "loss": 1.2748, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.5975002540392237, | |
| "grad_norm": 7.3427205085754395, | |
| "learning_rate": 7.358856561841021e-06, | |
| "loss": 1.2457, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.5983131795549232, | |
| "grad_norm": 3.274311065673828, | |
| "learning_rate": 7.333467265791563e-06, | |
| "loss": 1.2225, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.5991261050706229, | |
| "grad_norm": 4.503856658935547, | |
| "learning_rate": 7.308096460960441e-06, | |
| "loss": 1.2603, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.5999390305863225, | |
| "grad_norm": 3.6017913818359375, | |
| "learning_rate": 7.282744323282895e-06, | |
| "loss": 1.2278, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.6007519561020221, | |
| "grad_norm": 3.0930585861206055, | |
| "learning_rate": 7.2574110285647244e-06, | |
| "loss": 1.2649, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.6015648816177218, | |
| "grad_norm": 2.6793737411499023, | |
| "learning_rate": 7.232096752481061e-06, | |
| "loss": 1.215, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6023778071334214, | |
| "grad_norm": 3.0066819190979004, | |
| "learning_rate": 7.206801670575145e-06, | |
| "loss": 1.2953, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.603190732649121, | |
| "grad_norm": 3.2586004734039307, | |
| "learning_rate": 7.181525958257116e-06, | |
| "loss": 1.1988, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.6040036581648206, | |
| "grad_norm": 3.186267375946045, | |
| "learning_rate": 7.156269790802801e-06, | |
| "loss": 1.2425, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.6048165836805203, | |
| "grad_norm": 3.919509172439575, | |
| "learning_rate": 7.131033343352483e-06, | |
| "loss": 1.3432, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.6056295091962199, | |
| "grad_norm": 3.8313186168670654, | |
| "learning_rate": 7.105816790909699e-06, | |
| "loss": 1.2491, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.6064424347119195, | |
| "grad_norm": 2.7689011096954346, | |
| "learning_rate": 7.080620308340024e-06, | |
| "loss": 1.2673, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.6072553602276192, | |
| "grad_norm": 4.105691909790039, | |
| "learning_rate": 7.055444070369852e-06, | |
| "loss": 1.2688, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.6080682857433187, | |
| "grad_norm": 3.336580276489258, | |
| "learning_rate": 7.0302882515852025e-06, | |
| "loss": 1.2613, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.6088812112590184, | |
| "grad_norm": 3.7272021770477295, | |
| "learning_rate": 7.005153026430476e-06, | |
| "loss": 1.1882, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.6096941367747181, | |
| "grad_norm": 4.220558166503906, | |
| "learning_rate": 6.980038569207291e-06, | |
| "loss": 1.1853, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6105070622904176, | |
| "grad_norm": 2.8943638801574707, | |
| "learning_rate": 6.954945054073228e-06, | |
| "loss": 1.2408, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.6113199878061173, | |
| "grad_norm": 2.740449905395508, | |
| "learning_rate": 6.929872655040655e-06, | |
| "loss": 1.2233, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.6121329133218169, | |
| "grad_norm": 3.1293320655822754, | |
| "learning_rate": 6.904821545975507e-06, | |
| "loss": 1.2362, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.6129458388375165, | |
| "grad_norm": 2.9130334854125977, | |
| "learning_rate": 6.879791900596077e-06, | |
| "loss": 1.2525, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.6137587643532162, | |
| "grad_norm": 2.6800663471221924, | |
| "learning_rate": 6.854783892471823e-06, | |
| "loss": 1.2811, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.6145716898689157, | |
| "grad_norm": 2.7140908241271973, | |
| "learning_rate": 6.829797695022163e-06, | |
| "loss": 1.2693, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 2.687870740890503, | |
| "learning_rate": 6.804833481515256e-06, | |
| "loss": 1.2124, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.616197540900315, | |
| "grad_norm": 3.170487880706787, | |
| "learning_rate": 6.7798914250668154e-06, | |
| "loss": 1.2373, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.6170104664160146, | |
| "grad_norm": 2.6142961978912354, | |
| "learning_rate": 6.7549716986389146e-06, | |
| "loss": 1.2527, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.6178233919317142, | |
| "grad_norm": 3.4092085361480713, | |
| "learning_rate": 6.730074475038766e-06, | |
| "loss": 1.2401, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6186363174474139, | |
| "grad_norm": 3.256838083267212, | |
| "learning_rate": 6.7051999269175405e-06, | |
| "loss": 1.1863, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.6194492429631135, | |
| "grad_norm": 2.8312947750091553, | |
| "learning_rate": 6.680348226769162e-06, | |
| "loss": 1.241, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.6202621684788131, | |
| "grad_norm": 2.799750804901123, | |
| "learning_rate": 6.655519546929121e-06, | |
| "loss": 1.2601, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.6210750939945128, | |
| "grad_norm": 3.188913106918335, | |
| "learning_rate": 6.630714059573267e-06, | |
| "loss": 1.2719, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.6218880195102123, | |
| "grad_norm": 5.547321796417236, | |
| "learning_rate": 6.6059319367166165e-06, | |
| "loss": 1.2307, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.622700945025912, | |
| "grad_norm": 3.2380361557006836, | |
| "learning_rate": 6.581173350212169e-06, | |
| "loss": 1.2125, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.6235138705416117, | |
| "grad_norm": 2.61883282661438, | |
| "learning_rate": 6.55643847174971e-06, | |
| "loss": 1.2556, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.6243267960573112, | |
| "grad_norm": 3.0079920291900635, | |
| "learning_rate": 6.531727472854617e-06, | |
| "loss": 1.2761, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.6251397215730109, | |
| "grad_norm": 3.995910882949829, | |
| "learning_rate": 6.507040524886672e-06, | |
| "loss": 1.302, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.6259526470887105, | |
| "grad_norm": 2.7787578105926514, | |
| "learning_rate": 6.482377799038882e-06, | |
| "loss": 1.2249, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.6267655726044101, | |
| "grad_norm": 3.6458895206451416, | |
| "learning_rate": 6.45773946633628e-06, | |
| "loss": 1.2833, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.6275784981201098, | |
| "grad_norm": 2.9308435916900635, | |
| "learning_rate": 6.4331256976347434e-06, | |
| "loss": 1.309, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.6283914236358094, | |
| "grad_norm": 3.7917234897613525, | |
| "learning_rate": 6.408536663619803e-06, | |
| "loss": 1.2996, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.629204349151509, | |
| "grad_norm": 8.85531234741211, | |
| "learning_rate": 6.383972534805478e-06, | |
| "loss": 1.2499, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.6300172746672086, | |
| "grad_norm": 4.16661262512207, | |
| "learning_rate": 6.359433481533074e-06, | |
| "loss": 1.1928, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.6308302001829083, | |
| "grad_norm": 3.6679298877716064, | |
| "learning_rate": 6.3349196739700024e-06, | |
| "loss": 1.2917, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.6316431256986078, | |
| "grad_norm": 3.2031593322753906, | |
| "learning_rate": 6.310431282108622e-06, | |
| "loss": 1.2926, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.6324560512143075, | |
| "grad_norm": 2.7538363933563232, | |
| "learning_rate": 6.2859684757650365e-06, | |
| "loss": 1.2634, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.6332689767300071, | |
| "grad_norm": 3.4906575679779053, | |
| "learning_rate": 6.261531424577923e-06, | |
| "loss": 1.2711, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.6340819022457067, | |
| "grad_norm": 3.4287617206573486, | |
| "learning_rate": 6.2371202980073596e-06, | |
| "loss": 1.2412, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6348948277614064, | |
| "grad_norm": 3.5826241970062256, | |
| "learning_rate": 6.212735265333655e-06, | |
| "loss": 1.1782, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.6357077532771059, | |
| "grad_norm": 3.369983673095703, | |
| "learning_rate": 6.188376495656156e-06, | |
| "loss": 1.2628, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.6365206787928056, | |
| "grad_norm": 3.6163413524627686, | |
| "learning_rate": 6.164044157892102e-06, | |
| "loss": 1.3304, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.6373336043085053, | |
| "grad_norm": 2.6903252601623535, | |
| "learning_rate": 6.13973842077543e-06, | |
| "loss": 1.2458, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.6381465298242048, | |
| "grad_norm": 3.919074296951294, | |
| "learning_rate": 6.11545945285561e-06, | |
| "loss": 1.253, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.6389594553399045, | |
| "grad_norm": 2.9155240058898926, | |
| "learning_rate": 6.091207422496489e-06, | |
| "loss": 1.2661, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.6397723808556041, | |
| "grad_norm": 3.2426347732543945, | |
| "learning_rate": 6.066982497875109e-06, | |
| "loss": 1.2556, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.6405853063713037, | |
| "grad_norm": 3.078899383544922, | |
| "learning_rate": 6.042784846980542e-06, | |
| "loss": 1.2572, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.6413982318870034, | |
| "grad_norm": 3.3044381141662598, | |
| "learning_rate": 6.018614637612733e-06, | |
| "loss": 1.2301, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.642211157402703, | |
| "grad_norm": 2.8474955558776855, | |
| "learning_rate": 5.99447203738134e-06, | |
| "loss": 1.2042, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6430240829184026, | |
| "grad_norm": 2.9787845611572266, | |
| "learning_rate": 5.9703572137045495e-06, | |
| "loss": 1.2608, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.6438370084341022, | |
| "grad_norm": 3.380209445953369, | |
| "learning_rate": 5.946270333807937e-06, | |
| "loss": 1.2973, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.6446499339498019, | |
| "grad_norm": 2.81736421585083, | |
| "learning_rate": 5.922211564723302e-06, | |
| "loss": 1.2791, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.6454628594655015, | |
| "grad_norm": 2.9054102897644043, | |
| "learning_rate": 5.898181073287504e-06, | |
| "loss": 1.2692, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.6462757849812011, | |
| "grad_norm": 3.2480154037475586, | |
| "learning_rate": 5.87417902614131e-06, | |
| "loss": 1.311, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.6470887104969008, | |
| "grad_norm": 2.8822832107543945, | |
| "learning_rate": 5.850205589728239e-06, | |
| "loss": 1.2528, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.6479016360126003, | |
| "grad_norm": 2.8832008838653564, | |
| "learning_rate": 5.826260930293417e-06, | |
| "loss": 1.2631, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.6487145615283, | |
| "grad_norm": 3.547271490097046, | |
| "learning_rate": 5.802345213882396e-06, | |
| "loss": 1.2543, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.6495274870439995, | |
| "grad_norm": 9.93248176574707, | |
| "learning_rate": 5.778458606340037e-06, | |
| "loss": 1.3218, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.6503404125596992, | |
| "grad_norm": 4.664019584655762, | |
| "learning_rate": 5.754601273309333e-06, | |
| "loss": 1.2487, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6511533380753989, | |
| "grad_norm": 3.191390037536621, | |
| "learning_rate": 5.730773380230276e-06, | |
| "loss": 1.1966, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.6519662635910984, | |
| "grad_norm": 3.228309392929077, | |
| "learning_rate": 5.70697509233871e-06, | |
| "loss": 1.2556, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.6527791891067981, | |
| "grad_norm": 3.1456098556518555, | |
| "learning_rate": 5.683206574665165e-06, | |
| "loss": 1.2308, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.6535921146224977, | |
| "grad_norm": 2.800039052963257, | |
| "learning_rate": 5.6594679920337514e-06, | |
| "loss": 1.2599, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.6544050401381973, | |
| "grad_norm": 2.9048550128936768, | |
| "learning_rate": 5.635759509060969e-06, | |
| "loss": 1.2707, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.655217965653897, | |
| "grad_norm": 4.015383720397949, | |
| "learning_rate": 5.612081290154607e-06, | |
| "loss": 1.1853, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.6560308911695966, | |
| "grad_norm": 2.6166458129882812, | |
| "learning_rate": 5.58843349951258e-06, | |
| "loss": 1.2589, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.6568438166852962, | |
| "grad_norm": 4.735121726989746, | |
| "learning_rate": 5.564816301121792e-06, | |
| "loss": 1.2395, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.6576567422009958, | |
| "grad_norm": 3.5069589614868164, | |
| "learning_rate": 5.541229858757011e-06, | |
| "loss": 1.2888, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.6584696677166955, | |
| "grad_norm": 2.354539394378662, | |
| "learning_rate": 5.517674335979721e-06, | |
| "loss": 1.1898, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6592825932323951, | |
| "grad_norm": 3.2337725162506104, | |
| "learning_rate": 5.494149896136998e-06, | |
| "loss": 1.311, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.6600955187480947, | |
| "grad_norm": 2.6511757373809814, | |
| "learning_rate": 5.470656702360367e-06, | |
| "loss": 1.2788, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.6609084442637944, | |
| "grad_norm": 3.772780179977417, | |
| "learning_rate": 5.447194917564671e-06, | |
| "loss": 1.2211, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.6617213697794939, | |
| "grad_norm": 2.540316581726074, | |
| "learning_rate": 5.423764704446954e-06, | |
| "loss": 1.2647, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.6625342952951936, | |
| "grad_norm": 2.792747735977173, | |
| "learning_rate": 5.400366225485326e-06, | |
| "loss": 1.2184, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.6633472208108933, | |
| "grad_norm": 3.32261061668396, | |
| "learning_rate": 5.376999642937817e-06, | |
| "loss": 1.2727, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.6641601463265928, | |
| "grad_norm": 4.128072738647461, | |
| "learning_rate": 5.353665118841296e-06, | |
| "loss": 1.2718, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.6649730718422925, | |
| "grad_norm": 2.9913909435272217, | |
| "learning_rate": 5.330362815010306e-06, | |
| "loss": 1.2698, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.665785997357992, | |
| "grad_norm": 2.9993457794189453, | |
| "learning_rate": 5.307092893035951e-06, | |
| "loss": 1.2447, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.6665989228736917, | |
| "grad_norm": 2.801236629486084, | |
| "learning_rate": 5.2838555142847925e-06, | |
| "loss": 1.209, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.6674118483893913, | |
| "grad_norm": 3.982821464538574, | |
| "learning_rate": 5.260650839897719e-06, | |
| "loss": 1.3099, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.6682247739050909, | |
| "grad_norm": 2.9553382396698, | |
| "learning_rate": 5.237479030788817e-06, | |
| "loss": 1.2652, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.6690376994207906, | |
| "grad_norm": 3.233414888381958, | |
| "learning_rate": 5.214340247644278e-06, | |
| "loss": 1.2256, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.6698506249364902, | |
| "grad_norm": 3.1418299674987793, | |
| "learning_rate": 5.191234650921273e-06, | |
| "loss": 1.2225, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.6706635504521898, | |
| "grad_norm": 2.8071773052215576, | |
| "learning_rate": 5.168162400846835e-06, | |
| "loss": 1.3381, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.6714764759678894, | |
| "grad_norm": 3.2606897354125977, | |
| "learning_rate": 5.145123657416759e-06, | |
| "loss": 1.2671, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.6722894014835891, | |
| "grad_norm": 2.5103461742401123, | |
| "learning_rate": 5.122118580394473e-06, | |
| "loss": 1.2349, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.6731023269992887, | |
| "grad_norm": 2.882448196411133, | |
| "learning_rate": 5.099147329309959e-06, | |
| "loss": 1.2466, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.6739152525149883, | |
| "grad_norm": 3.0320730209350586, | |
| "learning_rate": 5.076210063458622e-06, | |
| "loss": 1.2157, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.674728178030688, | |
| "grad_norm": 3.285125970840454, | |
| "learning_rate": 5.0533069419002e-06, | |
| "loss": 1.3087, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.6755411035463875, | |
| "grad_norm": 3.9807510375976562, | |
| "learning_rate": 5.030438123457655e-06, | |
| "loss": 1.2153, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.6763540290620872, | |
| "grad_norm": 3.12975811958313, | |
| "learning_rate": 5.007603766716063e-06, | |
| "loss": 1.2064, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.6771669545777869, | |
| "grad_norm": 2.9132258892059326, | |
| "learning_rate": 4.984804030021533e-06, | |
| "loss": 1.2132, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.6779798800934864, | |
| "grad_norm": 2.872042417526245, | |
| "learning_rate": 4.962039071480102e-06, | |
| "loss": 1.2618, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.6787928056091861, | |
| "grad_norm": 3.7190613746643066, | |
| "learning_rate": 4.939309048956622e-06, | |
| "loss": 1.2482, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.6796057311248856, | |
| "grad_norm": 5.171625137329102, | |
| "learning_rate": 4.9166141200736885e-06, | |
| "loss": 1.2848, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.6804186566405853, | |
| "grad_norm": 3.5912961959838867, | |
| "learning_rate": 4.89395444221055e-06, | |
| "loss": 1.2525, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.6812315821562849, | |
| "grad_norm": 3.9113729000091553, | |
| "learning_rate": 4.871330172501979e-06, | |
| "loss": 1.2444, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.6820445076719845, | |
| "grad_norm": 5.135432720184326, | |
| "learning_rate": 4.848741467837228e-06, | |
| "loss": 1.2189, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.6828574331876842, | |
| "grad_norm": 3.0934841632843018, | |
| "learning_rate": 4.826188484858918e-06, | |
| "loss": 1.2357, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6836703587033838, | |
| "grad_norm": 3.951188325881958, | |
| "learning_rate": 4.803671379961945e-06, | |
| "loss": 1.2539, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.6844832842190834, | |
| "grad_norm": 6.205260753631592, | |
| "learning_rate": 4.781190309292421e-06, | |
| "loss": 1.2537, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.685296209734783, | |
| "grad_norm": 4.493546485900879, | |
| "learning_rate": 4.758745428746569e-06, | |
| "loss": 1.252, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.6861091352504827, | |
| "grad_norm": 4.0202436447143555, | |
| "learning_rate": 4.736336893969652e-06, | |
| "loss": 1.1887, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.6869220607661823, | |
| "grad_norm": 2.65285587310791, | |
| "learning_rate": 4.7139648603548925e-06, | |
| "loss": 1.2612, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.6877349862818819, | |
| "grad_norm": 3.629551410675049, | |
| "learning_rate": 4.691629483042387e-06, | |
| "loss": 1.2411, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.6885479117975816, | |
| "grad_norm": 3.20709228515625, | |
| "learning_rate": 4.669330916918043e-06, | |
| "loss": 1.1949, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.6893608373132811, | |
| "grad_norm": 3.19427752494812, | |
| "learning_rate": 4.647069316612502e-06, | |
| "loss": 1.2134, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.6901737628289808, | |
| "grad_norm": 3.6364243030548096, | |
| "learning_rate": 4.624844836500052e-06, | |
| "loss": 1.2915, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.6909866883446805, | |
| "grad_norm": 3.5689237117767334, | |
| "learning_rate": 4.60265763069758e-06, | |
| "loss": 1.2234, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.69179961386038, | |
| "grad_norm": 3.1175014972686768, | |
| "learning_rate": 4.580507853063487e-06, | |
| "loss": 1.1833, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.6926125393760797, | |
| "grad_norm": 2.945756196975708, | |
| "learning_rate": 4.5583956571966295e-06, | |
| "loss": 1.2231, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.6934254648917793, | |
| "grad_norm": 4.729986667633057, | |
| "learning_rate": 4.5363211964352524e-06, | |
| "loss": 1.2578, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.6942383904074789, | |
| "grad_norm": 2.7775003910064697, | |
| "learning_rate": 4.514284623855915e-06, | |
| "loss": 1.2678, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.6950513159231786, | |
| "grad_norm": 4.027686595916748, | |
| "learning_rate": 4.4922860922724466e-06, | |
| "loss": 1.1692, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.6958642414388782, | |
| "grad_norm": 3.3442118167877197, | |
| "learning_rate": 4.470325754234881e-06, | |
| "loss": 1.2515, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.6966771669545778, | |
| "grad_norm": 3.197281837463379, | |
| "learning_rate": 4.448403762028391e-06, | |
| "loss": 1.2789, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.6974900924702774, | |
| "grad_norm": 3.1467063426971436, | |
| "learning_rate": 4.426520267672244e-06, | |
| "loss": 1.2498, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.698303017985977, | |
| "grad_norm": 8.657835960388184, | |
| "learning_rate": 4.40467542291874e-06, | |
| "loss": 1.2149, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.6991159435016766, | |
| "grad_norm": 5.045658111572266, | |
| "learning_rate": 4.382869379252152e-06, | |
| "loss": 1.2143, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6999288690173763, | |
| "grad_norm": 3.543026924133301, | |
| "learning_rate": 4.361102287887698e-06, | |
| "loss": 1.2727, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.700741794533076, | |
| "grad_norm": 3.2592012882232666, | |
| "learning_rate": 4.339374299770477e-06, | |
| "loss": 1.2528, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.7015547200487755, | |
| "grad_norm": 3.284749984741211, | |
| "learning_rate": 4.31768556557441e-06, | |
| "loss": 1.1814, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.7023676455644752, | |
| "grad_norm": 2.9172427654266357, | |
| "learning_rate": 4.296036235701235e-06, | |
| "loss": 1.2536, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.7031805710801747, | |
| "grad_norm": 8.07040023803711, | |
| "learning_rate": 4.274426460279412e-06, | |
| "loss": 1.2113, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.7039934965958744, | |
| "grad_norm": 3.0349769592285156, | |
| "learning_rate": 4.252856389163128e-06, | |
| "loss": 1.2279, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.7048064221115741, | |
| "grad_norm": 2.7983269691467285, | |
| "learning_rate": 4.231326171931231e-06, | |
| "loss": 1.2585, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.7056193476272736, | |
| "grad_norm": 3.153099775314331, | |
| "learning_rate": 4.209835957886196e-06, | |
| "loss": 1.2576, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.7064322731429733, | |
| "grad_norm": 3.4303712844848633, | |
| "learning_rate": 4.188385896053098e-06, | |
| "loss": 1.2569, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.7072451986586729, | |
| "grad_norm": 3.310842990875244, | |
| "learning_rate": 4.166976135178575e-06, | |
| "loss": 1.2162, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.7080581241743725, | |
| "grad_norm": 3.982365846633911, | |
| "learning_rate": 4.1456068237297964e-06, | |
| "loss": 1.2409, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.7088710496900722, | |
| "grad_norm": 3.0641191005706787, | |
| "learning_rate": 4.124278109893432e-06, | |
| "loss": 1.2563, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.7096839752057718, | |
| "grad_norm": 2.9682273864746094, | |
| "learning_rate": 4.10299014157462e-06, | |
| "loss": 1.1857, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.7104969007214714, | |
| "grad_norm": 6.076914310455322, | |
| "learning_rate": 4.0817430663959536e-06, | |
| "loss": 1.2108, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.711309826237171, | |
| "grad_norm": 8.528678894042969, | |
| "learning_rate": 4.06053703169645e-06, | |
| "loss": 1.2185, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.7121227517528707, | |
| "grad_norm": 3.4424145221710205, | |
| "learning_rate": 4.039372184530521e-06, | |
| "loss": 1.2461, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.7129356772685703, | |
| "grad_norm": 3.1624224185943604, | |
| "learning_rate": 4.0182486716669656e-06, | |
| "loss": 1.2282, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.7137486027842699, | |
| "grad_norm": 4.986435890197754, | |
| "learning_rate": 3.9971666395879605e-06, | |
| "loss": 1.2048, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.7145615282999696, | |
| "grad_norm": 3.537174701690674, | |
| "learning_rate": 3.9761262344880096e-06, | |
| "loss": 1.2752, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.7153744538156691, | |
| "grad_norm": 2.7389779090881348, | |
| "learning_rate": 3.9551276022729644e-06, | |
| "loss": 1.2434, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.7161873793313688, | |
| "grad_norm": 3.5238423347473145, | |
| "learning_rate": 3.9341708885590034e-06, | |
| "loss": 1.2409, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.7170003048470683, | |
| "grad_norm": 3.9080941677093506, | |
| "learning_rate": 3.913256238671607e-06, | |
| "loss": 1.2019, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.717813230362768, | |
| "grad_norm": 4.038003921508789, | |
| "learning_rate": 3.89238379764457e-06, | |
| "loss": 1.2212, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.7186261558784677, | |
| "grad_norm": 3.344622850418091, | |
| "learning_rate": 3.871553710218988e-06, | |
| "loss": 1.2067, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.7194390813941672, | |
| "grad_norm": 3.5090816020965576, | |
| "learning_rate": 3.850766120842252e-06, | |
| "loss": 1.2171, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.7202520069098669, | |
| "grad_norm": 3.003899335861206, | |
| "learning_rate": 3.830021173667048e-06, | |
| "loss": 1.2371, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.7210649324255665, | |
| "grad_norm": 3.3116228580474854, | |
| "learning_rate": 3.809319012550352e-06, | |
| "loss": 1.2123, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.7218778579412661, | |
| "grad_norm": 3.532245397567749, | |
| "learning_rate": 3.788659781052444e-06, | |
| "loss": 1.2629, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.7226907834569658, | |
| "grad_norm": 4.061065196990967, | |
| "learning_rate": 3.7680436224359084e-06, | |
| "loss": 1.174, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.7235037089726654, | |
| "grad_norm": 3.3992788791656494, | |
| "learning_rate": 3.747470679664624e-06, | |
| "loss": 1.2209, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.724316634488365, | |
| "grad_norm": 3.4010937213897705, | |
| "learning_rate": 3.7269410954028107e-06, | |
| "loss": 1.2426, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.7251295600040646, | |
| "grad_norm": 2.854327917098999, | |
| "learning_rate": 3.706455012013994e-06, | |
| "loss": 1.1932, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.7259424855197643, | |
| "grad_norm": 3.451002836227417, | |
| "learning_rate": 3.6860125715600513e-06, | |
| "loss": 1.253, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.7267554110354639, | |
| "grad_norm": 3.123344898223877, | |
| "learning_rate": 3.665613915800217e-06, | |
| "loss": 1.2187, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.7275683365511635, | |
| "grad_norm": 3.021973133087158, | |
| "learning_rate": 3.6452591861900886e-06, | |
| "loss": 1.2165, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.7283812620668632, | |
| "grad_norm": 3.234985589981079, | |
| "learning_rate": 3.6249485238806637e-06, | |
| "loss": 1.212, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.7291941875825627, | |
| "grad_norm": 3.7146785259246826, | |
| "learning_rate": 3.6046820697173514e-06, | |
| "loss": 1.2697, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.7300071130982624, | |
| "grad_norm": 3.134507417678833, | |
| "learning_rate": 3.5844599642389965e-06, | |
| "loss": 1.2433, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.7308200386139619, | |
| "grad_norm": 2.9155194759368896, | |
| "learning_rate": 3.564282347676903e-06, | |
| "loss": 1.2403, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.7316329641296616, | |
| "grad_norm": 3.148232936859131, | |
| "learning_rate": 3.54414935995387e-06, | |
| "loss": 1.2575, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7324458896453613, | |
| "grad_norm": 2.685274124145508, | |
| "learning_rate": 3.524061140683206e-06, | |
| "loss": 1.2124, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.7332588151610608, | |
| "grad_norm": 3.4557571411132812, | |
| "learning_rate": 3.5040178291677816e-06, | |
| "loss": 1.2105, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.7340717406767605, | |
| "grad_norm": 2.8230202198028564, | |
| "learning_rate": 3.4840195643990383e-06, | |
| "loss": 1.1745, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.7348846661924601, | |
| "grad_norm": 3.311697483062744, | |
| "learning_rate": 3.464066485056048e-06, | |
| "loss": 1.222, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.7356975917081597, | |
| "grad_norm": 3.2953929901123047, | |
| "learning_rate": 3.444158729504549e-06, | |
| "loss": 1.2688, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.7365105172238594, | |
| "grad_norm": 3.3319778442382812, | |
| "learning_rate": 3.4242964357959597e-06, | |
| "loss": 1.2539, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.737323442739559, | |
| "grad_norm": 3.124361753463745, | |
| "learning_rate": 3.4044797416664564e-06, | |
| "loss": 1.2527, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.7381363682552586, | |
| "grad_norm": 2.9690327644348145, | |
| "learning_rate": 3.3847087845359996e-06, | |
| "loss": 1.2722, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.7389492937709582, | |
| "grad_norm": 5.119561672210693, | |
| "learning_rate": 3.364983701507376e-06, | |
| "loss": 1.2233, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.7397622192866579, | |
| "grad_norm": 2.818423271179199, | |
| "learning_rate": 3.3453046293652657e-06, | |
| "loss": 1.2438, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7405751448023575, | |
| "grad_norm": 3.0988523960113525, | |
| "learning_rate": 3.3256717045752794e-06, | |
| "loss": 1.223, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.7413880703180571, | |
| "grad_norm": 3.082066297531128, | |
| "learning_rate": 3.3060850632830167e-06, | |
| "loss": 1.244, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.7422009958337568, | |
| "grad_norm": 2.944265127182007, | |
| "learning_rate": 3.286544841313126e-06, | |
| "loss": 1.2308, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.7430139213494563, | |
| "grad_norm": 3.608762502670288, | |
| "learning_rate": 3.2670511741683475e-06, | |
| "loss": 1.2018, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.743826846865156, | |
| "grad_norm": 3.958385705947876, | |
| "learning_rate": 3.2476041970285945e-06, | |
| "loss": 1.2136, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.7446397723808557, | |
| "grad_norm": 2.9133267402648926, | |
| "learning_rate": 3.2282040447500063e-06, | |
| "loss": 1.2649, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.7454526978965552, | |
| "grad_norm": 3.8698244094848633, | |
| "learning_rate": 3.208850851863998e-06, | |
| "loss": 1.2265, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.7462656234122549, | |
| "grad_norm": 4.550247669219971, | |
| "learning_rate": 3.189544752576369e-06, | |
| "loss": 1.2046, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.7470785489279544, | |
| "grad_norm": 2.9886014461517334, | |
| "learning_rate": 3.1702858807663175e-06, | |
| "loss": 1.2812, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.7478914744436541, | |
| "grad_norm": 3.3736209869384766, | |
| "learning_rate": 3.151074369985556e-06, | |
| "loss": 1.2482, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7487043999593537, | |
| "grad_norm": 2.7061290740966797, | |
| "learning_rate": 3.131910353457369e-06, | |
| "loss": 1.2474, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.7495173254750533, | |
| "grad_norm": 4.058886528015137, | |
| "learning_rate": 3.112793964075681e-06, | |
| "loss": 1.1897, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.750330250990753, | |
| "grad_norm": 3.3311798572540283, | |
| "learning_rate": 3.0937253344041507e-06, | |
| "loss": 1.2129, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.7511431765064526, | |
| "grad_norm": 3.2716569900512695, | |
| "learning_rate": 3.074704596675242e-06, | |
| "loss": 1.1763, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.7519561020221522, | |
| "grad_norm": 3.360356569290161, | |
| "learning_rate": 3.055731882789311e-06, | |
| "loss": 1.2771, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.7527690275378518, | |
| "grad_norm": 3.9494638442993164, | |
| "learning_rate": 3.0368073243136874e-06, | |
| "loss": 1.2551, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.7535819530535515, | |
| "grad_norm": 3.3180434703826904, | |
| "learning_rate": 3.0179310524817707e-06, | |
| "loss": 1.245, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.7543948785692511, | |
| "grad_norm": 4.963752746582031, | |
| "learning_rate": 2.9991031981921026e-06, | |
| "loss": 1.2266, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.7552078040849507, | |
| "grad_norm": 3.1220555305480957, | |
| "learning_rate": 2.9803238920074784e-06, | |
| "loss": 1.2057, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.7560207296006504, | |
| "grad_norm": 2.8764801025390625, | |
| "learning_rate": 2.961593264154038e-06, | |
| "loss": 1.2157, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7568336551163499, | |
| "grad_norm": 2.682791233062744, | |
| "learning_rate": 2.9429114445203423e-06, | |
| "loss": 1.1899, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.7576465806320496, | |
| "grad_norm": 5.8080878257751465, | |
| "learning_rate": 2.924278562656514e-06, | |
| "loss": 1.1661, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.7584595061477493, | |
| "grad_norm": 3.5146303176879883, | |
| "learning_rate": 2.90569474777329e-06, | |
| "loss": 1.2712, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.7592724316634488, | |
| "grad_norm": 3.092174530029297, | |
| "learning_rate": 2.8871601287411634e-06, | |
| "loss": 1.2297, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.7600853571791485, | |
| "grad_norm": 2.807847499847412, | |
| "learning_rate": 2.8686748340894744e-06, | |
| "loss": 1.2369, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.760898282694848, | |
| "grad_norm": 2.8753178119659424, | |
| "learning_rate": 2.850238992005514e-06, | |
| "loss": 1.2812, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.7617112082105477, | |
| "grad_norm": 4.227181434631348, | |
| "learning_rate": 2.8318527303336465e-06, | |
| "loss": 1.2143, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.7625241337262473, | |
| "grad_norm": 3.921201229095459, | |
| "learning_rate": 2.81351617657442e-06, | |
| "loss": 1.2446, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.763337059241947, | |
| "grad_norm": 3.164557695388794, | |
| "learning_rate": 2.795229457883678e-06, | |
| "loss": 1.2085, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.7641499847576466, | |
| "grad_norm": 3.0904717445373535, | |
| "learning_rate": 2.7769927010716814e-06, | |
| "loss": 1.2436, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7649629102733462, | |
| "grad_norm": 9.615850448608398, | |
| "learning_rate": 2.7588060326022205e-06, | |
| "loss": 1.2179, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.7657758357890458, | |
| "grad_norm": 7.9210357666015625, | |
| "learning_rate": 2.740669578591755e-06, | |
| "loss": 1.1704, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.7665887613047454, | |
| "grad_norm": 3.03359055519104, | |
| "learning_rate": 2.7225834648085282e-06, | |
| "loss": 1.1919, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.7674016868204451, | |
| "grad_norm": 3.331894636154175, | |
| "learning_rate": 2.7045478166716843e-06, | |
| "loss": 1.2297, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.7682146123361447, | |
| "grad_norm": 2.9995782375335693, | |
| "learning_rate": 2.6865627592504295e-06, | |
| "loss": 1.1936, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.7690275378518443, | |
| "grad_norm": 11.267196655273438, | |
| "learning_rate": 2.668628417263137e-06, | |
| "loss": 1.2385, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.769840463367544, | |
| "grad_norm": 4.058920383453369, | |
| "learning_rate": 2.6507449150764852e-06, | |
| "loss": 1.2078, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.7706533888832435, | |
| "grad_norm": 2.8774616718292236, | |
| "learning_rate": 2.632912376704607e-06, | |
| "loss": 1.2585, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.7714663143989432, | |
| "grad_norm": 3.4053540229797363, | |
| "learning_rate": 2.615130925808228e-06, | |
| "loss": 1.2739, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.7722792399146429, | |
| "grad_norm": 3.0022501945495605, | |
| "learning_rate": 2.597400685693795e-06, | |
| "loss": 1.2136, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7730921654303424, | |
| "grad_norm": 3.6466481685638428, | |
| "learning_rate": 2.5797217793126373e-06, | |
| "loss": 1.3104, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.7739050909460421, | |
| "grad_norm": 4.021648406982422, | |
| "learning_rate": 2.5620943292601074e-06, | |
| "loss": 1.2621, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.7747180164617417, | |
| "grad_norm": 2.996817111968994, | |
| "learning_rate": 2.5445184577747305e-06, | |
| "loss": 1.2194, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.7755309419774413, | |
| "grad_norm": 3.8881189823150635, | |
| "learning_rate": 2.52699428673736e-06, | |
| "loss": 1.2516, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.776343867493141, | |
| "grad_norm": 3.279557228088379, | |
| "learning_rate": 2.5095219376703183e-06, | |
| "loss": 1.2116, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.7771567930088406, | |
| "grad_norm": 3.1030569076538086, | |
| "learning_rate": 2.4921015317365794e-06, | |
| "loss": 1.2902, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.7779697185245402, | |
| "grad_norm": 3.7724967002868652, | |
| "learning_rate": 2.4747331897389103e-06, | |
| "loss": 1.2783, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.7787826440402398, | |
| "grad_norm": 2.808138132095337, | |
| "learning_rate": 2.4574170321190305e-06, | |
| "loss": 1.2191, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.7795955695559394, | |
| "grad_norm": 2.6033871173858643, | |
| "learning_rate": 2.440153178956798e-06, | |
| "loss": 1.2282, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.780408495071639, | |
| "grad_norm": 2.870957612991333, | |
| "learning_rate": 2.42294174996935e-06, | |
| "loss": 1.2118, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.7812214205873387, | |
| "grad_norm": 2.913543462753296, | |
| "learning_rate": 2.40578286451029e-06, | |
| "loss": 1.2352, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.7820343461030383, | |
| "grad_norm": 3.7069716453552246, | |
| "learning_rate": 2.38867664156886e-06, | |
| "loss": 1.2218, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.7828472716187379, | |
| "grad_norm": 4.073693752288818, | |
| "learning_rate": 2.3716231997691007e-06, | |
| "loss": 1.1997, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.7836601971344376, | |
| "grad_norm": 2.7815756797790527, | |
| "learning_rate": 2.3546226573690444e-06, | |
| "loss": 1.1898, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.7844731226501371, | |
| "grad_norm": 3.2033910751342773, | |
| "learning_rate": 2.3376751322599e-06, | |
| "loss": 1.2575, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.7852860481658368, | |
| "grad_norm": 2.805227518081665, | |
| "learning_rate": 2.320780741965206e-06, | |
| "loss": 1.221, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.7860989736815365, | |
| "grad_norm": 2.747638463973999, | |
| "learning_rate": 2.3039396036400463e-06, | |
| "loss": 1.2199, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.786911899197236, | |
| "grad_norm": 2.758178234100342, | |
| "learning_rate": 2.287151834070226e-06, | |
| "loss": 1.1847, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.7877248247129357, | |
| "grad_norm": 3.467595338821411, | |
| "learning_rate": 2.2704175496714552e-06, | |
| "loss": 1.2456, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.7885377502286353, | |
| "grad_norm": 5.487158298492432, | |
| "learning_rate": 2.2537368664885527e-06, | |
| "loss": 1.2061, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7893506757443349, | |
| "grad_norm": 3.063075542449951, | |
| "learning_rate": 2.2371099001946385e-06, | |
| "loss": 1.264, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.7901636012600346, | |
| "grad_norm": 2.6598317623138428, | |
| "learning_rate": 2.2205367660903267e-06, | |
| "loss": 1.1971, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.7909765267757342, | |
| "grad_norm": 3.249379873275757, | |
| "learning_rate": 2.2040175791029305e-06, | |
| "loss": 1.2442, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.7917894522914338, | |
| "grad_norm": 3.2312817573547363, | |
| "learning_rate": 2.187552453785662e-06, | |
| "loss": 1.1871, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.7926023778071334, | |
| "grad_norm": 3.060171604156494, | |
| "learning_rate": 2.1711415043168395e-06, | |
| "loss": 1.2198, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.793415303322833, | |
| "grad_norm": 3.2674033641815186, | |
| "learning_rate": 2.1547848444991025e-06, | |
| "loss": 1.2343, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.7942282288385327, | |
| "grad_norm": 3.822357654571533, | |
| "learning_rate": 2.138482587758605e-06, | |
| "loss": 1.1876, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.7950411543542323, | |
| "grad_norm": 3.4773342609405518, | |
| "learning_rate": 2.1222348471442477e-06, | |
| "loss": 1.1976, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.795854079869932, | |
| "grad_norm": 3.8379478454589844, | |
| "learning_rate": 2.1060417353268845e-06, | |
| "loss": 1.198, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.7966670053856315, | |
| "grad_norm": 4.963233470916748, | |
| "learning_rate": 2.0899033645985423e-06, | |
| "loss": 1.2991, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.7974799309013312, | |
| "grad_norm": 3.4560701847076416, | |
| "learning_rate": 2.073819846871646e-06, | |
| "loss": 1.1936, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.7982928564170307, | |
| "grad_norm": 2.69124698638916, | |
| "learning_rate": 2.0577912936782317e-06, | |
| "loss": 1.1708, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.7991057819327304, | |
| "grad_norm": 2.973618268966675, | |
| "learning_rate": 2.041817816169187e-06, | |
| "loss": 1.2535, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.7999187074484301, | |
| "grad_norm": 3.1709506511688232, | |
| "learning_rate": 2.025899525113474e-06, | |
| "loss": 1.2015, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.8007316329641296, | |
| "grad_norm": 2.750272274017334, | |
| "learning_rate": 2.010036530897359e-06, | |
| "loss": 1.2677, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.8015445584798293, | |
| "grad_norm": 2.7218148708343506, | |
| "learning_rate": 1.9942289435236506e-06, | |
| "loss": 1.2679, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.8023574839955289, | |
| "grad_norm": 3.0237209796905518, | |
| "learning_rate": 1.978476872610939e-06, | |
| "loss": 1.2425, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.8031704095112285, | |
| "grad_norm": 4.8593363761901855, | |
| "learning_rate": 1.962780427392823e-06, | |
| "loss": 1.2754, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.8039833350269282, | |
| "grad_norm": 4.2402544021606445, | |
| "learning_rate": 1.9471397167171714e-06, | |
| "loss": 1.1841, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.8047962605426278, | |
| "grad_norm": 2.8616418838500977, | |
| "learning_rate": 1.931554849045355e-06, | |
| "loss": 1.1712, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.8056091860583274, | |
| "grad_norm": 3.0303030014038086, | |
| "learning_rate": 1.916025932451493e-06, | |
| "loss": 1.2217, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.806422111574027, | |
| "grad_norm": 3.096165180206299, | |
| "learning_rate": 1.9005530746217238e-06, | |
| "loss": 1.1515, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.8072350370897267, | |
| "grad_norm": 5.142411231994629, | |
| "learning_rate": 1.8851363828534253e-06, | |
| "loss": 1.167, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.8080479626054263, | |
| "grad_norm": 3.1720876693725586, | |
| "learning_rate": 1.869775964054501e-06, | |
| "loss": 1.1896, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.8088608881211259, | |
| "grad_norm": 3.833009719848633, | |
| "learning_rate": 1.8544719247426224e-06, | |
| "loss": 1.2517, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.8096738136368256, | |
| "grad_norm": 3.188974618911743, | |
| "learning_rate": 1.8392243710444911e-06, | |
| "loss": 1.2795, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.8104867391525251, | |
| "grad_norm": 3.601663589477539, | |
| "learning_rate": 1.8240334086951117e-06, | |
| "loss": 1.2366, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.8112996646682248, | |
| "grad_norm": 3.1258544921875, | |
| "learning_rate": 1.8088991430370506e-06, | |
| "loss": 1.2002, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.8121125901839243, | |
| "grad_norm": 2.71299409866333, | |
| "learning_rate": 1.7938216790197071e-06, | |
| "loss": 1.2609, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.812925515699624, | |
| "grad_norm": 3.2866601943969727, | |
| "learning_rate": 1.77880112119859e-06, | |
| "loss": 1.2571, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8137384412153237, | |
| "grad_norm": 3.1053292751312256, | |
| "learning_rate": 1.7638375737345804e-06, | |
| "loss": 1.2316, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.8145513667310232, | |
| "grad_norm": 2.839862823486328, | |
| "learning_rate": 1.7489311403932274e-06, | |
| "loss": 1.2464, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.8153642922467229, | |
| "grad_norm": 2.750040292739868, | |
| "learning_rate": 1.7340819245440166e-06, | |
| "loss": 1.2639, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.8161772177624225, | |
| "grad_norm": 3.918286085128784, | |
| "learning_rate": 1.7192900291596493e-06, | |
| "loss": 1.2379, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.8169901432781221, | |
| "grad_norm": 3.579942226409912, | |
| "learning_rate": 1.7045555568153415e-06, | |
| "loss": 1.1943, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.8178030687938218, | |
| "grad_norm": 3.2873690128326416, | |
| "learning_rate": 1.6898786096881104e-06, | |
| "loss": 1.2457, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.8186159943095214, | |
| "grad_norm": 2.721126079559326, | |
| "learning_rate": 1.6752592895560493e-06, | |
| "loss": 1.2681, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.819428919825221, | |
| "grad_norm": 2.9273929595947266, | |
| "learning_rate": 1.6606976977976408e-06, | |
| "loss": 1.1985, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.8202418453409206, | |
| "grad_norm": 3.6816606521606445, | |
| "learning_rate": 1.6461939353910494e-06, | |
| "loss": 1.2128, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.8210547708566203, | |
| "grad_norm": 2.8991682529449463, | |
| "learning_rate": 1.631748102913412e-06, | |
| "loss": 1.224, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.8218676963723199, | |
| "grad_norm": 3.2517406940460205, | |
| "learning_rate": 1.6173603005401505e-06, | |
| "loss": 1.1936, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.8226806218880195, | |
| "grad_norm": 3.0502426624298096, | |
| "learning_rate": 1.6030306280442764e-06, | |
| "loss": 1.2555, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.8234935474037192, | |
| "grad_norm": 3.2694664001464844, | |
| "learning_rate": 1.588759184795694e-06, | |
| "loss": 1.2643, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.8243064729194187, | |
| "grad_norm": 2.9429259300231934, | |
| "learning_rate": 1.574546069760514e-06, | |
| "loss": 1.2221, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.8251193984351184, | |
| "grad_norm": 3.2481369972229004, | |
| "learning_rate": 1.5603913815003634e-06, | |
| "loss": 1.1949, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.8259323239508181, | |
| "grad_norm": 3.006603717803955, | |
| "learning_rate": 1.5462952181717117e-06, | |
| "loss": 1.1593, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.8267452494665176, | |
| "grad_norm": 2.8126094341278076, | |
| "learning_rate": 1.532257677525183e-06, | |
| "loss": 1.2094, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.8275581749822173, | |
| "grad_norm": 3.258910894393921, | |
| "learning_rate": 1.5182788569048689e-06, | |
| "loss": 1.1524, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.8283711004979168, | |
| "grad_norm": 3.097121477127075, | |
| "learning_rate": 1.5043588532476827e-06, | |
| "loss": 1.2063, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.8291840260136165, | |
| "grad_norm": 3.5429606437683105, | |
| "learning_rate": 1.49049776308265e-06, | |
| "loss": 1.1579, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8299969515293161, | |
| "grad_norm": 3.0676991939544678, | |
| "learning_rate": 1.476695682530268e-06, | |
| "loss": 1.2063, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.8308098770450157, | |
| "grad_norm": 3.191493272781372, | |
| "learning_rate": 1.4629527073018267e-06, | |
| "loss": 1.2724, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.8316228025607154, | |
| "grad_norm": 4.181521415710449, | |
| "learning_rate": 1.449268932698743e-06, | |
| "loss": 1.2627, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.832435728076415, | |
| "grad_norm": 3.7330870628356934, | |
| "learning_rate": 1.4356444536119085e-06, | |
| "loss": 1.1875, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.8332486535921146, | |
| "grad_norm": 3.5213124752044678, | |
| "learning_rate": 1.422079364521024e-06, | |
| "loss": 1.2345, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.8340615791078142, | |
| "grad_norm": 3.672848701477051, | |
| "learning_rate": 1.4085737594939497e-06, | |
| "loss": 1.2451, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.8348745046235139, | |
| "grad_norm": 3.2613043785095215, | |
| "learning_rate": 1.3951277321860468e-06, | |
| "loss": 1.261, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.8356874301392135, | |
| "grad_norm": 3.1444427967071533, | |
| "learning_rate": 1.381741375839537e-06, | |
| "loss": 1.2205, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.8365003556549131, | |
| "grad_norm": 3.7306652069091797, | |
| "learning_rate": 1.3684147832828409e-06, | |
| "loss": 1.2343, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.8373132811706128, | |
| "grad_norm": 3.6698615550994873, | |
| "learning_rate": 1.355148046929956e-06, | |
| "loss": 1.2195, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.8381262066863123, | |
| "grad_norm": 4.807132244110107, | |
| "learning_rate": 1.3419412587797908e-06, | |
| "loss": 1.1946, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.838939132202012, | |
| "grad_norm": 3.0877437591552734, | |
| "learning_rate": 1.3287945104155487e-06, | |
| "loss": 1.1901, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.8397520577177117, | |
| "grad_norm": 6.123032093048096, | |
| "learning_rate": 1.3157078930040856e-06, | |
| "loss": 1.2338, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.8405649832334112, | |
| "grad_norm": 3.8207807540893555, | |
| "learning_rate": 1.3026814972952674e-06, | |
| "loss": 1.2064, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.8413779087491109, | |
| "grad_norm": 3.591054916381836, | |
| "learning_rate": 1.2897154136213542e-06, | |
| "loss": 1.248, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.8421908342648105, | |
| "grad_norm": 3.14103364944458, | |
| "learning_rate": 1.2768097318963701e-06, | |
| "loss": 1.2247, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.8430037597805101, | |
| "grad_norm": 3.2605819702148438, | |
| "learning_rate": 1.2639645416154744e-06, | |
| "loss": 1.2265, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.8438166852962098, | |
| "grad_norm": 3.2860848903656006, | |
| "learning_rate": 1.2511799318543493e-06, | |
| "loss": 1.2083, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.8446296108119093, | |
| "grad_norm": 3.6271586418151855, | |
| "learning_rate": 1.2384559912685768e-06, | |
| "loss": 1.2562, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.845442536327609, | |
| "grad_norm": 3.0439271926879883, | |
| "learning_rate": 1.2257928080930236e-06, | |
| "loss": 1.1838, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8462554618433086, | |
| "grad_norm": 2.9285664558410645, | |
| "learning_rate": 1.2131904701412345e-06, | |
| "loss": 1.2271, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.8470683873590082, | |
| "grad_norm": 4.422233581542969, | |
| "learning_rate": 1.2006490648048118e-06, | |
| "loss": 1.2218, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.8478813128747078, | |
| "grad_norm": 3.193469524383545, | |
| "learning_rate": 1.1881686790528279e-06, | |
| "loss": 1.2167, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.8486942383904075, | |
| "grad_norm": 2.9041225910186768, | |
| "learning_rate": 1.1757493994312052e-06, | |
| "loss": 1.1652, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.8495071639061071, | |
| "grad_norm": 2.902376890182495, | |
| "learning_rate": 1.1633913120621188e-06, | |
| "loss": 1.209, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.8503200894218067, | |
| "grad_norm": 2.7561545372009277, | |
| "learning_rate": 1.151094502643414e-06, | |
| "loss": 1.2105, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.8511330149375064, | |
| "grad_norm": 3.4532971382141113, | |
| "learning_rate": 1.1388590564479895e-06, | |
| "loss": 1.2457, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.8519459404532059, | |
| "grad_norm": 4.540160179138184, | |
| "learning_rate": 1.1266850583232224e-06, | |
| "loss": 1.1941, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.8527588659689056, | |
| "grad_norm": 2.99617075920105, | |
| "learning_rate": 1.1145725926903772e-06, | |
| "loss": 1.2138, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.8535717914846053, | |
| "grad_norm": 3.2309064865112305, | |
| "learning_rate": 1.1025217435440116e-06, | |
| "loss": 1.2373, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8543847170003048, | |
| "grad_norm": 2.7454960346221924, | |
| "learning_rate": 1.0905325944514034e-06, | |
| "loss": 1.2473, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.8551976425160045, | |
| "grad_norm": 8.090238571166992, | |
| "learning_rate": 1.078605228551971e-06, | |
| "loss": 1.2342, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.8560105680317041, | |
| "grad_norm": 3.7213146686553955, | |
| "learning_rate": 1.0667397285566893e-06, | |
| "loss": 1.2232, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.8568234935474037, | |
| "grad_norm": 3.4427578449249268, | |
| "learning_rate": 1.0549361767475241e-06, | |
| "loss": 1.2474, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.8576364190631034, | |
| "grad_norm": 3.212726593017578, | |
| "learning_rate": 1.0431946549768567e-06, | |
| "loss": 1.2727, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.858449344578803, | |
| "grad_norm": 3.895224094390869, | |
| "learning_rate": 1.0315152446669142e-06, | |
| "loss": 1.2451, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.8592622700945026, | |
| "grad_norm": 2.8261964321136475, | |
| "learning_rate": 1.019898026809214e-06, | |
| "loss": 1.2416, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.8600751956102022, | |
| "grad_norm": 3.2146759033203125, | |
| "learning_rate": 1.0083430819639962e-06, | |
| "loss": 1.2258, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.8608881211259019, | |
| "grad_norm": 5.239031791687012, | |
| "learning_rate": 9.968504902596566e-07, | |
| "loss": 1.2089, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.8617010466416014, | |
| "grad_norm": 3.1487622261047363, | |
| "learning_rate": 9.85420331392214e-07, | |
| "loss": 1.2445, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8625139721573011, | |
| "grad_norm": 3.5642974376678467, | |
| "learning_rate": 9.74052684624731e-07, | |
| "loss": 1.2724, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.8633268976730007, | |
| "grad_norm": 3.3064541816711426, | |
| "learning_rate": 9.62747628786782e-07, | |
| "loss": 1.235, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.8641398231887003, | |
| "grad_norm": 2.7583703994750977, | |
| "learning_rate": 9.515052422739035e-07, | |
| "loss": 1.1864, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.8649527487044, | |
| "grad_norm": 2.8002755641937256, | |
| "learning_rate": 9.403256030470386e-07, | |
| "loss": 1.1888, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.8657656742200995, | |
| "grad_norm": 4.0211710929870605, | |
| "learning_rate": 9.292087886320166e-07, | |
| "loss": 1.2513, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.8665785997357992, | |
| "grad_norm": 3.937668561935425, | |
| "learning_rate": 9.181548761189996e-07, | |
| "loss": 1.2111, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.8673915252514989, | |
| "grad_norm": 31.291566848754883, | |
| "learning_rate": 9.071639421619527e-07, | |
| "loss": 1.2234, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.8682044507671984, | |
| "grad_norm": 4.150018692016602, | |
| "learning_rate": 8.962360629781164e-07, | |
| "loss": 1.2205, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.8690173762828981, | |
| "grad_norm": 2.8017213344573975, | |
| "learning_rate": 8.853713143474685e-07, | |
| "loss": 1.27, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.8698303017985977, | |
| "grad_norm": 2.9798476696014404, | |
| "learning_rate": 8.745697716122081e-07, | |
| "loss": 1.2169, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.8706432273142973, | |
| "grad_norm": 4.344991683959961, | |
| "learning_rate": 8.638315096762318e-07, | |
| "loss": 1.2217, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.871456152829997, | |
| "grad_norm": 2.9421257972717285, | |
| "learning_rate": 8.531566030046035e-07, | |
| "loss": 1.2399, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.8722690783456966, | |
| "grad_norm": 3.4676921367645264, | |
| "learning_rate": 8.425451256230588e-07, | |
| "loss": 1.1957, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.8730820038613962, | |
| "grad_norm": 3.2855141162872314, | |
| "learning_rate": 8.319971511174718e-07, | |
| "loss": 1.2399, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.8738949293770958, | |
| "grad_norm": 2.990471839904785, | |
| "learning_rate": 8.215127526333499e-07, | |
| "loss": 1.2787, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.8747078548927955, | |
| "grad_norm": 3.183928966522217, | |
| "learning_rate": 8.110920028753355e-07, | |
| "loss": 1.1831, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.8755207804084951, | |
| "grad_norm": 2.8277997970581055, | |
| "learning_rate": 8.007349741066939e-07, | |
| "loss": 1.248, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.8763337059241947, | |
| "grad_norm": 2.7392983436584473, | |
| "learning_rate": 7.904417381488083e-07, | |
| "loss": 1.23, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.8771466314398944, | |
| "grad_norm": 5.617170333862305, | |
| "learning_rate": 7.802123663806938e-07, | |
| "loss": 1.2267, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.8779595569555939, | |
| "grad_norm": 2.906653642654419, | |
| "learning_rate": 7.700469297384927e-07, | |
| "loss": 1.2245, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.8787724824712936, | |
| "grad_norm": 2.7728428840637207, | |
| "learning_rate": 7.599454987149868e-07, | |
| "loss": 1.2131, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.8795854079869931, | |
| "grad_norm": 2.683861017227173, | |
| "learning_rate": 7.499081433591071e-07, | |
| "loss": 1.1936, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.8803983335026928, | |
| "grad_norm": 2.6362993717193604, | |
| "learning_rate": 7.399349332754458e-07, | |
| "loss": 1.2169, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.8812112590183925, | |
| "grad_norm": 3.3068742752075195, | |
| "learning_rate": 7.300259376237795e-07, | |
| "loss": 1.2098, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.882024184534092, | |
| "grad_norm": 2.825416326522827, | |
| "learning_rate": 7.201812251185869e-07, | |
| "loss": 1.2543, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.8828371100497917, | |
| "grad_norm": 3.172919750213623, | |
| "learning_rate": 7.104008640285642e-07, | |
| "loss": 1.1768, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.8836500355654913, | |
| "grad_norm": 3.052677869796753, | |
| "learning_rate": 7.006849221761736e-07, | |
| "loss": 1.2068, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.8844629610811909, | |
| "grad_norm": 2.8510589599609375, | |
| "learning_rate": 6.910334669371433e-07, | |
| "loss": 1.2043, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.8852758865968906, | |
| "grad_norm": 3.4369497299194336, | |
| "learning_rate": 6.814465652400237e-07, | |
| "loss": 1.2467, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.8860888121125902, | |
| "grad_norm": 2.667567491531372, | |
| "learning_rate": 6.719242835657147e-07, | |
| "loss": 1.2594, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.8869017376282898, | |
| "grad_norm": 2.983642816543579, | |
| "learning_rate": 6.62466687947001e-07, | |
| "loss": 1.2199, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.8877146631439894, | |
| "grad_norm": 3.583439350128174, | |
| "learning_rate": 6.530738439681017e-07, | |
| "loss": 1.1827, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.8885275886596891, | |
| "grad_norm": 4.706247806549072, | |
| "learning_rate": 6.437458167642164e-07, | |
| "loss": 1.2292, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.8893405141753887, | |
| "grad_norm": 4.394626140594482, | |
| "learning_rate": 6.344826710210584e-07, | |
| "loss": 1.2975, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.8901534396910883, | |
| "grad_norm": 4.5692572593688965, | |
| "learning_rate": 6.252844709744255e-07, | |
| "loss": 1.1853, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.890966365206788, | |
| "grad_norm": 3.4114434719085693, | |
| "learning_rate": 6.161512804097436e-07, | |
| "loss": 1.2067, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.8917792907224875, | |
| "grad_norm": 7.298144340515137, | |
| "learning_rate": 6.070831626616236e-07, | |
| "loss": 1.2149, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.8925922162381872, | |
| "grad_norm": 2.7437572479248047, | |
| "learning_rate": 5.980801806134318e-07, | |
| "loss": 1.2002, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.8934051417538869, | |
| "grad_norm": 3.101397752761841, | |
| "learning_rate": 5.891423966968413e-07, | |
| "loss": 1.2594, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.8942180672695864, | |
| "grad_norm": 3.186479091644287, | |
| "learning_rate": 5.80269872891408e-07, | |
| "loss": 1.1895, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8950309927852861, | |
| "grad_norm": 3.5605878829956055, | |
| "learning_rate": 5.714626707241411e-07, | |
| "loss": 1.1804, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.8958439183009856, | |
| "grad_norm": 3.0213913917541504, | |
| "learning_rate": 5.627208512690641e-07, | |
| "loss": 1.2619, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.8966568438166853, | |
| "grad_norm": 3.0476791858673096, | |
| "learning_rate": 5.5404447514681e-07, | |
| "loss": 1.1429, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.8974697693323849, | |
| "grad_norm": 2.9802823066711426, | |
| "learning_rate": 5.45433602524188e-07, | |
| "loss": 1.2353, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.8982826948480845, | |
| "grad_norm": 3.168029308319092, | |
| "learning_rate": 5.368882931137675e-07, | |
| "loss": 1.1771, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.8990956203637842, | |
| "grad_norm": 2.8624963760375977, | |
| "learning_rate": 5.284086061734672e-07, | |
| "loss": 1.1929, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.8999085458794838, | |
| "grad_norm": 3.3826193809509277, | |
| "learning_rate": 5.199946005061462e-07, | |
| "loss": 1.1379, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.9007214713951834, | |
| "grad_norm": 3.2084782123565674, | |
| "learning_rate": 5.116463344591893e-07, | |
| "loss": 1.1694, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.901534396910883, | |
| "grad_norm": 3.6624932289123535, | |
| "learning_rate": 5.033638659241102e-07, | |
| "loss": 1.219, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.9023473224265827, | |
| "grad_norm": 3.2314536571502686, | |
| "learning_rate": 4.951472523361401e-07, | |
| "loss": 1.2457, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.9031602479422823, | |
| "grad_norm": 3.1179494857788086, | |
| "learning_rate": 4.869965506738416e-07, | |
| "loss": 1.232, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.9039731734579819, | |
| "grad_norm": 2.875725030899048, | |
| "learning_rate": 4.789118174587071e-07, | |
| "loss": 1.2515, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.9047860989736816, | |
| "grad_norm": 2.5742199420928955, | |
| "learning_rate": 4.7089310875475856e-07, | |
| "loss": 1.2554, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.9055990244893811, | |
| "grad_norm": 3.2250759601593018, | |
| "learning_rate": 4.6294048016817917e-07, | |
| "loss": 1.2281, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.9064119500050808, | |
| "grad_norm": 2.866562843322754, | |
| "learning_rate": 4.550539868469106e-07, | |
| "loss": 1.2559, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.9072248755207805, | |
| "grad_norm": 2.9703938961029053, | |
| "learning_rate": 4.4723368348027375e-07, | |
| "loss": 1.307, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.90803780103648, | |
| "grad_norm": 3.0078420639038086, | |
| "learning_rate": 4.394796242985933e-07, | |
| "loss": 1.2285, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.9088507265521797, | |
| "grad_norm": 3.0581750869750977, | |
| "learning_rate": 4.317918630728235e-07, | |
| "loss": 1.1751, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.9096636520678792, | |
| "grad_norm": 4.224788188934326, | |
| "learning_rate": 4.241704531141633e-07, | |
| "loss": 1.155, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.9104765775835789, | |
| "grad_norm": 3.2800920009613037, | |
| "learning_rate": 4.166154472737061e-07, | |
| "loss": 1.199, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.9112895030992785, | |
| "grad_norm": 5.579473495483398, | |
| "learning_rate": 4.091268979420537e-07, | |
| "loss": 1.1558, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.9121024286149781, | |
| "grad_norm": 3.660987615585327, | |
| "learning_rate": 4.0170485704896453e-07, | |
| "loss": 1.2258, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.9129153541306778, | |
| "grad_norm": 11.064430236816406, | |
| "learning_rate": 3.943493760629924e-07, | |
| "loss": 1.1699, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.9137282796463774, | |
| "grad_norm": 4.9747138023376465, | |
| "learning_rate": 3.8706050599112363e-07, | |
| "loss": 1.2415, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.914541205162077, | |
| "grad_norm": 3.7896888256073, | |
| "learning_rate": 3.798382973784298e-07, | |
| "loss": 1.2221, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.9153541306777766, | |
| "grad_norm": 3.383769989013672, | |
| "learning_rate": 3.7268280030771655e-07, | |
| "loss": 1.196, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.9161670561934763, | |
| "grad_norm": 3.491272211074829, | |
| "learning_rate": 3.655940643991718e-07, | |
| "loss": 1.1786, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.9169799817091759, | |
| "grad_norm": 3.1759097576141357, | |
| "learning_rate": 3.585721388100283e-07, | |
| "loss": 1.1696, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.9177929072248755, | |
| "grad_norm": 2.7568089962005615, | |
| "learning_rate": 3.516170722342127e-07, | |
| "loss": 1.1703, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.9186058327405752, | |
| "grad_norm": 2.992725372314453, | |
| "learning_rate": 3.4472891290201927e-07, | |
| "loss": 1.1739, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.9194187582562747, | |
| "grad_norm": 4.317306041717529, | |
| "learning_rate": 3.3790770857976995e-07, | |
| "loss": 1.184, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.9202316837719744, | |
| "grad_norm": 3.9048075675964355, | |
| "learning_rate": 3.3115350656948043e-07, | |
| "loss": 1.2651, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.9210446092876741, | |
| "grad_norm": 3.3990674018859863, | |
| "learning_rate": 3.2446635370853686e-07, | |
| "loss": 1.205, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.9218575348033736, | |
| "grad_norm": 4.0517754554748535, | |
| "learning_rate": 3.1784629636937404e-07, | |
| "loss": 1.1996, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.9226704603190733, | |
| "grad_norm": 3.340564489364624, | |
| "learning_rate": 3.1129338045914004e-07, | |
| "loss": 1.2215, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.9234833858347729, | |
| "grad_norm": 3.5760183334350586, | |
| "learning_rate": 3.0480765141939316e-07, | |
| "loss": 1.2191, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.9242963113504725, | |
| "grad_norm": 2.8496994972229004, | |
| "learning_rate": 2.9838915422578e-07, | |
| "loss": 1.2217, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.9251092368661722, | |
| "grad_norm": 3.025475025177002, | |
| "learning_rate": 2.920379333877221e-07, | |
| "loss": 1.2332, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.9259221623818717, | |
| "grad_norm": 4.238699436187744, | |
| "learning_rate": 2.8575403294811123e-07, | |
| "loss": 1.2223, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.9267350878975714, | |
| "grad_norm": 2.9650015830993652, | |
| "learning_rate": 2.795374964830022e-07, | |
| "loss": 1.2149, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.927548013413271, | |
| "grad_norm": 2.731064796447754, | |
| "learning_rate": 2.733883671013082e-07, | |
| "loss": 1.2116, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.9283609389289706, | |
| "grad_norm": 4.153676986694336, | |
| "learning_rate": 2.673066874445096e-07, | |
| "loss": 1.1189, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.9291738644446702, | |
| "grad_norm": 3.843541383743286, | |
| "learning_rate": 2.612924996863453e-07, | |
| "loss": 1.1933, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.9299867899603699, | |
| "grad_norm": 3.0720019340515137, | |
| "learning_rate": 2.5534584553253526e-07, | |
| "loss": 1.1859, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.9307997154760695, | |
| "grad_norm": 3.4368112087249756, | |
| "learning_rate": 2.494667662204797e-07, | |
| "loss": 1.22, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.9316126409917691, | |
| "grad_norm": 2.524754285812378, | |
| "learning_rate": 2.436553025189758e-07, | |
| "loss": 1.2561, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.9324255665074688, | |
| "grad_norm": 3.2625484466552734, | |
| "learning_rate": 2.3791149472794373e-07, | |
| "loss": 1.2026, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.9332384920231683, | |
| "grad_norm": 3.4842891693115234, | |
| "learning_rate": 2.3223538267813317e-07, | |
| "loss": 1.234, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.934051417538868, | |
| "grad_norm": 2.9896857738494873, | |
| "learning_rate": 2.2662700573085505e-07, | |
| "loss": 1.2008, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.9348643430545677, | |
| "grad_norm": 3.3465092182159424, | |
| "learning_rate": 2.2108640277771153e-07, | |
| "loss": 1.2392, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9356772685702672, | |
| "grad_norm": 2.6980130672454834, | |
| "learning_rate": 2.156136122403174e-07, | |
| "loss": 1.2083, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.9364901940859669, | |
| "grad_norm": 3.4942784309387207, | |
| "learning_rate": 2.1020867207004026e-07, | |
| "loss": 1.2232, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.9373031196016665, | |
| "grad_norm": 2.874210834503174, | |
| "learning_rate": 2.048716197477374e-07, | |
| "loss": 1.2447, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.9381160451173661, | |
| "grad_norm": 3.429757833480835, | |
| "learning_rate": 1.996024922834905e-07, | |
| "loss": 1.1562, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.9389289706330658, | |
| "grad_norm": 2.96549654006958, | |
| "learning_rate": 1.9440132621635687e-07, | |
| "loss": 1.2543, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.9397418961487654, | |
| "grad_norm": 3.1660540103912354, | |
| "learning_rate": 1.8926815761410867e-07, | |
| "loss": 1.1931, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.940554821664465, | |
| "grad_norm": 2.848574161529541, | |
| "learning_rate": 1.8420302207298623e-07, | |
| "loss": 1.1837, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.9413677471801646, | |
| "grad_norm": 4.005343437194824, | |
| "learning_rate": 1.792059547174507e-07, | |
| "loss": 1.2423, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.9421806726958643, | |
| "grad_norm": 2.7809975147247314, | |
| "learning_rate": 1.7427699019994415e-07, | |
| "loss": 1.1665, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.9429935982115638, | |
| "grad_norm": 4.211681365966797, | |
| "learning_rate": 1.6941616270063854e-07, | |
| "loss": 1.2526, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.9438065237272635, | |
| "grad_norm": 4.117452144622803, | |
| "learning_rate": 1.6462350592721498e-07, | |
| "loss": 1.1957, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.9446194492429631, | |
| "grad_norm": 2.9959964752197266, | |
| "learning_rate": 1.5989905311461274e-07, | |
| "loss": 1.2342, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.9454323747586627, | |
| "grad_norm": 3.091280460357666, | |
| "learning_rate": 1.5524283702481158e-07, | |
| "loss": 1.2168, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.9462453002743624, | |
| "grad_norm": 4.000481128692627, | |
| "learning_rate": 1.5065488994659983e-07, | |
| "loss": 1.2206, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.9470582257900619, | |
| "grad_norm": 3.2974343299865723, | |
| "learning_rate": 1.461352436953478e-07, | |
| "loss": 1.1955, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.9478711513057616, | |
| "grad_norm": 3.589606285095215, | |
| "learning_rate": 1.4168392961279254e-07, | |
| "loss": 1.1277, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.9486840768214613, | |
| "grad_norm": 3.071859121322632, | |
| "learning_rate": 1.3730097856681668e-07, | |
| "loss": 1.1837, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.9494970023371608, | |
| "grad_norm": 3.4584462642669678, | |
| "learning_rate": 1.329864209512377e-07, | |
| "loss": 1.249, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.9503099278528605, | |
| "grad_norm": 4.1693434715271, | |
| "learning_rate": 1.2874028668559247e-07, | |
| "loss": 1.2234, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.9511228533685601, | |
| "grad_norm": 3.1776278018951416, | |
| "learning_rate": 1.245626052149318e-07, | |
| "loss": 1.2047, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.9519357788842597, | |
| "grad_norm": 3.347137689590454, | |
| "learning_rate": 1.2045340550961958e-07, | |
| "loss": 1.2995, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.9527487043999594, | |
| "grad_norm": 3.2806451320648193, | |
| "learning_rate": 1.164127160651285e-07, | |
| "loss": 1.1546, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.953561629915659, | |
| "grad_norm": 4.498492240905762, | |
| "learning_rate": 1.1244056490184008e-07, | |
| "loss": 1.2469, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.9543745554313586, | |
| "grad_norm": 3.0195493698120117, | |
| "learning_rate": 1.0853697956485942e-07, | |
| "loss": 1.2373, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.9551874809470582, | |
| "grad_norm": 4.176177501678467, | |
| "learning_rate": 1.0470198712381086e-07, | |
| "loss": 1.2486, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.9560004064627579, | |
| "grad_norm": 3.222987413406372, | |
| "learning_rate": 1.009356141726614e-07, | |
| "loss": 1.1905, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.9568133319784575, | |
| "grad_norm": 2.6555376052856445, | |
| "learning_rate": 9.723788682953539e-08, | |
| "loss": 1.1666, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.9576262574941571, | |
| "grad_norm": 4.015134334564209, | |
| "learning_rate": 9.360883073652238e-08, | |
| "loss": 1.2675, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.9584391830098568, | |
| "grad_norm": 3.029994487762451, | |
| "learning_rate": 9.004847105951509e-08, | |
| "loss": 1.1977, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.9592521085255563, | |
| "grad_norm": 2.7363007068634033, | |
| "learning_rate": 8.655683248802282e-08, | |
| "loss": 1.2359, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.960065034041256, | |
| "grad_norm": 4.360199451446533, | |
| "learning_rate": 8.313393923500613e-08, | |
| "loss": 1.2099, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.9608779595569555, | |
| "grad_norm": 2.9082043170928955, | |
| "learning_rate": 7.977981503670795e-08, | |
| "loss": 1.2632, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.9616908850726552, | |
| "grad_norm": 3.0049242973327637, | |
| "learning_rate": 7.64944831524872e-08, | |
| "loss": 1.2128, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.9625038105883549, | |
| "grad_norm": 2.9180142879486084, | |
| "learning_rate": 7.327796636465767e-08, | |
| "loss": 1.2075, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.9633167361040544, | |
| "grad_norm": 2.8545587062835693, | |
| "learning_rate": 7.01302869783338e-08, | |
| "loss": 1.1809, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.9641296616197541, | |
| "grad_norm": 3.2359890937805176, | |
| "learning_rate": 6.705146682127184e-08, | |
| "loss": 1.2404, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.9649425871354537, | |
| "grad_norm": 7.442730903625488, | |
| "learning_rate": 6.404152724371892e-08, | |
| "loss": 1.2081, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.9657555126511533, | |
| "grad_norm": 2.9155330657958984, | |
| "learning_rate": 6.110048911826871e-08, | |
| "loss": 1.1837, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.966568438166853, | |
| "grad_norm": 5.689270496368408, | |
| "learning_rate": 5.82283728397115e-08, | |
| "loss": 1.2039, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.9673813636825526, | |
| "grad_norm": 2.791161060333252, | |
| "learning_rate": 5.542519832489546e-08, | |
| "loss": 1.2032, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.9681942891982522, | |
| "grad_norm": 3.127793312072754, | |
| "learning_rate": 5.269098501259007e-08, | |
| "loss": 1.2016, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.9690072147139518, | |
| "grad_norm": 2.8209614753723145, | |
| "learning_rate": 5.002575186334735e-08, | |
| "loss": 1.1624, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.9698201402296515, | |
| "grad_norm": 3.3611080646514893, | |
| "learning_rate": 4.742951735937418e-08, | |
| "loss": 1.2068, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.9706330657453511, | |
| "grad_norm": 5.118293285369873, | |
| "learning_rate": 4.490229950440239e-08, | |
| "loss": 1.2398, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.9714459912610507, | |
| "grad_norm": 9.395883560180664, | |
| "learning_rate": 4.2444115823562226e-08, | |
| "loss": 1.3143, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.9722589167767504, | |
| "grad_norm": 3.1017065048217773, | |
| "learning_rate": 4.005498336326463e-08, | |
| "loss": 1.1918, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.9730718422924499, | |
| "grad_norm": 3.226966142654419, | |
| "learning_rate": 3.773491869108137e-08, | |
| "loss": 1.2046, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.9738847678081496, | |
| "grad_norm": 3.233693838119507, | |
| "learning_rate": 3.548393789562732e-08, | |
| "loss": 1.2325, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.9746976933238493, | |
| "grad_norm": 3.159299612045288, | |
| "learning_rate": 3.3302056586453916e-08, | |
| "loss": 1.1693, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.9755106188395488, | |
| "grad_norm": 2.7059924602508545, | |
| "learning_rate": 3.118928989393699e-08, | |
| "loss": 1.2422, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9763235443552485, | |
| "grad_norm": 3.511061668395996, | |
| "learning_rate": 2.9145652469174666e-08, | |
| "loss": 1.2184, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.977136469870948, | |
| "grad_norm": 4.077070236206055, | |
| "learning_rate": 2.7171158483882963e-08, | |
| "loss": 1.2309, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.9779493953866477, | |
| "grad_norm": 3.434537887573242, | |
| "learning_rate": 2.5265821630298116e-08, | |
| "loss": 1.1943, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.9787623209023473, | |
| "grad_norm": 3.698641300201416, | |
| "learning_rate": 2.3429655121085525e-08, | |
| "loss": 1.2671, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.9795752464180469, | |
| "grad_norm": 6.674719333648682, | |
| "learning_rate": 2.1662671689242076e-08, | |
| "loss": 1.1961, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.9803881719337466, | |
| "grad_norm": 4.9146952629089355, | |
| "learning_rate": 1.996488358801174e-08, | |
| "loss": 1.2345, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.9812010974494462, | |
| "grad_norm": 2.7147114276885986, | |
| "learning_rate": 1.8336302590798992e-08, | |
| "loss": 1.2118, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.9820140229651458, | |
| "grad_norm": 2.809692859649658, | |
| "learning_rate": 1.677693999109109e-08, | |
| "loss": 1.2162, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.9828269484808454, | |
| "grad_norm": 3.857846975326538, | |
| "learning_rate": 1.5286806602372583e-08, | |
| "loss": 1.1792, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.9836398739965451, | |
| "grad_norm": 3.8911325931549072, | |
| "learning_rate": 1.3865912758054267e-08, | |
| "loss": 1.2332, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.9844527995122447, | |
| "grad_norm": 3.5572190284729004, | |
| "learning_rate": 1.2514268311405452e-08, | |
| "loss": 1.2174, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.9852657250279443, | |
| "grad_norm": 3.22208833694458, | |
| "learning_rate": 1.1231882635477364e-08, | |
| "loss": 1.2146, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.986078650543644, | |
| "grad_norm": 4.469923973083496, | |
| "learning_rate": 1.0018764623045407e-08, | |
| "loss": 1.2168, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.9868915760593435, | |
| "grad_norm": 3.1559510231018066, | |
| "learning_rate": 8.874922686541442e-09, | |
| "loss": 1.2074, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.9877045015750432, | |
| "grad_norm": 2.6890878677368164, | |
| "learning_rate": 7.800364758002721e-09, | |
| "loss": 1.2358, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.9885174270907429, | |
| "grad_norm": 3.4091622829437256, | |
| "learning_rate": 6.795098289008595e-09, | |
| "loss": 1.2484, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.9893303526064424, | |
| "grad_norm": 3.0762569904327393, | |
| "learning_rate": 5.859130250636113e-09, | |
| "loss": 1.1787, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.9901432781221421, | |
| "grad_norm": 2.616163492202759, | |
| "learning_rate": 4.992467133406731e-09, | |
| "loss": 1.2092, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.9909562036378416, | |
| "grad_norm": 3.0248591899871826, | |
| "learning_rate": 4.195114947244117e-09, | |
| "loss": 1.1998, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.9917691291535413, | |
| "grad_norm": 5.664068698883057, | |
| "learning_rate": 3.4670792214297476e-09, | |
| "loss": 1.2539, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.9925820546692409, | |
| "grad_norm": 3.449087619781494, | |
| "learning_rate": 2.808365004569602e-09, | |
| "loss": 1.2463, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.9933949801849405, | |
| "grad_norm": 2.958399534225464, | |
| "learning_rate": 2.2189768645519693e-09, | |
| "loss": 1.2076, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.9942079057006402, | |
| "grad_norm": 3.4361188411712646, | |
| "learning_rate": 1.6989188885219165e-09, | |
| "loss": 1.2436, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.9950208312163398, | |
| "grad_norm": 3.0529403686523438, | |
| "learning_rate": 1.2481946828502011e-09, | |
| "loss": 1.1955, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.9958337567320394, | |
| "grad_norm": 3.090090274810791, | |
| "learning_rate": 8.668073731088467e-10, | |
| "loss": 1.1455, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.996646682247739, | |
| "grad_norm": 3.2662580013275146, | |
| "learning_rate": 5.547596040489378e-10, | |
| "loss": 1.2283, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.9974596077634387, | |
| "grad_norm": 2.7874884605407715, | |
| "learning_rate": 3.1205353958285724e-10, | |
| "loss": 1.2011, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.9982725332791383, | |
| "grad_norm": 2.9483141899108887, | |
| "learning_rate": 1.3869086276985243e-10, | |
| "loss": 1.272, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.9990854587948379, | |
| "grad_norm": 3.550588607788086, | |
| "learning_rate": 3.467277580271322e-11, | |
| "loss": 1.1665, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.9998983843105376, | |
| "grad_norm": 3.500861406326294, | |
| "learning_rate": 0.0, | |
| "loss": 1.2382, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.9998983843105376, | |
| "step": 1230, | |
| "total_flos": 3.1215366383127757e+18, | |
| "train_loss": 1.3087712280149382, | |
| "train_runtime": 25084.8125, | |
| "train_samples_per_second": 6.277, | |
| "train_steps_per_second": 0.049 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1230, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 7975, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.1215366383127757e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |