| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 625, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008, |
| "grad_norm": 3.64601993560791, |
| "learning_rate": 0.0, |
| "loss": 0.8628, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 3.9168810844421387, |
| "learning_rate": 1.7543859649122808e-07, |
| "loss": 0.9365, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 3.702859878540039, |
| "learning_rate": 3.5087719298245616e-07, |
| "loss": 0.7746, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 3.483750581741333, |
| "learning_rate": 5.263157894736843e-07, |
| "loss": 0.7739, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 3.805393934249878, |
| "learning_rate": 7.017543859649123e-07, |
| "loss": 0.9568, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 3.8995630741119385, |
| "learning_rate": 8.771929824561404e-07, |
| "loss": 1.0066, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 4.119105339050293, |
| "learning_rate": 1.0526315789473685e-06, |
| "loss": 0.9942, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 3.3201704025268555, |
| "learning_rate": 1.2280701754385965e-06, |
| "loss": 0.9177, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 3.132570266723633, |
| "learning_rate": 1.4035087719298246e-06, |
| "loss": 0.8407, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 3.132612466812134, |
| "learning_rate": 1.5789473684210526e-06, |
| "loss": 0.8996, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 2.6232998371124268, |
| "learning_rate": 1.7543859649122807e-06, |
| "loss": 0.9917, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.8322850465774536, |
| "learning_rate": 1.929824561403509e-06, |
| "loss": 0.9113, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 1.9032851457595825, |
| "learning_rate": 2.105263157894737e-06, |
| "loss": 0.9269, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 1.8699129819869995, |
| "learning_rate": 2.280701754385965e-06, |
| "loss": 0.8689, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.572948932647705, |
| "learning_rate": 2.456140350877193e-06, |
| "loss": 0.8004, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 1.5191115140914917, |
| "learning_rate": 2.631578947368421e-06, |
| "loss": 0.8864, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 1.55618417263031, |
| "learning_rate": 2.8070175438596493e-06, |
| "loss": 0.8495, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 1.5098791122436523, |
| "learning_rate": 2.9824561403508774e-06, |
| "loss": 0.8463, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 1.6831691265106201, |
| "learning_rate": 3.157894736842105e-06, |
| "loss": 0.9389, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.1799376010894775, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.7804, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 1.1570075750350952, |
| "learning_rate": 3.5087719298245615e-06, |
| "loss": 0.7694, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 1.3207712173461914, |
| "learning_rate": 3.6842105263157896e-06, |
| "loss": 0.821, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 1.26198410987854, |
| "learning_rate": 3.859649122807018e-06, |
| "loss": 0.7909, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 1.279601812362671, |
| "learning_rate": 4.035087719298246e-06, |
| "loss": 0.8096, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.336991548538208, |
| "learning_rate": 4.210526315789474e-06, |
| "loss": 0.7828, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 1.1203796863555908, |
| "learning_rate": 4.385964912280702e-06, |
| "loss": 0.7887, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 1.0610651969909668, |
| "learning_rate": 4.56140350877193e-06, |
| "loss": 0.7922, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.912101149559021, |
| "learning_rate": 4.736842105263158e-06, |
| "loss": 0.6419, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 1.065405249595642, |
| "learning_rate": 4.912280701754386e-06, |
| "loss": 0.7815, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.0197738409042358, |
| "learning_rate": 5.087719298245615e-06, |
| "loss": 0.6598, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.9232926368713379, |
| "learning_rate": 5.263157894736842e-06, |
| "loss": 0.7246, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 3.055030345916748, |
| "learning_rate": 5.438596491228071e-06, |
| "loss": 0.7377, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 1.2969378232955933, |
| "learning_rate": 5.6140350877192985e-06, |
| "loss": 0.7358, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 1.0571110248565674, |
| "learning_rate": 5.789473684210527e-06, |
| "loss": 0.7021, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.090430736541748, |
| "learning_rate": 5.964912280701755e-06, |
| "loss": 0.6808, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 1.427613377571106, |
| "learning_rate": 6.140350877192983e-06, |
| "loss": 0.7467, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 1.0251866579055786, |
| "learning_rate": 6.31578947368421e-06, |
| "loss": 0.7286, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.8874351382255554, |
| "learning_rate": 6.491228070175439e-06, |
| "loss": 0.6925, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 1.2552199363708496, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.7958, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.1221239566802979, |
| "learning_rate": 6.842105263157896e-06, |
| "loss": 0.8124, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 1.0117762088775635, |
| "learning_rate": 7.017543859649123e-06, |
| "loss": 0.6543, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.852236270904541, |
| "learning_rate": 7.192982456140352e-06, |
| "loss": 0.5704, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.8768660426139832, |
| "learning_rate": 7.368421052631579e-06, |
| "loss": 0.6461, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 1.0183557271957397, |
| "learning_rate": 7.5438596491228074e-06, |
| "loss": 0.6764, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.1444123983383179, |
| "learning_rate": 7.719298245614036e-06, |
| "loss": 0.707, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 1.009420394897461, |
| "learning_rate": 7.894736842105265e-06, |
| "loss": 0.8975, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 1.5204353332519531, |
| "learning_rate": 8.070175438596492e-06, |
| "loss": 0.6668, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 1.0963656902313232, |
| "learning_rate": 8.24561403508772e-06, |
| "loss": 0.6778, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 1.2279527187347412, |
| "learning_rate": 8.421052631578948e-06, |
| "loss": 0.8483, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.1100308895111084, |
| "learning_rate": 8.596491228070176e-06, |
| "loss": 0.7917, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 1.068907380104065, |
| "learning_rate": 8.771929824561405e-06, |
| "loss": 0.671, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.919391930103302, |
| "learning_rate": 8.947368421052632e-06, |
| "loss": 0.698, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 1.0914137363433838, |
| "learning_rate": 9.12280701754386e-06, |
| "loss": 0.6729, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.9236669540405273, |
| "learning_rate": 9.298245614035088e-06, |
| "loss": 0.7039, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.1973854303359985, |
| "learning_rate": 9.473684210526315e-06, |
| "loss": 0.7047, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 1.0812108516693115, |
| "learning_rate": 9.649122807017545e-06, |
| "loss": 0.7041, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 1.142591953277588, |
| "learning_rate": 9.824561403508772e-06, |
| "loss": 0.7425, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.9331685304641724, |
| "learning_rate": 1e-05, |
| "loss": 0.6029, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 1.1456042528152466, |
| "learning_rate": 9.999978367986988e-06, |
| "loss": 0.716, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.048388123512268, |
| "learning_rate": 9.999913472135126e-06, |
| "loss": 0.6665, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 1.0044022798538208, |
| "learning_rate": 9.999805313005946e-06, |
| "loss": 0.6526, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 1.0086668729782104, |
| "learning_rate": 9.99965389153533e-06, |
| "loss": 0.7125, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 1.088757872581482, |
| "learning_rate": 9.999459209033495e-06, |
| "loss": 0.5943, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 1.180656909942627, |
| "learning_rate": 9.999221267184993e-06, |
| "loss": 0.7177, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.8537901639938354, |
| "learning_rate": 9.998940068048688e-06, |
| "loss": 0.5641, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 1.2915085554122925, |
| "learning_rate": 9.998615614057743e-06, |
| "loss": 0.674, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 1.312907338142395, |
| "learning_rate": 9.998247908019594e-06, |
| "loss": 0.823, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 1.0484812259674072, |
| "learning_rate": 9.997836953115927e-06, |
| "loss": 0.7765, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 1.0241776704788208, |
| "learning_rate": 9.997382752902658e-06, |
| "loss": 0.8049, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.282970905303955, |
| "learning_rate": 9.996885311309892e-06, |
| "loss": 0.5718, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 0.8525036573410034, |
| "learning_rate": 9.996344632641895e-06, |
| "loss": 0.6602, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 1.2436128854751587, |
| "learning_rate": 9.995760721577053e-06, |
| "loss": 0.6208, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 0.9938145875930786, |
| "learning_rate": 9.995133583167833e-06, |
| "loss": 0.7188, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.9896339774131775, |
| "learning_rate": 9.994463222840748e-06, |
| "loss": 0.6339, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.9466623663902283, |
| "learning_rate": 9.993749646396286e-06, |
| "loss": 0.5896, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 1.0780235528945923, |
| "learning_rate": 9.992992860008893e-06, |
| "loss": 0.7525, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 1.3074487447738647, |
| "learning_rate": 9.99219287022689e-06, |
| "loss": 0.6707, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.9674292802810669, |
| "learning_rate": 9.991349683972435e-06, |
| "loss": 0.6817, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 1.029198169708252, |
| "learning_rate": 9.990463308541452e-06, |
| "loss": 0.7652, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.1561387777328491, |
| "learning_rate": 9.989533751603578e-06, |
| "loss": 0.7302, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 0.9618710875511169, |
| "learning_rate": 9.988561021202083e-06, |
| "loss": 0.7046, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 1.0132874250411987, |
| "learning_rate": 9.987545125753818e-06, |
| "loss": 0.7053, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 1.0416361093521118, |
| "learning_rate": 9.986486074049131e-06, |
| "loss": 0.6851, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.9856860041618347, |
| "learning_rate": 9.985383875251783e-06, |
| "loss": 0.6858, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.9925334453582764, |
| "learning_rate": 9.98423853889889e-06, |
| "loss": 0.7338, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.9347037076950073, |
| "learning_rate": 9.983050074900824e-06, |
| "loss": 0.7625, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 1.2280950546264648, |
| "learning_rate": 9.98181849354113e-06, |
| "loss": 0.7229, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.9444807171821594, |
| "learning_rate": 9.980543805476447e-06, |
| "loss": 0.6652, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 1.0696436166763306, |
| "learning_rate": 9.979226021736396e-06, |
| "loss": 0.646, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.9732766151428223, |
| "learning_rate": 9.977865153723508e-06, |
| "loss": 0.7359, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 1.041569471359253, |
| "learning_rate": 9.976461213213104e-06, |
| "loss": 0.7169, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 1.0077918767929077, |
| "learning_rate": 9.975014212353212e-06, |
| "loss": 0.7185, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 0.9856661558151245, |
| "learning_rate": 9.973524163664447e-06, |
| "loss": 0.6106, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 1.1179556846618652, |
| "learning_rate": 9.971991080039912e-06, |
| "loss": 0.6851, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.7839189171791077, |
| "learning_rate": 9.970414974745077e-06, |
| "loss": 0.6788, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.9997370839118958, |
| "learning_rate": 9.968795861417676e-06, |
| "loss": 0.5586, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 0.9802690148353577, |
| "learning_rate": 9.967133754067581e-06, |
| "loss": 0.7048, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 1.0631524324417114, |
| "learning_rate": 9.965428667076687e-06, |
| "loss": 0.6596, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 0.9298393130302429, |
| "learning_rate": 9.963680615198774e-06, |
| "loss": 0.6907, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.0759201049804688, |
| "learning_rate": 9.961889613559396e-06, |
| "loss": 0.7354, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 0.7959829568862915, |
| "learning_rate": 9.960055677655743e-06, |
| "loss": 0.6363, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.9710596799850464, |
| "learning_rate": 9.958178823356503e-06, |
| "loss": 0.6114, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 0.9413867592811584, |
| "learning_rate": 9.956259066901733e-06, |
| "loss": 0.6466, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 1.4074207544326782, |
| "learning_rate": 9.954296424902709e-06, |
| "loss": 0.6742, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.9660438895225525, |
| "learning_rate": 9.95229091434179e-06, |
| "loss": 0.719, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 1.3223888874053955, |
| "learning_rate": 9.950242552572272e-06, |
| "loss": 0.6541, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 1.0226150751113892, |
| "learning_rate": 9.948151357318228e-06, |
| "loss": 0.5981, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.9748033285140991, |
| "learning_rate": 9.946017346674362e-06, |
| "loss": 0.6822, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 1.3557853698730469, |
| "learning_rate": 9.943840539105853e-06, |
| "loss": 0.7028, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.009817361831665, |
| "learning_rate": 9.941620953448195e-06, |
| "loss": 0.7124, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 0.8992280960083008, |
| "learning_rate": 9.939358608907026e-06, |
| "loss": 0.6255, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.9794321060180664, |
| "learning_rate": 9.937053525057977e-06, |
| "loss": 0.6736, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 0.9605726599693298, |
| "learning_rate": 9.934705721846487e-06, |
| "loss": 0.6872, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 1.006405234336853, |
| "learning_rate": 9.932315219587641e-06, |
| "loss": 0.6675, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.9819768667221069, |
| "learning_rate": 9.92988203896599e-06, |
| "loss": 0.6566, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 1.0273972749710083, |
| "learning_rate": 9.927406201035368e-06, |
| "loss": 0.6309, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 1.0762145519256592, |
| "learning_rate": 9.924887727218724e-06, |
| "loss": 0.7274, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 1.00732421875, |
| "learning_rate": 9.922326639307918e-06, |
| "loss": 0.7601, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 1.0192487239837646, |
| "learning_rate": 9.919722959463545e-06, |
| "loss": 0.6605, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.9694429636001587, |
| "learning_rate": 9.917076710214739e-06, |
| "loss": 0.7889, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 1.139436960220337, |
| "learning_rate": 9.914387914458983e-06, |
| "loss": 0.636, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.9674109220504761, |
| "learning_rate": 9.911656595461899e-06, |
| "loss": 0.7108, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 1.0006988048553467, |
| "learning_rate": 9.908882776857057e-06, |
| "loss": 0.7325, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.8345931768417358, |
| "learning_rate": 9.906066482645774e-06, |
| "loss": 0.5953, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9422177076339722, |
| "learning_rate": 9.903207737196892e-06, |
| "loss": 0.7125, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.008, |
| "grad_norm": 1.0644160509109497, |
| "learning_rate": 9.900306565246579e-06, |
| "loss": 0.6101, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.016, |
| "grad_norm": 0.8198509812355042, |
| "learning_rate": 9.89736299189811e-06, |
| "loss": 0.5451, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 0.9468843340873718, |
| "learning_rate": 9.894377042621654e-06, |
| "loss": 0.6957, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.032, |
| "grad_norm": 1.0026391744613647, |
| "learning_rate": 9.891348743254046e-06, |
| "loss": 0.6251, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.8523833155632019, |
| "learning_rate": 9.888278119998573e-06, |
| "loss": 0.5409, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.048, |
| "grad_norm": 1.2016947269439697, |
| "learning_rate": 9.885165199424738e-06, |
| "loss": 0.5866, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.056, |
| "grad_norm": 0.8879609107971191, |
| "learning_rate": 9.882010008468038e-06, |
| "loss": 0.5592, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.064, |
| "grad_norm": 1.1025391817092896, |
| "learning_rate": 9.878812574429722e-06, |
| "loss": 0.567, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.072, |
| "grad_norm": 0.9392365217208862, |
| "learning_rate": 9.875572924976568e-06, |
| "loss": 0.567, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.8918107748031616, |
| "learning_rate": 9.87229108814063e-06, |
| "loss": 0.4709, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.088, |
| "grad_norm": 1.0703924894332886, |
| "learning_rate": 9.868967092319003e-06, |
| "loss": 0.5965, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.096, |
| "grad_norm": 0.9928643703460693, |
| "learning_rate": 9.865600966273576e-06, |
| "loss": 0.5996, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.104, |
| "grad_norm": 1.2148929834365845, |
| "learning_rate": 9.86219273913078e-06, |
| "loss": 0.611, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.112, |
| "grad_norm": 0.9947288632392883, |
| "learning_rate": 9.858742440381343e-06, |
| "loss": 0.5379, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 1.332168459892273, |
| "learning_rate": 9.855250099880026e-06, |
| "loss": 0.7006, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.1280000000000001, |
| "grad_norm": 2.9835128784179688, |
| "learning_rate": 9.851715747845372e-06, |
| "loss": 0.6709, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.1360000000000001, |
| "grad_norm": 0.8667416572570801, |
| "learning_rate": 9.848139414859441e-06, |
| "loss": 0.6153, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.144, |
| "grad_norm": 1.0628273487091064, |
| "learning_rate": 9.844521131867546e-06, |
| "loss": 0.6619, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 1.0005896091461182, |
| "learning_rate": 9.840860930177984e-06, |
| "loss": 0.5382, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 1.0219800472259521, |
| "learning_rate": 9.837158841461767e-06, |
| "loss": 0.6419, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.168, |
| "grad_norm": 0.7993205189704895, |
| "learning_rate": 9.833414897752346e-06, |
| "loss": 0.499, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.176, |
| "grad_norm": 0.7739636301994324, |
| "learning_rate": 9.829629131445342e-06, |
| "loss": 0.5477, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.184, |
| "grad_norm": 0.8899552822113037, |
| "learning_rate": 9.825801575298248e-06, |
| "loss": 0.4854, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.192, |
| "grad_norm": 0.8185807466506958, |
| "learning_rate": 9.821932262430164e-06, |
| "loss": 0.5946, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.9796144962310791, |
| "learning_rate": 9.818021226321502e-06, |
| "loss": 0.4688, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.208, |
| "grad_norm": 0.9567016959190369, |
| "learning_rate": 9.814068500813692e-06, |
| "loss": 0.5279, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.216, |
| "grad_norm": 0.8102734684944153, |
| "learning_rate": 9.8100741201089e-06, |
| "loss": 0.4962, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.224, |
| "grad_norm": 1.0615909099578857, |
| "learning_rate": 9.806038118769724e-06, |
| "loss": 0.6779, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.232, |
| "grad_norm": 0.9773924350738525, |
| "learning_rate": 9.801960531718898e-06, |
| "loss": 0.5115, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 1.0371609926223755, |
| "learning_rate": 9.797841394238987e-06, |
| "loss": 0.6258, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.248, |
| "grad_norm": 1.1592023372650146, |
| "learning_rate": 9.793680741972084e-06, |
| "loss": 0.5728, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.256, |
| "grad_norm": 0.8714562058448792, |
| "learning_rate": 9.789478610919508e-06, |
| "loss": 0.4935, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.264, |
| "grad_norm": 0.9967408180236816, |
| "learning_rate": 9.785235037441473e-06, |
| "loss": 0.5136, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.272, |
| "grad_norm": 0.7984183430671692, |
| "learning_rate": 9.780950058256802e-06, |
| "loss": 0.576, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.8621663451194763, |
| "learning_rate": 9.77662371044258e-06, |
| "loss": 0.5559, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.288, |
| "grad_norm": 0.8362479209899902, |
| "learning_rate": 9.77225603143385e-06, |
| "loss": 0.6398, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.296, |
| "grad_norm": 0.9502777457237244, |
| "learning_rate": 9.767847059023292e-06, |
| "loss": 0.5053, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.304, |
| "grad_norm": 1.245893120765686, |
| "learning_rate": 9.763396831360884e-06, |
| "loss": 0.5379, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.312, |
| "grad_norm": 0.9848424792289734, |
| "learning_rate": 9.75890538695358e-06, |
| "loss": 0.5523, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.9937321543693542, |
| "learning_rate": 9.75437276466497e-06, |
| "loss": 0.6296, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.328, |
| "grad_norm": 1.045898199081421, |
| "learning_rate": 9.749799003714954e-06, |
| "loss": 0.6407, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.336, |
| "grad_norm": 1.0084316730499268, |
| "learning_rate": 9.745184143679398e-06, |
| "loss": 0.626, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.3439999999999999, |
| "grad_norm": 1.1556116342544556, |
| "learning_rate": 9.74052822448978e-06, |
| "loss": 0.6494, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.3519999999999999, |
| "grad_norm": 0.9056588411331177, |
| "learning_rate": 9.735831286432869e-06, |
| "loss": 0.5156, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 1.0791850090026855, |
| "learning_rate": 9.731093370150349e-06, |
| "loss": 0.6605, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.3679999999999999, |
| "grad_norm": 0.8788161873817444, |
| "learning_rate": 9.72631451663849e-06, |
| "loss": 0.5369, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.376, |
| "grad_norm": 1.5434342622756958, |
| "learning_rate": 9.721494767247779e-06, |
| "loss": 0.6006, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.384, |
| "grad_norm": 0.8972302675247192, |
| "learning_rate": 9.71663416368257e-06, |
| "loss": 0.5413, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.392, |
| "grad_norm": 1.001869559288025, |
| "learning_rate": 9.71173274800072e-06, |
| "loss": 0.5712, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.9920283555984497, |
| "learning_rate": 9.70679056261322e-06, |
| "loss": 0.5848, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.408, |
| "grad_norm": 1.090257167816162, |
| "learning_rate": 9.70180765028384e-06, |
| "loss": 0.5776, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.416, |
| "grad_norm": 0.7861718535423279, |
| "learning_rate": 9.696784054128749e-06, |
| "loss": 0.5172, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.424, |
| "grad_norm": 0.9395256638526917, |
| "learning_rate": 9.691719817616148e-06, |
| "loss": 0.6224, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.432, |
| "grad_norm": 0.7598348259925842, |
| "learning_rate": 9.686614984565888e-06, |
| "loss": 0.5102, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.8387635946273804, |
| "learning_rate": 9.681469599149093e-06, |
| "loss": 0.5538, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.448, |
| "grad_norm": 0.779594361782074, |
| "learning_rate": 9.676283705887783e-06, |
| "loss": 0.4747, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.456, |
| "grad_norm": 1.5272139310836792, |
| "learning_rate": 9.671057349654481e-06, |
| "loss": 0.5398, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.464, |
| "grad_norm": 0.8969641923904419, |
| "learning_rate": 9.66579057567183e-06, |
| "loss": 0.5246, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.472, |
| "grad_norm": 0.8700845241546631, |
| "learning_rate": 9.660483429512198e-06, |
| "loss": 0.5047, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.8970540761947632, |
| "learning_rate": 9.65513595709729e-06, |
| "loss": 0.5832, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.488, |
| "grad_norm": 1.0614380836486816, |
| "learning_rate": 9.649748204697741e-06, |
| "loss": 0.5481, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.496, |
| "grad_norm": 0.9343193173408508, |
| "learning_rate": 9.644320218932723e-06, |
| "loss": 0.6128, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.504, |
| "grad_norm": 1.0007647275924683, |
| "learning_rate": 9.63885204676954e-06, |
| "loss": 0.5181, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.512, |
| "grad_norm": 0.8584722876548767, |
| "learning_rate": 9.63334373552322e-06, |
| "loss": 0.6004, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.9545193910598755, |
| "learning_rate": 9.627795332856107e-06, |
| "loss": 0.5491, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.528, |
| "grad_norm": 1.144181728363037, |
| "learning_rate": 9.622206886777448e-06, |
| "loss": 0.6506, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.536, |
| "grad_norm": 1.1122850179672241, |
| "learning_rate": 9.616578445642982e-06, |
| "loss": 0.4933, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.544, |
| "grad_norm": 1.247880458831787, |
| "learning_rate": 9.61091005815451e-06, |
| "loss": 0.5382, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.552, |
| "grad_norm": 0.9312114119529724, |
| "learning_rate": 9.605201773359485e-06, |
| "loss": 0.5274, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.7915589213371277, |
| "learning_rate": 9.599453640650585e-06, |
| "loss": 0.5353, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.568, |
| "grad_norm": 1.037292718887329, |
| "learning_rate": 9.59366570976528e-06, |
| "loss": 0.5941, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.576, |
| "grad_norm": 0.8507972359657288, |
| "learning_rate": 9.587838030785413e-06, |
| "loss": 0.5754, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.584, |
| "grad_norm": 0.770389199256897, |
| "learning_rate": 9.581970654136752e-06, |
| "loss": 0.5594, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.592, |
| "grad_norm": 1.1132396459579468, |
| "learning_rate": 9.576063630588563e-06, |
| "loss": 0.6345, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 1.018099069595337, |
| "learning_rate": 9.570117011253173e-06, |
| "loss": 0.5882, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.608, |
| "grad_norm": 1.043480634689331, |
| "learning_rate": 9.56413084758552e-06, |
| "loss": 0.5441, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.616, |
| "grad_norm": 0.792169988155365, |
| "learning_rate": 9.55810519138271e-06, |
| "loss": 0.4992, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.624, |
| "grad_norm": 0.9433432817459106, |
| "learning_rate": 9.552040094783575e-06, |
| "loss": 0.6049, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.6320000000000001, |
| "grad_norm": 0.8830149173736572, |
| "learning_rate": 9.545935610268213e-06, |
| "loss": 0.5478, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 0.8460153937339783, |
| "learning_rate": 9.53979179065754e-06, |
| "loss": 0.4875, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.6480000000000001, |
| "grad_norm": 2.63915753364563, |
| "learning_rate": 9.533608689112827e-06, |
| "loss": 0.5341, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.6560000000000001, |
| "grad_norm": 0.8142545223236084, |
| "learning_rate": 9.527386359135254e-06, |
| "loss": 0.5997, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.6640000000000001, |
| "grad_norm": 1.2874032258987427, |
| "learning_rate": 9.521124854565425e-06, |
| "loss": 0.5931, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.6720000000000002, |
| "grad_norm": 0.9045368432998657, |
| "learning_rate": 9.514824229582922e-06, |
| "loss": 0.5828, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 0.7827978730201721, |
| "learning_rate": 9.508484538705823e-06, |
| "loss": 0.5094, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.688, |
| "grad_norm": 0.9156500101089478, |
| "learning_rate": 9.50210583679024e-06, |
| "loss": 0.5887, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.696, |
| "grad_norm": 0.93426513671875, |
| "learning_rate": 9.495688179029838e-06, |
| "loss": 0.5525, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.704, |
| "grad_norm": 1.0492069721221924, |
| "learning_rate": 9.48923162095536e-06, |
| "loss": 0.6795, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.712, |
| "grad_norm": 0.9157354235649109, |
| "learning_rate": 9.482736218434144e-06, |
| "loss": 0.5795, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.876832902431488, |
| "learning_rate": 9.476202027669644e-06, |
| "loss": 0.5398, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.728, |
| "grad_norm": 0.8842900395393372, |
| "learning_rate": 9.469629105200937e-06, |
| "loss": 0.4838, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.736, |
| "grad_norm": 0.9871936440467834, |
| "learning_rate": 9.463017507902245e-06, |
| "loss": 0.6217, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.744, |
| "grad_norm": 1.0882078409194946, |
| "learning_rate": 9.45636729298243e-06, |
| "loss": 0.5909, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.752, |
| "grad_norm": 0.9433255195617676, |
| "learning_rate": 9.449678517984503e-06, |
| "loss": 0.547, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 1.105050802230835, |
| "learning_rate": 9.442951240785135e-06, |
| "loss": 0.519, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.768, |
| "grad_norm": 0.8703718781471252, |
| "learning_rate": 9.436185519594145e-06, |
| "loss": 0.677, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.776, |
| "grad_norm": 0.9973863959312439, |
| "learning_rate": 9.429381412954e-06, |
| "loss": 0.6254, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.784, |
| "grad_norm": 1.0442036390304565, |
| "learning_rate": 9.422538979739307e-06, |
| "loss": 0.629, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.792, |
| "grad_norm": 0.8797324299812317, |
| "learning_rate": 9.415658279156312e-06, |
| "loss": 0.5605, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.7664978504180908, |
| "learning_rate": 9.408739370742372e-06, |
| "loss": 0.5078, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.808, |
| "grad_norm": 0.8313367366790771, |
| "learning_rate": 9.401782314365458e-06, |
| "loss": 0.515, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.8159999999999998, |
| "grad_norm": 0.8382720351219177, |
| "learning_rate": 9.39478717022362e-06, |
| "loss": 0.5904, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.8239999999999998, |
| "grad_norm": 0.7888085246086121, |
| "learning_rate": 9.387753998844482e-06, |
| "loss": 0.4307, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.8319999999999999, |
| "grad_norm": 0.8832230567932129, |
| "learning_rate": 9.380682861084703e-06, |
| "loss": 0.5782, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 0.9309815764427185, |
| "learning_rate": 9.37357381812946e-06, |
| "loss": 0.5872, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.8479999999999999, |
| "grad_norm": 0.91801917552948, |
| "learning_rate": 9.366426931491917e-06, |
| "loss": 0.5212, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.8559999999999999, |
| "grad_norm": 0.8884289264678955, |
| "learning_rate": 9.359242263012693e-06, |
| "loss": 0.5056, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.8639999999999999, |
| "grad_norm": 0.9634459018707275, |
| "learning_rate": 9.352019874859326e-06, |
| "loss": 0.4745, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.8719999999999999, |
| "grad_norm": 0.8102062940597534, |
| "learning_rate": 9.344759829525734e-06, |
| "loss": 0.5154, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.8682310581207275, |
| "learning_rate": 9.33746218983167e-06, |
| "loss": 0.6459, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.888, |
| "grad_norm": 0.8204552531242371, |
| "learning_rate": 9.330127018922195e-06, |
| "loss": 0.5901, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.896, |
| "grad_norm": 0.8650975227355957, |
| "learning_rate": 9.32275438026711e-06, |
| "loss": 0.5927, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.904, |
| "grad_norm": 0.9267044067382812, |
| "learning_rate": 9.315344337660422e-06, |
| "loss": 0.5608, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.912, |
| "grad_norm": 0.8690655827522278, |
| "learning_rate": 9.307896955219787e-06, |
| "loss": 0.5985, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.9724792838096619, |
| "learning_rate": 9.300412297385954e-06, |
| "loss": 0.5905, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.928, |
| "grad_norm": 0.9143782258033752, |
| "learning_rate": 9.29289042892221e-06, |
| "loss": 0.4937, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.936, |
| "grad_norm": 0.8621585369110107, |
| "learning_rate": 9.285331414913816e-06, |
| "loss": 0.584, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.944, |
| "grad_norm": 0.7874864339828491, |
| "learning_rate": 9.277735320767449e-06, |
| "loss": 0.4881, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.952, |
| "grad_norm": 0.9059499502182007, |
| "learning_rate": 9.270102212210632e-06, |
| "loss": 0.4926, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.8558206558227539, |
| "learning_rate": 9.262432155291167e-06, |
| "loss": 0.5671, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.968, |
| "grad_norm": 1.2835336923599243, |
| "learning_rate": 9.254725216376562e-06, |
| "loss": 0.5115, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.976, |
| "grad_norm": 0.8753209710121155, |
| "learning_rate": 9.246981462153456e-06, |
| "loss": 0.4818, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.984, |
| "grad_norm": 0.7964107394218445, |
| "learning_rate": 9.239200959627048e-06, |
| "loss": 0.5344, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.992, |
| "grad_norm": 0.8038451671600342, |
| "learning_rate": 9.231383776120512e-06, |
| "loss": 0.5566, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.8682637214660645, |
| "learning_rate": 9.223529979274411e-06, |
| "loss": 0.5996, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.008, |
| "grad_norm": 0.9511418342590332, |
| "learning_rate": 9.215639637046121e-06, |
| "loss": 0.5249, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.016, |
| "grad_norm": 1.1337953805923462, |
| "learning_rate": 9.207712817709237e-06, |
| "loss": 0.421, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.024, |
| "grad_norm": 0.8518550395965576, |
| "learning_rate": 9.19974958985298e-06, |
| "loss": 0.4252, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.032, |
| "grad_norm": 0.9565451741218567, |
| "learning_rate": 9.191750022381613e-06, |
| "loss": 0.3497, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 0.8157749176025391, |
| "learning_rate": 9.183714184513832e-06, |
| "loss": 0.3978, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.048, |
| "grad_norm": 1.450162649154663, |
| "learning_rate": 9.175642145782179e-06, |
| "loss": 0.4195, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.056, |
| "grad_norm": 1.2736579179763794, |
| "learning_rate": 9.16753397603243e-06, |
| "loss": 0.4481, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.064, |
| "grad_norm": 0.9117834568023682, |
| "learning_rate": 9.159389745423003e-06, |
| "loss": 0.4056, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.072, |
| "grad_norm": 0.8460937738418579, |
| "learning_rate": 9.151209524424333e-06, |
| "loss": 0.3456, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 1.0585182905197144, |
| "learning_rate": 9.142993383818284e-06, |
| "loss": 0.3645, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.088, |
| "grad_norm": 0.949852466583252, |
| "learning_rate": 9.134741394697517e-06, |
| "loss": 0.3891, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.096, |
| "grad_norm": 0.8298773169517517, |
| "learning_rate": 9.126453628464889e-06, |
| "loss": 0.3439, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.104, |
| "grad_norm": 0.8148176670074463, |
| "learning_rate": 9.118130156832823e-06, |
| "loss": 0.4309, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.112, |
| "grad_norm": 0.9829700589179993, |
| "learning_rate": 9.109771051822702e-06, |
| "loss": 0.3994, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 0.8034675121307373, |
| "learning_rate": 9.10137638576423e-06, |
| "loss": 0.4744, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.128, |
| "grad_norm": 0.8785883188247681, |
| "learning_rate": 9.09294623129482e-06, |
| "loss": 0.3681, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.136, |
| "grad_norm": 1.2200959920883179, |
| "learning_rate": 9.084480661358954e-06, |
| "loss": 0.4163, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.144, |
| "grad_norm": 0.8462724089622498, |
| "learning_rate": 9.07597974920756e-06, |
| "loss": 0.4157, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.152, |
| "grad_norm": 0.8349795341491699, |
| "learning_rate": 9.067443568397378e-06, |
| "loss": 0.45, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 0.7713517546653748, |
| "learning_rate": 9.058872192790314e-06, |
| "loss": 0.2785, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.168, |
| "grad_norm": 0.8653733134269714, |
| "learning_rate": 9.05026569655281e-06, |
| "loss": 0.4135, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.176, |
| "grad_norm": 0.7914355397224426, |
| "learning_rate": 9.041624154155208e-06, |
| "loss": 0.3978, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.184, |
| "grad_norm": 0.8875446915626526, |
| "learning_rate": 9.032947640371086e-06, |
| "loss": 0.4148, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.192, |
| "grad_norm": 0.8357672095298767, |
| "learning_rate": 9.02423623027663e-06, |
| "loss": 0.4515, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.7554605007171631, |
| "learning_rate": 9.01548999924997e-06, |
| "loss": 0.4099, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.208, |
| "grad_norm": 0.9491044878959656, |
| "learning_rate": 9.006709022970547e-06, |
| "loss": 0.3829, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.216, |
| "grad_norm": 0.9047715663909912, |
| "learning_rate": 8.997893377418432e-06, |
| "loss": 0.4119, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.224, |
| "grad_norm": 0.8157122731208801, |
| "learning_rate": 8.98904313887369e-06, |
| "loss": 0.3683, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.232, |
| "grad_norm": 0.8922542929649353, |
| "learning_rate": 8.980158383915714e-06, |
| "loss": 0.5189, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 1.1166770458221436, |
| "learning_rate": 8.971239189422555e-06, |
| "loss": 0.4893, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.248, |
| "grad_norm": 0.8188707828521729, |
| "learning_rate": 8.962285632570266e-06, |
| "loss": 0.375, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.2560000000000002, |
| "grad_norm": 0.8818830251693726, |
| "learning_rate": 8.953297790832231e-06, |
| "loss": 0.4442, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.2640000000000002, |
| "grad_norm": 0.9548737406730652, |
| "learning_rate": 8.944275741978495e-06, |
| "loss": 0.3671, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.2720000000000002, |
| "grad_norm": 0.9230669140815735, |
| "learning_rate": 8.935219564075087e-06, |
| "loss": 0.3722, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.2800000000000002, |
| "grad_norm": 0.9684281945228577, |
| "learning_rate": 8.92612933548335e-06, |
| "loss": 0.5036, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.288, |
| "grad_norm": 1.3175219297409058, |
| "learning_rate": 8.917005134859263e-06, |
| "loss": 0.4662, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.296, |
| "grad_norm": 0.7010194659233093, |
| "learning_rate": 8.907847041152757e-06, |
| "loss": 0.3565, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.304, |
| "grad_norm": 0.7920108437538147, |
| "learning_rate": 8.89865513360703e-06, |
| "loss": 0.4084, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.312, |
| "grad_norm": 1.3468918800354004, |
| "learning_rate": 8.889429491757872e-06, |
| "loss": 0.5612, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 0.7971108555793762, |
| "learning_rate": 8.88017019543296e-06, |
| "loss": 0.4659, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.328, |
| "grad_norm": 0.6736342906951904, |
| "learning_rate": 8.870877324751186e-06, |
| "loss": 0.3444, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.336, |
| "grad_norm": 0.743931233882904, |
| "learning_rate": 8.861550960121946e-06, |
| "loss": 0.4217, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.344, |
| "grad_norm": 0.8505037426948547, |
| "learning_rate": 8.852191182244456e-06, |
| "loss": 0.3812, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.352, |
| "grad_norm": 1.2008588314056396, |
| "learning_rate": 8.842798072107055e-06, |
| "loss": 0.464, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 0.942729651927948, |
| "learning_rate": 8.833371710986493e-06, |
| "loss": 0.4023, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.368, |
| "grad_norm": 0.9422355890274048, |
| "learning_rate": 8.823912180447237e-06, |
| "loss": 0.4129, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.376, |
| "grad_norm": 0.79082190990448, |
| "learning_rate": 8.81441956234076e-06, |
| "loss": 0.5188, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.384, |
| "grad_norm": 0.953883171081543, |
| "learning_rate": 8.804893938804839e-06, |
| "loss": 0.4497, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.392, |
| "grad_norm": 0.8168990015983582, |
| "learning_rate": 8.795335392262841e-06, |
| "loss": 0.4807, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.7993202209472656, |
| "learning_rate": 8.785744005423003e-06, |
| "loss": 0.4685, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.408, |
| "grad_norm": 0.7926377058029175, |
| "learning_rate": 8.77611986127773e-06, |
| "loss": 0.3851, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.416, |
| "grad_norm": 0.8498927354812622, |
| "learning_rate": 8.766463043102864e-06, |
| "loss": 0.4263, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.424, |
| "grad_norm": 0.9551386833190918, |
| "learning_rate": 8.756773634456975e-06, |
| "loss": 0.4211, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.432, |
| "grad_norm": 0.7406211495399475, |
| "learning_rate": 8.747051719180626e-06, |
| "loss": 0.4182, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 0.7700343728065491, |
| "learning_rate": 8.737297381395657e-06, |
| "loss": 0.373, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.448, |
| "grad_norm": 0.7161412835121155, |
| "learning_rate": 8.727510705504453e-06, |
| "loss": 0.4379, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.456, |
| "grad_norm": 1.073433756828308, |
| "learning_rate": 8.717691776189214e-06, |
| "loss": 0.3261, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.464, |
| "grad_norm": 0.8888611197471619, |
| "learning_rate": 8.707840678411223e-06, |
| "loss": 0.3806, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.472, |
| "grad_norm": 0.8704436421394348, |
| "learning_rate": 8.69795749741011e-06, |
| "loss": 0.3873, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 0.943534791469574, |
| "learning_rate": 8.688042318703111e-06, |
| "loss": 0.4458, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.488, |
| "grad_norm": 0.7201529145240784, |
| "learning_rate": 8.678095228084343e-06, |
| "loss": 0.4377, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.496, |
| "grad_norm": 0.8133971095085144, |
| "learning_rate": 8.66811631162404e-06, |
| "loss": 0.3975, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.504, |
| "grad_norm": 0.7576972842216492, |
| "learning_rate": 8.65810565566782e-06, |
| "loss": 0.3578, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.512, |
| "grad_norm": 0.8077313303947449, |
| "learning_rate": 8.648063346835943e-06, |
| "loss": 0.4251, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 0.8535168766975403, |
| "learning_rate": 8.637989472022548e-06, |
| "loss": 0.3954, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.528, |
| "grad_norm": 0.8312145471572876, |
| "learning_rate": 8.627884118394913e-06, |
| "loss": 0.358, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.536, |
| "grad_norm": 0.9409812688827515, |
| "learning_rate": 8.617747373392697e-06, |
| "loss": 0.3673, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.544, |
| "grad_norm": 0.729206919670105, |
| "learning_rate": 8.607579324727175e-06, |
| "loss": 0.4074, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.552, |
| "grad_norm": 0.865915834903717, |
| "learning_rate": 8.597380060380493e-06, |
| "loss": 0.4622, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 0.7116239070892334, |
| "learning_rate": 8.5871496686049e-06, |
| "loss": 0.4169, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.568, |
| "grad_norm": 0.9631391763687134, |
| "learning_rate": 8.576888237921983e-06, |
| "loss": 0.4627, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.576, |
| "grad_norm": 0.8730547428131104, |
| "learning_rate": 8.566595857121902e-06, |
| "loss": 0.4204, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.584, |
| "grad_norm": 0.7882906198501587, |
| "learning_rate": 8.556272615262623e-06, |
| "loss": 0.3398, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.592, |
| "grad_norm": 0.9142358899116516, |
| "learning_rate": 8.545918601669147e-06, |
| "loss": 0.4639, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.9243710041046143, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 0.4013, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.608, |
| "grad_norm": 0.8279150128364563, |
| "learning_rate": 8.525118617910144e-06, |
| "loss": 0.4878, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.616, |
| "grad_norm": 0.9344537854194641, |
| "learning_rate": 8.514672827722824e-06, |
| "loss": 0.3711, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.624, |
| "grad_norm": 0.8455937504768372, |
| "learning_rate": 8.504196625756166e-06, |
| "loss": 0.3921, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.632, |
| "grad_norm": 0.8718138933181763, |
| "learning_rate": 8.493690102658703e-06, |
| "loss": 0.376, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 0.8311028480529785, |
| "learning_rate": 8.483153349341336e-06, |
| "loss": 0.4188, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.648, |
| "grad_norm": 0.8838032484054565, |
| "learning_rate": 8.472586456976534e-06, |
| "loss": 0.3689, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.656, |
| "grad_norm": 1.0043684244155884, |
| "learning_rate": 8.461989516997565e-06, |
| "loss": 0.4979, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.664, |
| "grad_norm": 0.855467677116394, |
| "learning_rate": 8.45136262109768e-06, |
| "loss": 0.4576, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.672, |
| "grad_norm": 0.7641392946243286, |
| "learning_rate": 8.440705861229344e-06, |
| "loss": 0.4115, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.68, |
| "grad_norm": 1.2907041311264038, |
| "learning_rate": 8.430019329603423e-06, |
| "loss": 0.4438, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.6879999999999997, |
| "grad_norm": 0.8499248027801514, |
| "learning_rate": 8.41930311868839e-06, |
| "loss": 0.3627, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.6959999999999997, |
| "grad_norm": 0.8722188472747803, |
| "learning_rate": 8.408557321209534e-06, |
| "loss": 0.3774, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.7039999999999997, |
| "grad_norm": 0.862125039100647, |
| "learning_rate": 8.397782030148147e-06, |
| "loss": 0.3429, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.7119999999999997, |
| "grad_norm": 0.7485009431838989, |
| "learning_rate": 8.386977338740724e-06, |
| "loss": 0.3594, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 0.8689549565315247, |
| "learning_rate": 8.376143340478153e-06, |
| "loss": 0.4013, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.7279999999999998, |
| "grad_norm": 1.5140808820724487, |
| "learning_rate": 8.365280129104912e-06, |
| "loss": 0.5017, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.7359999999999998, |
| "grad_norm": 0.781104564666748, |
| "learning_rate": 8.354387798618254e-06, |
| "loss": 0.2975, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.7439999999999998, |
| "grad_norm": 0.9789696335792542, |
| "learning_rate": 8.34346644326739e-06, |
| "loss": 0.3661, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.752, |
| "grad_norm": 1.0142971277236938, |
| "learning_rate": 8.332516157552684e-06, |
| "loss": 0.3785, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 0.7328018546104431, |
| "learning_rate": 8.321537036224822e-06, |
| "loss": 0.4422, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.768, |
| "grad_norm": 0.7889101505279541, |
| "learning_rate": 8.310529174284004e-06, |
| "loss": 0.4195, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.776, |
| "grad_norm": 1.032447099685669, |
| "learning_rate": 8.299492666979114e-06, |
| "loss": 0.4659, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.784, |
| "grad_norm": 0.8374775648117065, |
| "learning_rate": 8.288427609806899e-06, |
| "loss": 0.4177, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.792, |
| "grad_norm": 0.8811750411987305, |
| "learning_rate": 8.277334098511147e-06, |
| "loss": 0.4335, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 1.1630526781082153, |
| "learning_rate": 8.266212229081846e-06, |
| "loss": 0.4981, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.808, |
| "grad_norm": 0.7661845684051514, |
| "learning_rate": 8.255062097754371e-06, |
| "loss": 0.383, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.816, |
| "grad_norm": 0.8048927187919617, |
| "learning_rate": 8.243883801008632e-06, |
| "loss": 0.3572, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.824, |
| "grad_norm": 0.7940247058868408, |
| "learning_rate": 8.232677435568252e-06, |
| "loss": 0.3471, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.832, |
| "grad_norm": 0.7869440913200378, |
| "learning_rate": 8.221443098399733e-06, |
| "loss": 0.3616, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 0.9467028379440308, |
| "learning_rate": 8.210180886711603e-06, |
| "loss": 0.4388, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.848, |
| "grad_norm": 0.9510444402694702, |
| "learning_rate": 8.198890897953586e-06, |
| "loss": 0.3982, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.856, |
| "grad_norm": 0.7974853515625, |
| "learning_rate": 8.187573229815757e-06, |
| "loss": 0.4713, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.864, |
| "grad_norm": 0.7801048755645752, |
| "learning_rate": 8.176227980227693e-06, |
| "loss": 0.4379, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.872, |
| "grad_norm": 0.8292092084884644, |
| "learning_rate": 8.164855247357628e-06, |
| "loss": 0.4051, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 1.026355504989624, |
| "learning_rate": 8.153455129611605e-06, |
| "loss": 0.4278, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.888, |
| "grad_norm": 0.9270377159118652, |
| "learning_rate": 8.142027725632622e-06, |
| "loss": 0.5052, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.896, |
| "grad_norm": 0.790385365486145, |
| "learning_rate": 8.130573134299782e-06, |
| "loss": 0.339, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.904, |
| "grad_norm": 0.9593288898468018, |
| "learning_rate": 8.119091454727427e-06, |
| "loss": 0.4492, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.912, |
| "grad_norm": 1.2080198526382446, |
| "learning_rate": 8.107582786264299e-06, |
| "loss": 0.5128, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 0.9071283340454102, |
| "learning_rate": 8.09604722849266e-06, |
| "loss": 0.4407, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.928, |
| "grad_norm": 0.821433424949646, |
| "learning_rate": 8.084484881227449e-06, |
| "loss": 0.4552, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.936, |
| "grad_norm": 0.7334813475608826, |
| "learning_rate": 8.072895844515398e-06, |
| "loss": 0.4174, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.944, |
| "grad_norm": 0.9713824391365051, |
| "learning_rate": 8.061280218634192e-06, |
| "loss": 0.431, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.952, |
| "grad_norm": 0.7867675423622131, |
| "learning_rate": 8.049638104091575e-06, |
| "loss": 0.4379, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 0.8193316459655762, |
| "learning_rate": 8.037969601624495e-06, |
| "loss": 0.4465, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.968, |
| "grad_norm": 0.8776450753211975, |
| "learning_rate": 8.026274812198235e-06, |
| "loss": 0.3944, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.976, |
| "grad_norm": 0.6917450428009033, |
| "learning_rate": 8.014553837005527e-06, |
| "loss": 0.3864, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.984, |
| "grad_norm": 0.94938063621521, |
| "learning_rate": 8.002806777465685e-06, |
| "loss": 0.5753, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.992, |
| "grad_norm": 0.7153763771057129, |
| "learning_rate": 7.99103373522373e-06, |
| "loss": 0.3676, |
| "step": 374 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.9804494976997375, |
| "learning_rate": 7.9792348121495e-06, |
| "loss": 0.4525, |
| "step": 375 |
| }, |
| { |
| "epoch": 3.008, |
| "grad_norm": 0.8869629502296448, |
| "learning_rate": 7.967410110336782e-06, |
| "loss": 0.2091, |
| "step": 376 |
| }, |
| { |
| "epoch": 3.016, |
| "grad_norm": 1.2333577871322632, |
| "learning_rate": 7.955559732102414e-06, |
| "loss": 0.3921, |
| "step": 377 |
| }, |
| { |
| "epoch": 3.024, |
| "grad_norm": 1.1201192140579224, |
| "learning_rate": 7.943683779985412e-06, |
| "loss": 0.3377, |
| "step": 378 |
| }, |
| { |
| "epoch": 3.032, |
| "grad_norm": 0.7908293008804321, |
| "learning_rate": 7.931782356746076e-06, |
| "loss": 0.3317, |
| "step": 379 |
| }, |
| { |
| "epoch": 3.04, |
| "grad_norm": 1.140568733215332, |
| "learning_rate": 7.919855565365102e-06, |
| "loss": 0.2545, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.048, |
| "grad_norm": 1.1746877431869507, |
| "learning_rate": 7.907903509042696e-06, |
| "loss": 0.328, |
| "step": 381 |
| }, |
| { |
| "epoch": 3.056, |
| "grad_norm": 0.8736027479171753, |
| "learning_rate": 7.895926291197667e-06, |
| "loss": 0.2423, |
| "step": 382 |
| }, |
| { |
| "epoch": 3.064, |
| "grad_norm": 2.1225597858428955, |
| "learning_rate": 7.883924015466554e-06, |
| "loss": 0.2731, |
| "step": 383 |
| }, |
| { |
| "epoch": 3.072, |
| "grad_norm": 0.8059532642364502, |
| "learning_rate": 7.871896785702707e-06, |
| "loss": 0.3011, |
| "step": 384 |
| }, |
| { |
| "epoch": 3.08, |
| "grad_norm": 0.8498049974441528, |
| "learning_rate": 7.859844705975405e-06, |
| "loss": 0.2865, |
| "step": 385 |
| }, |
| { |
| "epoch": 3.088, |
| "grad_norm": 2.4544568061828613, |
| "learning_rate": 7.847767880568944e-06, |
| "loss": 0.2806, |
| "step": 386 |
| }, |
| { |
| "epoch": 3.096, |
| "grad_norm": 0.7781065702438354, |
| "learning_rate": 7.835666413981744e-06, |
| "loss": 0.2607, |
| "step": 387 |
| }, |
| { |
| "epoch": 3.104, |
| "grad_norm": 0.8441874384880066, |
| "learning_rate": 7.823540410925434e-06, |
| "loss": 0.2531, |
| "step": 388 |
| }, |
| { |
| "epoch": 3.112, |
| "grad_norm": 0.7791491150856018, |
| "learning_rate": 7.811389976323963e-06, |
| "loss": 0.2228, |
| "step": 389 |
| }, |
| { |
| "epoch": 3.12, |
| "grad_norm": 1.0188428163528442, |
| "learning_rate": 7.799215215312667e-06, |
| "loss": 0.3195, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.128, |
| "grad_norm": 0.872238039970398, |
| "learning_rate": 7.787016233237387e-06, |
| "loss": 0.1906, |
| "step": 391 |
| }, |
| { |
| "epoch": 3.136, |
| "grad_norm": 0.8092172145843506, |
| "learning_rate": 7.774793135653537e-06, |
| "loss": 0.2425, |
| "step": 392 |
| }, |
| { |
| "epoch": 3.144, |
| "grad_norm": 4.553264141082764, |
| "learning_rate": 7.7625460283252e-06, |
| "loss": 0.3214, |
| "step": 393 |
| }, |
| { |
| "epoch": 3.152, |
| "grad_norm": 0.9575441479682922, |
| "learning_rate": 7.750275017224208e-06, |
| "loss": 0.2925, |
| "step": 394 |
| }, |
| { |
| "epoch": 3.16, |
| "grad_norm": 1.2400014400482178, |
| "learning_rate": 7.737980208529232e-06, |
| "loss": 0.3511, |
| "step": 395 |
| }, |
| { |
| "epoch": 3.168, |
| "grad_norm": 0.8100807070732117, |
| "learning_rate": 7.725661708624855e-06, |
| "loss": 0.3122, |
| "step": 396 |
| }, |
| { |
| "epoch": 3.176, |
| "grad_norm": 0.6804743409156799, |
| "learning_rate": 7.713319624100657e-06, |
| "loss": 0.2015, |
| "step": 397 |
| }, |
| { |
| "epoch": 3.184, |
| "grad_norm": 0.9154347777366638, |
| "learning_rate": 7.700954061750295e-06, |
| "loss": 0.3348, |
| "step": 398 |
| }, |
| { |
| "epoch": 3.192, |
| "grad_norm": 0.92096346616745, |
| "learning_rate": 7.688565128570564e-06, |
| "loss": 0.3135, |
| "step": 399 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.7802610993385315, |
| "learning_rate": 7.676152931760496e-06, |
| "loss": 0.2611, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.208, |
| "grad_norm": 0.7140606045722961, |
| "learning_rate": 7.663717578720412e-06, |
| "loss": 0.2202, |
| "step": 401 |
| }, |
| { |
| "epoch": 3.216, |
| "grad_norm": 1.0909085273742676, |
| "learning_rate": 7.651259177050996e-06, |
| "loss": 0.3117, |
| "step": 402 |
| }, |
| { |
| "epoch": 3.224, |
| "grad_norm": 0.8841260671615601, |
| "learning_rate": 7.638777834552372e-06, |
| "loss": 0.2847, |
| "step": 403 |
| }, |
| { |
| "epoch": 3.232, |
| "grad_norm": 0.8838410377502441, |
| "learning_rate": 7.626273659223166e-06, |
| "loss": 0.3084, |
| "step": 404 |
| }, |
| { |
| "epoch": 3.24, |
| "grad_norm": 1.0443016290664673, |
| "learning_rate": 7.61374675925957e-06, |
| "loss": 0.293, |
| "step": 405 |
| }, |
| { |
| "epoch": 3.248, |
| "grad_norm": 0.8339405655860901, |
| "learning_rate": 7.601197243054411e-06, |
| "loss": 0.2314, |
| "step": 406 |
| }, |
| { |
| "epoch": 3.2560000000000002, |
| "grad_norm": 0.8528264760971069, |
| "learning_rate": 7.588625219196208e-06, |
| "loss": 0.2275, |
| "step": 407 |
| }, |
| { |
| "epoch": 3.2640000000000002, |
| "grad_norm": 0.7928086519241333, |
| "learning_rate": 7.576030796468233e-06, |
| "loss": 0.2629, |
| "step": 408 |
| }, |
| { |
| "epoch": 3.2720000000000002, |
| "grad_norm": 0.7202726006507874, |
| "learning_rate": 7.563414083847573e-06, |
| "loss": 0.2481, |
| "step": 409 |
| }, |
| { |
| "epoch": 3.2800000000000002, |
| "grad_norm": 0.8470425009727478, |
| "learning_rate": 7.5507751905041885e-06, |
| "loss": 0.2975, |
| "step": 410 |
| }, |
| { |
| "epoch": 3.288, |
| "grad_norm": 1.0719774961471558, |
| "learning_rate": 7.538114225799955e-06, |
| "loss": 0.327, |
| "step": 411 |
| }, |
| { |
| "epoch": 3.296, |
| "grad_norm": 0.797988772392273, |
| "learning_rate": 7.525431299287737e-06, |
| "loss": 0.3041, |
| "step": 412 |
| }, |
| { |
| "epoch": 3.304, |
| "grad_norm": 0.7145325541496277, |
| "learning_rate": 7.512726520710429e-06, |
| "loss": 0.2963, |
| "step": 413 |
| }, |
| { |
| "epoch": 3.312, |
| "grad_norm": 0.7283927202224731, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.235, |
| "step": 414 |
| }, |
| { |
| "epoch": 3.32, |
| "grad_norm": 0.7725811004638672, |
| "learning_rate": 7.4872518472765594e-06, |
| "loss": 0.2348, |
| "step": 415 |
| }, |
| { |
| "epoch": 3.328, |
| "grad_norm": 0.8668394088745117, |
| "learning_rate": 7.474482172847391e-06, |
| "loss": 0.26, |
| "step": 416 |
| }, |
| { |
| "epoch": 3.336, |
| "grad_norm": 0.87476646900177, |
| "learning_rate": 7.461691087205993e-06, |
| "loss": 0.2704, |
| "step": 417 |
| }, |
| { |
| "epoch": 3.344, |
| "grad_norm": 0.7493270039558411, |
| "learning_rate": 7.4488787010311425e-06, |
| "loss": 0.3159, |
| "step": 418 |
| }, |
| { |
| "epoch": 3.352, |
| "grad_norm": 0.8767033815383911, |
| "learning_rate": 7.436045125185923e-06, |
| "loss": 0.3183, |
| "step": 419 |
| }, |
| { |
| "epoch": 3.36, |
| "grad_norm": 0.805707573890686, |
| "learning_rate": 7.423190470716761e-06, |
| "loss": 0.289, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.368, |
| "grad_norm": 0.859490692615509, |
| "learning_rate": 7.4103148488524824e-06, |
| "loss": 0.2513, |
| "step": 421 |
| }, |
| { |
| "epoch": 3.376, |
| "grad_norm": 0.7154383659362793, |
| "learning_rate": 7.3974183710033334e-06, |
| "loss": 0.2479, |
| "step": 422 |
| }, |
| { |
| "epoch": 3.384, |
| "grad_norm": 0.8182111978530884, |
| "learning_rate": 7.384501148760024e-06, |
| "loss": 0.2879, |
| "step": 423 |
| }, |
| { |
| "epoch": 3.392, |
| "grad_norm": 0.7623195648193359, |
| "learning_rate": 7.371563293892761e-06, |
| "loss": 0.2687, |
| "step": 424 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 0.8968961238861084, |
| "learning_rate": 7.3586049183502875e-06, |
| "loss": 0.2893, |
| "step": 425 |
| }, |
| { |
| "epoch": 3.408, |
| "grad_norm": 0.81312495470047, |
| "learning_rate": 7.345626134258897e-06, |
| "loss": 0.2856, |
| "step": 426 |
| }, |
| { |
| "epoch": 3.416, |
| "grad_norm": 0.7374573945999146, |
| "learning_rate": 7.3326270539214826e-06, |
| "loss": 0.2818, |
| "step": 427 |
| }, |
| { |
| "epoch": 3.424, |
| "grad_norm": 0.6881022453308105, |
| "learning_rate": 7.319607789816555e-06, |
| "loss": 0.2353, |
| "step": 428 |
| }, |
| { |
| "epoch": 3.432, |
| "grad_norm": 0.8280784487724304, |
| "learning_rate": 7.306568454597269e-06, |
| "loss": 0.3473, |
| "step": 429 |
| }, |
| { |
| "epoch": 3.44, |
| "grad_norm": 0.7189634442329407, |
| "learning_rate": 7.293509161090453e-06, |
| "loss": 0.2186, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.448, |
| "grad_norm": 0.85979163646698, |
| "learning_rate": 7.28043002229563e-06, |
| "loss": 0.2869, |
| "step": 431 |
| }, |
| { |
| "epoch": 3.456, |
| "grad_norm": 0.846011757850647, |
| "learning_rate": 7.2673311513840395e-06, |
| "loss": 0.2669, |
| "step": 432 |
| }, |
| { |
| "epoch": 3.464, |
| "grad_norm": 0.7689185738563538, |
| "learning_rate": 7.2542126616976596e-06, |
| "loss": 0.2488, |
| "step": 433 |
| }, |
| { |
| "epoch": 3.472, |
| "grad_norm": 0.9952272176742554, |
| "learning_rate": 7.241074666748228e-06, |
| "loss": 0.2867, |
| "step": 434 |
| }, |
| { |
| "epoch": 3.48, |
| "grad_norm": 0.9386110901832581, |
| "learning_rate": 7.227917280216254e-06, |
| "loss": 0.3069, |
| "step": 435 |
| }, |
| { |
| "epoch": 3.488, |
| "grad_norm": 0.9380288124084473, |
| "learning_rate": 7.214740615950041e-06, |
| "loss": 0.2969, |
| "step": 436 |
| }, |
| { |
| "epoch": 3.496, |
| "grad_norm": 0.944202184677124, |
| "learning_rate": 7.201544787964698e-06, |
| "loss": 0.2469, |
| "step": 437 |
| }, |
| { |
| "epoch": 3.504, |
| "grad_norm": 0.898861289024353, |
| "learning_rate": 7.188329910441154e-06, |
| "loss": 0.2336, |
| "step": 438 |
| }, |
| { |
| "epoch": 3.512, |
| "grad_norm": 1.0536229610443115, |
| "learning_rate": 7.175096097725169e-06, |
| "loss": 0.3452, |
| "step": 439 |
| }, |
| { |
| "epoch": 3.52, |
| "grad_norm": 0.9392815232276917, |
| "learning_rate": 7.161843464326349e-06, |
| "loss": 0.294, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.528, |
| "grad_norm": 0.7755203247070312, |
| "learning_rate": 7.148572124917148e-06, |
| "loss": 0.2786, |
| "step": 441 |
| }, |
| { |
| "epoch": 3.536, |
| "grad_norm": 0.7829739451408386, |
| "learning_rate": 7.135282194331881e-06, |
| "loss": 0.2856, |
| "step": 442 |
| }, |
| { |
| "epoch": 3.544, |
| "grad_norm": 1.1139544248580933, |
| "learning_rate": 7.121973787565727e-06, |
| "loss": 0.3355, |
| "step": 443 |
| }, |
| { |
| "epoch": 3.552, |
| "grad_norm": 0.8333198428153992, |
| "learning_rate": 7.1086470197737405e-06, |
| "loss": 0.256, |
| "step": 444 |
| }, |
| { |
| "epoch": 3.56, |
| "grad_norm": 0.7802929282188416, |
| "learning_rate": 7.095302006269842e-06, |
| "loss": 0.2372, |
| "step": 445 |
| }, |
| { |
| "epoch": 3.568, |
| "grad_norm": 0.8746179342269897, |
| "learning_rate": 7.0819388625258385e-06, |
| "loss": 0.3049, |
| "step": 446 |
| }, |
| { |
| "epoch": 3.576, |
| "grad_norm": 0.7507904767990112, |
| "learning_rate": 7.06855770417041e-06, |
| "loss": 0.2701, |
| "step": 447 |
| }, |
| { |
| "epoch": 3.584, |
| "grad_norm": 0.8713604211807251, |
| "learning_rate": 7.05515864698811e-06, |
| "loss": 0.2763, |
| "step": 448 |
| }, |
| { |
| "epoch": 3.592, |
| "grad_norm": 0.9194297790527344, |
| "learning_rate": 7.041741806918372e-06, |
| "loss": 0.3081, |
| "step": 449 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.769226610660553, |
| "learning_rate": 7.028307300054499e-06, |
| "loss": 0.3234, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.608, |
| "grad_norm": 0.763100802898407, |
| "learning_rate": 7.014855242642662e-06, |
| "loss": 0.2972, |
| "step": 451 |
| }, |
| { |
| "epoch": 3.616, |
| "grad_norm": 0.7915220260620117, |
| "learning_rate": 7.0013857510808934e-06, |
| "loss": 0.247, |
| "step": 452 |
| }, |
| { |
| "epoch": 3.624, |
| "grad_norm": 0.8212165236473083, |
| "learning_rate": 6.987898941918082e-06, |
| "loss": 0.2414, |
| "step": 453 |
| }, |
| { |
| "epoch": 3.632, |
| "grad_norm": 0.658478856086731, |
| "learning_rate": 6.974394931852957e-06, |
| "loss": 0.2388, |
| "step": 454 |
| }, |
| { |
| "epoch": 3.64, |
| "grad_norm": 0.8130829334259033, |
| "learning_rate": 6.960873837733089e-06, |
| "loss": 0.2811, |
| "step": 455 |
| }, |
| { |
| "epoch": 3.648, |
| "grad_norm": 0.7456867098808289, |
| "learning_rate": 6.94733577655387e-06, |
| "loss": 0.2794, |
| "step": 456 |
| }, |
| { |
| "epoch": 3.656, |
| "grad_norm": 0.8971588611602783, |
| "learning_rate": 6.933780865457508e-06, |
| "loss": 0.3021, |
| "step": 457 |
| }, |
| { |
| "epoch": 3.664, |
| "grad_norm": 0.987177312374115, |
| "learning_rate": 6.920209221732007e-06, |
| "loss": 0.314, |
| "step": 458 |
| }, |
| { |
| "epoch": 3.672, |
| "grad_norm": 0.8147737383842468, |
| "learning_rate": 6.90662096281016e-06, |
| "loss": 0.3202, |
| "step": 459 |
| }, |
| { |
| "epoch": 3.68, |
| "grad_norm": 0.668489396572113, |
| "learning_rate": 6.893016206268518e-06, |
| "loss": 0.2737, |
| "step": 460 |
| }, |
| { |
| "epoch": 3.6879999999999997, |
| "grad_norm": 0.7580770254135132, |
| "learning_rate": 6.879395069826394e-06, |
| "loss": 0.2473, |
| "step": 461 |
| }, |
| { |
| "epoch": 3.6959999999999997, |
| "grad_norm": 1.1495063304901123, |
| "learning_rate": 6.865757671344827e-06, |
| "loss": 0.2972, |
| "step": 462 |
| }, |
| { |
| "epoch": 3.7039999999999997, |
| "grad_norm": 1.5628399848937988, |
| "learning_rate": 6.85210412882557e-06, |
| "loss": 0.2253, |
| "step": 463 |
| }, |
| { |
| "epoch": 3.7119999999999997, |
| "grad_norm": 0.7946319580078125, |
| "learning_rate": 6.838434560410064e-06, |
| "loss": 0.2689, |
| "step": 464 |
| }, |
| { |
| "epoch": 3.7199999999999998, |
| "grad_norm": 0.9710001349449158, |
| "learning_rate": 6.824749084378428e-06, |
| "loss": 0.4637, |
| "step": 465 |
| }, |
| { |
| "epoch": 3.7279999999999998, |
| "grad_norm": 0.9095158576965332, |
| "learning_rate": 6.811047819148413e-06, |
| "loss": 0.4004, |
| "step": 466 |
| }, |
| { |
| "epoch": 3.7359999999999998, |
| "grad_norm": 0.848308265209198, |
| "learning_rate": 6.7973308832744035e-06, |
| "loss": 0.2459, |
| "step": 467 |
| }, |
| { |
| "epoch": 3.7439999999999998, |
| "grad_norm": 0.9440385103225708, |
| "learning_rate": 6.783598395446371e-06, |
| "loss": 0.2605, |
| "step": 468 |
| }, |
| { |
| "epoch": 3.752, |
| "grad_norm": 0.7493250370025635, |
| "learning_rate": 6.769850474488859e-06, |
| "loss": 0.266, |
| "step": 469 |
| }, |
| { |
| "epoch": 3.76, |
| "grad_norm": 0.9245112538337708, |
| "learning_rate": 6.756087239359948e-06, |
| "loss": 0.3054, |
| "step": 470 |
| }, |
| { |
| "epoch": 3.768, |
| "grad_norm": 0.7723512649536133, |
| "learning_rate": 6.742308809150232e-06, |
| "loss": 0.2728, |
| "step": 471 |
| }, |
| { |
| "epoch": 3.776, |
| "grad_norm": 0.8170971870422363, |
| "learning_rate": 6.728515303081782e-06, |
| "loss": 0.3104, |
| "step": 472 |
| }, |
| { |
| "epoch": 3.784, |
| "grad_norm": 0.7314550280570984, |
| "learning_rate": 6.714706840507122e-06, |
| "loss": 0.2466, |
| "step": 473 |
| }, |
| { |
| "epoch": 3.792, |
| "grad_norm": 0.8251045942306519, |
| "learning_rate": 6.700883540908185e-06, |
| "loss": 0.22, |
| "step": 474 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 0.8969084024429321, |
| "learning_rate": 6.687045523895292e-06, |
| "loss": 0.2554, |
| "step": 475 |
| }, |
| { |
| "epoch": 3.808, |
| "grad_norm": 0.9797028303146362, |
| "learning_rate": 6.673192909206109e-06, |
| "loss": 0.2945, |
| "step": 476 |
| }, |
| { |
| "epoch": 3.816, |
| "grad_norm": 0.6120610237121582, |
| "learning_rate": 6.6593258167046115e-06, |
| "loss": 0.2175, |
| "step": 477 |
| }, |
| { |
| "epoch": 3.824, |
| "grad_norm": 0.7815307974815369, |
| "learning_rate": 6.64544436638005e-06, |
| "loss": 0.2863, |
| "step": 478 |
| }, |
| { |
| "epoch": 3.832, |
| "grad_norm": 0.8799474835395813, |
| "learning_rate": 6.63154867834591e-06, |
| "loss": 0.2541, |
| "step": 479 |
| }, |
| { |
| "epoch": 3.84, |
| "grad_norm": 0.7827337980270386, |
| "learning_rate": 6.617638872838874e-06, |
| "loss": 0.2887, |
| "step": 480 |
| }, |
| { |
| "epoch": 3.848, |
| "grad_norm": 0.7949901223182678, |
| "learning_rate": 6.603715070217779e-06, |
| "loss": 0.2471, |
| "step": 481 |
| }, |
| { |
| "epoch": 3.856, |
| "grad_norm": 0.9216195940971375, |
| "learning_rate": 6.589777390962575e-06, |
| "loss": 0.2361, |
| "step": 482 |
| }, |
| { |
| "epoch": 3.864, |
| "grad_norm": 0.9386386275291443, |
| "learning_rate": 6.5758259556732896e-06, |
| "loss": 0.3444, |
| "step": 483 |
| }, |
| { |
| "epoch": 3.872, |
| "grad_norm": 0.9753653407096863, |
| "learning_rate": 6.561860885068972e-06, |
| "loss": 0.2515, |
| "step": 484 |
| }, |
| { |
| "epoch": 3.88, |
| "grad_norm": 0.8993600010871887, |
| "learning_rate": 6.547882299986658e-06, |
| "loss": 0.2978, |
| "step": 485 |
| }, |
| { |
| "epoch": 3.888, |
| "grad_norm": 1.270437479019165, |
| "learning_rate": 6.53389032138032e-06, |
| "loss": 0.2523, |
| "step": 486 |
| }, |
| { |
| "epoch": 3.896, |
| "grad_norm": 0.9538208842277527, |
| "learning_rate": 6.519885070319827e-06, |
| "loss": 0.2749, |
| "step": 487 |
| }, |
| { |
| "epoch": 3.904, |
| "grad_norm": 0.7965839505195618, |
| "learning_rate": 6.505866667989884e-06, |
| "loss": 0.316, |
| "step": 488 |
| }, |
| { |
| "epoch": 3.912, |
| "grad_norm": 0.8446824550628662, |
| "learning_rate": 6.491835235688999e-06, |
| "loss": 0.3438, |
| "step": 489 |
| }, |
| { |
| "epoch": 3.92, |
| "grad_norm": 0.7848514914512634, |
| "learning_rate": 6.477790894828422e-06, |
| "loss": 0.2165, |
| "step": 490 |
| }, |
| { |
| "epoch": 3.928, |
| "grad_norm": 0.722306489944458, |
| "learning_rate": 6.463733766931096e-06, |
| "loss": 0.2449, |
| "step": 491 |
| }, |
| { |
| "epoch": 3.936, |
| "grad_norm": 0.9112846255302429, |
| "learning_rate": 6.449663973630613e-06, |
| "loss": 0.2707, |
| "step": 492 |
| }, |
| { |
| "epoch": 3.944, |
| "grad_norm": 0.8071252703666687, |
| "learning_rate": 6.435581636670154e-06, |
| "loss": 0.2387, |
| "step": 493 |
| }, |
| { |
| "epoch": 3.952, |
| "grad_norm": 0.8816282749176025, |
| "learning_rate": 6.421486877901436e-06, |
| "loss": 0.2018, |
| "step": 494 |
| }, |
| { |
| "epoch": 3.96, |
| "grad_norm": 0.7326090335845947, |
| "learning_rate": 6.407379819283661e-06, |
| "loss": 0.2245, |
| "step": 495 |
| }, |
| { |
| "epoch": 3.968, |
| "grad_norm": 0.862907886505127, |
| "learning_rate": 6.393260582882462e-06, |
| "loss": 0.2872, |
| "step": 496 |
| }, |
| { |
| "epoch": 3.976, |
| "grad_norm": 0.7406136989593506, |
| "learning_rate": 6.379129290868837e-06, |
| "loss": 0.2779, |
| "step": 497 |
| }, |
| { |
| "epoch": 3.984, |
| "grad_norm": 1.0465444326400757, |
| "learning_rate": 6.364986065518106e-06, |
| "loss": 0.3433, |
| "step": 498 |
| }, |
| { |
| "epoch": 3.992, |
| "grad_norm": 0.7991510629653931, |
| "learning_rate": 6.350831029208844e-06, |
| "loss": 0.2742, |
| "step": 499 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.9392898678779602, |
| "learning_rate": 6.336664304421818e-06, |
| "loss": 0.2667, |
| "step": 500 |
| }, |
| { |
| "epoch": 4.008, |
| "grad_norm": 1.1515122652053833, |
| "learning_rate": 6.322486013738942e-06, |
| "loss": 0.1675, |
| "step": 501 |
| }, |
| { |
| "epoch": 4.016, |
| "grad_norm": 1.152869462966919, |
| "learning_rate": 6.308296279842204e-06, |
| "loss": 0.1561, |
| "step": 502 |
| }, |
| { |
| "epoch": 4.024, |
| "grad_norm": 0.8425310850143433, |
| "learning_rate": 6.294095225512604e-06, |
| "loss": 0.1293, |
| "step": 503 |
| }, |
| { |
| "epoch": 4.032, |
| "grad_norm": 0.8141390681266785, |
| "learning_rate": 6.279882973629101e-06, |
| "loss": 0.1929, |
| "step": 504 |
| }, |
| { |
| "epoch": 4.04, |
| "grad_norm": 0.9013286828994751, |
| "learning_rate": 6.265659647167542e-06, |
| "loss": 0.1551, |
| "step": 505 |
| }, |
| { |
| "epoch": 4.048, |
| "grad_norm": 0.8535316586494446, |
| "learning_rate": 6.2514253691996e-06, |
| "loss": 0.1687, |
| "step": 506 |
| }, |
| { |
| "epoch": 4.056, |
| "grad_norm": 1.3743019104003906, |
| "learning_rate": 6.237180262891709e-06, |
| "loss": 0.212, |
| "step": 507 |
| }, |
| { |
| "epoch": 4.064, |
| "grad_norm": 1.4581520557403564, |
| "learning_rate": 6.222924451504001e-06, |
| "loss": 0.198, |
| "step": 508 |
| }, |
| { |
| "epoch": 4.072, |
| "grad_norm": 1.3407318592071533, |
| "learning_rate": 6.208658058389232e-06, |
| "loss": 0.1358, |
| "step": 509 |
| }, |
| { |
| "epoch": 4.08, |
| "grad_norm": 0.8731305003166199, |
| "learning_rate": 6.194381206991723e-06, |
| "loss": 0.1078, |
| "step": 510 |
| }, |
| { |
| "epoch": 4.088, |
| "grad_norm": 0.7974117398262024, |
| "learning_rate": 6.180094020846291e-06, |
| "loss": 0.1521, |
| "step": 511 |
| }, |
| { |
| "epoch": 4.096, |
| "grad_norm": 0.7402255535125732, |
| "learning_rate": 6.165796623577171e-06, |
| "loss": 0.105, |
| "step": 512 |
| }, |
| { |
| "epoch": 4.104, |
| "grad_norm": 0.7679521441459656, |
| "learning_rate": 6.15148913889696e-06, |
| "loss": 0.1433, |
| "step": 513 |
| }, |
| { |
| "epoch": 4.112, |
| "grad_norm": 0.8533539175987244, |
| "learning_rate": 6.1371716906055336e-06, |
| "loss": 0.203, |
| "step": 514 |
| }, |
| { |
| "epoch": 4.12, |
| "grad_norm": 0.8484000563621521, |
| "learning_rate": 6.122844402588982e-06, |
| "loss": 0.112, |
| "step": 515 |
| }, |
| { |
| "epoch": 4.128, |
| "grad_norm": 0.7334287762641907, |
| "learning_rate": 6.10850739881854e-06, |
| "loss": 0.1203, |
| "step": 516 |
| }, |
| { |
| "epoch": 4.136, |
| "grad_norm": 0.8749109506607056, |
| "learning_rate": 6.094160803349508e-06, |
| "loss": 0.1452, |
| "step": 517 |
| }, |
| { |
| "epoch": 4.144, |
| "grad_norm": 1.0502787828445435, |
| "learning_rate": 6.079804740320181e-06, |
| "loss": 0.1226, |
| "step": 518 |
| }, |
| { |
| "epoch": 4.152, |
| "grad_norm": 0.9388486742973328, |
| "learning_rate": 6.065439333950776e-06, |
| "loss": 0.1565, |
| "step": 519 |
| }, |
| { |
| "epoch": 4.16, |
| "grad_norm": 0.7821274995803833, |
| "learning_rate": 6.051064708542357e-06, |
| "loss": 0.0648, |
| "step": 520 |
| }, |
| { |
| "epoch": 4.168, |
| "grad_norm": 0.8100799322128296, |
| "learning_rate": 6.036680988475756e-06, |
| "loss": 0.1198, |
| "step": 521 |
| }, |
| { |
| "epoch": 4.176, |
| "grad_norm": 0.8949313759803772, |
| "learning_rate": 6.022288298210502e-06, |
| "loss": 0.18, |
| "step": 522 |
| }, |
| { |
| "epoch": 4.184, |
| "grad_norm": 0.770513117313385, |
| "learning_rate": 6.00788676228374e-06, |
| "loss": 0.1465, |
| "step": 523 |
| }, |
| { |
| "epoch": 4.192, |
| "grad_norm": 0.7669388055801392, |
| "learning_rate": 5.993476505309154e-06, |
| "loss": 0.1009, |
| "step": 524 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 0.7349802851676941, |
| "learning_rate": 5.979057651975893e-06, |
| "loss": 0.097, |
| "step": 525 |
| }, |
| { |
| "epoch": 4.208, |
| "grad_norm": 0.788063108921051, |
| "learning_rate": 5.964630327047485e-06, |
| "loss": 0.1451, |
| "step": 526 |
| }, |
| { |
| "epoch": 4.216, |
| "grad_norm": 0.9150925278663635, |
| "learning_rate": 5.9501946553607615e-06, |
| "loss": 0.1764, |
| "step": 527 |
| }, |
| { |
| "epoch": 4.224, |
| "grad_norm": 0.8285714387893677, |
| "learning_rate": 5.935750761824777e-06, |
| "loss": 0.0965, |
| "step": 528 |
| }, |
| { |
| "epoch": 4.232, |
| "grad_norm": 0.9090161919593811, |
| "learning_rate": 5.921298771419731e-06, |
| "loss": 0.1792, |
| "step": 529 |
| }, |
| { |
| "epoch": 4.24, |
| "grad_norm": 0.7813971638679504, |
| "learning_rate": 5.906838809195879e-06, |
| "loss": 0.1492, |
| "step": 530 |
| }, |
| { |
| "epoch": 4.248, |
| "grad_norm": 0.7030081748962402, |
| "learning_rate": 5.8923710002724595e-06, |
| "loss": 0.0911, |
| "step": 531 |
| }, |
| { |
| "epoch": 4.256, |
| "grad_norm": 0.8696473240852356, |
| "learning_rate": 5.877895469836604e-06, |
| "loss": 0.1808, |
| "step": 532 |
| }, |
| { |
| "epoch": 4.264, |
| "grad_norm": 0.8898045420646667, |
| "learning_rate": 5.863412343142258e-06, |
| "loss": 0.2043, |
| "step": 533 |
| }, |
| { |
| "epoch": 4.272, |
| "grad_norm": 0.9002187848091125, |
| "learning_rate": 5.848921745509094e-06, |
| "loss": 0.1582, |
| "step": 534 |
| }, |
| { |
| "epoch": 4.28, |
| "grad_norm": 1.1970871686935425, |
| "learning_rate": 5.8344238023214305e-06, |
| "loss": 0.1485, |
| "step": 535 |
| }, |
| { |
| "epoch": 4.288, |
| "grad_norm": 0.8672990798950195, |
| "learning_rate": 5.819918639027149e-06, |
| "loss": 0.1837, |
| "step": 536 |
| }, |
| { |
| "epoch": 4.296, |
| "grad_norm": 0.8390870094299316, |
| "learning_rate": 5.805406381136598e-06, |
| "loss": 0.1557, |
| "step": 537 |
| }, |
| { |
| "epoch": 4.304, |
| "grad_norm": 1.0075196027755737, |
| "learning_rate": 5.790887154221521e-06, |
| "loss": 0.2385, |
| "step": 538 |
| }, |
| { |
| "epoch": 4.312, |
| "grad_norm": 0.9522439241409302, |
| "learning_rate": 5.776361083913959e-06, |
| "loss": 0.1226, |
| "step": 539 |
| }, |
| { |
| "epoch": 4.32, |
| "grad_norm": 0.9824661612510681, |
| "learning_rate": 5.7618282959051685e-06, |
| "loss": 0.1157, |
| "step": 540 |
| }, |
| { |
| "epoch": 4.328, |
| "grad_norm": 0.8090677261352539, |
| "learning_rate": 5.747288915944533e-06, |
| "loss": 0.1193, |
| "step": 541 |
| }, |
| { |
| "epoch": 4.336, |
| "grad_norm": 0.7453141212463379, |
| "learning_rate": 5.7327430698384775e-06, |
| "loss": 0.1051, |
| "step": 542 |
| }, |
| { |
| "epoch": 4.344, |
| "grad_norm": 1.0975877046585083, |
| "learning_rate": 5.718190883449373e-06, |
| "loss": 0.1913, |
| "step": 543 |
| }, |
| { |
| "epoch": 4.352, |
| "grad_norm": 0.7594937682151794, |
| "learning_rate": 5.703632482694453e-06, |
| "loss": 0.1369, |
| "step": 544 |
| }, |
| { |
| "epoch": 4.36, |
| "grad_norm": 0.7948636412620544, |
| "learning_rate": 5.689067993544726e-06, |
| "loss": 0.1818, |
| "step": 545 |
| }, |
| { |
| "epoch": 4.368, |
| "grad_norm": 0.896981954574585, |
| "learning_rate": 5.674497542023875e-06, |
| "loss": 0.1261, |
| "step": 546 |
| }, |
| { |
| "epoch": 4.376, |
| "grad_norm": 0.7178036570549011, |
| "learning_rate": 5.659921254207183e-06, |
| "loss": 0.12, |
| "step": 547 |
| }, |
| { |
| "epoch": 4.384, |
| "grad_norm": 0.673762857913971, |
| "learning_rate": 5.645339256220427e-06, |
| "loss": 0.1432, |
| "step": 548 |
| }, |
| { |
| "epoch": 4.392, |
| "grad_norm": 0.8115494847297668, |
| "learning_rate": 5.630751674238796e-06, |
| "loss": 0.115, |
| "step": 549 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.7743411064147949, |
| "learning_rate": 5.616158634485793e-06, |
| "loss": 0.112, |
| "step": 550 |
| }, |
| { |
| "epoch": 4.408, |
| "grad_norm": 0.6997777223587036, |
| "learning_rate": 5.601560263232153e-06, |
| "loss": 0.1006, |
| "step": 551 |
| }, |
| { |
| "epoch": 4.416, |
| "grad_norm": 0.8114786148071289, |
| "learning_rate": 5.5869566867947344e-06, |
| "loss": 0.1308, |
| "step": 552 |
| }, |
| { |
| "epoch": 4.424, |
| "grad_norm": 0.7540757060050964, |
| "learning_rate": 5.572348031535442e-06, |
| "loss": 0.1558, |
| "step": 553 |
| }, |
| { |
| "epoch": 4.432, |
| "grad_norm": 0.8168392777442932, |
| "learning_rate": 5.557734423860122e-06, |
| "loss": 0.1635, |
| "step": 554 |
| }, |
| { |
| "epoch": 4.44, |
| "grad_norm": 0.998570442199707, |
| "learning_rate": 5.543115990217478e-06, |
| "loss": 0.1332, |
| "step": 555 |
| }, |
| { |
| "epoch": 4.448, |
| "grad_norm": 0.8248946070671082, |
| "learning_rate": 5.528492857097966e-06, |
| "loss": 0.1711, |
| "step": 556 |
| }, |
| { |
| "epoch": 4.456, |
| "grad_norm": 0.7837976813316345, |
| "learning_rate": 5.513865151032709e-06, |
| "loss": 0.1547, |
| "step": 557 |
| }, |
| { |
| "epoch": 4.464, |
| "grad_norm": 0.7378290295600891, |
| "learning_rate": 5.499232998592399e-06, |
| "loss": 0.0789, |
| "step": 558 |
| }, |
| { |
| "epoch": 4.4719999999999995, |
| "grad_norm": 0.6844831109046936, |
| "learning_rate": 5.484596526386198e-06, |
| "loss": 0.114, |
| "step": 559 |
| }, |
| { |
| "epoch": 4.48, |
| "grad_norm": 0.7904562950134277, |
| "learning_rate": 5.469955861060653e-06, |
| "loss": 0.1676, |
| "step": 560 |
| }, |
| { |
| "epoch": 4.4879999999999995, |
| "grad_norm": 0.7541800141334534, |
| "learning_rate": 5.455311129298586e-06, |
| "loss": 0.1196, |
| "step": 561 |
| }, |
| { |
| "epoch": 4.496, |
| "grad_norm": 0.7022758722305298, |
| "learning_rate": 5.44066245781801e-06, |
| "loss": 0.127, |
| "step": 562 |
| }, |
| { |
| "epoch": 4.504, |
| "grad_norm": 0.7572545409202576, |
| "learning_rate": 5.426009973371026e-06, |
| "loss": 0.1458, |
| "step": 563 |
| }, |
| { |
| "epoch": 4.5120000000000005, |
| "grad_norm": 0.7217088341712952, |
| "learning_rate": 5.4113538027427245e-06, |
| "loss": 0.1052, |
| "step": 564 |
| }, |
| { |
| "epoch": 4.52, |
| "grad_norm": 0.6230894923210144, |
| "learning_rate": 5.396694072750099e-06, |
| "loss": 0.103, |
| "step": 565 |
| }, |
| { |
| "epoch": 4.5280000000000005, |
| "grad_norm": 0.7158833146095276, |
| "learning_rate": 5.382030910240936e-06, |
| "loss": 0.0882, |
| "step": 566 |
| }, |
| { |
| "epoch": 4.536, |
| "grad_norm": 0.8185319900512695, |
| "learning_rate": 5.367364442092724e-06, |
| "loss": 0.125, |
| "step": 567 |
| }, |
| { |
| "epoch": 4.5440000000000005, |
| "grad_norm": 0.8623816967010498, |
| "learning_rate": 5.352694795211555e-06, |
| "loss": 0.1166, |
| "step": 568 |
| }, |
| { |
| "epoch": 4.552, |
| "grad_norm": 0.8295913338661194, |
| "learning_rate": 5.338022096531028e-06, |
| "loss": 0.1315, |
| "step": 569 |
| }, |
| { |
| "epoch": 4.5600000000000005, |
| "grad_norm": 1.077094316482544, |
| "learning_rate": 5.3233464730111426e-06, |
| "loss": 0.2167, |
| "step": 570 |
| }, |
| { |
| "epoch": 4.568, |
| "grad_norm": 0.7022574543952942, |
| "learning_rate": 5.308668051637213e-06, |
| "loss": 0.1212, |
| "step": 571 |
| }, |
| { |
| "epoch": 4.576, |
| "grad_norm": 1.0412487983703613, |
| "learning_rate": 5.29398695941876e-06, |
| "loss": 0.1534, |
| "step": 572 |
| }, |
| { |
| "epoch": 4.584, |
| "grad_norm": 1.1887091398239136, |
| "learning_rate": 5.279303323388413e-06, |
| "loss": 0.1918, |
| "step": 573 |
| }, |
| { |
| "epoch": 4.592, |
| "grad_norm": 0.7496406435966492, |
| "learning_rate": 5.2646172706008154e-06, |
| "loss": 0.1203, |
| "step": 574 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 0.7063726186752319, |
| "learning_rate": 5.249928928131523e-06, |
| "loss": 0.121, |
| "step": 575 |
| }, |
| { |
| "epoch": 4.608, |
| "grad_norm": 0.8660956621170044, |
| "learning_rate": 5.235238423075899e-06, |
| "loss": 0.1118, |
| "step": 576 |
| }, |
| { |
| "epoch": 4.616, |
| "grad_norm": 0.8066213130950928, |
| "learning_rate": 5.220545882548024e-06, |
| "loss": 0.1414, |
| "step": 577 |
| }, |
| { |
| "epoch": 4.624, |
| "grad_norm": 0.7297463417053223, |
| "learning_rate": 5.20585143367959e-06, |
| "loss": 0.1391, |
| "step": 578 |
| }, |
| { |
| "epoch": 4.632, |
| "grad_norm": 0.7243335247039795, |
| "learning_rate": 5.191155203618796e-06, |
| "loss": 0.0997, |
| "step": 579 |
| }, |
| { |
| "epoch": 4.64, |
| "grad_norm": 0.8564410209655762, |
| "learning_rate": 5.176457319529264e-06, |
| "loss": 0.1378, |
| "step": 580 |
| }, |
| { |
| "epoch": 4.648, |
| "grad_norm": 0.8925532102584839, |
| "learning_rate": 5.161757908588917e-06, |
| "loss": 0.1611, |
| "step": 581 |
| }, |
| { |
| "epoch": 4.656, |
| "grad_norm": 0.7033802270889282, |
| "learning_rate": 5.147057097988898e-06, |
| "loss": 0.1161, |
| "step": 582 |
| }, |
| { |
| "epoch": 4.664, |
| "grad_norm": 0.7617799639701843, |
| "learning_rate": 5.132355014932455e-06, |
| "loss": 0.0811, |
| "step": 583 |
| }, |
| { |
| "epoch": 4.672, |
| "grad_norm": 0.7035624384880066, |
| "learning_rate": 5.1176517866338495e-06, |
| "loss": 0.1392, |
| "step": 584 |
| }, |
| { |
| "epoch": 4.68, |
| "grad_norm": 0.7635079026222229, |
| "learning_rate": 5.102947540317254e-06, |
| "loss": 0.116, |
| "step": 585 |
| }, |
| { |
| "epoch": 4.688, |
| "grad_norm": 0.6591924428939819, |
| "learning_rate": 5.088242403215644e-06, |
| "loss": 0.1264, |
| "step": 586 |
| }, |
| { |
| "epoch": 4.696, |
| "grad_norm": 0.7575790882110596, |
| "learning_rate": 5.073536502569708e-06, |
| "loss": 0.138, |
| "step": 587 |
| }, |
| { |
| "epoch": 4.704, |
| "grad_norm": 0.802493691444397, |
| "learning_rate": 5.058829965626742e-06, |
| "loss": 0.15, |
| "step": 588 |
| }, |
| { |
| "epoch": 4.712, |
| "grad_norm": 0.7997198104858398, |
| "learning_rate": 5.0441229196395416e-06, |
| "loss": 0.1249, |
| "step": 589 |
| }, |
| { |
| "epoch": 4.72, |
| "grad_norm": 0.8240690231323242, |
| "learning_rate": 5.029415491865311e-06, |
| "loss": 0.136, |
| "step": 590 |
| }, |
| { |
| "epoch": 4.728, |
| "grad_norm": 0.7805035710334778, |
| "learning_rate": 5.014707809564562e-06, |
| "loss": 0.135, |
| "step": 591 |
| }, |
| { |
| "epoch": 4.736, |
| "grad_norm": 0.7590795755386353, |
| "learning_rate": 5e-06, |
| "loss": 0.1646, |
| "step": 592 |
| }, |
| { |
| "epoch": 4.744, |
| "grad_norm": 0.738740086555481, |
| "learning_rate": 4.98529219043544e-06, |
| "loss": 0.1616, |
| "step": 593 |
| }, |
| { |
| "epoch": 4.752, |
| "grad_norm": 0.7487245798110962, |
| "learning_rate": 4.97058450813469e-06, |
| "loss": 0.1933, |
| "step": 594 |
| }, |
| { |
| "epoch": 4.76, |
| "grad_norm": 0.6358115673065186, |
| "learning_rate": 4.955877080360462e-06, |
| "loss": 0.1079, |
| "step": 595 |
| }, |
| { |
| "epoch": 4.768, |
| "grad_norm": 0.8972571492195129, |
| "learning_rate": 4.94117003437326e-06, |
| "loss": 0.2013, |
| "step": 596 |
| }, |
| { |
| "epoch": 4.776, |
| "grad_norm": 0.7692276835441589, |
| "learning_rate": 4.926463497430293e-06, |
| "loss": 0.1722, |
| "step": 597 |
| }, |
| { |
| "epoch": 4.784, |
| "grad_norm": 0.8051016926765442, |
| "learning_rate": 4.911757596784358e-06, |
| "loss": 0.1459, |
| "step": 598 |
| }, |
| { |
| "epoch": 4.792, |
| "grad_norm": 0.7161281108856201, |
| "learning_rate": 4.897052459682749e-06, |
| "loss": 0.1477, |
| "step": 599 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.7656087279319763, |
| "learning_rate": 4.882348213366152e-06, |
| "loss": 0.1256, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.808, |
| "grad_norm": 0.8391464948654175, |
| "learning_rate": 4.867644985067548e-06, |
| "loss": 0.1232, |
| "step": 601 |
| }, |
| { |
| "epoch": 4.816, |
| "grad_norm": 0.8359267115592957, |
| "learning_rate": 4.8529429020111035e-06, |
| "loss": 0.1453, |
| "step": 602 |
| }, |
| { |
| "epoch": 4.824, |
| "grad_norm": 1.7344919443130493, |
| "learning_rate": 4.838242091411085e-06, |
| "loss": 0.1262, |
| "step": 603 |
| }, |
| { |
| "epoch": 4.832, |
| "grad_norm": 0.8207628130912781, |
| "learning_rate": 4.823542680470738e-06, |
| "loss": 0.0926, |
| "step": 604 |
| }, |
| { |
| "epoch": 4.84, |
| "grad_norm": 0.7868751883506775, |
| "learning_rate": 4.808844796381205e-06, |
| "loss": 0.2016, |
| "step": 605 |
| }, |
| { |
| "epoch": 4.848, |
| "grad_norm": 0.700920820236206, |
| "learning_rate": 4.794148566320412e-06, |
| "loss": 0.1125, |
| "step": 606 |
| }, |
| { |
| "epoch": 4.856, |
| "grad_norm": 0.8076983094215393, |
| "learning_rate": 4.779454117451978e-06, |
| "loss": 0.1505, |
| "step": 607 |
| }, |
| { |
| "epoch": 4.864, |
| "grad_norm": 0.8895502686500549, |
| "learning_rate": 4.7647615769241e-06, |
| "loss": 0.1841, |
| "step": 608 |
| }, |
| { |
| "epoch": 4.872, |
| "grad_norm": 0.8726681470870972, |
| "learning_rate": 4.750071071868478e-06, |
| "loss": 0.1005, |
| "step": 609 |
| }, |
| { |
| "epoch": 4.88, |
| "grad_norm": 0.8028600215911865, |
| "learning_rate": 4.7353827293991845e-06, |
| "loss": 0.1587, |
| "step": 610 |
| }, |
| { |
| "epoch": 4.888, |
| "grad_norm": 0.8120298981666565, |
| "learning_rate": 4.720696676611589e-06, |
| "loss": 0.198, |
| "step": 611 |
| }, |
| { |
| "epoch": 4.896, |
| "grad_norm": 0.9092877507209778, |
| "learning_rate": 4.706013040581242e-06, |
| "loss": 0.1812, |
| "step": 612 |
| }, |
| { |
| "epoch": 4.904, |
| "grad_norm": 0.9110473394393921, |
| "learning_rate": 4.691331948362789e-06, |
| "loss": 0.1525, |
| "step": 613 |
| }, |
| { |
| "epoch": 4.912, |
| "grad_norm": 0.9524548053741455, |
| "learning_rate": 4.676653526988858e-06, |
| "loss": 0.1463, |
| "step": 614 |
| }, |
| { |
| "epoch": 4.92, |
| "grad_norm": 0.8919450044631958, |
| "learning_rate": 4.661977903468974e-06, |
| "loss": 0.1775, |
| "step": 615 |
| }, |
| { |
| "epoch": 4.928, |
| "grad_norm": 0.900175929069519, |
| "learning_rate": 4.647305204788445e-06, |
| "loss": 0.1803, |
| "step": 616 |
| }, |
| { |
| "epoch": 4.936, |
| "grad_norm": 1.5099406242370605, |
| "learning_rate": 4.632635557907277e-06, |
| "loss": 0.0989, |
| "step": 617 |
| }, |
| { |
| "epoch": 4.944, |
| "grad_norm": 0.7202231884002686, |
| "learning_rate": 4.617969089759066e-06, |
| "loss": 0.1328, |
| "step": 618 |
| }, |
| { |
| "epoch": 4.952, |
| "grad_norm": 0.7937277555465698, |
| "learning_rate": 4.603305927249902e-06, |
| "loss": 0.1041, |
| "step": 619 |
| }, |
| { |
| "epoch": 4.96, |
| "grad_norm": 0.8269131183624268, |
| "learning_rate": 4.588646197257278e-06, |
| "loss": 0.1296, |
| "step": 620 |
| }, |
| { |
| "epoch": 4.968, |
| "grad_norm": 0.7114303708076477, |
| "learning_rate": 4.573990026628976e-06, |
| "loss": 0.1493, |
| "step": 621 |
| }, |
| { |
| "epoch": 4.976, |
| "grad_norm": 0.7398365139961243, |
| "learning_rate": 4.559337542181993e-06, |
| "loss": 0.0922, |
| "step": 622 |
| }, |
| { |
| "epoch": 4.984, |
| "grad_norm": 0.8082178235054016, |
| "learning_rate": 4.544688870701416e-06, |
| "loss": 0.177, |
| "step": 623 |
| }, |
| { |
| "epoch": 4.992, |
| "grad_norm": 0.8239241242408752, |
| "learning_rate": 4.53004413893935e-06, |
| "loss": 0.1691, |
| "step": 624 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.8424403071403503, |
| "learning_rate": 4.5154034736138035e-06, |
| "loss": 0.2033, |
| "step": 625 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1125, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.0327304331801395e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|