| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9968, |
| "eval_steps": 500, |
| "global_step": 780, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00256, |
| "grad_norm": 9.365875273875238, |
| "learning_rate": 1.25e-06, |
| "loss": 1.3849, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00512, |
| "grad_norm": 10.543859791920642, |
| "learning_rate": 2.5e-06, |
| "loss": 1.4809, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00768, |
| "grad_norm": 8.02060315144952, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 1.3488, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01024, |
| "grad_norm": 6.20091121600503, |
| "learning_rate": 5e-06, |
| "loss": 1.1901, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 5.164630784639751, |
| "learning_rate": 6.25e-06, |
| "loss": 1.1539, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01536, |
| "grad_norm": 4.321523771840541, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 1.0821, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01792, |
| "grad_norm": 3.931935241507329, |
| "learning_rate": 8.750000000000001e-06, |
| "loss": 0.9753, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02048, |
| "grad_norm": 3.5793289410771827, |
| "learning_rate": 1e-05, |
| "loss": 0.9717, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02304, |
| "grad_norm": 4.256516218362947, |
| "learning_rate": 1.125e-05, |
| "loss": 0.9383, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 4.063010311058102, |
| "learning_rate": 1.25e-05, |
| "loss": 1.0224, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02816, |
| "grad_norm": 4.272978299264522, |
| "learning_rate": 1.375e-05, |
| "loss": 0.8981, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03072, |
| "grad_norm": 3.75622481926478, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.9254, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03328, |
| "grad_norm": 3.103766157100673, |
| "learning_rate": 1.6250000000000002e-05, |
| "loss": 0.839, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03584, |
| "grad_norm": 3.0001871471139943, |
| "learning_rate": 1.7500000000000002e-05, |
| "loss": 0.8211, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 2.9538431919045105, |
| "learning_rate": 1.8750000000000002e-05, |
| "loss": 0.7587, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04096, |
| "grad_norm": 3.1375029063705995, |
| "learning_rate": 2e-05, |
| "loss": 0.8149, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.04352, |
| "grad_norm": 3.086989542887256, |
| "learning_rate": 1.9999915456072218e-05, |
| "loss": 0.7836, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.04608, |
| "grad_norm": 3.0607437954707812, |
| "learning_rate": 1.9999661825718408e-05, |
| "loss": 0.8128, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04864, |
| "grad_norm": 3.0400418762467196, |
| "learning_rate": 1.9999239113227146e-05, |
| "loss": 0.7667, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 2.82664678787635, |
| "learning_rate": 1.9998647325745995e-05, |
| "loss": 0.7523, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05376, |
| "grad_norm": 2.8593372729121036, |
| "learning_rate": 1.9997886473281355e-05, |
| "loss": 0.7988, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05632, |
| "grad_norm": 2.923977789288882, |
| "learning_rate": 1.9996956568698325e-05, |
| "loss": 0.7527, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05888, |
| "grad_norm": 2.6178941828631337, |
| "learning_rate": 1.9995857627720456e-05, |
| "loss": 0.7438, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.06144, |
| "grad_norm": 2.5820149137237345, |
| "learning_rate": 1.99945896689295e-05, |
| "loss": 0.7248, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 2.7301648737950224, |
| "learning_rate": 1.9993152713765116e-05, |
| "loss": 0.6776, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06656, |
| "grad_norm": 2.5608433077601775, |
| "learning_rate": 1.999154678652446e-05, |
| "loss": 0.8218, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.06912, |
| "grad_norm": 3.0122091067319623, |
| "learning_rate": 1.998977191436181e-05, |
| "loss": 0.7429, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07168, |
| "grad_norm": 2.5954170063820654, |
| "learning_rate": 1.9987828127288105e-05, |
| "loss": 0.6964, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.07424, |
| "grad_norm": 2.7446609447785004, |
| "learning_rate": 1.998571545817042e-05, |
| "loss": 0.7901, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 2.6357015498254794, |
| "learning_rate": 1.9983433942731427e-05, |
| "loss": 0.6802, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07936, |
| "grad_norm": 3.0253927729854966, |
| "learning_rate": 1.998098361954878e-05, |
| "loss": 0.7918, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.08192, |
| "grad_norm": 2.729017093894504, |
| "learning_rate": 1.9978364530054465e-05, |
| "loss": 0.7565, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.08448, |
| "grad_norm": 2.551676529089632, |
| "learning_rate": 1.9975576718534105e-05, |
| "loss": 0.6984, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.08704, |
| "grad_norm": 2.375670501782899, |
| "learning_rate": 1.9972620232126215e-05, |
| "loss": 0.711, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 2.532366615420919, |
| "learning_rate": 1.996949512082138e-05, |
| "loss": 0.7398, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.09216, |
| "grad_norm": 2.531683530333962, |
| "learning_rate": 1.996620143746144e-05, |
| "loss": 0.7466, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.09472, |
| "grad_norm": 2.4816273494459598, |
| "learning_rate": 1.9962739237738585e-05, |
| "loss": 0.7701, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.09728, |
| "grad_norm": 2.5153024518723006, |
| "learning_rate": 1.9959108580194403e-05, |
| "loss": 0.765, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.09984, |
| "grad_norm": 2.2940292462059735, |
| "learning_rate": 1.9955309526218903e-05, |
| "loss": 0.7029, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 2.438121400214285, |
| "learning_rate": 1.9951342140049483e-05, |
| "loss": 0.7248, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10496, |
| "grad_norm": 2.645240477077927, |
| "learning_rate": 1.9947206488769812e-05, |
| "loss": 0.7326, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.10752, |
| "grad_norm": 2.4038499208315445, |
| "learning_rate": 1.9942902642308737e-05, |
| "loss": 0.6941, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.11008, |
| "grad_norm": 2.5452147223708486, |
| "learning_rate": 1.9938430673439075e-05, |
| "loss": 0.7328, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.11264, |
| "grad_norm": 2.1971161649405815, |
| "learning_rate": 1.993379065777639e-05, |
| "loss": 0.6695, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 2.3455095751305945, |
| "learning_rate": 1.9928982673777707e-05, |
| "loss": 0.6808, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.11776, |
| "grad_norm": 2.205715323577842, |
| "learning_rate": 1.9924006802740203e-05, |
| "loss": 0.692, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.12032, |
| "grad_norm": 2.879359922240759, |
| "learning_rate": 1.9918863128799805e-05, |
| "loss": 0.7251, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.12288, |
| "grad_norm": 2.306572227959017, |
| "learning_rate": 1.9913551738929803e-05, |
| "loss": 0.734, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.12544, |
| "grad_norm": 3.010751503600904, |
| "learning_rate": 1.9908072722939344e-05, |
| "loss": 0.6667, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 2.500047773905711, |
| "learning_rate": 1.9902426173471933e-05, |
| "loss": 0.7766, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13056, |
| "grad_norm": 2.3211836211522945, |
| "learning_rate": 1.9896612186003866e-05, |
| "loss": 0.7355, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.13312, |
| "grad_norm": 2.1700803300583553, |
| "learning_rate": 1.9890630858842614e-05, |
| "loss": 0.6743, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.13568, |
| "grad_norm": 2.4572092973317416, |
| "learning_rate": 1.988448229312515e-05, |
| "loss": 0.6261, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.13824, |
| "grad_norm": 2.6489718279675167, |
| "learning_rate": 1.9878166592816255e-05, |
| "loss": 0.7783, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 1.9656310646192854, |
| "learning_rate": 1.9871683864706752e-05, |
| "loss": 0.6444, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.14336, |
| "grad_norm": 2.8772548921197374, |
| "learning_rate": 1.9865034218411698e-05, |
| "loss": 0.6984, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.14592, |
| "grad_norm": 2.2991328693562183, |
| "learning_rate": 1.9858217766368538e-05, |
| "loss": 0.7365, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.14848, |
| "grad_norm": 2.744151628806484, |
| "learning_rate": 1.98512346238352e-05, |
| "loss": 0.7668, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.15104, |
| "grad_norm": 2.126168797905977, |
| "learning_rate": 1.984408490888814e-05, |
| "loss": 0.6806, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.1536, |
| "grad_norm": 2.179164947113899, |
| "learning_rate": 1.9836768742420355e-05, |
| "loss": 0.7316, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15616, |
| "grad_norm": 2.471943900346479, |
| "learning_rate": 1.9829286248139334e-05, |
| "loss": 0.8413, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.15872, |
| "grad_norm": 2.1155043013923898, |
| "learning_rate": 1.9821637552564973e-05, |
| "loss": 0.7118, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.16128, |
| "grad_norm": 2.511969558064074, |
| "learning_rate": 1.9813822785027422e-05, |
| "loss": 0.8131, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.16384, |
| "grad_norm": 2.967819167847234, |
| "learning_rate": 1.9805842077664913e-05, |
| "loss": 0.7301, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1664, |
| "grad_norm": 2.8259470035066485, |
| "learning_rate": 1.9797695565421507e-05, |
| "loss": 0.7323, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.16896, |
| "grad_norm": 2.302591195035351, |
| "learning_rate": 1.978938338604484e-05, |
| "loss": 0.6715, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.17152, |
| "grad_norm": 2.3196544383121958, |
| "learning_rate": 1.978090568008377e-05, |
| "loss": 0.685, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.17408, |
| "grad_norm": 2.624524610568192, |
| "learning_rate": 1.9772262590886006e-05, |
| "loss": 0.6869, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.17664, |
| "grad_norm": 2.6543764757775006, |
| "learning_rate": 1.9763454264595694e-05, |
| "loss": 0.8062, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1792, |
| "grad_norm": 2.434462493719978, |
| "learning_rate": 1.975448085015093e-05, |
| "loss": 0.727, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.18176, |
| "grad_norm": 4.222411149653797, |
| "learning_rate": 1.9745342499281254e-05, |
| "loss": 0.7087, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.18432, |
| "grad_norm": 2.1852577874422963, |
| "learning_rate": 1.9736039366505087e-05, |
| "loss": 0.6602, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.18688, |
| "grad_norm": 2.366328418247487, |
| "learning_rate": 1.9726571609127097e-05, |
| "loss": 0.7275, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.18944, |
| "grad_norm": 2.4876758296788193, |
| "learning_rate": 1.9716939387235573e-05, |
| "loss": 0.6976, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 2.4805839086487382, |
| "learning_rate": 1.9707142863699687e-05, |
| "loss": 0.6665, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.19456, |
| "grad_norm": 2.427841997203471, |
| "learning_rate": 1.969718220416675e-05, |
| "loss": 0.6817, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.19712, |
| "grad_norm": 2.386419228571022, |
| "learning_rate": 1.9687057577059422e-05, |
| "loss": 0.7231, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.19968, |
| "grad_norm": 2.1832161777994394, |
| "learning_rate": 1.9676769153572853e-05, |
| "loss": 0.6697, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.20224, |
| "grad_norm": 1.913933923954127, |
| "learning_rate": 1.966631710767178e-05, |
| "loss": 0.6854, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.2048, |
| "grad_norm": 2.2129580021231066, |
| "learning_rate": 1.965570161608762e-05, |
| "loss": 0.6902, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.20736, |
| "grad_norm": 2.3913638942796283, |
| "learning_rate": 1.9644922858315432e-05, |
| "loss": 0.6949, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.20992, |
| "grad_norm": 2.523715832051759, |
| "learning_rate": 1.9633981016610926e-05, |
| "loss": 0.6668, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.21248, |
| "grad_norm": 2.6639369089204794, |
| "learning_rate": 1.9622876275987355e-05, |
| "loss": 0.7303, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.21504, |
| "grad_norm": 2.3755204875190827, |
| "learning_rate": 1.9611608824212395e-05, |
| "loss": 0.7113, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.2176, |
| "grad_norm": 2.252602926327337, |
| "learning_rate": 1.9600178851804977e-05, |
| "loss": 0.7382, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.22016, |
| "grad_norm": 2.2611177708517363, |
| "learning_rate": 1.958858655203205e-05, |
| "loss": 0.6784, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.22272, |
| "grad_norm": 2.3668773245432537, |
| "learning_rate": 1.9576832120905323e-05, |
| "loss": 0.7523, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.22528, |
| "grad_norm": 2.0990171366400414, |
| "learning_rate": 1.9564915757177955e-05, |
| "loss": 0.6191, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.22784, |
| "grad_norm": 2.1035732112703722, |
| "learning_rate": 1.9552837662341182e-05, |
| "loss": 0.6708, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2304, |
| "grad_norm": 2.333412145423922, |
| "learning_rate": 1.954059804062092e-05, |
| "loss": 0.677, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.23296, |
| "grad_norm": 2.1825211355833405, |
| "learning_rate": 1.952819709897431e-05, |
| "loss": 0.6866, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.23552, |
| "grad_norm": 2.2533551325921866, |
| "learning_rate": 1.951563504708622e-05, |
| "loss": 0.5733, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.23808, |
| "grad_norm": 2.4138760679849174, |
| "learning_rate": 1.9502912097365677e-05, |
| "loss": 0.6838, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.24064, |
| "grad_norm": 2.7111028570055598, |
| "learning_rate": 1.9490028464942322e-05, |
| "loss": 0.6759, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2432, |
| "grad_norm": 2.303650092378439, |
| "learning_rate": 1.9476984367662724e-05, |
| "loss": 0.6187, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.24576, |
| "grad_norm": 2.36652208711437, |
| "learning_rate": 1.9463780026086735e-05, |
| "loss": 0.7569, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.24832, |
| "grad_norm": 1.957291438435352, |
| "learning_rate": 1.9450415663483734e-05, |
| "loss": 0.6306, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.25088, |
| "grad_norm": 2.5876692343060443, |
| "learning_rate": 1.9436891505828854e-05, |
| "loss": 0.743, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.25344, |
| "grad_norm": 2.4337909184554642, |
| "learning_rate": 1.9423207781799186e-05, |
| "loss": 0.6156, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 2.1443313808235698, |
| "learning_rate": 1.9409364722769882e-05, |
| "loss": 0.6749, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.25856, |
| "grad_norm": 2.542769952057182, |
| "learning_rate": 1.939536256281026e-05, |
| "loss": 0.7428, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.26112, |
| "grad_norm": 2.3379731947006865, |
| "learning_rate": 1.938120153867983e-05, |
| "loss": 0.7159, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.26368, |
| "grad_norm": 2.1808691357013954, |
| "learning_rate": 1.936688188982433e-05, |
| "loss": 0.7103, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.26624, |
| "grad_norm": 1.916729367685334, |
| "learning_rate": 1.9352403858371618e-05, |
| "loss": 0.6595, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.2688, |
| "grad_norm": 2.014231671417234, |
| "learning_rate": 1.9337767689127628e-05, |
| "loss": 0.6411, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.27136, |
| "grad_norm": 1.9129910147877291, |
| "learning_rate": 1.9322973629572207e-05, |
| "loss": 0.6819, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.27392, |
| "grad_norm": 2.077225498483606, |
| "learning_rate": 1.9308021929854934e-05, |
| "loss": 0.6587, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.27648, |
| "grad_norm": 2.0796212070981865, |
| "learning_rate": 1.9292912842790893e-05, |
| "loss": 0.7157, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.27904, |
| "grad_norm": 2.2161612611915147, |
| "learning_rate": 1.92776466238564e-05, |
| "loss": 0.6943, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2816, |
| "grad_norm": 2.1549077684507396, |
| "learning_rate": 1.9262223531184678e-05, |
| "loss": 0.6259, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.28416, |
| "grad_norm": 2.0654699290261647, |
| "learning_rate": 1.924664382556149e-05, |
| "loss": 0.6057, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.28672, |
| "grad_norm": 2.142110152136122, |
| "learning_rate": 1.9230907770420737e-05, |
| "loss": 0.6625, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.28928, |
| "grad_norm": 1.9897544774269862, |
| "learning_rate": 1.9215015631840005e-05, |
| "loss": 0.6164, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.29184, |
| "grad_norm": 2.2587245433808594, |
| "learning_rate": 1.9198967678536054e-05, |
| "loss": 0.6616, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.2944, |
| "grad_norm": 2.342586021827016, |
| "learning_rate": 1.918276418186028e-05, |
| "loss": 0.6494, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.29696, |
| "grad_norm": 1.9302093197641312, |
| "learning_rate": 1.916640541579415e-05, |
| "loss": 0.65, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.29952, |
| "grad_norm": 2.115112025916886, |
| "learning_rate": 1.9149891656944513e-05, |
| "loss": 0.6374, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.30208, |
| "grad_norm": 2.1565182972269032, |
| "learning_rate": 1.913322318453899e-05, |
| "loss": 0.699, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.30464, |
| "grad_norm": 2.11350736146981, |
| "learning_rate": 1.9116400280421196e-05, |
| "loss": 0.6088, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3072, |
| "grad_norm": 2.344994034519172, |
| "learning_rate": 1.9099423229046015e-05, |
| "loss": 0.752, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.30976, |
| "grad_norm": 2.093068646948046, |
| "learning_rate": 1.9082292317474766e-05, |
| "loss": 0.6185, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.31232, |
| "grad_norm": 2.316319871123848, |
| "learning_rate": 1.9065007835370358e-05, |
| "loss": 0.7374, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.31488, |
| "grad_norm": 2.096344829293569, |
| "learning_rate": 1.90475700749924e-05, |
| "loss": 0.672, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.31744, |
| "grad_norm": 2.5526545007602235, |
| "learning_rate": 1.902997933119223e-05, |
| "loss": 0.7168, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.234512228398607, |
| "learning_rate": 1.9012235901407976e-05, |
| "loss": 0.6724, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.32256, |
| "grad_norm": 2.221990498534791, |
| "learning_rate": 1.8994340085659474e-05, |
| "loss": 0.6165, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.32512, |
| "grad_norm": 2.1511357630477526, |
| "learning_rate": 1.897629218654325e-05, |
| "loss": 0.6733, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.32768, |
| "grad_norm": 2.122487155014188, |
| "learning_rate": 1.8958092509227347e-05, |
| "loss": 0.6036, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.33024, |
| "grad_norm": 2.0229651456552924, |
| "learning_rate": 1.8939741361446207e-05, |
| "loss": 0.581, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.3328, |
| "grad_norm": 2.1911603701017226, |
| "learning_rate": 1.8921239053495465e-05, |
| "loss": 0.7016, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.33536, |
| "grad_norm": 2.0676857284142804, |
| "learning_rate": 1.8902585898226663e-05, |
| "loss": 0.6983, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.33792, |
| "grad_norm": 2.264571405087662, |
| "learning_rate": 1.888378221104201e-05, |
| "loss": 0.724, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.34048, |
| "grad_norm": 2.051720164060425, |
| "learning_rate": 1.8864828309889022e-05, |
| "loss": 0.6245, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.34304, |
| "grad_norm": 2.2288895604426116, |
| "learning_rate": 1.8845724515255147e-05, |
| "loss": 0.6611, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.3456, |
| "grad_norm": 2.008766119781776, |
| "learning_rate": 1.8826471150162354e-05, |
| "loss": 0.6202, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.34816, |
| "grad_norm": 2.4451017728151045, |
| "learning_rate": 1.880706854016166e-05, |
| "loss": 0.714, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.35072, |
| "grad_norm": 2.279883321646425, |
| "learning_rate": 1.8787517013327642e-05, |
| "loss": 0.6921, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.35328, |
| "grad_norm": 1.8819123586138218, |
| "learning_rate": 1.876781690025287e-05, |
| "loss": 0.6242, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.35584, |
| "grad_norm": 2.209480600689614, |
| "learning_rate": 1.8747968534042333e-05, |
| "loss": 0.685, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.3584, |
| "grad_norm": 2.25151039966994, |
| "learning_rate": 1.87279722503078e-05, |
| "loss": 0.6191, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.36096, |
| "grad_norm": 1.9964258482072739, |
| "learning_rate": 1.8707828387162145e-05, |
| "loss": 0.6209, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.36352, |
| "grad_norm": 2.06019337674458, |
| "learning_rate": 1.8687537285213627e-05, |
| "loss": 0.6007, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.36608, |
| "grad_norm": 2.2157618609318264, |
| "learning_rate": 1.866709928756014e-05, |
| "loss": 0.6939, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.36864, |
| "grad_norm": 2.0119018007709553, |
| "learning_rate": 1.8646514739783404e-05, |
| "loss": 0.6719, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3712, |
| "grad_norm": 2.2377675426901287, |
| "learning_rate": 1.8625783989943124e-05, |
| "loss": 0.6735, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.37376, |
| "grad_norm": 1.8794121110318662, |
| "learning_rate": 1.8604907388571097e-05, |
| "loss": 0.6737, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.37632, |
| "grad_norm": 2.0370150523104704, |
| "learning_rate": 1.8583885288665307e-05, |
| "loss": 0.7013, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.37888, |
| "grad_norm": 2.0600475834352445, |
| "learning_rate": 1.8562718045683933e-05, |
| "loss": 0.6418, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.38144, |
| "grad_norm": 2.0885287172547096, |
| "learning_rate": 1.854140601753934e-05, |
| "loss": 0.6588, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 2.1901572667391362, |
| "learning_rate": 1.8519949564592047e-05, |
| "loss": 0.6395, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.38656, |
| "grad_norm": 2.223773214792353, |
| "learning_rate": 1.8498349049644614e-05, |
| "loss": 0.7024, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.38912, |
| "grad_norm": 1.9602921943490395, |
| "learning_rate": 1.8476604837935515e-05, |
| "loss": 0.6104, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.39168, |
| "grad_norm": 1.7818478560580842, |
| "learning_rate": 1.8454717297132966e-05, |
| "loss": 0.6368, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.39424, |
| "grad_norm": 2.0816014287167466, |
| "learning_rate": 1.8432686797328697e-05, |
| "loss": 0.7267, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3968, |
| "grad_norm": 2.0299850911079, |
| "learning_rate": 1.8410513711031713e-05, |
| "loss": 0.6158, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.39936, |
| "grad_norm": 1.7411644336765735, |
| "learning_rate": 1.8388198413161962e-05, |
| "loss": 0.5699, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.40192, |
| "grad_norm": 1.925361120184857, |
| "learning_rate": 1.8365741281044046e-05, |
| "loss": 0.6232, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.40448, |
| "grad_norm": 2.083962310552385, |
| "learning_rate": 1.8343142694400784e-05, |
| "loss": 0.6156, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.40704, |
| "grad_norm": 2.3609597131509945, |
| "learning_rate": 1.8320403035346834e-05, |
| "loss": 0.7054, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.4096, |
| "grad_norm": 2.060537565443389, |
| "learning_rate": 1.829752268838222e-05, |
| "loss": 0.6445, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.41216, |
| "grad_norm": 2.022152022387066, |
| "learning_rate": 1.8274502040385814e-05, |
| "loss": 0.5156, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.41472, |
| "grad_norm": 2.1533000714293022, |
| "learning_rate": 1.8251341480608823e-05, |
| "loss": 0.6786, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.41728, |
| "grad_norm": 2.221504339814079, |
| "learning_rate": 1.8228041400668185e-05, |
| "loss": 0.6856, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.41984, |
| "grad_norm": 2.123873387795693, |
| "learning_rate": 1.8204602194539948e-05, |
| "loss": 0.6883, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.4224, |
| "grad_norm": 1.9816040211257193, |
| "learning_rate": 1.8181024258552633e-05, |
| "loss": 0.651, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.42496, |
| "grad_norm": 1.9951719680446292, |
| "learning_rate": 1.8157307991380496e-05, |
| "loss": 0.63, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.42752, |
| "grad_norm": 1.8866496489390194, |
| "learning_rate": 1.8133453794036816e-05, |
| "loss": 0.5996, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.43008, |
| "grad_norm": 2.0782535399638924, |
| "learning_rate": 1.81094620698671e-05, |
| "loss": 0.68, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.43264, |
| "grad_norm": 2.015837845490803, |
| "learning_rate": 1.8085333224542263e-05, |
| "loss": 0.6587, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.4352, |
| "grad_norm": 1.9606661811078803, |
| "learning_rate": 1.806106766605178e-05, |
| "loss": 0.6543, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.43776, |
| "grad_norm": 2.0108460061760463, |
| "learning_rate": 1.8036665804696777e-05, |
| "loss": 0.7058, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.44032, |
| "grad_norm": 2.3760532048781773, |
| "learning_rate": 1.8012128053083097e-05, |
| "loss": 0.6896, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.44288, |
| "grad_norm": 2.068909616846273, |
| "learning_rate": 1.798745482611431e-05, |
| "loss": 0.6617, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.44544, |
| "grad_norm": 2.174520441881339, |
| "learning_rate": 1.7962646540984733e-05, |
| "loss": 0.6053, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 1.8665524812908805, |
| "learning_rate": 1.7937703617172326e-05, |
| "loss": 0.5854, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.45056, |
| "grad_norm": 2.0307815923129144, |
| "learning_rate": 1.7912626476431648e-05, |
| "loss": 0.7031, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.45312, |
| "grad_norm": 2.3551765798842754, |
| "learning_rate": 1.7887415542786694e-05, |
| "loss": 0.6616, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.45568, |
| "grad_norm": 2.316754252058375, |
| "learning_rate": 1.786207124252373e-05, |
| "loss": 0.6539, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.45824, |
| "grad_norm": 1.8999783082061619, |
| "learning_rate": 1.7836594004184097e-05, |
| "loss": 0.6462, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.4608, |
| "grad_norm": 2.1452149051911142, |
| "learning_rate": 1.7810984258556955e-05, |
| "loss": 0.6678, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.46336, |
| "grad_norm": 1.8647430589477332, |
| "learning_rate": 1.7785242438672002e-05, |
| "loss": 0.654, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.46592, |
| "grad_norm": 2.1243120883953615, |
| "learning_rate": 1.7759368979792145e-05, |
| "loss": 0.6092, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.46848, |
| "grad_norm": 2.2473766249395015, |
| "learning_rate": 1.773336431940616e-05, |
| "loss": 0.6942, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.47104, |
| "grad_norm": 1.9868382907140005, |
| "learning_rate": 1.770722889722126e-05, |
| "loss": 0.6385, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.4736, |
| "grad_norm": 1.8495835605726167, |
| "learning_rate": 1.7680963155155712e-05, |
| "loss": 0.5967, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.47616, |
| "grad_norm": 2.012001328207229, |
| "learning_rate": 1.76545675373313e-05, |
| "loss": 0.5712, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.47872, |
| "grad_norm": 2.028400500138602, |
| "learning_rate": 1.7628042490065877e-05, |
| "loss": 0.6759, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.48128, |
| "grad_norm": 2.1686891556942474, |
| "learning_rate": 1.760138846186577e-05, |
| "loss": 0.6193, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.48384, |
| "grad_norm": 1.8958996376109047, |
| "learning_rate": 1.7574605903418226e-05, |
| "loss": 0.5843, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.4864, |
| "grad_norm": 1.8940614758684058, |
| "learning_rate": 1.7547695267583794e-05, |
| "loss": 0.5822, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.48896, |
| "grad_norm": 2.024651976413743, |
| "learning_rate": 1.7520657009388634e-05, |
| "loss": 0.6215, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.49152, |
| "grad_norm": 1.9418524552652976, |
| "learning_rate": 1.749349158601686e-05, |
| "loss": 0.5839, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.49408, |
| "grad_norm": 1.8295776930635907, |
| "learning_rate": 1.7466199456802784e-05, |
| "loss": 0.5904, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.49664, |
| "grad_norm": 1.797136193223393, |
| "learning_rate": 1.743878108322318e-05, |
| "loss": 0.526, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.4992, |
| "grad_norm": 2.2173879719468297, |
| "learning_rate": 1.741123692888943e-05, |
| "loss": 0.6384, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.50176, |
| "grad_norm": 1.9844216586253793, |
| "learning_rate": 1.738356745953975e-05, |
| "loss": 0.6999, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.50432, |
| "grad_norm": 2.0317553296200987, |
| "learning_rate": 1.7355773143031247e-05, |
| "loss": 0.6412, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.50688, |
| "grad_norm": 1.8020878757675494, |
| "learning_rate": 1.7327854449332067e-05, |
| "loss": 0.5577, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.50944, |
| "grad_norm": 2.1986751893306664, |
| "learning_rate": 1.729981185051342e-05, |
| "loss": 0.6713, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 2.083058419584056, |
| "learning_rate": 1.7271645820741586e-05, |
| "loss": 0.6577, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.51456, |
| "grad_norm": 2.124603731918396, |
| "learning_rate": 1.7243356836269928e-05, |
| "loss": 0.6368, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.51712, |
| "grad_norm": 1.9258241573803725, |
| "learning_rate": 1.7214945375430816e-05, |
| "loss": 0.6209, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.51968, |
| "grad_norm": 2.005751012165097, |
| "learning_rate": 1.718641191862755e-05, |
| "loss": 0.6142, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.52224, |
| "grad_norm": 1.9212135746911165, |
| "learning_rate": 1.715775694832623e-05, |
| "loss": 0.5725, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5248, |
| "grad_norm": 1.830132532496078, |
| "learning_rate": 1.7128980949047607e-05, |
| "loss": 0.5957, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.52736, |
| "grad_norm": 1.9564593565799755, |
| "learning_rate": 1.7100084407358882e-05, |
| "loss": 0.673, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.52992, |
| "grad_norm": 1.9307648613118853, |
| "learning_rate": 1.7071067811865477e-05, |
| "loss": 0.5983, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.53248, |
| "grad_norm": 2.2224928140736084, |
| "learning_rate": 1.7041931653202788e-05, |
| "loss": 0.6069, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.53504, |
| "grad_norm": 2.251661406658001, |
| "learning_rate": 1.7012676424027873e-05, |
| "loss": 0.6489, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.5376, |
| "grad_norm": 1.9038603600791324, |
| "learning_rate": 1.6983302619011125e-05, |
| "loss": 0.5801, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.54016, |
| "grad_norm": 1.8456987114605605, |
| "learning_rate": 1.6953810734827917e-05, |
| "loss": 0.5935, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.54272, |
| "grad_norm": 1.922915287325509, |
| "learning_rate": 1.6924201270150194e-05, |
| "loss": 0.5463, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.54528, |
| "grad_norm": 1.9097147283983678, |
| "learning_rate": 1.6894474725638043e-05, |
| "loss": 0.6044, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.54784, |
| "grad_norm": 2.102752688389468, |
| "learning_rate": 1.686463160393123e-05, |
| "loss": 0.67, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.5504, |
| "grad_norm": 1.9917213811536882, |
| "learning_rate": 1.6834672409640705e-05, |
| "loss": 0.6429, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.55296, |
| "grad_norm": 1.974969043991725, |
| "learning_rate": 1.680459764934006e-05, |
| "loss": 0.6335, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.55552, |
| "grad_norm": 2.196550392557438, |
| "learning_rate": 1.677440783155696e-05, |
| "loss": 0.6468, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.55808, |
| "grad_norm": 1.8154736707811576, |
| "learning_rate": 1.6744103466764566e-05, |
| "loss": 0.576, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.56064, |
| "grad_norm": 2.1050477732929633, |
| "learning_rate": 1.671368506737288e-05, |
| "loss": 0.6891, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5632, |
| "grad_norm": 1.7992925292913355, |
| "learning_rate": 1.6683153147720098e-05, |
| "loss": 0.5747, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.56576, |
| "grad_norm": 1.8783520446755098, |
| "learning_rate": 1.66525082240639e-05, |
| "loss": 0.5986, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.56832, |
| "grad_norm": 1.9949767049473588, |
| "learning_rate": 1.6621750814572728e-05, |
| "loss": 0.5943, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.57088, |
| "grad_norm": 2.1034251822329537, |
| "learning_rate": 1.6590881439317025e-05, |
| "loss": 0.6973, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.57344, |
| "grad_norm": 1.9209578446003501, |
| "learning_rate": 1.6559900620260435e-05, |
| "loss": 0.6694, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 1.9220786921566595, |
| "learning_rate": 1.6528808881250986e-05, |
| "loss": 0.6559, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.57856, |
| "grad_norm": 2.0292167831172225, |
| "learning_rate": 1.6497606748012227e-05, |
| "loss": 0.5997, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.58112, |
| "grad_norm": 2.113629211379267, |
| "learning_rate": 1.646629474813433e-05, |
| "loss": 0.6631, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.58368, |
| "grad_norm": 1.8495528915876476, |
| "learning_rate": 1.64348734110652e-05, |
| "loss": 0.6076, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.58624, |
| "grad_norm": 2.2273973566204304, |
| "learning_rate": 1.6403343268101476e-05, |
| "loss": 0.6892, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5888, |
| "grad_norm": 2.18163885450235, |
| "learning_rate": 1.6371704852379587e-05, |
| "loss": 0.6187, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.59136, |
| "grad_norm": 1.7637267715802756, |
| "learning_rate": 1.6339958698866716e-05, |
| "loss": 0.5672, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.59392, |
| "grad_norm": 2.0590984564497234, |
| "learning_rate": 1.6308105344351776e-05, |
| "loss": 0.606, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.59648, |
| "grad_norm": 2.1250870681801692, |
| "learning_rate": 1.6276145327436298e-05, |
| "loss": 0.5996, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.59904, |
| "grad_norm": 1.8078047249763116, |
| "learning_rate": 1.6244079188525358e-05, |
| "loss": 0.5606, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.6016, |
| "grad_norm": 1.881044538945948, |
| "learning_rate": 1.621190746981842e-05, |
| "loss": 0.5877, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.60416, |
| "grad_norm": 1.92455732840866, |
| "learning_rate": 1.617963071530018e-05, |
| "loss": 0.5617, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.60672, |
| "grad_norm": 1.8832244455059177, |
| "learning_rate": 1.6147249470731355e-05, |
| "loss": 0.5666, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.60928, |
| "grad_norm": 2.082069857382887, |
| "learning_rate": 1.6114764283639467e-05, |
| "loss": 0.5915, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.61184, |
| "grad_norm": 2.001850994059788, |
| "learning_rate": 1.608217570330958e-05, |
| "loss": 0.6164, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6144, |
| "grad_norm": 2.022238936162546, |
| "learning_rate": 1.6049484280775012e-05, |
| "loss": 0.5671, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.61696, |
| "grad_norm": 1.9972354860485817, |
| "learning_rate": 1.601669056880801e-05, |
| "loss": 0.5998, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.61952, |
| "grad_norm": 1.9148720082205921, |
| "learning_rate": 1.598379512191042e-05, |
| "loss": 0.5544, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.62208, |
| "grad_norm": 2.1123296116387644, |
| "learning_rate": 1.5950798496304303e-05, |
| "loss": 0.6324, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.62464, |
| "grad_norm": 2.093610037072655, |
| "learning_rate": 1.591770124992252e-05, |
| "loss": 0.6248, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.6272, |
| "grad_norm": 2.009063600874344, |
| "learning_rate": 1.5884503942399314e-05, |
| "loss": 0.537, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.62976, |
| "grad_norm": 2.0328862721575938, |
| "learning_rate": 1.585120713506084e-05, |
| "loss": 0.6311, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.63232, |
| "grad_norm": 1.797706266437, |
| "learning_rate": 1.5817811390915676e-05, |
| "loss": 0.6263, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.63488, |
| "grad_norm": 1.8701784871242504, |
| "learning_rate": 1.5784317274645294e-05, |
| "loss": 0.5819, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.63744, |
| "grad_norm": 1.8585542262348, |
| "learning_rate": 1.575072535259452e-05, |
| "loss": 0.5841, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.9058528200620377, |
| "learning_rate": 1.571703619276197e-05, |
| "loss": 0.5926, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.64256, |
| "grad_norm": 1.8283834697734105, |
| "learning_rate": 1.5683250364790415e-05, |
| "loss": 0.6193, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.64512, |
| "grad_norm": 2.0581163017398225, |
| "learning_rate": 1.5649368439957182e-05, |
| "loss": 0.6108, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.64768, |
| "grad_norm": 1.8346847863840565, |
| "learning_rate": 1.5615390991164465e-05, |
| "loss": 0.5812, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.65024, |
| "grad_norm": 1.9374483799503257, |
| "learning_rate": 1.5581318592929665e-05, |
| "loss": 0.5771, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.6528, |
| "grad_norm": 1.8804501132111215, |
| "learning_rate": 1.5547151821375654e-05, |
| "loss": 0.5901, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.65536, |
| "grad_norm": 1.6759367916217955, |
| "learning_rate": 1.5512891254221046e-05, |
| "loss": 0.5727, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.65792, |
| "grad_norm": 1.877781648410285, |
| "learning_rate": 1.5478537470770425e-05, |
| "loss": 0.5859, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.66048, |
| "grad_norm": 2.0006799639933384, |
| "learning_rate": 1.5444091051904545e-05, |
| "loss": 0.6468, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.66304, |
| "grad_norm": 1.8416628315812464, |
| "learning_rate": 1.540955258007052e-05, |
| "loss": 0.5418, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.6656, |
| "grad_norm": 1.8634187238978763, |
| "learning_rate": 1.537492263927196e-05, |
| "loss": 0.5494, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.66816, |
| "grad_norm": 1.9439207615090717, |
| "learning_rate": 1.5340201815059116e-05, |
| "loss": 0.5393, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.67072, |
| "grad_norm": 1.8477228376076562, |
| "learning_rate": 1.5305390694518953e-05, |
| "loss": 0.5406, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.67328, |
| "grad_norm": 2.071111010740906, |
| "learning_rate": 1.5270489866265236e-05, |
| "loss": 0.612, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.67584, |
| "grad_norm": 1.9934090221262504, |
| "learning_rate": 1.52354999204286e-05, |
| "loss": 0.5988, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.6784, |
| "grad_norm": 2.028728814629187, |
| "learning_rate": 1.5200421448646525e-05, |
| "loss": 0.5714, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.68096, |
| "grad_norm": 2.030984918895053, |
| "learning_rate": 1.5165255044053373e-05, |
| "loss": 0.5977, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.68352, |
| "grad_norm": 1.8548332091193185, |
| "learning_rate": 1.5130001301270334e-05, |
| "loss": 0.5617, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.68608, |
| "grad_norm": 1.8656760202026166, |
| "learning_rate": 1.509466081639539e-05, |
| "loss": 0.578, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.68864, |
| "grad_norm": 2.065217551069724, |
| "learning_rate": 1.5059234186993217e-05, |
| "loss": 0.6552, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.6912, |
| "grad_norm": 1.9936665359457852, |
| "learning_rate": 1.5023722012085098e-05, |
| "loss": 0.6026, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.69376, |
| "grad_norm": 1.7581904286308514, |
| "learning_rate": 1.4988124892138782e-05, |
| "loss": 0.5201, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.69632, |
| "grad_norm": 2.0162120027330284, |
| "learning_rate": 1.4952443429058334e-05, |
| "loss": 0.5915, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.69888, |
| "grad_norm": 2.0537671862981695, |
| "learning_rate": 1.4916678226173966e-05, |
| "loss": 0.5461, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.70144, |
| "grad_norm": 2.0260603366887295, |
| "learning_rate": 1.4880829888231818e-05, |
| "loss": 0.6289, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 1.840168296016021, |
| "learning_rate": 1.4844899021383756e-05, |
| "loss": 0.5839, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.70656, |
| "grad_norm": 1.9912121443060273, |
| "learning_rate": 1.4808886233177096e-05, |
| "loss": 0.608, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.70912, |
| "grad_norm": 1.8192068126971754, |
| "learning_rate": 1.4772792132544354e-05, |
| "loss": 0.5602, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.71168, |
| "grad_norm": 1.8029322129897425, |
| "learning_rate": 1.4736617329792942e-05, |
| "loss": 0.604, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.71424, |
| "grad_norm": 1.9524417007781032, |
| "learning_rate": 1.4700362436594834e-05, |
| "loss": 0.5981, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.7168, |
| "grad_norm": 1.8084386702168436, |
| "learning_rate": 1.4664028065976245e-05, |
| "loss": 0.5817, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.71936, |
| "grad_norm": 2.1802877987514604, |
| "learning_rate": 1.4627614832307261e-05, |
| "loss": 0.6396, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.72192, |
| "grad_norm": 2.01749908189055, |
| "learning_rate": 1.459112335129144e-05, |
| "loss": 0.6185, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.72448, |
| "grad_norm": 2.038378021785889, |
| "learning_rate": 1.4554554239955412e-05, |
| "loss": 0.5979, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.72704, |
| "grad_norm": 1.9263728755541718, |
| "learning_rate": 1.4517908116638433e-05, |
| "loss": 0.5926, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.7296, |
| "grad_norm": 1.9574220263738664, |
| "learning_rate": 1.4481185600981945e-05, |
| "loss": 0.5807, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.73216, |
| "grad_norm": 1.9996955352260337, |
| "learning_rate": 1.4444387313919092e-05, |
| "loss": 0.603, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.73472, |
| "grad_norm": 1.9024141327391195, |
| "learning_rate": 1.440751387766422e-05, |
| "loss": 0.5523, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.73728, |
| "grad_norm": 1.793371820272698, |
| "learning_rate": 1.437056591570235e-05, |
| "loss": 0.5853, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.73984, |
| "grad_norm": 1.9581066893720518, |
| "learning_rate": 1.4333544052778655e-05, |
| "loss": 0.6131, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.7424, |
| "grad_norm": 1.8602904498846513, |
| "learning_rate": 1.4296448914887866e-05, |
| "loss": 0.5976, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.74496, |
| "grad_norm": 1.75407998573317, |
| "learning_rate": 1.4259281129263727e-05, |
| "loss": 0.527, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.74752, |
| "grad_norm": 1.9323066549675147, |
| "learning_rate": 1.4222041324368347e-05, |
| "loss": 0.6473, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.75008, |
| "grad_norm": 1.6313629921698742, |
| "learning_rate": 1.4184730129881601e-05, |
| "loss": 0.4679, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.75264, |
| "grad_norm": 1.8821039452506023, |
| "learning_rate": 1.4147348176690479e-05, |
| "loss": 0.596, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.7552, |
| "grad_norm": 2.0756598406983855, |
| "learning_rate": 1.4109896096878408e-05, |
| "loss": 0.6384, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.75776, |
| "grad_norm": 1.945088077779729, |
| "learning_rate": 1.4072374523714577e-05, |
| "loss": 0.5608, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.76032, |
| "grad_norm": 1.935435206680739, |
| "learning_rate": 1.4034784091643218e-05, |
| "loss": 0.5793, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.76288, |
| "grad_norm": 2.0773604638736485, |
| "learning_rate": 1.399712543627289e-05, |
| "loss": 0.6529, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.76544, |
| "grad_norm": 1.9163069194223226, |
| "learning_rate": 1.3959399194365712e-05, |
| "loss": 0.6056, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 1.837037316584707, |
| "learning_rate": 1.392160600382663e-05, |
| "loss": 0.5853, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.77056, |
| "grad_norm": 2.0141541605155395, |
| "learning_rate": 1.3883746503692587e-05, |
| "loss": 0.5898, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.77312, |
| "grad_norm": 1.8576928552652707, |
| "learning_rate": 1.3845821334121763e-05, |
| "loss": 0.5624, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.77568, |
| "grad_norm": 1.9366964632674861, |
| "learning_rate": 1.3807831136382706e-05, |
| "loss": 0.6462, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.77824, |
| "grad_norm": 1.9953305945547755, |
| "learning_rate": 1.3769776552843532e-05, |
| "loss": 0.6181, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.7808, |
| "grad_norm": 1.9990027115873188, |
| "learning_rate": 1.3731658226961031e-05, |
| "loss": 0.6303, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.78336, |
| "grad_norm": 1.9858501691321515, |
| "learning_rate": 1.3693476803269799e-05, |
| "loss": 0.5916, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.78592, |
| "grad_norm": 1.9457879785437557, |
| "learning_rate": 1.3655232927371342e-05, |
| "loss": 0.5691, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.78848, |
| "grad_norm": 1.9025068512726264, |
| "learning_rate": 1.3616927245923157e-05, |
| "loss": 0.5378, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.79104, |
| "grad_norm": 1.9938161594081456, |
| "learning_rate": 1.3578560406627798e-05, |
| "loss": 0.6176, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7936, |
| "grad_norm": 1.959851063997509, |
| "learning_rate": 1.3540133058221927e-05, |
| "loss": 0.6209, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.79616, |
| "grad_norm": 1.8935432838789927, |
| "learning_rate": 1.3501645850465327e-05, |
| "loss": 0.632, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.79872, |
| "grad_norm": 1.7611365587951862, |
| "learning_rate": 1.346309943412995e-05, |
| "loss": 0.5552, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.80128, |
| "grad_norm": 1.8949244476360045, |
| "learning_rate": 1.342449446098888e-05, |
| "loss": 0.6063, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.80384, |
| "grad_norm": 2.0195750845328395, |
| "learning_rate": 1.3385831583805329e-05, |
| "loss": 0.5886, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.8064, |
| "grad_norm": 1.8175701233229387, |
| "learning_rate": 1.33471114563216e-05, |
| "loss": 0.5937, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.80896, |
| "grad_norm": 2.1857335001832303, |
| "learning_rate": 1.3308334733248019e-05, |
| "loss": 0.6594, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.81152, |
| "grad_norm": 1.842277672202166, |
| "learning_rate": 1.3269502070251885e-05, |
| "loss": 0.5555, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.81408, |
| "grad_norm": 1.8495246423693503, |
| "learning_rate": 1.323061412394637e-05, |
| "loss": 0.6004, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.81664, |
| "grad_norm": 1.899465186771449, |
| "learning_rate": 1.3191671551879418e-05, |
| "loss": 0.5188, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.8192, |
| "grad_norm": 2.0205565271224635, |
| "learning_rate": 1.3152675012522629e-05, |
| "loss": 0.6318, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.82176, |
| "grad_norm": 1.9988122791104623, |
| "learning_rate": 1.311362516526012e-05, |
| "loss": 0.6078, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.82432, |
| "grad_norm": 1.7875368384493597, |
| "learning_rate": 1.3074522670377392e-05, |
| "loss": 0.5636, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.82688, |
| "grad_norm": 2.1123854748425894, |
| "learning_rate": 1.3035368189050142e-05, |
| "loss": 0.6282, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.82944, |
| "grad_norm": 2.0127085167163066, |
| "learning_rate": 1.2996162383333097e-05, |
| "loss": 0.5353, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 1.9207019986925034, |
| "learning_rate": 1.2956905916148821e-05, |
| "loss": 0.5553, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.83456, |
| "grad_norm": 1.8938216896130815, |
| "learning_rate": 1.2917599451276498e-05, |
| "loss": 0.5619, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.83712, |
| "grad_norm": 1.8339307990942622, |
| "learning_rate": 1.2878243653340714e-05, |
| "loss": 0.5301, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.83968, |
| "grad_norm": 2.020673528812742, |
| "learning_rate": 1.2838839187800218e-05, |
| "loss": 0.5634, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.84224, |
| "grad_norm": 2.0042232597725422, |
| "learning_rate": 1.2799386720936663e-05, |
| "loss": 0.565, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.8448, |
| "grad_norm": 1.6440410966587669, |
| "learning_rate": 1.2759886919843354e-05, |
| "loss": 0.5487, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.84736, |
| "grad_norm": 2.033498045933394, |
| "learning_rate": 1.2720340452413962e-05, |
| "loss": 0.5313, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.84992, |
| "grad_norm": 1.9130851998572171, |
| "learning_rate": 1.2680747987331215e-05, |
| "loss": 0.5445, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.85248, |
| "grad_norm": 2.025171351176087, |
| "learning_rate": 1.264111019405562e-05, |
| "loss": 0.5699, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.85504, |
| "grad_norm": 1.9582047276900996, |
| "learning_rate": 1.2601427742814123e-05, |
| "loss": 0.5473, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.8576, |
| "grad_norm": 1.924176492111395, |
| "learning_rate": 1.2561701304588782e-05, |
| "loss": 0.5896, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.86016, |
| "grad_norm": 2.0844112285904823, |
| "learning_rate": 1.2521931551105427e-05, |
| "loss": 0.5678, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.86272, |
| "grad_norm": 1.9210828492706074, |
| "learning_rate": 1.248211915482228e-05, |
| "loss": 0.5465, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.86528, |
| "grad_norm": 1.9083499459646247, |
| "learning_rate": 1.244226478891862e-05, |
| "loss": 0.5568, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.86784, |
| "grad_norm": 1.763077087198924, |
| "learning_rate": 1.2402369127283374e-05, |
| "loss": 0.5632, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.8704, |
| "grad_norm": 1.838786657423739, |
| "learning_rate": 1.2362432844503725e-05, |
| "loss": 0.5387, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.87296, |
| "grad_norm": 1.8884385940297985, |
| "learning_rate": 1.2322456615853718e-05, |
| "loss": 0.6314, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.87552, |
| "grad_norm": 1.8950854441907627, |
| "learning_rate": 1.2282441117282831e-05, |
| "loss": 0.54, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.87808, |
| "grad_norm": 1.9183342023884988, |
| "learning_rate": 1.224238702540454e-05, |
| "loss": 0.5748, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.88064, |
| "grad_norm": 1.9998947291027693, |
| "learning_rate": 1.2202295017484911e-05, |
| "loss": 0.595, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.8832, |
| "grad_norm": 1.7690361873293163, |
| "learning_rate": 1.2162165771431094e-05, |
| "loss": 0.4816, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.88576, |
| "grad_norm": 1.8887292208556585, |
| "learning_rate": 1.212199996577991e-05, |
| "loss": 0.5548, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.88832, |
| "grad_norm": 1.9015638197509928, |
| "learning_rate": 1.2081798279686354e-05, |
| "loss": 0.5399, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.89088, |
| "grad_norm": 1.8797520886634111, |
| "learning_rate": 1.2041561392912118e-05, |
| "loss": 0.5652, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.89344, |
| "grad_norm": 1.692840000131321, |
| "learning_rate": 1.2001289985814088e-05, |
| "loss": 0.5431, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 1.9518924556661963, |
| "learning_rate": 1.1960984739332851e-05, |
| "loss": 0.5328, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.89856, |
| "grad_norm": 1.9176530907572014, |
| "learning_rate": 1.1920646334981176e-05, |
| "loss": 0.5948, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.90112, |
| "grad_norm": 1.8600950416768505, |
| "learning_rate": 1.1880275454832493e-05, |
| "loss": 0.5214, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.90368, |
| "grad_norm": 2.0987333730497033, |
| "learning_rate": 1.1839872781509358e-05, |
| "loss": 0.6008, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.90624, |
| "grad_norm": 2.0052047615781157, |
| "learning_rate": 1.1799438998171909e-05, |
| "loss": 0.5804, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.9088, |
| "grad_norm": 1.9543922891858452, |
| "learning_rate": 1.175897478850632e-05, |
| "loss": 0.5801, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.91136, |
| "grad_norm": 1.9278210545847223, |
| "learning_rate": 1.1718480836713228e-05, |
| "loss": 0.5786, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.91392, |
| "grad_norm": 1.9169280603449947, |
| "learning_rate": 1.1677957827496191e-05, |
| "loss": 0.5683, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.91648, |
| "grad_norm": 1.9206761554005032, |
| "learning_rate": 1.1637406446050072e-05, |
| "loss": 0.5628, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.91904, |
| "grad_norm": 2.0320383330546714, |
| "learning_rate": 1.1596827378049491e-05, |
| "loss": 0.5568, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.9216, |
| "grad_norm": 2.011012460142978, |
| "learning_rate": 1.1556221309637204e-05, |
| "loss": 0.5911, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.92416, |
| "grad_norm": 1.6733867449703013, |
| "learning_rate": 1.1515588927412509e-05, |
| "loss": 0.4909, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.92672, |
| "grad_norm": 1.7160108869926338, |
| "learning_rate": 1.147493091841965e-05, |
| "loss": 0.4918, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.92928, |
| "grad_norm": 1.8248678379826555, |
| "learning_rate": 1.1434247970136188e-05, |
| "loss": 0.5299, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.93184, |
| "grad_norm": 1.8911031735554016, |
| "learning_rate": 1.1393540770461358e-05, |
| "loss": 0.622, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.9344, |
| "grad_norm": 1.6917252673616938, |
| "learning_rate": 1.1352810007704476e-05, |
| "loss": 0.5672, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.93696, |
| "grad_norm": 1.7652120249882262, |
| "learning_rate": 1.1312056370573277e-05, |
| "loss": 0.4876, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.93952, |
| "grad_norm": 1.912188345028055, |
| "learning_rate": 1.127128054816227e-05, |
| "loss": 0.5229, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.94208, |
| "grad_norm": 1.9268235049476894, |
| "learning_rate": 1.1230483229941092e-05, |
| "loss": 0.4969, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.94464, |
| "grad_norm": 1.7124716981838979, |
| "learning_rate": 1.1189665105742846e-05, |
| "loss": 0.4973, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.9472, |
| "grad_norm": 1.7639128606217356, |
| "learning_rate": 1.1148826865752445e-05, |
| "loss": 0.55, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.94976, |
| "grad_norm": 1.9598599864278292, |
| "learning_rate": 1.1107969200494928e-05, |
| "loss": 0.5607, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.95232, |
| "grad_norm": 1.8988749717432336, |
| "learning_rate": 1.1067092800823798e-05, |
| "loss": 0.5147, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.95488, |
| "grad_norm": 1.8154562117513298, |
| "learning_rate": 1.1026198357909327e-05, |
| "loss": 0.5039, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.95744, |
| "grad_norm": 1.7241766515805417, |
| "learning_rate": 1.0985286563226887e-05, |
| "loss": 0.5053, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.886751738203789, |
| "learning_rate": 1.0944358108545236e-05, |
| "loss": 0.5563, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.96256, |
| "grad_norm": 1.8887450413746818, |
| "learning_rate": 1.0903413685914843e-05, |
| "loss": 0.5866, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.96512, |
| "grad_norm": 1.8801523776322404, |
| "learning_rate": 1.0862453987656162e-05, |
| "loss": 0.573, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.96768, |
| "grad_norm": 1.8773940578406214, |
| "learning_rate": 1.0821479706347953e-05, |
| "loss": 0.4809, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.97024, |
| "grad_norm": 1.8407925735220598, |
| "learning_rate": 1.0780491534815549e-05, |
| "loss": 0.5471, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.9728, |
| "grad_norm": 2.144021305027399, |
| "learning_rate": 1.0739490166119155e-05, |
| "loss": 0.5732, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.97536, |
| "grad_norm": 1.8454174363471443, |
| "learning_rate": 1.0698476293542124e-05, |
| "loss": 0.5603, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.97792, |
| "grad_norm": 1.8272486716574172, |
| "learning_rate": 1.0657450610579225e-05, |
| "loss": 0.5493, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.98048, |
| "grad_norm": 1.8920692278670657, |
| "learning_rate": 1.0616413810924937e-05, |
| "loss": 0.5611, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.98304, |
| "grad_norm": 2.0131793545946626, |
| "learning_rate": 1.057536658846171e-05, |
| "loss": 0.5706, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.9856, |
| "grad_norm": 1.8744182336273665, |
| "learning_rate": 1.053430963724822e-05, |
| "loss": 0.5511, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.98816, |
| "grad_norm": 1.705985214182383, |
| "learning_rate": 1.0493243651507654e-05, |
| "loss": 0.4967, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.99072, |
| "grad_norm": 1.855409465319251, |
| "learning_rate": 1.0452169325615956e-05, |
| "loss": 0.5375, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.99328, |
| "grad_norm": 1.7606864394836728, |
| "learning_rate": 1.04110873540901e-05, |
| "loss": 0.52, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.99584, |
| "grad_norm": 1.8631155705703495, |
| "learning_rate": 1.0369998431576328e-05, |
| "loss": 0.6018, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.9984, |
| "grad_norm": 1.8541664181232975, |
| "learning_rate": 1.0328903252838415e-05, |
| "loss": 0.5396, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.00096, |
| "grad_norm": 1.5672351465494485, |
| "learning_rate": 1.0287802512745935e-05, |
| "loss": 0.3953, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.00352, |
| "grad_norm": 1.558195855755232, |
| "learning_rate": 1.0246696906262484e-05, |
| "loss": 0.3791, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.00608, |
| "grad_norm": 1.5431663246597491, |
| "learning_rate": 1.0205587128433944e-05, |
| "loss": 0.3558, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.00864, |
| "grad_norm": 1.445015635657179, |
| "learning_rate": 1.016447387437674e-05, |
| "loss": 0.3351, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.0112, |
| "grad_norm": 1.481573837422665, |
| "learning_rate": 1.0123357839266066e-05, |
| "loss": 0.3327, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.01376, |
| "grad_norm": 1.5392257435904235, |
| "learning_rate": 1.0082239718324136e-05, |
| "loss": 0.2687, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.01632, |
| "grad_norm": 1.4663853851401927, |
| "learning_rate": 1.004112020680845e-05, |
| "loss": 0.3138, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.01888, |
| "grad_norm": 1.650830842194171, |
| "learning_rate": 1e-05, |
| "loss": 0.3256, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.02144, |
| "grad_norm": 1.5814294192961373, |
| "learning_rate": 9.958879793191553e-06, |
| "loss": 0.3138, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 1.6865522137000346, |
| "learning_rate": 9.917760281675867e-06, |
| "loss": 0.3153, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.02656, |
| "grad_norm": 1.7826957665836876, |
| "learning_rate": 9.876642160733937e-06, |
| "loss": 0.3152, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.02912, |
| "grad_norm": 1.7725840767006589, |
| "learning_rate": 9.835526125623262e-06, |
| "loss": 0.2928, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.03168, |
| "grad_norm": 1.8174709729145797, |
| "learning_rate": 9.794412871566057e-06, |
| "loss": 0.3079, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.03424, |
| "grad_norm": 2.005333790551581, |
| "learning_rate": 9.753303093737518e-06, |
| "loss": 0.3421, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.0368, |
| "grad_norm": 1.873929148570637, |
| "learning_rate": 9.71219748725407e-06, |
| "loss": 0.3364, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.03936, |
| "grad_norm": 1.5777934082692342, |
| "learning_rate": 9.671096747161587e-06, |
| "loss": 0.3168, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.04192, |
| "grad_norm": 1.6309016044222864, |
| "learning_rate": 9.630001568423677e-06, |
| "loss": 0.2704, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.04448, |
| "grad_norm": 1.7365095962411672, |
| "learning_rate": 9.588912645909905e-06, |
| "loss": 0.3153, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.04704, |
| "grad_norm": 1.7258342184967879, |
| "learning_rate": 9.547830674384043e-06, |
| "loss": 0.3018, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.0496, |
| "grad_norm": 1.6833772470314767, |
| "learning_rate": 9.506756348492348e-06, |
| "loss": 0.3026, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.05216, |
| "grad_norm": 1.813776064675088, |
| "learning_rate": 9.465690362751781e-06, |
| "loss": 0.3179, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.05472, |
| "grad_norm": 1.7975291601015548, |
| "learning_rate": 9.424633411538289e-06, |
| "loss": 0.3629, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.05728, |
| "grad_norm": 1.4794908417040535, |
| "learning_rate": 9.383586189075065e-06, |
| "loss": 0.3126, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.05984, |
| "grad_norm": 1.594883685807337, |
| "learning_rate": 9.342549389420777e-06, |
| "loss": 0.2997, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.0624, |
| "grad_norm": 1.4379937930102515, |
| "learning_rate": 9.30152370645788e-06, |
| "loss": 0.2796, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.06496, |
| "grad_norm": 1.772458848105549, |
| "learning_rate": 9.260509833880848e-06, |
| "loss": 0.3676, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.06752, |
| "grad_norm": 1.7331530871521117, |
| "learning_rate": 9.21950846518445e-06, |
| "loss": 0.2902, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.07008, |
| "grad_norm": 1.7622988238409705, |
| "learning_rate": 9.17852029365205e-06, |
| "loss": 0.2951, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.07264, |
| "grad_norm": 1.7162234337691245, |
| "learning_rate": 9.13754601234384e-06, |
| "loss": 0.3163, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.0752, |
| "grad_norm": 1.7337491899665072, |
| "learning_rate": 9.096586314085162e-06, |
| "loss": 0.3362, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.07776, |
| "grad_norm": 1.574010878864483, |
| "learning_rate": 9.055641891454766e-06, |
| "loss": 0.284, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.08032, |
| "grad_norm": 2.0248969916179123, |
| "learning_rate": 9.014713436773114e-06, |
| "loss": 0.3209, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.08288, |
| "grad_norm": 1.6901116986132412, |
| "learning_rate": 8.973801642090674e-06, |
| "loss": 0.3283, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.08544, |
| "grad_norm": 1.8279240135161061, |
| "learning_rate": 8.932907199176206e-06, |
| "loss": 0.2894, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.088, |
| "grad_norm": 1.6523113744718456, |
| "learning_rate": 8.892030799505072e-06, |
| "loss": 0.3071, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.09056, |
| "grad_norm": 1.8531686326526295, |
| "learning_rate": 8.85117313424756e-06, |
| "loss": 0.326, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.09312, |
| "grad_norm": 1.70280493987216, |
| "learning_rate": 8.810334894257156e-06, |
| "loss": 0.3218, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.09568, |
| "grad_norm": 1.5975578961167665, |
| "learning_rate": 8.769516770058915e-06, |
| "loss": 0.3014, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.09824, |
| "grad_norm": 1.5963594131551222, |
| "learning_rate": 8.728719451837735e-06, |
| "loss": 0.2813, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.1008, |
| "grad_norm": 1.5356693796819318, |
| "learning_rate": 8.687943629426725e-06, |
| "loss": 0.2765, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.10336, |
| "grad_norm": 1.9059233869160093, |
| "learning_rate": 8.647189992295526e-06, |
| "loss": 0.2862, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.10592, |
| "grad_norm": 1.915468592863442, |
| "learning_rate": 8.606459229538645e-06, |
| "loss": 0.3143, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.10848, |
| "grad_norm": 1.74246639812338, |
| "learning_rate": 8.56575202986382e-06, |
| "loss": 0.3091, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.11104, |
| "grad_norm": 1.8647458734292492, |
| "learning_rate": 8.525069081580351e-06, |
| "loss": 0.3317, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.1136, |
| "grad_norm": 1.715483977869925, |
| "learning_rate": 8.484411072587491e-06, |
| "loss": 0.3065, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.11616, |
| "grad_norm": 1.6696415990436007, |
| "learning_rate": 8.443778690362801e-06, |
| "loss": 0.2741, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.11872, |
| "grad_norm": 1.7084096219447564, |
| "learning_rate": 8.403172621950512e-06, |
| "loss": 0.3058, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.12128, |
| "grad_norm": 1.7929139885486827, |
| "learning_rate": 8.362593553949926e-06, |
| "loss": 0.2868, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.12384, |
| "grad_norm": 1.757789981994357, |
| "learning_rate": 8.322042172503812e-06, |
| "loss": 0.3129, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.1264, |
| "grad_norm": 1.6625503348814892, |
| "learning_rate": 8.281519163286772e-06, |
| "loss": 0.287, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.12896, |
| "grad_norm": 1.9281991436107038, |
| "learning_rate": 8.241025211493684e-06, |
| "loss": 0.3375, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.13152, |
| "grad_norm": 1.6723278237853747, |
| "learning_rate": 8.200561001828093e-06, |
| "loss": 0.2843, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.13408, |
| "grad_norm": 1.6487601783747716, |
| "learning_rate": 8.160127218490643e-06, |
| "loss": 0.3173, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.13664, |
| "grad_norm": 1.8497858729306897, |
| "learning_rate": 8.11972454516751e-06, |
| "loss": 0.3022, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.1392, |
| "grad_norm": 1.6639405806591592, |
| "learning_rate": 8.079353665018827e-06, |
| "loss": 0.3453, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.14176, |
| "grad_norm": 1.560079035489574, |
| "learning_rate": 8.039015260667154e-06, |
| "loss": 0.33, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.14432, |
| "grad_norm": 1.7303374905946536, |
| "learning_rate": 7.998710014185916e-06, |
| "loss": 0.311, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.14688, |
| "grad_norm": 1.7019483722377131, |
| "learning_rate": 7.958438607087884e-06, |
| "loss": 0.3124, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.14944, |
| "grad_norm": 1.7461543792169232, |
| "learning_rate": 7.918201720313648e-06, |
| "loss": 0.3132, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 1.65701293316665, |
| "learning_rate": 7.878000034220092e-06, |
| "loss": 0.2898, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.15456, |
| "grad_norm": 1.793230509633861, |
| "learning_rate": 7.837834228568911e-06, |
| "loss": 0.3116, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.15712, |
| "grad_norm": 1.9106756293670617, |
| "learning_rate": 7.797704982515094e-06, |
| "loss": 0.3451, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.15968, |
| "grad_norm": 1.8497679963390545, |
| "learning_rate": 7.75761297459546e-06, |
| "loss": 0.2923, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.16224, |
| "grad_norm": 1.7480611256420666, |
| "learning_rate": 7.717558882717175e-06, |
| "loss": 0.3156, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.1648, |
| "grad_norm": 1.667882679284623, |
| "learning_rate": 7.677543384146287e-06, |
| "loss": 0.2967, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.16736, |
| "grad_norm": 1.6327780686406852, |
| "learning_rate": 7.637567155496277e-06, |
| "loss": 0.3298, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.16992, |
| "grad_norm": 1.9718387276246228, |
| "learning_rate": 7.597630872716631e-06, |
| "loss": 0.3067, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.17248, |
| "grad_norm": 1.8631792827164149, |
| "learning_rate": 7.5577352110813825e-06, |
| "loss": 0.3188, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.17504, |
| "grad_norm": 1.7515381932583938, |
| "learning_rate": 7.517880845177725e-06, |
| "loss": 0.3103, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.1776, |
| "grad_norm": 1.6929585603147987, |
| "learning_rate": 7.478068448894577e-06, |
| "loss": 0.2854, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.1801599999999999, |
| "grad_norm": 1.6653769100031355, |
| "learning_rate": 7.438298695411218e-06, |
| "loss": 0.2605, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.18272, |
| "grad_norm": 1.660565063408245, |
| "learning_rate": 7.398572257185879e-06, |
| "loss": 0.2627, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.1852800000000001, |
| "grad_norm": 1.6879487748419324, |
| "learning_rate": 7.358889805944383e-06, |
| "loss": 0.2577, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.18784, |
| "grad_norm": 1.7460066493392816, |
| "learning_rate": 7.31925201266879e-06, |
| "loss": 0.3328, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.1904, |
| "grad_norm": 1.6332719487911835, |
| "learning_rate": 7.2796595475860425e-06, |
| "loss": 0.2843, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.19296, |
| "grad_norm": 1.9280765743425918, |
| "learning_rate": 7.240113080156646e-06, |
| "loss": 0.3132, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.19552, |
| "grad_norm": 1.8041999203900638, |
| "learning_rate": 7.200613279063341e-06, |
| "loss": 0.284, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.19808, |
| "grad_norm": 1.927491301377088, |
| "learning_rate": 7.161160812199785e-06, |
| "loss": 0.3379, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.20064, |
| "grad_norm": 1.7700179930703523, |
| "learning_rate": 7.121756346659292e-06, |
| "loss": 0.3304, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.2032, |
| "grad_norm": 1.8239759132867097, |
| "learning_rate": 7.082400548723505e-06, |
| "loss": 0.2773, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.20576, |
| "grad_norm": 1.8558590715535617, |
| "learning_rate": 7.043094083851181e-06, |
| "loss": 0.3101, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.20832, |
| "grad_norm": 1.9466031042965117, |
| "learning_rate": 7.003837616666906e-06, |
| "loss": 0.3011, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.21088, |
| "grad_norm": 1.718281958362647, |
| "learning_rate": 6.96463181094986e-06, |
| "loss": 0.2914, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.21344, |
| "grad_norm": 1.6920570061933058, |
| "learning_rate": 6.925477329622609e-06, |
| "loss": 0.29, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.216, |
| "grad_norm": 1.5582172395472835, |
| "learning_rate": 6.886374834739883e-06, |
| "loss": 0.2565, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.21856, |
| "grad_norm": 1.9619111558727853, |
| "learning_rate": 6.847324987477375e-06, |
| "loss": 0.2811, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.22112, |
| "grad_norm": 1.7660903136072212, |
| "learning_rate": 6.808328448120588e-06, |
| "loss": 0.3277, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.2236799999999999, |
| "grad_norm": 1.6879368816014204, |
| "learning_rate": 6.769385876053632e-06, |
| "loss": 0.2918, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.22624, |
| "grad_norm": 1.8466731384715298, |
| "learning_rate": 6.730497929748116e-06, |
| "loss": 0.3103, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.2288000000000001, |
| "grad_norm": 1.8338896405236385, |
| "learning_rate": 6.6916652667519855e-06, |
| "loss": 0.3211, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.23136, |
| "grad_norm": 1.7548167253010734, |
| "learning_rate": 6.652888543678404e-06, |
| "loss": 0.3331, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.23392, |
| "grad_norm": 1.7191841522008924, |
| "learning_rate": 6.614168416194674e-06, |
| "loss": 0.2684, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.23648, |
| "grad_norm": 1.6620234347821567, |
| "learning_rate": 6.575505539011123e-06, |
| "loss": 0.273, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.23904, |
| "grad_norm": 1.8705146664514543, |
| "learning_rate": 6.536900565870052e-06, |
| "loss": 0.3324, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.2416, |
| "grad_norm": 1.6318180235961661, |
| "learning_rate": 6.498354149534677e-06, |
| "loss": 0.2891, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.24416, |
| "grad_norm": 1.7493455359816799, |
| "learning_rate": 6.459866941778077e-06, |
| "loss": 0.2847, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.24672, |
| "grad_norm": 1.6377143720858458, |
| "learning_rate": 6.421439593372201e-06, |
| "loss": 0.2841, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.24928, |
| "grad_norm": 1.7580656850520682, |
| "learning_rate": 6.3830727540768445e-06, |
| "loss": 0.3086, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.25184, |
| "grad_norm": 1.7537075156428699, |
| "learning_rate": 6.344767072628659e-06, |
| "loss": 0.3166, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.2544, |
| "grad_norm": 1.727275836520608, |
| "learning_rate": 6.3065231967302055e-06, |
| "loss": 0.279, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.25696, |
| "grad_norm": 1.8430842401562408, |
| "learning_rate": 6.268341773038973e-06, |
| "loss": 0.2752, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.25952, |
| "grad_norm": 1.7578451456561774, |
| "learning_rate": 6.230223447156469e-06, |
| "loss": 0.2958, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.26208, |
| "grad_norm": 1.6454134556997662, |
| "learning_rate": 6.1921688636172964e-06, |
| "loss": 0.274, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.26464, |
| "grad_norm": 1.6214753367677326, |
| "learning_rate": 6.154178665878241e-06, |
| "loss": 0.2761, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.2671999999999999, |
| "grad_norm": 1.8385311514429392, |
| "learning_rate": 6.116253496307415e-06, |
| "loss": 0.2883, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.26976, |
| "grad_norm": 1.7575162142152143, |
| "learning_rate": 6.078393996173375e-06, |
| "loss": 0.2789, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.2723200000000001, |
| "grad_norm": 1.7863119640113554, |
| "learning_rate": 6.040600805634287e-06, |
| "loss": 0.3174, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.27488, |
| "grad_norm": 1.802911376776798, |
| "learning_rate": 6.002874563727116e-06, |
| "loss": 0.2831, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.27744, |
| "grad_norm": 1.7950928461300708, |
| "learning_rate": 5.965215908356783e-06, |
| "loss": 0.3047, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 1.8067717158566738, |
| "learning_rate": 5.927625476285426e-06, |
| "loss": 0.2981, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.28256, |
| "grad_norm": 1.759342634285478, |
| "learning_rate": 5.890103903121593e-06, |
| "loss": 0.3129, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.28512, |
| "grad_norm": 2.0897739052494293, |
| "learning_rate": 5.852651823309521e-06, |
| "loss": 0.2928, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.28768, |
| "grad_norm": 1.8220220157801934, |
| "learning_rate": 5.815269870118403e-06, |
| "loss": 0.2899, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.29024, |
| "grad_norm": 1.688333219221233, |
| "learning_rate": 5.777958675631657e-06, |
| "loss": 0.3004, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.2928, |
| "grad_norm": 1.6140942084232062, |
| "learning_rate": 5.740718870736272e-06, |
| "loss": 0.2953, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.29536, |
| "grad_norm": 1.8293432699580725, |
| "learning_rate": 5.703551085112133e-06, |
| "loss": 0.3021, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.29792, |
| "grad_norm": 1.8202872346713284, |
| "learning_rate": 5.6664559472213495e-06, |
| "loss": 0.2781, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.30048, |
| "grad_norm": 1.8156854105803961, |
| "learning_rate": 5.629434084297654e-06, |
| "loss": 0.3122, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.30304, |
| "grad_norm": 2.0300686868487556, |
| "learning_rate": 5.592486122335784e-06, |
| "loss": 0.3498, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.3056, |
| "grad_norm": 1.7909337703685713, |
| "learning_rate": 5.555612686080909e-06, |
| "loss": 0.3079, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.30816, |
| "grad_norm": 1.5882480407199409, |
| "learning_rate": 5.518814399018058e-06, |
| "loss": 0.2685, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.3107199999999999, |
| "grad_norm": 1.6893045166962126, |
| "learning_rate": 5.482091883361571e-06, |
| "loss": 0.2897, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.31328, |
| "grad_norm": 1.9623021679379937, |
| "learning_rate": 5.445445760044594e-06, |
| "loss": 0.2751, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.3158400000000001, |
| "grad_norm": 1.8542025764374261, |
| "learning_rate": 5.408876648708561e-06, |
| "loss": 0.2808, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.3184, |
| "grad_norm": 1.625965052985563, |
| "learning_rate": 5.372385167692739e-06, |
| "loss": 0.2751, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.32096, |
| "grad_norm": 1.8190194776544601, |
| "learning_rate": 5.335971934023757e-06, |
| "loss": 0.2917, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.32352, |
| "grad_norm": 1.92774093649498, |
| "learning_rate": 5.299637563405169e-06, |
| "loss": 0.2997, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.32608, |
| "grad_norm": 1.7214112637921481, |
| "learning_rate": 5.263382670207063e-06, |
| "loss": 0.2627, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.32864, |
| "grad_norm": 1.6131621215448115, |
| "learning_rate": 5.227207867455648e-06, |
| "loss": 0.2679, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.3312, |
| "grad_norm": 1.68659474905301, |
| "learning_rate": 5.191113766822905e-06, |
| "loss": 0.2732, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.33376, |
| "grad_norm": 1.8809611133528827, |
| "learning_rate": 5.155100978616248e-06, |
| "loss": 0.3065, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.33632, |
| "grad_norm": 1.8386742630648891, |
| "learning_rate": 5.1191701117681815e-06, |
| "loss": 0.3092, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.33888, |
| "grad_norm": 1.780053544517966, |
| "learning_rate": 5.083321773826038e-06, |
| "loss": 0.2825, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.34144, |
| "grad_norm": 1.6847775520894839, |
| "learning_rate": 5.04755657094167e-06, |
| "loss": 0.2928, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.3439999999999999, |
| "grad_norm": 1.5698053305448867, |
| "learning_rate": 5.011875107861221e-06, |
| "loss": 0.2725, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.34656, |
| "grad_norm": 1.796815658001955, |
| "learning_rate": 4.976277987914905e-06, |
| "loss": 0.3287, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.34912, |
| "grad_norm": 1.728558689136845, |
| "learning_rate": 4.940765813006784e-06, |
| "loss": 0.2839, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.35168, |
| "grad_norm": 1.8233040819705273, |
| "learning_rate": 4.905339183604614e-06, |
| "loss": 0.3033, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.3542399999999999, |
| "grad_norm": 1.772177612527984, |
| "learning_rate": 4.86999869872967e-06, |
| "loss": 0.301, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.3568, |
| "grad_norm": 1.6377253150219735, |
| "learning_rate": 4.834744955946631e-06, |
| "loss": 0.2911, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.3593600000000001, |
| "grad_norm": 1.7737384781484877, |
| "learning_rate": 4.79957855135348e-06, |
| "loss": 0.3027, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.36192, |
| "grad_norm": 1.9035288129722436, |
| "learning_rate": 4.764500079571403e-06, |
| "loss": 0.3231, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.36448, |
| "grad_norm": 1.837775666581152, |
| "learning_rate": 4.729510133734766e-06, |
| "loss": 0.2855, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.36704, |
| "grad_norm": 1.7295857177394576, |
| "learning_rate": 4.694609305481055e-06, |
| "loss": 0.2804, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.3696, |
| "grad_norm": 1.806102463932245, |
| "learning_rate": 4.659798184940887e-06, |
| "loss": 0.3093, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.37216, |
| "grad_norm": 1.5986303108578905, |
| "learning_rate": 4.6250773607280375e-06, |
| "loss": 0.2445, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.37472, |
| "grad_norm": 1.9007193691045698, |
| "learning_rate": 4.590447419929481e-06, |
| "loss": 0.28, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.37728, |
| "grad_norm": 1.5654130383489473, |
| "learning_rate": 4.555908948095455e-06, |
| "loss": 0.2487, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.37984, |
| "grad_norm": 1.807208592204752, |
| "learning_rate": 4.521462529229579e-06, |
| "loss": 0.2874, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.3824, |
| "grad_norm": 1.9059242633131657, |
| "learning_rate": 4.487108745778958e-06, |
| "loss": 0.2994, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.38496, |
| "grad_norm": 1.93530044589512, |
| "learning_rate": 4.452848178624348e-06, |
| "loss": 0.3276, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.3875199999999999, |
| "grad_norm": 1.6211554099082521, |
| "learning_rate": 4.418681407070339e-06, |
| "loss": 0.2484, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.39008, |
| "grad_norm": 1.8235255434109632, |
| "learning_rate": 4.384609008835535e-06, |
| "loss": 0.284, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.39264, |
| "grad_norm": 1.8642953553369541, |
| "learning_rate": 4.350631560042821e-06, |
| "loss": 0.3073, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.3952, |
| "grad_norm": 1.8052071453907574, |
| "learning_rate": 4.3167496352095876e-06, |
| "loss": 0.3176, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.39776, |
| "grad_norm": 2.05400626300769, |
| "learning_rate": 4.282963807238032e-06, |
| "loss": 0.2941, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.40032, |
| "grad_norm": 1.8143960130300665, |
| "learning_rate": 4.2492746474054825e-06, |
| "loss": 0.3092, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.4028800000000001, |
| "grad_norm": 1.8062220799473758, |
| "learning_rate": 4.2156827253547095e-06, |
| "loss": 0.2854, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.40544, |
| "grad_norm": 1.8654013394561701, |
| "learning_rate": 4.182188609084328e-06, |
| "loss": 0.327, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.408, |
| "grad_norm": 1.6776732336572873, |
| "learning_rate": 4.148792864939164e-06, |
| "loss": 0.2519, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.41056, |
| "grad_norm": 1.6871367738087453, |
| "learning_rate": 4.115496057600689e-06, |
| "loss": 0.2978, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.41312, |
| "grad_norm": 1.9401515888224816, |
| "learning_rate": 4.082298750077485e-06, |
| "loss": 0.3189, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.41568, |
| "grad_norm": 1.7127683807523446, |
| "learning_rate": 4.0492015036957e-06, |
| "loss": 0.2839, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.41824, |
| "grad_norm": 1.7006614590161078, |
| "learning_rate": 4.016204878089579e-06, |
| "loss": 0.265, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.4208, |
| "grad_norm": 1.6926095736078215, |
| "learning_rate": 3.983309431191995e-06, |
| "loss": 0.2566, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.42336, |
| "grad_norm": 1.7495489591645166, |
| "learning_rate": 3.950515719224991e-06, |
| "loss": 0.3097, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.42592, |
| "grad_norm": 1.685311274806112, |
| "learning_rate": 3.9178242966904225e-06, |
| "loss": 0.2629, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.42848, |
| "grad_norm": 1.868117738329845, |
| "learning_rate": 3.885235716360534e-06, |
| "loss": 0.3089, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.4310399999999999, |
| "grad_norm": 1.6105881067710597, |
| "learning_rate": 3.852750529268645e-06, |
| "loss": 0.2841, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.4336, |
| "grad_norm": 1.7563576736879873, |
| "learning_rate": 3.820369284699823e-06, |
| "loss": 0.3019, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.43616, |
| "grad_norm": 1.6236719779141782, |
| "learning_rate": 3.788092530181583e-06, |
| "loss": 0.2747, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.43872, |
| "grad_norm": 1.625891715697152, |
| "learning_rate": 3.755920811474647e-06, |
| "loss": 0.2652, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.44128, |
| "grad_norm": 1.6938038174994694, |
| "learning_rate": 3.7238546725637046e-06, |
| "loss": 0.2763, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.44384, |
| "grad_norm": 1.872174965035592, |
| "learning_rate": 3.691894655648225e-06, |
| "loss": 0.2877, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.4464000000000001, |
| "grad_norm": 1.714528697164392, |
| "learning_rate": 3.6600413011332835e-06, |
| "loss": 0.2975, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.44896, |
| "grad_norm": 1.7847360697911003, |
| "learning_rate": 3.6282951476204177e-06, |
| "loss": 0.3059, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.45152, |
| "grad_norm": 1.7410992194167252, |
| "learning_rate": 3.5966567318985267e-06, |
| "loss": 0.3158, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.45408, |
| "grad_norm": 1.6864063612802922, |
| "learning_rate": 3.565126588934803e-06, |
| "loss": 0.2836, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.45664, |
| "grad_norm": 1.714438302514508, |
| "learning_rate": 3.533705251865668e-06, |
| "loss": 0.2957, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.4592, |
| "grad_norm": 1.8173113954750335, |
| "learning_rate": 3.502393251987776e-06, |
| "loss": 0.3121, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.46176, |
| "grad_norm": 1.6936277347451338, |
| "learning_rate": 3.4711911187490165e-06, |
| "loss": 0.2687, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.46432, |
| "grad_norm": 1.9870635360197202, |
| "learning_rate": 3.4400993797395664e-06, |
| "loss": 0.3278, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.46688, |
| "grad_norm": 1.7897731687332026, |
| "learning_rate": 3.4091185606829793e-06, |
| "loss": 0.2655, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.46944, |
| "grad_norm": 1.70048947778315, |
| "learning_rate": 3.3782491854272736e-06, |
| "loss": 0.3024, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.472, |
| "grad_norm": 1.619029376588391, |
| "learning_rate": 3.3474917759361036e-06, |
| "loss": 0.2755, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.4745599999999999, |
| "grad_norm": 1.8451956935598997, |
| "learning_rate": 3.316846852279907e-06, |
| "loss": 0.2863, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.47712, |
| "grad_norm": 1.8145774126755378, |
| "learning_rate": 3.2863149326271226e-06, |
| "loss": 0.281, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.47968, |
| "grad_norm": 1.6672300035397345, |
| "learning_rate": 3.255896533235439e-06, |
| "loss": 0.27, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.48224, |
| "grad_norm": 1.668688823060323, |
| "learning_rate": 3.2255921684430423e-06, |
| "loss": 0.2756, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.4848, |
| "grad_norm": 1.7784555478973214, |
| "learning_rate": 3.195402350659945e-06, |
| "loss": 0.321, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.48736, |
| "grad_norm": 1.6455590944323666, |
| "learning_rate": 3.165327590359295e-06, |
| "loss": 0.2877, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.4899200000000001, |
| "grad_norm": 2.0743380157623124, |
| "learning_rate": 3.135368396068771e-06, |
| "loss": 0.3027, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.49248, |
| "grad_norm": 1.5664539608516517, |
| "learning_rate": 3.1055252743619623e-06, |
| "loss": 0.2573, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.49504, |
| "grad_norm": 1.7550490290565095, |
| "learning_rate": 3.0757987298498106e-06, |
| "loss": 0.2703, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.4976, |
| "grad_norm": 1.7076285620278457, |
| "learning_rate": 3.046189265172085e-06, |
| "loss": 0.2836, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.5001600000000002, |
| "grad_norm": 1.5213003659889548, |
| "learning_rate": 3.0166973809888776e-06, |
| "loss": 0.2958, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.50272, |
| "grad_norm": 1.6842118874383583, |
| "learning_rate": 2.987323575972132e-06, |
| "loss": 0.2819, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.50528, |
| "grad_norm": 1.7336219161047688, |
| "learning_rate": 2.958068346797217e-06, |
| "loss": 0.2939, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.5078399999999998, |
| "grad_norm": 1.8365763102322976, |
| "learning_rate": 2.9289321881345257e-06, |
| "loss": 0.2822, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.5104, |
| "grad_norm": 2.0201724941232273, |
| "learning_rate": 2.8999155926411203e-06, |
| "loss": 0.3133, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.51296, |
| "grad_norm": 1.7737234557135833, |
| "learning_rate": 2.871019050952395e-06, |
| "loss": 0.2718, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.51552, |
| "grad_norm": 1.7105337375961225, |
| "learning_rate": 2.8422430516737733e-06, |
| "loss": 0.2287, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.5180799999999999, |
| "grad_norm": 1.5532376533528256, |
| "learning_rate": 2.813588081372456e-06, |
| "loss": 0.2805, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.52064, |
| "grad_norm": 1.6488135407698572, |
| "learning_rate": 2.7850546245691866e-06, |
| "loss": 0.2783, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.5232, |
| "grad_norm": 1.8138759575713275, |
| "learning_rate": 2.7566431637300738e-06, |
| "loss": 0.2936, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.52576, |
| "grad_norm": 1.9339574210123396, |
| "learning_rate": 2.7283541792584165e-06, |
| "loss": 0.2858, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.52832, |
| "grad_norm": 1.6414720524358055, |
| "learning_rate": 2.7001881494865845e-06, |
| "loss": 0.2717, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.53088, |
| "grad_norm": 1.670022901559193, |
| "learning_rate": 2.672145550667933e-06, |
| "loss": 0.2761, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.5334400000000001, |
| "grad_norm": 1.651543474445551, |
| "learning_rate": 2.6442268569687567e-06, |
| "loss": 0.266, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.536, |
| "grad_norm": 1.7579445968272946, |
| "learning_rate": 2.616432540460255e-06, |
| "loss": 0.2839, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.53856, |
| "grad_norm": 1.7310903919014502, |
| "learning_rate": 2.5887630711105705e-06, |
| "loss": 0.2996, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.54112, |
| "grad_norm": 1.7899987929588956, |
| "learning_rate": 2.561218916776823e-06, |
| "loss": 0.2827, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.5436800000000002, |
| "grad_norm": 1.793887084940259, |
| "learning_rate": 2.5338005431972144e-06, |
| "loss": 0.2962, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.54624, |
| "grad_norm": 1.8405222153934413, |
| "learning_rate": 2.5065084139831443e-06, |
| "loss": 0.2769, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.5488, |
| "grad_norm": 1.6538653809897454, |
| "learning_rate": 2.4793429906113676e-06, |
| "loss": 0.2798, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.5513599999999999, |
| "grad_norm": 1.7853505122465314, |
| "learning_rate": 2.4523047324162087e-06, |
| "loss": 0.2837, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.55392, |
| "grad_norm": 1.705880111795306, |
| "learning_rate": 2.4253940965817726e-06, |
| "loss": 0.3107, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.55648, |
| "grad_norm": 2.053508927696654, |
| "learning_rate": 2.3986115381342347e-06, |
| "loss": 0.3172, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.55904, |
| "grad_norm": 1.6153752928179927, |
| "learning_rate": 2.3719575099341298e-06, |
| "loss": 0.2837, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.5615999999999999, |
| "grad_norm": 1.6914755812024604, |
| "learning_rate": 2.345432462668702e-06, |
| "loss": 0.2551, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.56416, |
| "grad_norm": 1.770027987354017, |
| "learning_rate": 2.3190368448442936e-06, |
| "loss": 0.2574, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.5667200000000001, |
| "grad_norm": 1.859972298306848, |
| "learning_rate": 2.292771102778739e-06, |
| "loss": 0.3037, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.56928, |
| "grad_norm": 1.897188441833873, |
| "learning_rate": 2.266635680593845e-06, |
| "loss": 0.3067, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.57184, |
| "grad_norm": 1.7143004820729382, |
| "learning_rate": 2.2406310202078586e-06, |
| "loss": 0.2807, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.5744, |
| "grad_norm": 1.8448535483722395, |
| "learning_rate": 2.2147575613280013e-06, |
| "loss": 0.2997, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.5769600000000001, |
| "grad_norm": 1.669565045060629, |
| "learning_rate": 2.1890157414430448e-06, |
| "loss": 0.2528, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.57952, |
| "grad_norm": 1.842829735431479, |
| "learning_rate": 2.163405995815904e-06, |
| "loss": 0.2771, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.58208, |
| "grad_norm": 1.8727513182856619, |
| "learning_rate": 2.1379287574762717e-06, |
| "loss": 0.3045, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.58464, |
| "grad_norm": 1.85842601413902, |
| "learning_rate": 2.11258445721331e-06, |
| "loss": 0.319, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.5872000000000002, |
| "grad_norm": 1.8680204100053788, |
| "learning_rate": 2.0873735235683535e-06, |
| "loss": 0.2799, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.58976, |
| "grad_norm": 1.730211436527152, |
| "learning_rate": 2.0622963828276744e-06, |
| "loss": 0.2626, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.59232, |
| "grad_norm": 1.6435312018877513, |
| "learning_rate": 2.037353459015272e-06, |
| "loss": 0.2744, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.5948799999999999, |
| "grad_norm": 1.6224922861504871, |
| "learning_rate": 2.0125451738856903e-06, |
| "loss": 0.2296, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.59744, |
| "grad_norm": 1.861965804154878, |
| "learning_rate": 1.9878719469169104e-06, |
| "loss": 0.2595, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 1.6943488611068847, |
| "learning_rate": 1.9633341953032246e-06, |
| "loss": 0.2906, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.60256, |
| "grad_norm": 1.6320130474511332, |
| "learning_rate": 1.9389323339482204e-06, |
| "loss": 0.2756, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.6051199999999999, |
| "grad_norm": 1.6698686833486305, |
| "learning_rate": 1.9146667754577408e-06, |
| "loss": 0.3068, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.60768, |
| "grad_norm": 1.6880177914613448, |
| "learning_rate": 1.890537930132903e-06, |
| "loss": 0.2839, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.6102400000000001, |
| "grad_norm": 1.6773268308513856, |
| "learning_rate": 1.8665462059631866e-06, |
| "loss": 0.2491, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.6128, |
| "grad_norm": 1.8474070458904108, |
| "learning_rate": 1.8426920086195065e-06, |
| "loss": 0.2904, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.61536, |
| "grad_norm": 1.6218761795055971, |
| "learning_rate": 1.8189757414473686e-06, |
| "loss": 0.2441, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.61792, |
| "grad_norm": 1.8666720513899506, |
| "learning_rate": 1.795397805460053e-06, |
| "loss": 0.3003, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.6204800000000001, |
| "grad_norm": 1.685788691547833, |
| "learning_rate": 1.7719585993318177e-06, |
| "loss": 0.2896, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.62304, |
| "grad_norm": 1.7449552299776978, |
| "learning_rate": 1.7486585193911787e-06, |
| "loss": 0.2794, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.6256, |
| "grad_norm": 1.704848209821893, |
| "learning_rate": 1.7254979596141886e-06, |
| "loss": 0.2616, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.62816, |
| "grad_norm": 1.8450853254842057, |
| "learning_rate": 1.7024773116177839e-06, |
| "loss": 0.2912, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.63072, |
| "grad_norm": 1.7926242028598987, |
| "learning_rate": 1.6795969646531685e-06, |
| "loss": 0.2617, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.63328, |
| "grad_norm": 1.7925132560586257, |
| "learning_rate": 1.6568573055992188e-06, |
| "loss": 0.2784, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.63584, |
| "grad_norm": 1.7663875310273034, |
| "learning_rate": 1.6342587189559577e-06, |
| "loss": 0.2696, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.6383999999999999, |
| "grad_norm": 1.6485491658987015, |
| "learning_rate": 1.6118015868380387e-06, |
| "loss": 0.2386, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.64096, |
| "grad_norm": 1.7988576600138655, |
| "learning_rate": 1.5894862889682906e-06, |
| "loss": 0.2955, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.64352, |
| "grad_norm": 1.714936041943462, |
| "learning_rate": 1.5673132026713046e-06, |
| "loss": 0.2843, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.64608, |
| "grad_norm": 1.967807455369453, |
| "learning_rate": 1.5452827028670358e-06, |
| "loss": 0.3071, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.6486399999999999, |
| "grad_norm": 1.767360909454626, |
| "learning_rate": 1.523395162064486e-06, |
| "loss": 0.251, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.6512, |
| "grad_norm": 1.6381296026383634, |
| "learning_rate": 1.50165095035539e-06, |
| "loss": 0.2755, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.6537600000000001, |
| "grad_norm": 1.5619584246682827, |
| "learning_rate": 1.480050435407957e-06, |
| "loss": 0.2614, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.65632, |
| "grad_norm": 1.6327509469165924, |
| "learning_rate": 1.4585939824606621e-06, |
| "loss": 0.255, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.65888, |
| "grad_norm": 1.563354867722546, |
| "learning_rate": 1.437281954316071e-06, |
| "loss": 0.2807, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.66144, |
| "grad_norm": 1.7550766801341493, |
| "learning_rate": 1.4161147113346917e-06, |
| "loss": 0.2702, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.6640000000000001, |
| "grad_norm": 1.6865725533510028, |
| "learning_rate": 1.395092611428902e-06, |
| "loss": 0.251, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.66656, |
| "grad_norm": 1.8531854972293114, |
| "learning_rate": 1.374216010056879e-06, |
| "loss": 0.2985, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.66912, |
| "grad_norm": 1.9209594828542413, |
| "learning_rate": 1.353485260216596e-06, |
| "loss": 0.3067, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.67168, |
| "grad_norm": 1.616385407100715, |
| "learning_rate": 1.3329007124398608e-06, |
| "loss": 0.2331, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.67424, |
| "grad_norm": 1.8003991478619372, |
| "learning_rate": 1.3124627147863733e-06, |
| "loss": 0.2878, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.6768, |
| "grad_norm": 1.5521426456650567, |
| "learning_rate": 1.2921716128378581e-06, |
| "loss": 0.2522, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.67936, |
| "grad_norm": 1.7611518891884141, |
| "learning_rate": 1.272027749692203e-06, |
| "loss": 0.261, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.6819199999999999, |
| "grad_norm": 1.7446813440682067, |
| "learning_rate": 1.2520314659576683e-06, |
| "loss": 0.2708, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.68448, |
| "grad_norm": 1.8976906912021962, |
| "learning_rate": 1.2321830997471329e-06, |
| "loss": 0.3082, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.68704, |
| "grad_norm": 1.5745862289974457, |
| "learning_rate": 1.212482986672361e-06, |
| "loss": 0.2438, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.6896, |
| "grad_norm": 1.7666742654864107, |
| "learning_rate": 1.1929314598383423e-06, |
| "loss": 0.2664, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.6921599999999999, |
| "grad_norm": 1.7635144087752495, |
| "learning_rate": 1.1735288498376495e-06, |
| "loss": 0.2784, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.69472, |
| "grad_norm": 1.6871885012565122, |
| "learning_rate": 1.1542754847448544e-06, |
| "loss": 0.2585, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.6972800000000001, |
| "grad_norm": 1.763546085268813, |
| "learning_rate": 1.13517169011098e-06, |
| "loss": 0.2675, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.69984, |
| "grad_norm": 1.6833283054672803, |
| "learning_rate": 1.1162177889579906e-06, |
| "loss": 0.2456, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.7024, |
| "grad_norm": 1.4995582297938723, |
| "learning_rate": 1.0974141017733386e-06, |
| "loss": 0.2219, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.70496, |
| "grad_norm": 1.7935988415350803, |
| "learning_rate": 1.078760946504539e-06, |
| "loss": 0.2878, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.7075200000000001, |
| "grad_norm": 1.8128478947806876, |
| "learning_rate": 1.0602586385537928e-06, |
| "loss": 0.2581, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.71008, |
| "grad_norm": 1.63791230975168, |
| "learning_rate": 1.041907490772658e-06, |
| "loss": 0.2498, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.71264, |
| "grad_norm": 1.590925354056578, |
| "learning_rate": 1.0237078134567535e-06, |
| "loss": 0.2505, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.7151999999999998, |
| "grad_norm": 1.7695283124365373, |
| "learning_rate": 1.0056599143405244e-06, |
| "loss": 0.2754, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.71776, |
| "grad_norm": 1.7812963095403938, |
| "learning_rate": 9.877640985920268e-07, |
| "loss": 0.2798, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.72032, |
| "grad_norm": 1.6627323998398744, |
| "learning_rate": 9.700206688077707e-07, |
| "loss": 0.2298, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.72288, |
| "grad_norm": 1.9098592625989281, |
| "learning_rate": 9.524299250076052e-07, |
| "loss": 0.2805, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.7254399999999999, |
| "grad_norm": 1.6937003995052815, |
| "learning_rate": 9.349921646296423e-07, |
| "loss": 0.2548, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.728, |
| "grad_norm": 1.8012637933088234, |
| "learning_rate": 9.177076825252351e-07, |
| "loss": 0.2343, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.73056, |
| "grad_norm": 1.9146685623071344, |
| "learning_rate": 9.00576770953987e-07, |
| "loss": 0.2783, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.73312, |
| "grad_norm": 1.6706056064712593, |
| "learning_rate": 8.835997195788071e-07, |
| "loss": 0.2596, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.73568, |
| "grad_norm": 1.7350656477174768, |
| "learning_rate": 8.667768154610124e-07, |
| "loss": 0.2754, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.73824, |
| "grad_norm": 1.765131272482126, |
| "learning_rate": 8.501083430554868e-07, |
| "loss": 0.2655, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.7408000000000001, |
| "grad_norm": 1.7079209480392799, |
| "learning_rate": 8.335945842058524e-07, |
| "loss": 0.2853, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.74336, |
| "grad_norm": 1.659320265734026, |
| "learning_rate": 8.172358181397178e-07, |
| "loss": 0.2669, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.74592, |
| "grad_norm": 1.7216123667879766, |
| "learning_rate": 8.010323214639492e-07, |
| "loss": 0.2939, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.74848, |
| "grad_norm": 1.8167275931675924, |
| "learning_rate": 7.849843681599978e-07, |
| "loss": 0.2919, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.7510400000000002, |
| "grad_norm": 1.7516661490079315, |
| "learning_rate": 7.690922295792647e-07, |
| "loss": 0.2405, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.7536, |
| "grad_norm": 1.7608726427403516, |
| "learning_rate": 7.53356174438512e-07, |
| "loss": 0.2692, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.75616, |
| "grad_norm": 1.7716445711738182, |
| "learning_rate": 7.377764688153244e-07, |
| "loss": 0.2444, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.7587199999999998, |
| "grad_norm": 1.6553834628764956, |
| "learning_rate": 7.223533761435986e-07, |
| "loss": 0.2283, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.76128, |
| "grad_norm": 1.6208484626275992, |
| "learning_rate": 7.070871572091076e-07, |
| "loss": 0.2447, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.76384, |
| "grad_norm": 1.9469256466871052, |
| "learning_rate": 6.919780701450684e-07, |
| "loss": 0.3117, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.7664, |
| "grad_norm": 1.807807654678934, |
| "learning_rate": 6.770263704277958e-07, |
| "loss": 0.2489, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.7689599999999999, |
| "grad_norm": 1.7754566354998111, |
| "learning_rate": 6.62232310872375e-07, |
| "loss": 0.2933, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.77152, |
| "grad_norm": 2.142560102394874, |
| "learning_rate": 6.475961416283838e-07, |
| "loss": 0.2976, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.77408, |
| "grad_norm": 1.8425369199032686, |
| "learning_rate": 6.331181101756733e-07, |
| "loss": 0.2525, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.77664, |
| "grad_norm": 1.8067216131513493, |
| "learning_rate": 6.187984613201703e-07, |
| "loss": 0.2724, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.7792, |
| "grad_norm": 1.819627845132494, |
| "learning_rate": 6.046374371897446e-07, |
| "loss": 0.2676, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.78176, |
| "grad_norm": 1.6926155476047056, |
| "learning_rate": 5.906352772301193e-07, |
| "loss": 0.2734, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.7843200000000001, |
| "grad_norm": 1.7608510485979083, |
| "learning_rate": 5.767922182008145e-07, |
| "loss": 0.2753, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.78688, |
| "grad_norm": 1.715763266425071, |
| "learning_rate": 5.631084941711473e-07, |
| "loss": 0.2673, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.78944, |
| "grad_norm": 1.6687789738165386, |
| "learning_rate": 5.495843365162701e-07, |
| "loss": 0.2901, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.792, |
| "grad_norm": 1.8537817214382708, |
| "learning_rate": 5.362199739132656e-07, |
| "loss": 0.2747, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.7945600000000002, |
| "grad_norm": 1.6704242281507662, |
| "learning_rate": 5.230156323372759e-07, |
| "loss": 0.2524, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.79712, |
| "grad_norm": 1.825247698595894, |
| "learning_rate": 5.099715350576817e-07, |
| "loss": 0.2676, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.79968, |
| "grad_norm": 1.7300015240890014, |
| "learning_rate": 4.970879026343256e-07, |
| "loss": 0.2747, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.8022399999999998, |
| "grad_norm": 1.707816251456467, |
| "learning_rate": 4.843649529137861e-07, |
| "loss": 0.2708, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.8048, |
| "grad_norm": 1.8785886608496822, |
| "learning_rate": 4.7180290102568973e-07, |
| "loss": 0.3164, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.80736, |
| "grad_norm": 1.8644648318252912, |
| "learning_rate": 4.594019593790799e-07, |
| "loss": 0.2927, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.80992, |
| "grad_norm": 1.8493889520810634, |
| "learning_rate": 4.471623376588197e-07, |
| "loss": 0.2628, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.8124799999999999, |
| "grad_norm": 1.7845383273554698, |
| "learning_rate": 4.35084242822047e-07, |
| "loss": 0.2582, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.81504, |
| "grad_norm": 1.8293493217735093, |
| "learning_rate": 4.2316787909467915e-07, |
| "loss": 0.2753, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.8176, |
| "grad_norm": 1.6894890585749698, |
| "learning_rate": 4.114134479679543e-07, |
| "loss": 0.2678, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.82016, |
| "grad_norm": 1.7542138094486555, |
| "learning_rate": 3.998211481950254e-07, |
| "loss": 0.2892, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.82272, |
| "grad_norm": 1.8793095620901379, |
| "learning_rate": 3.883911757876058e-07, |
| "loss": 0.2728, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.82528, |
| "grad_norm": 1.7267463710184283, |
| "learning_rate": 3.771237240126469e-07, |
| "loss": 0.2735, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.8278400000000001, |
| "grad_norm": 1.7219666375218627, |
| "learning_rate": 3.66018983389077e-07, |
| "loss": 0.2597, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.8304, |
| "grad_norm": 1.7636190461227865, |
| "learning_rate": 3.5507714168457e-07, |
| "loss": 0.2665, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.83296, |
| "grad_norm": 1.7666528327974367, |
| "learning_rate": 3.442983839123826e-07, |
| "loss": 0.2805, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.83552, |
| "grad_norm": 1.6781083614930392, |
| "learning_rate": 3.3368289232822094e-07, |
| "loss": 0.246, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.8380800000000002, |
| "grad_norm": 1.7603755414192976, |
| "learning_rate": 3.232308464271505e-07, |
| "loss": 0.2947, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.84064, |
| "grad_norm": 1.7521780009843657, |
| "learning_rate": 3.1294242294057974e-07, |
| "loss": 0.3191, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.8432, |
| "grad_norm": 1.6098707300379458, |
| "learning_rate": 3.028177958332512e-07, |
| "loss": 0.2548, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.8457599999999998, |
| "grad_norm": 1.6267142406397461, |
| "learning_rate": 2.928571363003152e-07, |
| "loss": 0.3028, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.84832, |
| "grad_norm": 1.6960630177797111, |
| "learning_rate": 2.8306061276442753e-07, |
| "loss": 0.2631, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.85088, |
| "grad_norm": 1.7493636450152343, |
| "learning_rate": 2.7342839087290183e-07, |
| "loss": 0.2518, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.85344, |
| "grad_norm": 1.6385553511066044, |
| "learning_rate": 2.639606334949163e-07, |
| "loss": 0.2908, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.8559999999999999, |
| "grad_norm": 1.7931640851082686, |
| "learning_rate": 2.5465750071874797e-07, |
| "loss": 0.2649, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.85856, |
| "grad_norm": 1.5912117214737356, |
| "learning_rate": 2.455191498490739e-07, |
| "loss": 0.2664, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.86112, |
| "grad_norm": 1.6678147462368311, |
| "learning_rate": 2.365457354043088e-07, |
| "loss": 0.2172, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.86368, |
| "grad_norm": 1.6035873970448906, |
| "learning_rate": 2.27737409113995e-07, |
| "loss": 0.2504, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.86624, |
| "grad_norm": 1.7528209162894965, |
| "learning_rate": 2.1909431991623097e-07, |
| "loss": 0.2615, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.8688, |
| "grad_norm": 1.8639972671950014, |
| "learning_rate": 2.106166139551602e-07, |
| "loss": 0.2668, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.8713600000000001, |
| "grad_norm": 1.6865465188399893, |
| "learning_rate": 2.0230443457849414e-07, |
| "loss": 0.2797, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.87392, |
| "grad_norm": 1.5210672653088169, |
| "learning_rate": 1.941579223350898e-07, |
| "loss": 0.2304, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.87648, |
| "grad_norm": 1.7085604230659135, |
| "learning_rate": 1.8617721497257823e-07, |
| "loss": 0.2505, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.87904, |
| "grad_norm": 1.8288714006316353, |
| "learning_rate": 1.7836244743502762e-07, |
| "loss": 0.2364, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.8816000000000002, |
| "grad_norm": 1.7484101329116195, |
| "learning_rate": 1.7071375186066607e-07, |
| "loss": 0.2449, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.88416, |
| "grad_norm": 1.6200555865387618, |
| "learning_rate": 1.6323125757964799e-07, |
| "loss": 0.2692, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.88672, |
| "grad_norm": 1.9011939560507523, |
| "learning_rate": 1.5591509111186342e-07, |
| "loss": 0.2652, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.8892799999999998, |
| "grad_norm": 1.7822199769462572, |
| "learning_rate": 1.4876537616480335e-07, |
| "loss": 0.2881, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.89184, |
| "grad_norm": 1.750191048312678, |
| "learning_rate": 1.4178223363146226e-07, |
| "loss": 0.2622, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.8944, |
| "grad_norm": 1.7516915828032618, |
| "learning_rate": 1.349657815883032e-07, |
| "loss": 0.2961, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.89696, |
| "grad_norm": 1.645641455994093, |
| "learning_rate": 1.283161352932505e-07, |
| "loss": 0.2736, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.8995199999999999, |
| "grad_norm": 1.705553244598403, |
| "learning_rate": 1.218334071837468e-07, |
| "loss": 0.2583, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.90208, |
| "grad_norm": 1.7315966835392997, |
| "learning_rate": 1.1551770687485142e-07, |
| "loss": 0.2758, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.90464, |
| "grad_norm": 1.727806848265733, |
| "learning_rate": 1.0936914115738717e-07, |
| "loss": 0.2657, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.9072, |
| "grad_norm": 1.676827461659673, |
| "learning_rate": 1.0338781399613307e-07, |
| "loss": 0.2642, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.90976, |
| "grad_norm": 1.7115198632740858, |
| "learning_rate": 9.757382652806791e-08, |
| "loss": 0.2545, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.91232, |
| "grad_norm": 1.7354830976605933, |
| "learning_rate": 9.192727706065829e-08, |
| "loss": 0.2583, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.9148800000000001, |
| "grad_norm": 1.7582838528396394, |
| "learning_rate": 8.644826107019888e-08, |
| "loss": 0.2814, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.91744, |
| "grad_norm": 1.681917990191324, |
| "learning_rate": 8.113687120019587e-08, |
| "loss": 0.2601, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 1.7668971882563782, |
| "learning_rate": 7.599319725980047e-08, |
| "loss": 0.2621, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.92256, |
| "grad_norm": 1.7599942208335475, |
| "learning_rate": 7.101732622229462e-08, |
| "loss": 0.2881, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.9251200000000002, |
| "grad_norm": 1.7230238393967752, |
| "learning_rate": 6.62093422236132e-08, |
| "loss": 0.3086, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.92768, |
| "grad_norm": 1.7803538404725028, |
| "learning_rate": 6.15693265609274e-08, |
| "loss": 0.2647, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.93024, |
| "grad_norm": 1.8054504698595564, |
| "learning_rate": 5.709735769126479e-08, |
| "loss": 0.2815, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.9327999999999999, |
| "grad_norm": 1.6120878259133444, |
| "learning_rate": 5.279351123019028e-08, |
| "loss": 0.2307, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.93536, |
| "grad_norm": 1.7147176143131828, |
| "learning_rate": 4.8657859950520524e-08, |
| "loss": 0.2741, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.93792, |
| "grad_norm": 1.826014851535064, |
| "learning_rate": 4.469047378109603e-08, |
| "loss": 0.2949, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.94048, |
| "grad_norm": 1.8033974744681112, |
| "learning_rate": 4.0891419805597634e-08, |
| "loss": 0.2564, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.9430399999999999, |
| "grad_norm": 1.7228110817797906, |
| "learning_rate": 3.7260762261416287e-08, |
| "loss": 0.2654, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.9456, |
| "grad_norm": 1.7114724793916518, |
| "learning_rate": 3.379856253855951e-08, |
| "loss": 0.2441, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.9481600000000001, |
| "grad_norm": 1.736060820260858, |
| "learning_rate": 3.0504879178622214e-08, |
| "loss": 0.2748, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.95072, |
| "grad_norm": 1.7505236918858846, |
| "learning_rate": 2.73797678737886e-08, |
| "loss": 0.2456, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.95328, |
| "grad_norm": 1.5859654595806796, |
| "learning_rate": 2.442328146589512e-08, |
| "loss": 0.2799, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.95584, |
| "grad_norm": 1.7039588055868866, |
| "learning_rate": 2.163546994553789e-08, |
| "loss": 0.2402, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.9584000000000001, |
| "grad_norm": 1.8531697022252631, |
| "learning_rate": 1.9016380451223337e-08, |
| "loss": 0.2944, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.96096, |
| "grad_norm": 1.7102421117876718, |
| "learning_rate": 1.656605726857441e-08, |
| "loss": 0.2704, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.96352, |
| "grad_norm": 2.0294788740008176, |
| "learning_rate": 1.4284541829580056e-08, |
| "loss": 0.3189, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.96608, |
| "grad_norm": 1.8662967648141273, |
| "learning_rate": 1.2171872711895794e-08, |
| "loss": 0.2729, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.96864, |
| "grad_norm": 1.6195984132177164, |
| "learning_rate": 1.0228085638190887e-08, |
| "loss": 0.2613, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.9712, |
| "grad_norm": 1.5450708746300255, |
| "learning_rate": 8.453213475543287e-09, |
| "loss": 0.2625, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.97376, |
| "grad_norm": 1.69409357688793, |
| "learning_rate": 6.84728623488562e-09, |
| "loss": 0.2557, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.9763199999999999, |
| "grad_norm": 1.812223784293616, |
| "learning_rate": 5.410331070498931e-09, |
| "loss": 0.2712, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.97888, |
| "grad_norm": 1.6091781900571478, |
| "learning_rate": 4.142372279548612e-09, |
| "loss": 0.2593, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.98144, |
| "grad_norm": 1.6908756673392302, |
| "learning_rate": 3.043431301678057e-09, |
| "loss": 0.2226, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.984, |
| "grad_norm": 1.7859820632732437, |
| "learning_rate": 2.11352671864562e-09, |
| "loss": 0.2807, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.9865599999999999, |
| "grad_norm": 1.7470027221404476, |
| "learning_rate": 1.3526742540070913e-09, |
| "loss": 0.2718, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.98912, |
| "grad_norm": 1.8210620903336188, |
| "learning_rate": 7.608867728536862e-10, |
| "loss": 0.2749, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.9916800000000001, |
| "grad_norm": 1.9294520890967608, |
| "learning_rate": 3.381742815944389e-10, |
| "loss": 0.2902, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.99424, |
| "grad_norm": 1.7057897433651361, |
| "learning_rate": 8.454392778189935e-11, |
| "loss": 0.2478, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.9968, |
| "grad_norm": 1.7268558478851974, |
| "learning_rate": 0.0, |
| "loss": 0.2642, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.9968, |
| "step": 780, |
| "total_flos": 440534549233664.0, |
| "train_loss": 0.46617485760496213, |
| "train_runtime": 27454.835, |
| "train_samples_per_second": 1.821, |
| "train_steps_per_second": 0.028 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 780, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 440534549233664.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|