| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.999288256227758, |
| "eval_steps": 500, |
| "global_step": 936, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0010676156583629894, |
| "grad_norm": 6.073639869689941, |
| "learning_rate": 1.0638297872340426e-07, |
| "loss": 0.8861, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.002135231316725979, |
| "grad_norm": 5.964370250701904, |
| "learning_rate": 2.1276595744680852e-07, |
| "loss": 0.8784, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.003202846975088968, |
| "grad_norm": 5.9699530601501465, |
| "learning_rate": 3.1914893617021275e-07, |
| "loss": 0.9081, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.004270462633451958, |
| "grad_norm": 6.123816013336182, |
| "learning_rate": 4.2553191489361704e-07, |
| "loss": 0.923, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.005338078291814947, |
| "grad_norm": 6.031068801879883, |
| "learning_rate": 5.319148936170213e-07, |
| "loss": 0.8834, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.006405693950177936, |
| "grad_norm": 5.705842018127441, |
| "learning_rate": 6.382978723404255e-07, |
| "loss": 0.8708, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.007473309608540925, |
| "grad_norm": 5.794719696044922, |
| "learning_rate": 7.446808510638298e-07, |
| "loss": 0.8466, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.008540925266903915, |
| "grad_norm": 5.5866618156433105, |
| "learning_rate": 8.510638297872341e-07, |
| "loss": 0.8647, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.009608540925266904, |
| "grad_norm": 5.529083251953125, |
| "learning_rate": 9.574468085106384e-07, |
| "loss": 0.8451, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.010676156583629894, |
| "grad_norm": 5.221846580505371, |
| "learning_rate": 1.0638297872340427e-06, |
| "loss": 0.8676, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.011743772241992882, |
| "grad_norm": 4.504139423370361, |
| "learning_rate": 1.170212765957447e-06, |
| "loss": 0.8504, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.012811387900355872, |
| "grad_norm": 4.460880756378174, |
| "learning_rate": 1.276595744680851e-06, |
| "loss": 0.85, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.013879003558718862, |
| "grad_norm": 4.31349515914917, |
| "learning_rate": 1.3829787234042555e-06, |
| "loss": 0.8676, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.01494661921708185, |
| "grad_norm": 2.5565595626831055, |
| "learning_rate": 1.4893617021276596e-06, |
| "loss": 0.8101, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.01601423487544484, |
| "grad_norm": 2.412811040878296, |
| "learning_rate": 1.595744680851064e-06, |
| "loss": 0.7941, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01708185053380783, |
| "grad_norm": 2.3634886741638184, |
| "learning_rate": 1.7021276595744682e-06, |
| "loss": 0.7672, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.018149466192170817, |
| "grad_norm": 2.1130712032318115, |
| "learning_rate": 1.8085106382978727e-06, |
| "loss": 0.7964, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.019217081850533807, |
| "grad_norm": 1.9730169773101807, |
| "learning_rate": 1.9148936170212767e-06, |
| "loss": 0.7533, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.020284697508896797, |
| "grad_norm": 2.553852081298828, |
| "learning_rate": 2.021276595744681e-06, |
| "loss": 0.7736, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.021352313167259787, |
| "grad_norm": 3.0640649795532227, |
| "learning_rate": 2.1276595744680853e-06, |
| "loss": 0.7367, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.022419928825622777, |
| "grad_norm": 3.216430902481079, |
| "learning_rate": 2.2340425531914894e-06, |
| "loss": 0.7172, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.023487544483985764, |
| "grad_norm": 3.115963935852051, |
| "learning_rate": 2.340425531914894e-06, |
| "loss": 0.722, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.024555160142348754, |
| "grad_norm": 2.9504928588867188, |
| "learning_rate": 2.446808510638298e-06, |
| "loss": 0.7265, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.025622775800711744, |
| "grad_norm": 2.530184030532837, |
| "learning_rate": 2.553191489361702e-06, |
| "loss": 0.7168, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.026690391459074734, |
| "grad_norm": 2.288259506225586, |
| "learning_rate": 2.6595744680851065e-06, |
| "loss": 0.7317, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.027758007117437724, |
| "grad_norm": 1.7040042877197266, |
| "learning_rate": 2.765957446808511e-06, |
| "loss": 0.7177, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.02882562277580071, |
| "grad_norm": 1.2607332468032837, |
| "learning_rate": 2.8723404255319155e-06, |
| "loss": 0.6854, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0298932384341637, |
| "grad_norm": 1.2629083395004272, |
| "learning_rate": 2.978723404255319e-06, |
| "loss": 0.6923, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03096085409252669, |
| "grad_norm": 1.2417811155319214, |
| "learning_rate": 3.0851063829787237e-06, |
| "loss": 0.6781, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03202846975088968, |
| "grad_norm": 1.1789079904556274, |
| "learning_rate": 3.191489361702128e-06, |
| "loss": 0.6828, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03309608540925267, |
| "grad_norm": 1.0594401359558105, |
| "learning_rate": 3.297872340425532e-06, |
| "loss": 0.6351, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.03416370106761566, |
| "grad_norm": 1.0672959089279175, |
| "learning_rate": 3.4042553191489363e-06, |
| "loss": 0.6465, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.03523131672597865, |
| "grad_norm": 0.9413697123527527, |
| "learning_rate": 3.510638297872341e-06, |
| "loss": 0.6452, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.036298932384341634, |
| "grad_norm": 0.8563829064369202, |
| "learning_rate": 3.6170212765957453e-06, |
| "loss": 0.6293, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.037366548042704624, |
| "grad_norm": 0.7698128819465637, |
| "learning_rate": 3.723404255319149e-06, |
| "loss": 0.6168, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.038434163701067614, |
| "grad_norm": 0.9001553654670715, |
| "learning_rate": 3.8297872340425535e-06, |
| "loss": 0.6169, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.039501779359430604, |
| "grad_norm": 1.0053948163986206, |
| "learning_rate": 3.936170212765958e-06, |
| "loss": 0.6171, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.040569395017793594, |
| "grad_norm": 1.0858631134033203, |
| "learning_rate": 4.042553191489362e-06, |
| "loss": 0.651, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.041637010676156584, |
| "grad_norm": 0.7921259999275208, |
| "learning_rate": 4.148936170212766e-06, |
| "loss": 0.6447, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.042704626334519574, |
| "grad_norm": 0.6242907643318176, |
| "learning_rate": 4.255319148936171e-06, |
| "loss": 0.5966, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.043772241992882564, |
| "grad_norm": 0.6965751051902771, |
| "learning_rate": 4.361702127659575e-06, |
| "loss": 0.5909, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.044839857651245554, |
| "grad_norm": 0.7059489488601685, |
| "learning_rate": 4.468085106382979e-06, |
| "loss": 0.607, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.045907473309608544, |
| "grad_norm": 0.6813053488731384, |
| "learning_rate": 4.574468085106383e-06, |
| "loss": 0.5946, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.04697508896797153, |
| "grad_norm": 0.6372105479240417, |
| "learning_rate": 4.680851063829788e-06, |
| "loss": 0.5817, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.04804270462633452, |
| "grad_norm": 0.5932055711746216, |
| "learning_rate": 4.787234042553192e-06, |
| "loss": 0.5916, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.04911032028469751, |
| "grad_norm": 0.5361504554748535, |
| "learning_rate": 4.893617021276596e-06, |
| "loss": 0.6004, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0501779359430605, |
| "grad_norm": 0.5340394973754883, |
| "learning_rate": 5e-06, |
| "loss": 0.5828, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.05124555160142349, |
| "grad_norm": 0.7338045835494995, |
| "learning_rate": 5.106382978723404e-06, |
| "loss": 0.622, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.05231316725978648, |
| "grad_norm": 0.6164150834083557, |
| "learning_rate": 5.212765957446809e-06, |
| "loss": 0.5792, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.05338078291814947, |
| "grad_norm": 0.5131736397743225, |
| "learning_rate": 5.319148936170213e-06, |
| "loss": 0.5769, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05444839857651246, |
| "grad_norm": 0.5779881477355957, |
| "learning_rate": 5.425531914893617e-06, |
| "loss": 0.6085, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.05551601423487545, |
| "grad_norm": 0.652091383934021, |
| "learning_rate": 5.531914893617022e-06, |
| "loss": 0.6063, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.05658362989323843, |
| "grad_norm": 0.5679817199707031, |
| "learning_rate": 5.638297872340426e-06, |
| "loss": 0.5642, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.05765124555160142, |
| "grad_norm": 0.564539909362793, |
| "learning_rate": 5.744680851063831e-06, |
| "loss": 0.5881, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.05871886120996441, |
| "grad_norm": 0.5641509294509888, |
| "learning_rate": 5.851063829787235e-06, |
| "loss": 0.575, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0597864768683274, |
| "grad_norm": 0.5606682300567627, |
| "learning_rate": 5.957446808510638e-06, |
| "loss": 0.5476, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06085409252669039, |
| "grad_norm": 0.573742151260376, |
| "learning_rate": 6.063829787234044e-06, |
| "loss": 0.5857, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.06192170818505338, |
| "grad_norm": 0.574886679649353, |
| "learning_rate": 6.170212765957447e-06, |
| "loss": 0.5638, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06298932384341636, |
| "grad_norm": 0.5414725542068481, |
| "learning_rate": 6.276595744680851e-06, |
| "loss": 0.5927, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.06405693950177936, |
| "grad_norm": 0.5933969020843506, |
| "learning_rate": 6.382978723404256e-06, |
| "loss": 0.55, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06512455516014234, |
| "grad_norm": 0.4871617257595062, |
| "learning_rate": 6.48936170212766e-06, |
| "loss": 0.5433, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.06619217081850534, |
| "grad_norm": 0.55656498670578, |
| "learning_rate": 6.595744680851064e-06, |
| "loss": 0.5599, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.06725978647686832, |
| "grad_norm": 0.5835067629814148, |
| "learning_rate": 6.702127659574469e-06, |
| "loss": 0.5549, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.06832740213523132, |
| "grad_norm": 0.4503428637981415, |
| "learning_rate": 6.808510638297873e-06, |
| "loss": 0.573, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0693950177935943, |
| "grad_norm": 0.5671048164367676, |
| "learning_rate": 6.914893617021278e-06, |
| "loss": 0.5565, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.0704626334519573, |
| "grad_norm": 0.4813767075538635, |
| "learning_rate": 7.021276595744682e-06, |
| "loss": 0.575, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.07153024911032028, |
| "grad_norm": 0.5659592151641846, |
| "learning_rate": 7.127659574468085e-06, |
| "loss": 0.5181, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.07259786476868327, |
| "grad_norm": 0.5178795456886292, |
| "learning_rate": 7.234042553191491e-06, |
| "loss": 0.5566, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.07366548042704626, |
| "grad_norm": 0.5484414100646973, |
| "learning_rate": 7.340425531914894e-06, |
| "loss": 0.5409, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.07473309608540925, |
| "grad_norm": 0.5428405404090881, |
| "learning_rate": 7.446808510638298e-06, |
| "loss": 0.5962, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07580071174377224, |
| "grad_norm": 0.4844944179058075, |
| "learning_rate": 7.553191489361703e-06, |
| "loss": 0.571, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.07686832740213523, |
| "grad_norm": 0.590509831905365, |
| "learning_rate": 7.659574468085107e-06, |
| "loss": 0.5348, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.07793594306049823, |
| "grad_norm": 0.4901210367679596, |
| "learning_rate": 7.765957446808511e-06, |
| "loss": 0.5472, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.07900355871886121, |
| "grad_norm": 0.4863327443599701, |
| "learning_rate": 7.872340425531916e-06, |
| "loss": 0.5249, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0800711743772242, |
| "grad_norm": 0.5700340270996094, |
| "learning_rate": 7.97872340425532e-06, |
| "loss": 0.5875, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08113879003558719, |
| "grad_norm": 0.4684799015522003, |
| "learning_rate": 8.085106382978723e-06, |
| "loss": 0.5293, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.08220640569395017, |
| "grad_norm": 0.6248029470443726, |
| "learning_rate": 8.191489361702128e-06, |
| "loss": 0.5562, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.08327402135231317, |
| "grad_norm": 0.5804619789123535, |
| "learning_rate": 8.297872340425532e-06, |
| "loss": 0.544, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.08434163701067615, |
| "grad_norm": 0.4794471561908722, |
| "learning_rate": 8.404255319148937e-06, |
| "loss": 0.5629, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.08540925266903915, |
| "grad_norm": 0.5686808824539185, |
| "learning_rate": 8.510638297872341e-06, |
| "loss": 0.5605, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.08647686832740213, |
| "grad_norm": 0.6154677867889404, |
| "learning_rate": 8.617021276595746e-06, |
| "loss": 0.5632, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.08754448398576513, |
| "grad_norm": 0.599371612071991, |
| "learning_rate": 8.72340425531915e-06, |
| "loss": 0.5118, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.08861209964412811, |
| "grad_norm": 0.6383739709854126, |
| "learning_rate": 8.829787234042555e-06, |
| "loss": 0.5584, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.08967971530249111, |
| "grad_norm": 0.6114341616630554, |
| "learning_rate": 8.936170212765958e-06, |
| "loss": 0.5475, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.09074733096085409, |
| "grad_norm": 0.5422399044036865, |
| "learning_rate": 9.042553191489362e-06, |
| "loss": 0.5356, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.09181494661921709, |
| "grad_norm": 0.5314775109291077, |
| "learning_rate": 9.148936170212767e-06, |
| "loss": 0.5241, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.09288256227758007, |
| "grad_norm": 0.592779278755188, |
| "learning_rate": 9.255319148936171e-06, |
| "loss": 0.5339, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.09395017793594305, |
| "grad_norm": 0.6122350096702576, |
| "learning_rate": 9.361702127659576e-06, |
| "loss": 0.5434, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.09501779359430605, |
| "grad_norm": 0.5250897407531738, |
| "learning_rate": 9.46808510638298e-06, |
| "loss": 0.5459, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.09608540925266904, |
| "grad_norm": 0.592778205871582, |
| "learning_rate": 9.574468085106385e-06, |
| "loss": 0.5578, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.09715302491103203, |
| "grad_norm": 0.47788286209106445, |
| "learning_rate": 9.680851063829787e-06, |
| "loss": 0.5471, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.09822064056939502, |
| "grad_norm": 0.5285771489143372, |
| "learning_rate": 9.787234042553192e-06, |
| "loss": 0.5281, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.09928825622775801, |
| "grad_norm": 0.47819527983665466, |
| "learning_rate": 9.893617021276596e-06, |
| "loss": 0.5269, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.100355871886121, |
| "grad_norm": 0.48683878779411316, |
| "learning_rate": 1e-05, |
| "loss": 0.5286, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.10142348754448399, |
| "grad_norm": 0.4826238751411438, |
| "learning_rate": 9.999965197129365e-06, |
| "loss": 0.5338, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.10249110320284698, |
| "grad_norm": 0.5617088675498962, |
| "learning_rate": 9.999860789001947e-06, |
| "loss": 0.5396, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.10355871886120996, |
| "grad_norm": 0.46838635206222534, |
| "learning_rate": 9.999686777071233e-06, |
| "loss": 0.5162, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.10462633451957296, |
| "grad_norm": 0.5251678228378296, |
| "learning_rate": 9.999443163759669e-06, |
| "loss": 0.5158, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.10569395017793594, |
| "grad_norm": 0.4968458116054535, |
| "learning_rate": 9.999129952458628e-06, |
| "loss": 0.5348, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.10676156583629894, |
| "grad_norm": 0.5481524467468262, |
| "learning_rate": 9.998747147528375e-06, |
| "loss": 0.5245, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.10782918149466192, |
| "grad_norm": 0.495601624250412, |
| "learning_rate": 9.998294754297992e-06, |
| "loss": 0.4902, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.10889679715302492, |
| "grad_norm": 0.5450451374053955, |
| "learning_rate": 9.997772779065312e-06, |
| "loss": 0.5487, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1099644128113879, |
| "grad_norm": 0.5144025087356567, |
| "learning_rate": 9.997181229096831e-06, |
| "loss": 0.5088, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.1110320284697509, |
| "grad_norm": 0.5595222115516663, |
| "learning_rate": 9.996520112627602e-06, |
| "loss": 0.5327, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.11209964412811388, |
| "grad_norm": 0.6185320615768433, |
| "learning_rate": 9.995789438861128e-06, |
| "loss": 0.529, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.11316725978647686, |
| "grad_norm": 0.5363855957984924, |
| "learning_rate": 9.994989217969224e-06, |
| "loss": 0.5566, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.11423487544483986, |
| "grad_norm": 0.598929762840271, |
| "learning_rate": 9.994119461091885e-06, |
| "loss": 0.5024, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.11530249110320284, |
| "grad_norm": 0.49212321639060974, |
| "learning_rate": 9.993180180337126e-06, |
| "loss": 0.5165, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.11637010676156584, |
| "grad_norm": 0.5540521740913391, |
| "learning_rate": 9.992171388780814e-06, |
| "loss": 0.545, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.11743772241992882, |
| "grad_norm": 0.45905211567878723, |
| "learning_rate": 9.991093100466482e-06, |
| "loss": 0.4976, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.11850533807829182, |
| "grad_norm": 0.6659161448478699, |
| "learning_rate": 9.989945330405146e-06, |
| "loss": 0.5297, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1195729537366548, |
| "grad_norm": 0.5286558866500854, |
| "learning_rate": 9.988728094575082e-06, |
| "loss": 0.5029, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1206405693950178, |
| "grad_norm": 0.615146279335022, |
| "learning_rate": 9.98744140992161e-06, |
| "loss": 0.4897, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.12170818505338078, |
| "grad_norm": 0.5887618064880371, |
| "learning_rate": 9.986085294356858e-06, |
| "loss": 0.5802, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.12277580071174377, |
| "grad_norm": 0.5688331723213196, |
| "learning_rate": 9.98465976675951e-06, |
| "loss": 0.5015, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.12384341637010676, |
| "grad_norm": 0.548701822757721, |
| "learning_rate": 9.983164846974549e-06, |
| "loss": 0.5154, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.12491103202846975, |
| "grad_norm": 0.5815207362174988, |
| "learning_rate": 9.981600555812975e-06, |
| "loss": 0.5134, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.12597864768683273, |
| "grad_norm": 0.5351188778877258, |
| "learning_rate": 9.979966915051517e-06, |
| "loss": 0.4912, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.12704626334519573, |
| "grad_norm": 0.5330350399017334, |
| "learning_rate": 9.978263947432331e-06, |
| "loss": 0.5283, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.12811387900355872, |
| "grad_norm": 0.5946553349494934, |
| "learning_rate": 9.976491676662679e-06, |
| "loss": 0.5365, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.12918149466192172, |
| "grad_norm": 0.4562559723854065, |
| "learning_rate": 9.974650127414609e-06, |
| "loss": 0.5257, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.1302491103202847, |
| "grad_norm": 0.6122528910636902, |
| "learning_rate": 9.972739325324596e-06, |
| "loss": 0.5068, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.13131672597864769, |
| "grad_norm": 0.5213277339935303, |
| "learning_rate": 9.970759296993205e-06, |
| "loss": 0.4931, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.13238434163701068, |
| "grad_norm": 0.5232741236686707, |
| "learning_rate": 9.968710069984699e-06, |
| "loss": 0.5208, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.13345195729537365, |
| "grad_norm": 0.5642791390419006, |
| "learning_rate": 9.966591672826674e-06, |
| "loss": 0.5277, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.13451957295373665, |
| "grad_norm": 0.5566534996032715, |
| "learning_rate": 9.964404135009649e-06, |
| "loss": 0.5485, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.13558718861209965, |
| "grad_norm": 0.512524425983429, |
| "learning_rate": 9.962147486986664e-06, |
| "loss": 0.5566, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.13665480427046264, |
| "grad_norm": 0.5945698618888855, |
| "learning_rate": 9.959821760172849e-06, |
| "loss": 0.5323, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.1377224199288256, |
| "grad_norm": 0.519640326499939, |
| "learning_rate": 9.957426986944994e-06, |
| "loss": 0.5002, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.1387900355871886, |
| "grad_norm": 0.5728665590286255, |
| "learning_rate": 9.95496320064109e-06, |
| "loss": 0.4939, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1398576512455516, |
| "grad_norm": 0.5529624819755554, |
| "learning_rate": 9.952430435559873e-06, |
| "loss": 0.4971, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.1409252669039146, |
| "grad_norm": 0.5441509485244751, |
| "learning_rate": 9.94982872696034e-06, |
| "loss": 0.5287, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.14199288256227757, |
| "grad_norm": 0.5497064590454102, |
| "learning_rate": 9.947158111061263e-06, |
| "loss": 0.5063, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.14306049822064057, |
| "grad_norm": 0.5112823843955994, |
| "learning_rate": 9.94441862504068e-06, |
| "loss": 0.5122, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.14412811387900357, |
| "grad_norm": 0.588614284992218, |
| "learning_rate": 9.941610307035385e-06, |
| "loss": 0.5098, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.14519572953736654, |
| "grad_norm": 0.4941340386867523, |
| "learning_rate": 9.938733196140386e-06, |
| "loss": 0.5082, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.14626334519572953, |
| "grad_norm": 0.566385805606842, |
| "learning_rate": 9.935787332408375e-06, |
| "loss": 0.4837, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.14733096085409253, |
| "grad_norm": 0.5272175073623657, |
| "learning_rate": 9.932772756849152e-06, |
| "loss": 0.5014, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.14839857651245553, |
| "grad_norm": 0.5560716986656189, |
| "learning_rate": 9.929689511429075e-06, |
| "loss": 0.5203, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.1494661921708185, |
| "grad_norm": 0.6580199003219604, |
| "learning_rate": 9.926537639070457e-06, |
| "loss": 0.5088, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1505338078291815, |
| "grad_norm": 0.5059327483177185, |
| "learning_rate": 9.923317183650985e-06, |
| "loss": 0.5139, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.1516014234875445, |
| "grad_norm": 0.54743891954422, |
| "learning_rate": 9.92002819000309e-06, |
| "loss": 0.5079, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.1526690391459075, |
| "grad_norm": 0.5422698259353638, |
| "learning_rate": 9.916670703913345e-06, |
| "loss": 0.5176, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.15373665480427046, |
| "grad_norm": 0.5230839848518372, |
| "learning_rate": 9.913244772121811e-06, |
| "loss": 0.5, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.15480427046263345, |
| "grad_norm": 0.4665907025337219, |
| "learning_rate": 9.90975044232139e-06, |
| "loss": 0.5053, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.15587188612099645, |
| "grad_norm": 0.6438184380531311, |
| "learning_rate": 9.90618776315717e-06, |
| "loss": 0.5266, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.15693950177935942, |
| "grad_norm": 0.4731660485267639, |
| "learning_rate": 9.902556784225729e-06, |
| "loss": 0.4834, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.15800711743772242, |
| "grad_norm": 0.5352628827095032, |
| "learning_rate": 9.898857556074469e-06, |
| "loss": 0.5099, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.1590747330960854, |
| "grad_norm": 0.5462168455123901, |
| "learning_rate": 9.895090130200889e-06, |
| "loss": 0.5195, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.1601423487544484, |
| "grad_norm": 0.4784468710422516, |
| "learning_rate": 9.891254559051886e-06, |
| "loss": 0.5173, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.16120996441281138, |
| "grad_norm": 0.498097687959671, |
| "learning_rate": 9.887350896023015e-06, |
| "loss": 0.5065, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.16227758007117438, |
| "grad_norm": 0.4685448706150055, |
| "learning_rate": 9.883379195457747e-06, |
| "loss": 0.4687, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.16334519572953737, |
| "grad_norm": 0.5289403200149536, |
| "learning_rate": 9.879339512646714e-06, |
| "loss": 0.5154, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.16441281138790034, |
| "grad_norm": 0.5611624121665955, |
| "learning_rate": 9.875231903826936e-06, |
| "loss": 0.5305, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.16548042704626334, |
| "grad_norm": 0.5301553010940552, |
| "learning_rate": 9.871056426181052e-06, |
| "loss": 0.5316, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.16654804270462634, |
| "grad_norm": 0.6103717684745789, |
| "learning_rate": 9.8668131378365e-06, |
| "loss": 0.5185, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.16761565836298933, |
| "grad_norm": 0.5421136021614075, |
| "learning_rate": 9.862502097864726e-06, |
| "loss": 0.518, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.1686832740213523, |
| "grad_norm": 0.5964006185531616, |
| "learning_rate": 9.858123366280358e-06, |
| "loss": 0.5073, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.1697508896797153, |
| "grad_norm": 0.5336704254150391, |
| "learning_rate": 9.853677004040368e-06, |
| "loss": 0.5178, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.1708185053380783, |
| "grad_norm": 0.5406745076179504, |
| "learning_rate": 9.849163073043223e-06, |
| "loss": 0.5046, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1718861209964413, |
| "grad_norm": 0.5165396928787231, |
| "learning_rate": 9.844581636128025e-06, |
| "loss": 0.497, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.17295373665480426, |
| "grad_norm": 0.5873040556907654, |
| "learning_rate": 9.83993275707364e-06, |
| "loss": 0.5206, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.17402135231316726, |
| "grad_norm": 0.5253546237945557, |
| "learning_rate": 9.835216500597797e-06, |
| "loss": 0.515, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.17508896797153026, |
| "grad_norm": 0.5812190175056458, |
| "learning_rate": 9.830432932356207e-06, |
| "loss": 0.5192, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.17615658362989323, |
| "grad_norm": 0.4591503143310547, |
| "learning_rate": 9.82558211894163e-06, |
| "loss": 0.5025, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.17722419928825622, |
| "grad_norm": 0.6000644564628601, |
| "learning_rate": 9.820664127882958e-06, |
| "loss": 0.4817, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.17829181494661922, |
| "grad_norm": 0.5603543519973755, |
| "learning_rate": 9.815679027644273e-06, |
| "loss": 0.4793, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.17935943060498222, |
| "grad_norm": 0.5255252122879028, |
| "learning_rate": 9.8106268876239e-06, |
| "loss": 0.494, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.1804270462633452, |
| "grad_norm": 0.6187337040901184, |
| "learning_rate": 9.805507778153423e-06, |
| "loss": 0.5069, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.18149466192170818, |
| "grad_norm": 0.5259950160980225, |
| "learning_rate": 9.800321770496726e-06, |
| "loss": 0.5192, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.18256227758007118, |
| "grad_norm": 0.5141558051109314, |
| "learning_rate": 9.79506893684899e-06, |
| "loss": 0.488, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.18362989323843418, |
| "grad_norm": 0.5964564681053162, |
| "learning_rate": 9.789749350335693e-06, |
| "loss": 0.501, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.18469750889679715, |
| "grad_norm": 0.5745047330856323, |
| "learning_rate": 9.784363085011587e-06, |
| "loss": 0.5174, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.18576512455516014, |
| "grad_norm": 0.44916895031929016, |
| "learning_rate": 9.778910215859666e-06, |
| "loss": 0.4964, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.18683274021352314, |
| "grad_norm": 0.5034676790237427, |
| "learning_rate": 9.773390818790136e-06, |
| "loss": 0.4729, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.1879003558718861, |
| "grad_norm": 0.5329164266586304, |
| "learning_rate": 9.767804970639338e-06, |
| "loss": 0.4945, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.1889679715302491, |
| "grad_norm": 0.4742647409439087, |
| "learning_rate": 9.762152749168693e-06, |
| "loss": 0.5445, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.1900355871886121, |
| "grad_norm": 0.4610464572906494, |
| "learning_rate": 9.756434233063616e-06, |
| "loss": 0.4924, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.1911032028469751, |
| "grad_norm": 0.5255376696586609, |
| "learning_rate": 9.750649501932414e-06, |
| "loss": 0.5241, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.19217081850533807, |
| "grad_norm": 0.5016917586326599, |
| "learning_rate": 9.744798636305189e-06, |
| "loss": 0.5058, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.19323843416370107, |
| "grad_norm": 0.6196140646934509, |
| "learning_rate": 9.738881717632709e-06, |
| "loss": 0.5042, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.19430604982206406, |
| "grad_norm": 0.5428318977355957, |
| "learning_rate": 9.732898828285273e-06, |
| "loss": 0.5129, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.19537366548042703, |
| "grad_norm": 0.5006230473518372, |
| "learning_rate": 9.726850051551575e-06, |
| "loss": 0.4631, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.19644128113879003, |
| "grad_norm": 0.5109187960624695, |
| "learning_rate": 9.72073547163753e-06, |
| "loss": 0.4773, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.19750889679715303, |
| "grad_norm": 0.5989903807640076, |
| "learning_rate": 9.714555173665112e-06, |
| "loss": 0.5078, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.19857651245551602, |
| "grad_norm": 0.5101140737533569, |
| "learning_rate": 9.708309243671167e-06, |
| "loss": 0.5248, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.199644128113879, |
| "grad_norm": 0.4500106871128082, |
| "learning_rate": 9.701997768606209e-06, |
| "loss": 0.4814, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.200711743772242, |
| "grad_norm": 0.5334274172782898, |
| "learning_rate": 9.695620836333219e-06, |
| "loss": 0.4939, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.201779359430605, |
| "grad_norm": 0.5067172050476074, |
| "learning_rate": 9.68917853562642e-06, |
| "loss": 0.5177, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.20284697508896798, |
| "grad_norm": 0.5605948567390442, |
| "learning_rate": 9.68267095617003e-06, |
| "loss": 0.5021, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.20391459074733095, |
| "grad_norm": 0.536536455154419, |
| "learning_rate": 9.676098188557032e-06, |
| "loss": 0.4814, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.20498220640569395, |
| "grad_norm": 0.5245672464370728, |
| "learning_rate": 9.669460324287899e-06, |
| "loss": 0.4853, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.20604982206405695, |
| "grad_norm": 0.6165151596069336, |
| "learning_rate": 9.662757455769317e-06, |
| "loss": 0.4744, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.20711743772241992, |
| "grad_norm": 0.5017523169517517, |
| "learning_rate": 9.655989676312918e-06, |
| "loss": 0.5089, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.20818505338078291, |
| "grad_norm": 0.6126395463943481, |
| "learning_rate": 9.649157080133962e-06, |
| "loss": 0.5089, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.2092526690391459, |
| "grad_norm": 0.520261824131012, |
| "learning_rate": 9.642259762350034e-06, |
| "loss": 0.4986, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.2103202846975089, |
| "grad_norm": 0.5865549445152283, |
| "learning_rate": 9.635297818979715e-06, |
| "loss": 0.5336, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.21138790035587188, |
| "grad_norm": 0.5497699975967407, |
| "learning_rate": 9.628271346941252e-06, |
| "loss": 0.5195, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.21245551601423487, |
| "grad_norm": 0.5673022866249084, |
| "learning_rate": 9.621180444051206e-06, |
| "loss": 0.5036, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.21352313167259787, |
| "grad_norm": 0.5429431796073914, |
| "learning_rate": 9.614025209023084e-06, |
| "loss": 0.5244, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.21459074733096084, |
| "grad_norm": 0.5560723543167114, |
| "learning_rate": 9.606805741465977e-06, |
| "loss": 0.5, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.21565836298932384, |
| "grad_norm": 0.5545246005058289, |
| "learning_rate": 9.59952214188316e-06, |
| "loss": 0.4939, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.21672597864768683, |
| "grad_norm": 0.6207299828529358, |
| "learning_rate": 9.592174511670704e-06, |
| "loss": 0.5191, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.21779359430604983, |
| "grad_norm": 0.5119560360908508, |
| "learning_rate": 9.58476295311606e-06, |
| "loss": 0.4974, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.2188612099644128, |
| "grad_norm": 0.5543833374977112, |
| "learning_rate": 9.577287569396632e-06, |
| "loss": 0.4777, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.2199288256227758, |
| "grad_norm": 0.5279098153114319, |
| "learning_rate": 9.569748464578343e-06, |
| "loss": 0.5012, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.2209964412811388, |
| "grad_norm": 0.5337633490562439, |
| "learning_rate": 9.562145743614193e-06, |
| "loss": 0.4872, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.2220640569395018, |
| "grad_norm": 0.534850001335144, |
| "learning_rate": 9.554479512342785e-06, |
| "loss": 0.4928, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.22313167259786476, |
| "grad_norm": 0.48084014654159546, |
| "learning_rate": 9.54674987748686e-06, |
| "loss": 0.4863, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.22419928825622776, |
| "grad_norm": 0.6603854298591614, |
| "learning_rate": 9.538956946651816e-06, |
| "loss": 0.5256, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.22526690391459075, |
| "grad_norm": 0.5027628540992737, |
| "learning_rate": 9.531100828324191e-06, |
| "loss": 0.5022, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.22633451957295372, |
| "grad_norm": 0.5168050527572632, |
| "learning_rate": 9.52318163187018e-06, |
| "loss": 0.4878, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.22740213523131672, |
| "grad_norm": 0.5213115215301514, |
| "learning_rate": 9.515199467534086e-06, |
| "loss": 0.527, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.22846975088967972, |
| "grad_norm": 0.49242091178894043, |
| "learning_rate": 9.507154446436806e-06, |
| "loss": 0.4916, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.22953736654804271, |
| "grad_norm": 0.5438655018806458, |
| "learning_rate": 9.499046680574267e-06, |
| "loss": 0.4751, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.23060498220640568, |
| "grad_norm": 0.5265784859657288, |
| "learning_rate": 9.490876282815884e-06, |
| "loss": 0.4805, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.23167259786476868, |
| "grad_norm": 0.5454720258712769, |
| "learning_rate": 9.482643366902972e-06, |
| "loss": 0.5312, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.23274021352313168, |
| "grad_norm": 0.6158825159072876, |
| "learning_rate": 9.474348047447177e-06, |
| "loss": 0.5209, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.23380782918149468, |
| "grad_norm": 0.49415621161460876, |
| "learning_rate": 9.465990439928868e-06, |
| "loss": 0.4835, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.23487544483985764, |
| "grad_norm": 0.5915224552154541, |
| "learning_rate": 9.457570660695542e-06, |
| "loss": 0.486, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.23594306049822064, |
| "grad_norm": 0.5715787410736084, |
| "learning_rate": 9.449088826960187e-06, |
| "loss": 0.4949, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.23701067615658364, |
| "grad_norm": 0.6309436559677124, |
| "learning_rate": 9.440545056799677e-06, |
| "loss": 0.5237, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2380782918149466, |
| "grad_norm": 0.6128714084625244, |
| "learning_rate": 9.431939469153096e-06, |
| "loss": 0.4709, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.2391459074733096, |
| "grad_norm": 0.5757558345794678, |
| "learning_rate": 9.423272183820109e-06, |
| "loss": 0.5063, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.2402135231316726, |
| "grad_norm": 0.5617343187332153, |
| "learning_rate": 9.41454332145928e-06, |
| "loss": 0.4868, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.2412811387900356, |
| "grad_norm": 0.5010789036750793, |
| "learning_rate": 9.405753003586396e-06, |
| "loss": 0.5037, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.24234875444839857, |
| "grad_norm": 0.49613580107688904, |
| "learning_rate": 9.396901352572771e-06, |
| "loss": 0.4892, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.24341637010676156, |
| "grad_norm": 0.6001424789428711, |
| "learning_rate": 9.387988491643558e-06, |
| "loss": 0.5054, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.24448398576512456, |
| "grad_norm": 0.5321950316429138, |
| "learning_rate": 9.379014544876011e-06, |
| "loss": 0.5082, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.24555160142348753, |
| "grad_norm": 0.5619071125984192, |
| "learning_rate": 9.369979637197774e-06, |
| "loss": 0.5071, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.24661921708185053, |
| "grad_norm": 0.4910016357898712, |
| "learning_rate": 9.360883894385137e-06, |
| "loss": 0.4774, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.24768683274021353, |
| "grad_norm": 0.5721420645713806, |
| "learning_rate": 9.351727443061284e-06, |
| "loss": 0.4978, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.24875444839857652, |
| "grad_norm": 0.5795683264732361, |
| "learning_rate": 9.342510410694529e-06, |
| "loss": 0.5085, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.2498220640569395, |
| "grad_norm": 0.5288822054862976, |
| "learning_rate": 9.33323292559655e-06, |
| "loss": 0.4864, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.2508896797153025, |
| "grad_norm": 0.5554943680763245, |
| "learning_rate": 9.323895116920591e-06, |
| "loss": 0.4998, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.25195729537366546, |
| "grad_norm": 0.5668061971664429, |
| "learning_rate": 9.31449711465967e-06, |
| "loss": 0.477, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.25302491103202845, |
| "grad_norm": 0.5568402409553528, |
| "learning_rate": 9.305039049644772e-06, |
| "loss": 0.5175, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.25409252669039145, |
| "grad_norm": 0.5518472790718079, |
| "learning_rate": 9.29552105354302e-06, |
| "loss": 0.4745, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.25516014234875445, |
| "grad_norm": 0.6117028594017029, |
| "learning_rate": 9.28594325885585e-06, |
| "loss": 0.5148, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.25622775800711745, |
| "grad_norm": 0.5180391073226929, |
| "learning_rate": 9.27630579891716e-06, |
| "loss": 0.4837, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.25729537366548044, |
| "grad_norm": 0.5480329990386963, |
| "learning_rate": 9.266608807891459e-06, |
| "loss": 0.501, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.25836298932384344, |
| "grad_norm": 0.4984034597873688, |
| "learning_rate": 9.256852420771999e-06, |
| "loss": 0.4946, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.2594306049822064, |
| "grad_norm": 0.5542665123939514, |
| "learning_rate": 9.24703677337889e-06, |
| "loss": 0.4815, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.2604982206405694, |
| "grad_norm": 0.4799808859825134, |
| "learning_rate": 9.237162002357214e-06, |
| "loss": 0.4838, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.2615658362989324, |
| "grad_norm": 0.49428969621658325, |
| "learning_rate": 9.227228245175127e-06, |
| "loss": 0.4865, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.26263345195729537, |
| "grad_norm": 0.4643561840057373, |
| "learning_rate": 9.217235640121927e-06, |
| "loss": 0.4722, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.26370106761565837, |
| "grad_norm": 0.48922228813171387, |
| "learning_rate": 9.207184326306155e-06, |
| "loss": 0.5024, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.26476868327402137, |
| "grad_norm": 0.5658605098724365, |
| "learning_rate": 9.197074443653643e-06, |
| "loss": 0.4878, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.26583629893238436, |
| "grad_norm": 0.4450552761554718, |
| "learning_rate": 9.186906132905563e-06, |
| "loss": 0.4621, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.2669039145907473, |
| "grad_norm": 0.5502617955207825, |
| "learning_rate": 9.176679535616477e-06, |
| "loss": 0.4836, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2679715302491103, |
| "grad_norm": 0.4923563599586487, |
| "learning_rate": 9.166394794152363e-06, |
| "loss": 0.5166, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.2690391459074733, |
| "grad_norm": 0.5220004916191101, |
| "learning_rate": 9.156052051688633e-06, |
| "loss": 0.464, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2701067615658363, |
| "grad_norm": 0.5749658942222595, |
| "learning_rate": 9.145651452208133e-06, |
| "loss": 0.493, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.2711743772241993, |
| "grad_norm": 0.4788929522037506, |
| "learning_rate": 9.135193140499155e-06, |
| "loss": 0.506, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.2722419928825623, |
| "grad_norm": 0.5826008915901184, |
| "learning_rate": 9.124677262153405e-06, |
| "loss": 0.481, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.2733096085409253, |
| "grad_norm": 0.5467514395713806, |
| "learning_rate": 9.114103963563986e-06, |
| "loss": 0.4821, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.2743772241992883, |
| "grad_norm": 0.5301008224487305, |
| "learning_rate": 9.103473391923354e-06, |
| "loss": 0.4727, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.2754448398576512, |
| "grad_norm": 0.5102054476737976, |
| "learning_rate": 9.092785695221271e-06, |
| "loss": 0.4828, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.2765124555160142, |
| "grad_norm": 0.6430336236953735, |
| "learning_rate": 9.08204102224275e-06, |
| "loss": 0.4909, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.2775800711743772, |
| "grad_norm": 0.5367814898490906, |
| "learning_rate": 9.071239522565978e-06, |
| "loss": 0.4805, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2786476868327402, |
| "grad_norm": 0.561622142791748, |
| "learning_rate": 9.06038134656023e-06, |
| "loss": 0.4643, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.2797153024911032, |
| "grad_norm": 0.5907300710678101, |
| "learning_rate": 9.049466645383785e-06, |
| "loss": 0.5223, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.2807829181494662, |
| "grad_norm": 0.5875605940818787, |
| "learning_rate": 9.038495570981814e-06, |
| "loss": 0.4932, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.2818505338078292, |
| "grad_norm": 0.5611529350280762, |
| "learning_rate": 9.027468276084274e-06, |
| "loss": 0.4901, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.28291814946619215, |
| "grad_norm": 0.6816518902778625, |
| "learning_rate": 9.016384914203771e-06, |
| "loss": 0.5165, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.28398576512455515, |
| "grad_norm": 0.68822181224823, |
| "learning_rate": 9.00524563963343e-06, |
| "loss": 0.4756, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.28505338078291814, |
| "grad_norm": 0.5975049138069153, |
| "learning_rate": 8.99405060744474e-06, |
| "loss": 0.4945, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.28612099644128114, |
| "grad_norm": 0.7125190496444702, |
| "learning_rate": 8.982799973485407e-06, |
| "loss": 0.4962, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.28718861209964414, |
| "grad_norm": 0.6332557201385498, |
| "learning_rate": 8.971493894377174e-06, |
| "loss": 0.4869, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.28825622775800713, |
| "grad_norm": 0.5689089894294739, |
| "learning_rate": 8.960132527513642e-06, |
| "loss": 0.5099, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.28932384341637013, |
| "grad_norm": 0.5326068997383118, |
| "learning_rate": 8.94871603105809e-06, |
| "loss": 0.4912, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.29039145907473307, |
| "grad_norm": 0.5300759077072144, |
| "learning_rate": 8.937244563941248e-06, |
| "loss": 0.5066, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.29145907473309607, |
| "grad_norm": 0.5240178108215332, |
| "learning_rate": 8.925718285859118e-06, |
| "loss": 0.5005, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.29252669039145907, |
| "grad_norm": 0.47631746530532837, |
| "learning_rate": 8.914137357270723e-06, |
| "loss": 0.5008, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.29359430604982206, |
| "grad_norm": 0.49288827180862427, |
| "learning_rate": 8.902501939395887e-06, |
| "loss": 0.4866, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.29466192170818506, |
| "grad_norm": 0.5662288069725037, |
| "learning_rate": 8.890812194212987e-06, |
| "loss": 0.5421, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.29572953736654806, |
| "grad_norm": 0.4656676650047302, |
| "learning_rate": 8.879068284456702e-06, |
| "loss": 0.4997, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.29679715302491105, |
| "grad_norm": 0.5733962059020996, |
| "learning_rate": 8.867270373615735e-06, |
| "loss": 0.501, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.297864768683274, |
| "grad_norm": 0.5234590172767639, |
| "learning_rate": 8.855418625930556e-06, |
| "loss": 0.4848, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.298932384341637, |
| "grad_norm": 0.5430875420570374, |
| "learning_rate": 8.8435132063911e-06, |
| "loss": 0.5157, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5232681035995483, |
| "learning_rate": 8.83155428073448e-06, |
| "loss": 0.4854, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.301067615658363, |
| "grad_norm": 0.5697162747383118, |
| "learning_rate": 8.81954201544267e-06, |
| "loss": 0.4928, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.302135231316726, |
| "grad_norm": 0.462223619222641, |
| "learning_rate": 8.8074765777402e-06, |
| "loss": 0.4856, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.303202846975089, |
| "grad_norm": 0.518064022064209, |
| "learning_rate": 8.79535813559181e-06, |
| "loss": 0.5049, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.304270462633452, |
| "grad_norm": 0.46611088514328003, |
| "learning_rate": 8.783186857700137e-06, |
| "loss": 0.4837, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.305338078291815, |
| "grad_norm": 0.519318699836731, |
| "learning_rate": 8.77096291350334e-06, |
| "loss": 0.4947, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.3064056939501779, |
| "grad_norm": 0.46297067403793335, |
| "learning_rate": 8.75868647317276e-06, |
| "loss": 0.4985, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.3074733096085409, |
| "grad_norm": 0.4645700752735138, |
| "learning_rate": 8.746357707610544e-06, |
| "loss": 0.4659, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.3085409252669039, |
| "grad_norm": 0.463349848985672, |
| "learning_rate": 8.733976788447265e-06, |
| "loss": 0.5017, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.3096085409252669, |
| "grad_norm": 0.5248959064483643, |
| "learning_rate": 8.721543888039534e-06, |
| "loss": 0.5002, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3106761565836299, |
| "grad_norm": 0.514178991317749, |
| "learning_rate": 8.709059179467598e-06, |
| "loss": 0.4693, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.3117437722419929, |
| "grad_norm": 0.49520182609558105, |
| "learning_rate": 8.69652283653294e-06, |
| "loss": 0.4899, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.3128113879003559, |
| "grad_norm": 0.4822703003883362, |
| "learning_rate": 8.683935033755848e-06, |
| "loss": 0.4944, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.31387900355871884, |
| "grad_norm": 0.5043975710868835, |
| "learning_rate": 8.671295946372989e-06, |
| "loss": 0.4725, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.31494661921708184, |
| "grad_norm": 0.4966917932033539, |
| "learning_rate": 8.658605750334972e-06, |
| "loss": 0.4764, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.31601423487544483, |
| "grad_norm": 0.48992806673049927, |
| "learning_rate": 8.6458646223039e-06, |
| "loss": 0.4899, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.31708185053380783, |
| "grad_norm": 0.5476608872413635, |
| "learning_rate": 8.6330727396509e-06, |
| "loss": 0.488, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.3181494661921708, |
| "grad_norm": 0.6118818521499634, |
| "learning_rate": 8.620230280453672e-06, |
| "loss": 0.5071, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.3192170818505338, |
| "grad_norm": 0.45298174023628235, |
| "learning_rate": 8.607337423493996e-06, |
| "loss": 0.471, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.3202846975088968, |
| "grad_norm": 0.5458585023880005, |
| "learning_rate": 8.594394348255239e-06, |
| "loss": 0.5012, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.32135231316725976, |
| "grad_norm": 0.5509236454963684, |
| "learning_rate": 8.581401234919873e-06, |
| "loss": 0.497, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.32241992882562276, |
| "grad_norm": 0.520375669002533, |
| "learning_rate": 8.568358264366958e-06, |
| "loss": 0.4948, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.32348754448398576, |
| "grad_norm": 0.46900251507759094, |
| "learning_rate": 8.555265618169615e-06, |
| "loss": 0.4987, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.32455516014234875, |
| "grad_norm": 0.44442543387413025, |
| "learning_rate": 8.542123478592518e-06, |
| "loss": 0.4898, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.32562277580071175, |
| "grad_norm": 0.43564245104789734, |
| "learning_rate": 8.528932028589337e-06, |
| "loss": 0.4587, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.32669039145907475, |
| "grad_norm": 0.6464988589286804, |
| "learning_rate": 8.515691451800206e-06, |
| "loss": 0.511, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.32775800711743774, |
| "grad_norm": 0.485740602016449, |
| "learning_rate": 8.502401932549154e-06, |
| "loss": 0.4917, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.3288256227758007, |
| "grad_norm": 0.5098385214805603, |
| "learning_rate": 8.489063655841552e-06, |
| "loss": 0.4796, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3298932384341637, |
| "grad_norm": 0.5981292724609375, |
| "learning_rate": 8.475676807361526e-06, |
| "loss": 0.5112, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.3309608540925267, |
| "grad_norm": 0.499467670917511, |
| "learning_rate": 8.462241573469378e-06, |
| "loss": 0.4924, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3320284697508897, |
| "grad_norm": 0.5141733884811401, |
| "learning_rate": 8.448758141198991e-06, |
| "loss": 0.4856, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.3330960854092527, |
| "grad_norm": 0.49083369970321655, |
| "learning_rate": 8.435226698255228e-06, |
| "loss": 0.4927, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.33416370106761567, |
| "grad_norm": 0.5083484053611755, |
| "learning_rate": 8.421647433011306e-06, |
| "loss": 0.4963, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.33523131672597867, |
| "grad_norm": 0.5530070066452026, |
| "learning_rate": 8.408020534506195e-06, |
| "loss": 0.5088, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.33629893238434166, |
| "grad_norm": 0.5097641944885254, |
| "learning_rate": 8.394346192441967e-06, |
| "loss": 0.4999, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.3373665480427046, |
| "grad_norm": 0.5912004709243774, |
| "learning_rate": 8.380624597181165e-06, |
| "loss": 0.5071, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.3384341637010676, |
| "grad_norm": 0.5386204123497009, |
| "learning_rate": 8.366855939744152e-06, |
| "loss": 0.5018, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.3395017793594306, |
| "grad_norm": 0.6744493246078491, |
| "learning_rate": 8.353040411806449e-06, |
| "loss": 0.5036, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.3405693950177936, |
| "grad_norm": 0.5442379117012024, |
| "learning_rate": 8.339178205696067e-06, |
| "loss": 0.5192, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.3416370106761566, |
| "grad_norm": 0.4711393117904663, |
| "learning_rate": 8.325269514390835e-06, |
| "loss": 0.4805, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3427046263345196, |
| "grad_norm": 0.5519885420799255, |
| "learning_rate": 8.311314531515707e-06, |
| "loss": 0.4606, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.3437722419928826, |
| "grad_norm": 0.48979809880256653, |
| "learning_rate": 8.297313451340064e-06, |
| "loss": 0.4683, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.34483985765124553, |
| "grad_norm": 0.45639723539352417, |
| "learning_rate": 8.283266468775024e-06, |
| "loss": 0.4899, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.3459074733096085, |
| "grad_norm": 0.559330940246582, |
| "learning_rate": 8.269173779370712e-06, |
| "loss": 0.4993, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.3469750889679715, |
| "grad_norm": 0.5538395047187805, |
| "learning_rate": 8.255035579313545e-06, |
| "loss": 0.4826, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.3480427046263345, |
| "grad_norm": 0.5136542320251465, |
| "learning_rate": 8.240852065423507e-06, |
| "loss": 0.4979, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.3491103202846975, |
| "grad_norm": 0.5399389863014221, |
| "learning_rate": 8.226623435151389e-06, |
| "loss": 0.4782, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.3501779359430605, |
| "grad_norm": 0.535988450050354, |
| "learning_rate": 8.21234988657607e-06, |
| "loss": 0.507, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.3512455516014235, |
| "grad_norm": 0.4826440215110779, |
| "learning_rate": 8.198031618401733e-06, |
| "loss": 0.4858, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.35231316725978645, |
| "grad_norm": 0.541845440864563, |
| "learning_rate": 8.183668829955111e-06, |
| "loss": 0.4436, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.35338078291814945, |
| "grad_norm": 0.5265049338340759, |
| "learning_rate": 8.169261721182715e-06, |
| "loss": 0.4608, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.35444839857651245, |
| "grad_norm": 0.5588465332984924, |
| "learning_rate": 8.154810492648038e-06, |
| "loss": 0.5055, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.35551601423487544, |
| "grad_norm": 0.4561479091644287, |
| "learning_rate": 8.140315345528778e-06, |
| "loss": 0.4939, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.35658362989323844, |
| "grad_norm": 0.4961983263492584, |
| "learning_rate": 8.125776481614025e-06, |
| "loss": 0.5079, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.35765124555160144, |
| "grad_norm": 0.4646869897842407, |
| "learning_rate": 8.111194103301461e-06, |
| "loss": 0.4641, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.35871886120996443, |
| "grad_norm": 0.5100634694099426, |
| "learning_rate": 8.096568413594533e-06, |
| "loss": 0.5032, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.3597864768683274, |
| "grad_norm": 0.5835485458374023, |
| "learning_rate": 8.081899616099638e-06, |
| "loss": 0.4585, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.3608540925266904, |
| "grad_norm": 0.39481019973754883, |
| "learning_rate": 8.067187915023283e-06, |
| "loss": 0.5012, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.36192170818505337, |
| "grad_norm": 0.55184006690979, |
| "learning_rate": 8.052433515169235e-06, |
| "loss": 0.4703, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.36298932384341637, |
| "grad_norm": 0.471427321434021, |
| "learning_rate": 8.037636621935686e-06, |
| "loss": 0.478, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.36405693950177936, |
| "grad_norm": 0.47815489768981934, |
| "learning_rate": 8.022797441312376e-06, |
| "loss": 0.4687, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.36512455516014236, |
| "grad_norm": 0.48546668887138367, |
| "learning_rate": 8.007916179877742e-06, |
| "loss": 0.5058, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.36619217081850536, |
| "grad_norm": 0.48870334029197693, |
| "learning_rate": 7.99299304479603e-06, |
| "loss": 0.4874, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.36725978647686836, |
| "grad_norm": 0.4691154658794403, |
| "learning_rate": 7.978028243814416e-06, |
| "loss": 0.4834, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.3683274021352313, |
| "grad_norm": 0.49752214550971985, |
| "learning_rate": 7.96302198526011e-06, |
| "loss": 0.4959, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.3693950177935943, |
| "grad_norm": 0.5052193403244019, |
| "learning_rate": 7.947974478037468e-06, |
| "loss": 0.4817, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.3704626334519573, |
| "grad_norm": 0.5181514620780945, |
| "learning_rate": 7.932885931625063e-06, |
| "loss": 0.4578, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.3715302491103203, |
| "grad_norm": 0.482715368270874, |
| "learning_rate": 7.917756556072792e-06, |
| "loss": 0.4587, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.3725978647686833, |
| "grad_norm": 0.4123336672782898, |
| "learning_rate": 7.902586561998928e-06, |
| "loss": 0.454, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.3736654804270463, |
| "grad_norm": 0.5441368222236633, |
| "learning_rate": 7.887376160587214e-06, |
| "loss": 0.4759, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3747330960854093, |
| "grad_norm": 0.48946669697761536, |
| "learning_rate": 7.8721255635839e-06, |
| "loss": 0.4942, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.3758007117437722, |
| "grad_norm": 0.4664275348186493, |
| "learning_rate": 7.85683498329481e-06, |
| "loss": 0.4775, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.3768683274021352, |
| "grad_norm": 0.5947299599647522, |
| "learning_rate": 7.841504632582378e-06, |
| "loss": 0.4781, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.3779359430604982, |
| "grad_norm": 0.43907010555267334, |
| "learning_rate": 7.826134724862687e-06, |
| "loss": 0.4785, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.3790035587188612, |
| "grad_norm": 0.49153631925582886, |
| "learning_rate": 7.810725474102504e-06, |
| "loss": 0.4623, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.3800711743772242, |
| "grad_norm": 0.5009203553199768, |
| "learning_rate": 7.795277094816292e-06, |
| "loss": 0.4878, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.3811387900355872, |
| "grad_norm": 0.5319011211395264, |
| "learning_rate": 7.779789802063229e-06, |
| "loss": 0.4535, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.3822064056939502, |
| "grad_norm": 0.5173964500427246, |
| "learning_rate": 7.764263811444214e-06, |
| "loss": 0.4956, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.38327402135231314, |
| "grad_norm": 0.4726311266422272, |
| "learning_rate": 7.748699339098864e-06, |
| "loss": 0.4771, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.38434163701067614, |
| "grad_norm": 0.5030087232589722, |
| "learning_rate": 7.733096601702508e-06, |
| "loss": 0.4995, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.38540925266903914, |
| "grad_norm": 0.4362412989139557, |
| "learning_rate": 7.717455816463161e-06, |
| "loss": 0.483, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.38647686832740213, |
| "grad_norm": 0.45854416489601135, |
| "learning_rate": 7.70177720111852e-06, |
| "loss": 0.4828, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.38754448398576513, |
| "grad_norm": 0.4099372327327728, |
| "learning_rate": 7.68606097393291e-06, |
| "loss": 0.4601, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.38861209964412813, |
| "grad_norm": 0.5316334962844849, |
| "learning_rate": 7.67030735369426e-06, |
| "loss": 0.5109, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.3896797153024911, |
| "grad_norm": 0.5196130871772766, |
| "learning_rate": 7.654516559711053e-06, |
| "loss": 0.4849, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.39074733096085407, |
| "grad_norm": 0.4411613941192627, |
| "learning_rate": 7.638688811809274e-06, |
| "loss": 0.4807, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.39181494661921706, |
| "grad_norm": 0.508170485496521, |
| "learning_rate": 7.622824330329345e-06, |
| "loss": 0.4694, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.39288256227758006, |
| "grad_norm": 0.42211753129959106, |
| "learning_rate": 7.6069233361230696e-06, |
| "loss": 0.4573, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.39395017793594306, |
| "grad_norm": 0.4601055085659027, |
| "learning_rate": 7.590986050550542e-06, |
| "loss": 0.4752, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.39501779359430605, |
| "grad_norm": 0.4858173131942749, |
| "learning_rate": 7.575012695477076e-06, |
| "loss": 0.4706, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.39608540925266905, |
| "grad_norm": 0.42238175868988037, |
| "learning_rate": 7.55900349327012e-06, |
| "loss": 0.4828, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.39715302491103205, |
| "grad_norm": 0.4975998103618622, |
| "learning_rate": 7.542958666796149e-06, |
| "loss": 0.4884, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.398220640569395, |
| "grad_norm": 0.48582613468170166, |
| "learning_rate": 7.526878439417572e-06, |
| "loss": 0.4961, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.399288256227758, |
| "grad_norm": 0.4576529562473297, |
| "learning_rate": 7.510763034989616e-06, |
| "loss": 0.4311, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.400355871886121, |
| "grad_norm": 0.48702099919319153, |
| "learning_rate": 7.494612677857218e-06, |
| "loss": 0.4955, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.401423487544484, |
| "grad_norm": 0.4474165737628937, |
| "learning_rate": 7.478427592851894e-06, |
| "loss": 0.4615, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.402491103202847, |
| "grad_norm": 0.4888235032558441, |
| "learning_rate": 7.462208005288609e-06, |
| "loss": 0.4711, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.40355871886121, |
| "grad_norm": 0.5036333799362183, |
| "learning_rate": 7.44595414096265e-06, |
| "loss": 0.4885, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.40462633451957297, |
| "grad_norm": 0.4840095639228821, |
| "learning_rate": 7.429666226146468e-06, |
| "loss": 0.4932, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.40569395017793597, |
| "grad_norm": 0.4943961203098297, |
| "learning_rate": 7.413344487586542e-06, |
| "loss": 0.4874, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4067615658362989, |
| "grad_norm": 0.535376250743866, |
| "learning_rate": 7.396989152500215e-06, |
| "loss": 0.4982, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.4078291814946619, |
| "grad_norm": 0.4504840672016144, |
| "learning_rate": 7.380600448572532e-06, |
| "loss": 0.436, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.4088967971530249, |
| "grad_norm": 0.5047032833099365, |
| "learning_rate": 7.364178603953066e-06, |
| "loss": 0.4702, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.4099644128113879, |
| "grad_norm": 0.4717814028263092, |
| "learning_rate": 7.347723847252756e-06, |
| "loss": 0.4783, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.4110320284697509, |
| "grad_norm": 0.4659929871559143, |
| "learning_rate": 7.331236407540704e-06, |
| "loss": 0.4612, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.4120996441281139, |
| "grad_norm": 0.47856637835502625, |
| "learning_rate": 7.314716514341007e-06, |
| "loss": 0.4766, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.4131672597864769, |
| "grad_norm": 0.4641667902469635, |
| "learning_rate": 7.298164397629545e-06, |
| "loss": 0.4708, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.41423487544483983, |
| "grad_norm": 0.5396067500114441, |
| "learning_rate": 7.28158028783079e-06, |
| "loss": 0.4809, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.41530249110320283, |
| "grad_norm": 0.5329163670539856, |
| "learning_rate": 7.2649644158145925e-06, |
| "loss": 0.4829, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.41637010676156583, |
| "grad_norm": 0.450914204120636, |
| "learning_rate": 7.248317012892969e-06, |
| "loss": 0.4527, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.4174377224199288, |
| "grad_norm": 0.5790780782699585, |
| "learning_rate": 7.231638310816888e-06, |
| "loss": 0.4893, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.4185053380782918, |
| "grad_norm": 0.5594152212142944, |
| "learning_rate": 7.214928541773027e-06, |
| "loss": 0.4794, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.4195729537366548, |
| "grad_norm": 0.46533674001693726, |
| "learning_rate": 7.198187938380565e-06, |
| "loss": 0.466, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.4206405693950178, |
| "grad_norm": 0.5824273228645325, |
| "learning_rate": 7.1814167336879195e-06, |
| "loss": 0.4833, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.42170818505338076, |
| "grad_norm": 0.4478416442871094, |
| "learning_rate": 7.164615161169518e-06, |
| "loss": 0.5013, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.42277580071174375, |
| "grad_norm": 0.6205080151557922, |
| "learning_rate": 7.147783454722545e-06, |
| "loss": 0.4905, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.42384341637010675, |
| "grad_norm": 0.4739533066749573, |
| "learning_rate": 7.130921848663678e-06, |
| "loss": 0.4834, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.42491103202846975, |
| "grad_norm": 0.4931207597255707, |
| "learning_rate": 7.1140305777258355e-06, |
| "loss": 0.5142, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.42597864768683275, |
| "grad_norm": 0.5040392279624939, |
| "learning_rate": 7.097109877054906e-06, |
| "loss": 0.4679, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.42704626334519574, |
| "grad_norm": 0.4795084595680237, |
| "learning_rate": 7.080159982206471e-06, |
| "loss": 0.4869, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.42811387900355874, |
| "grad_norm": 0.5009298920631409, |
| "learning_rate": 7.06318112914253e-06, |
| "loss": 0.4786, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.4291814946619217, |
| "grad_norm": 0.5377593040466309, |
| "learning_rate": 7.046173554228213e-06, |
| "loss": 0.4968, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.4302491103202847, |
| "grad_norm": 0.4396429657936096, |
| "learning_rate": 7.029137494228491e-06, |
| "loss": 0.5166, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.4313167259786477, |
| "grad_norm": 0.4758850336074829, |
| "learning_rate": 7.012073186304885e-06, |
| "loss": 0.4896, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.43238434163701067, |
| "grad_norm": 0.4826003611087799, |
| "learning_rate": 6.994980868012151e-06, |
| "loss": 0.5043, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.43345195729537367, |
| "grad_norm": 0.4461214244365692, |
| "learning_rate": 6.9778607772949894e-06, |
| "loss": 0.4657, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.43451957295373667, |
| "grad_norm": 0.4717596769332886, |
| "learning_rate": 6.9607131524847175e-06, |
| "loss": 0.4889, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.43558718861209966, |
| "grad_norm": 0.41522154211997986, |
| "learning_rate": 6.943538232295965e-06, |
| "loss": 0.4716, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.43665480427046266, |
| "grad_norm": 0.5176120400428772, |
| "learning_rate": 6.926336255823341e-06, |
| "loss": 0.4855, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.4377224199288256, |
| "grad_norm": 0.4708162248134613, |
| "learning_rate": 6.909107462538113e-06, |
| "loss": 0.4839, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4387900355871886, |
| "grad_norm": 0.39738133549690247, |
| "learning_rate": 6.891852092284863e-06, |
| "loss": 0.4911, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.4398576512455516, |
| "grad_norm": 0.4732625186443329, |
| "learning_rate": 6.874570385278161e-06, |
| "loss": 0.4938, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.4409252669039146, |
| "grad_norm": 0.5151704549789429, |
| "learning_rate": 6.857262582099209e-06, |
| "loss": 0.504, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.4419928825622776, |
| "grad_norm": 0.49842819571495056, |
| "learning_rate": 6.839928923692505e-06, |
| "loss": 0.5116, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.4430604982206406, |
| "grad_norm": 0.4782036244869232, |
| "learning_rate": 6.822569651362475e-06, |
| "loss": 0.4888, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.4441281138790036, |
| "grad_norm": 0.4534831941127777, |
| "learning_rate": 6.805185006770125e-06, |
| "loss": 0.4548, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.4451957295373665, |
| "grad_norm": 0.5043431520462036, |
| "learning_rate": 6.787775231929666e-06, |
| "loss": 0.5011, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.4462633451957295, |
| "grad_norm": 0.47425511479377747, |
| "learning_rate": 6.7703405692051585e-06, |
| "loss": 0.4861, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.4473309608540925, |
| "grad_norm": 0.4268990159034729, |
| "learning_rate": 6.752881261307125e-06, |
| "loss": 0.4773, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.4483985765124555, |
| "grad_norm": 0.459902822971344, |
| "learning_rate": 6.735397551289179e-06, |
| "loss": 0.4815, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4494661921708185, |
| "grad_norm": 0.5495928525924683, |
| "learning_rate": 6.717889682544641e-06, |
| "loss": 0.5039, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.4505338078291815, |
| "grad_norm": 0.4123859703540802, |
| "learning_rate": 6.700357898803146e-06, |
| "loss": 0.487, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.4516014234875445, |
| "grad_norm": 0.44671013951301575, |
| "learning_rate": 6.6828024441272554e-06, |
| "loss": 0.4913, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.45266903914590745, |
| "grad_norm": 0.4648853838443756, |
| "learning_rate": 6.665223562909058e-06, |
| "loss": 0.4852, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.45373665480427045, |
| "grad_norm": 0.494157075881958, |
| "learning_rate": 6.647621499866762e-06, |
| "loss": 0.4851, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.45480427046263344, |
| "grad_norm": 0.5244255661964417, |
| "learning_rate": 6.629996500041299e-06, |
| "loss": 0.4945, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.45587188612099644, |
| "grad_norm": 0.48558488488197327, |
| "learning_rate": 6.612348808792904e-06, |
| "loss": 0.4829, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.45693950177935944, |
| "grad_norm": 0.46548742055892944, |
| "learning_rate": 6.5946786717977026e-06, |
| "loss": 0.5057, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.45800711743772243, |
| "grad_norm": 0.49716660380363464, |
| "learning_rate": 6.576986335044292e-06, |
| "loss": 0.4682, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.45907473309608543, |
| "grad_norm": 0.427898108959198, |
| "learning_rate": 6.5592720448303174e-06, |
| "loss": 0.4922, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.46014234875444837, |
| "grad_norm": 0.46051132678985596, |
| "learning_rate": 6.541536047759034e-06, |
| "loss": 0.4756, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.46120996441281137, |
| "grad_norm": 0.4844045639038086, |
| "learning_rate": 6.523778590735892e-06, |
| "loss": 0.5199, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.46227758007117437, |
| "grad_norm": 0.4431370496749878, |
| "learning_rate": 6.5059999209650795e-06, |
| "loss": 0.4744, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.46334519572953736, |
| "grad_norm": 0.4615848958492279, |
| "learning_rate": 6.488200285946094e-06, |
| "loss": 0.4459, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.46441281138790036, |
| "grad_norm": 0.5034524202346802, |
| "learning_rate": 6.470379933470296e-06, |
| "loss": 0.4859, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.46548042704626336, |
| "grad_norm": 0.42077118158340454, |
| "learning_rate": 6.452539111617454e-06, |
| "loss": 0.4703, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.46654804270462635, |
| "grad_norm": 0.5283306241035461, |
| "learning_rate": 6.434678068752293e-06, |
| "loss": 0.4733, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.46761565836298935, |
| "grad_norm": 0.48218491673469543, |
| "learning_rate": 6.416797053521039e-06, |
| "loss": 0.4779, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.4686832740213523, |
| "grad_norm": 0.4461103677749634, |
| "learning_rate": 6.398896314847954e-06, |
| "loss": 0.4851, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.4697508896797153, |
| "grad_norm": 0.47475722432136536, |
| "learning_rate": 6.380976101931879e-06, |
| "loss": 0.4747, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4708185053380783, |
| "grad_norm": 0.4456132650375366, |
| "learning_rate": 6.363036664242751e-06, |
| "loss": 0.4364, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.4718861209964413, |
| "grad_norm": 0.4457268714904785, |
| "learning_rate": 6.345078251518144e-06, |
| "loss": 0.4487, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.4729537366548043, |
| "grad_norm": 0.4818935990333557, |
| "learning_rate": 6.327101113759783e-06, |
| "loss": 0.5008, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.4740213523131673, |
| "grad_norm": 0.44397759437561035, |
| "learning_rate": 6.3091055012300675e-06, |
| "loss": 0.4546, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.4750889679715303, |
| "grad_norm": 0.4248422086238861, |
| "learning_rate": 6.291091664448589e-06, |
| "loss": 0.4797, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.4761565836298932, |
| "grad_norm": 0.48325735330581665, |
| "learning_rate": 6.273059854188636e-06, |
| "loss": 0.4949, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.4772241992882562, |
| "grad_norm": 0.44900190830230713, |
| "learning_rate": 6.25501032147372e-06, |
| "loss": 0.4731, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.4782918149466192, |
| "grad_norm": 0.4795812964439392, |
| "learning_rate": 6.236943317574054e-06, |
| "loss": 0.466, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.4793594306049822, |
| "grad_norm": 0.49573490023612976, |
| "learning_rate": 6.218859094003082e-06, |
| "loss": 0.4884, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.4804270462633452, |
| "grad_norm": 0.40788835287094116, |
| "learning_rate": 6.200757902513962e-06, |
| "loss": 0.4572, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4814946619217082, |
| "grad_norm": 0.44407787919044495, |
| "learning_rate": 6.182639995096061e-06, |
| "loss": 0.5016, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.4825622775800712, |
| "grad_norm": 0.43770918250083923, |
| "learning_rate": 6.164505623971458e-06, |
| "loss": 0.4699, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.48362989323843414, |
| "grad_norm": 0.41643866896629333, |
| "learning_rate": 6.146355041591419e-06, |
| "loss": 0.4783, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.48469750889679714, |
| "grad_norm": 0.44599294662475586, |
| "learning_rate": 6.128188500632892e-06, |
| "loss": 0.4764, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.48576512455516013, |
| "grad_norm": 0.4716036319732666, |
| "learning_rate": 6.11000625399499e-06, |
| "loss": 0.4683, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.48683274021352313, |
| "grad_norm": 0.49038171768188477, |
| "learning_rate": 6.091808554795462e-06, |
| "loss": 0.4716, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.4879003558718861, |
| "grad_norm": 0.43345335125923157, |
| "learning_rate": 6.073595656367175e-06, |
| "loss": 0.4742, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.4889679715302491, |
| "grad_norm": 0.4429580569267273, |
| "learning_rate": 6.055367812254592e-06, |
| "loss": 0.4951, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.4900355871886121, |
| "grad_norm": 0.510330319404602, |
| "learning_rate": 6.037125276210229e-06, |
| "loss": 0.4771, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.49110320284697506, |
| "grad_norm": 0.42020678520202637, |
| "learning_rate": 6.0188683021911394e-06, |
| "loss": 0.4939, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.49217081850533806, |
| "grad_norm": 0.45770880579948425, |
| "learning_rate": 6.000597144355361e-06, |
| "loss": 0.4931, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.49323843416370106, |
| "grad_norm": 0.3960902690887451, |
| "learning_rate": 5.982312057058392e-06, |
| "loss": 0.4706, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.49430604982206405, |
| "grad_norm": 0.5214159488677979, |
| "learning_rate": 5.964013294849646e-06, |
| "loss": 0.4777, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.49537366548042705, |
| "grad_norm": 0.43403932452201843, |
| "learning_rate": 5.9457011124689025e-06, |
| "loss": 0.4688, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.49644128113879005, |
| "grad_norm": 0.4650368392467499, |
| "learning_rate": 5.927375764842766e-06, |
| "loss": 0.467, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.49750889679715304, |
| "grad_norm": 0.4884885847568512, |
| "learning_rate": 5.9090375070811215e-06, |
| "loss": 0.4872, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.49857651245551604, |
| "grad_norm": 0.4051380157470703, |
| "learning_rate": 5.890686594473571e-06, |
| "loss": 0.4685, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.499644128113879, |
| "grad_norm": 0.443988561630249, |
| "learning_rate": 5.872323282485889e-06, |
| "loss": 0.4981, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.500711743772242, |
| "grad_norm": 0.444369375705719, |
| "learning_rate": 5.853947826756465e-06, |
| "loss": 0.465, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.501779359430605, |
| "grad_norm": 0.42520400881767273, |
| "learning_rate": 5.835560483092743e-06, |
| "loss": 0.484, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5028469750889679, |
| "grad_norm": 0.45270073413848877, |
| "learning_rate": 5.8171615074676615e-06, |
| "loss": 0.4886, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.5039145907473309, |
| "grad_norm": 0.47045156359672546, |
| "learning_rate": 5.798751156016085e-06, |
| "loss": 0.4733, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5049822064056939, |
| "grad_norm": 0.49486202001571655, |
| "learning_rate": 5.780329685031247e-06, |
| "loss": 0.4799, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.5060498220640569, |
| "grad_norm": 0.495645135641098, |
| "learning_rate": 5.7618973509611755e-06, |
| "loss": 0.483, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5071174377224199, |
| "grad_norm": 0.4936763048171997, |
| "learning_rate": 5.743454410405126e-06, |
| "loss": 0.495, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5081850533807829, |
| "grad_norm": 0.3758457899093628, |
| "learning_rate": 5.72500112011001e-06, |
| "loss": 0.4616, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5092526690391459, |
| "grad_norm": 0.45885196328163147, |
| "learning_rate": 5.706537736966814e-06, |
| "loss": 0.4808, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.5103202846975089, |
| "grad_norm": 0.46448948979377747, |
| "learning_rate": 5.688064518007036e-06, |
| "loss": 0.495, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5113879003558719, |
| "grad_norm": 0.43365931510925293, |
| "learning_rate": 5.669581720399094e-06, |
| "loss": 0.4811, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.5124555160142349, |
| "grad_norm": 0.4774491786956787, |
| "learning_rate": 5.651089601444752e-06, |
| "loss": 0.4794, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5135231316725979, |
| "grad_norm": 0.48784658312797546, |
| "learning_rate": 5.632588418575542e-06, |
| "loss": 0.4799, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.5145907473309609, |
| "grad_norm": 0.49221184849739075, |
| "learning_rate": 5.614078429349172e-06, |
| "loss": 0.4921, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5156583629893239, |
| "grad_norm": 0.45859670639038086, |
| "learning_rate": 5.5955598914459465e-06, |
| "loss": 0.481, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.5167259786476869, |
| "grad_norm": 0.5400739908218384, |
| "learning_rate": 5.577033062665179e-06, |
| "loss": 0.4904, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.5177935943060499, |
| "grad_norm": 0.4636092483997345, |
| "learning_rate": 5.558498200921597e-06, |
| "loss": 0.4493, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.5188612099644128, |
| "grad_norm": 0.4300142228603363, |
| "learning_rate": 5.53995556424176e-06, |
| "loss": 0.4679, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.5199288256227758, |
| "grad_norm": 0.4838177561759949, |
| "learning_rate": 5.521405410760462e-06, |
| "loss": 0.4625, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.5209964412811388, |
| "grad_norm": 0.47749972343444824, |
| "learning_rate": 5.50284799871714e-06, |
| "loss": 0.4488, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.5220640569395018, |
| "grad_norm": 0.4752497673034668, |
| "learning_rate": 5.484283586452279e-06, |
| "loss": 0.5103, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.5231316725978647, |
| "grad_norm": 0.463785856962204, |
| "learning_rate": 5.465712432403812e-06, |
| "loss": 0.4624, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5241992882562277, |
| "grad_norm": 0.4545430839061737, |
| "learning_rate": 5.447134795103531e-06, |
| "loss": 0.4719, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.5252669039145907, |
| "grad_norm": 0.41529300808906555, |
| "learning_rate": 5.428550933173476e-06, |
| "loss": 0.4708, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.5263345195729537, |
| "grad_norm": 0.4432843327522278, |
| "learning_rate": 5.409961105322347e-06, |
| "loss": 0.4675, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.5274021352313167, |
| "grad_norm": 0.48375219106674194, |
| "learning_rate": 5.391365570341893e-06, |
| "loss": 0.4847, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.5284697508896797, |
| "grad_norm": 0.40725329518318176, |
| "learning_rate": 5.372764587103309e-06, |
| "loss": 0.4477, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.5295373665480427, |
| "grad_norm": 0.4455367624759674, |
| "learning_rate": 5.3541584145536475e-06, |
| "loss": 0.4819, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.5306049822064057, |
| "grad_norm": 0.45164966583251953, |
| "learning_rate": 5.335547311712188e-06, |
| "loss": 0.4642, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.5316725978647687, |
| "grad_norm": 0.49471500515937805, |
| "learning_rate": 5.3169315376668566e-06, |
| "loss": 0.4823, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.5327402135231317, |
| "grad_norm": 0.4399643838405609, |
| "learning_rate": 5.2983113515706045e-06, |
| "loss": 0.4819, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.5338078291814946, |
| "grad_norm": 0.5442211627960205, |
| "learning_rate": 5.279687012637798e-06, |
| "loss": 0.4677, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5348754448398576, |
| "grad_norm": 0.45155906677246094, |
| "learning_rate": 5.2610587801406256e-06, |
| "loss": 0.4878, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.5359430604982206, |
| "grad_norm": 0.5040996670722961, |
| "learning_rate": 5.242426913405471e-06, |
| "loss": 0.482, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.5370106761565836, |
| "grad_norm": 0.4809477925300598, |
| "learning_rate": 5.223791671809314e-06, |
| "loss": 0.4697, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.5380782918149466, |
| "grad_norm": 0.5347772240638733, |
| "learning_rate": 5.2051533147761155e-06, |
| "loss": 0.4574, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.5391459074733096, |
| "grad_norm": 0.48782646656036377, |
| "learning_rate": 5.186512101773206e-06, |
| "loss": 0.4747, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.5402135231316726, |
| "grad_norm": 0.46589890122413635, |
| "learning_rate": 5.167868292307679e-06, |
| "loss": 0.4814, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.5412811387900356, |
| "grad_norm": 0.5483913421630859, |
| "learning_rate": 5.149222145922765e-06, |
| "loss": 0.4807, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.5423487544483986, |
| "grad_norm": 0.4894302487373352, |
| "learning_rate": 5.130573922194236e-06, |
| "loss": 0.4881, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.5434163701067616, |
| "grad_norm": 0.41597887873649597, |
| "learning_rate": 5.111923880726779e-06, |
| "loss": 0.4766, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.5444839857651246, |
| "grad_norm": 0.5004387497901917, |
| "learning_rate": 5.093272281150383e-06, |
| "loss": 0.4656, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5455516014234876, |
| "grad_norm": 0.4926692843437195, |
| "learning_rate": 5.074619383116733e-06, |
| "loss": 0.4579, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.5466192170818506, |
| "grad_norm": 0.4545387029647827, |
| "learning_rate": 5.05596544629559e-06, |
| "loss": 0.4748, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.5476868327402136, |
| "grad_norm": 0.4115523397922516, |
| "learning_rate": 5.03731073037117e-06, |
| "loss": 0.4856, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.5487544483985766, |
| "grad_norm": 0.4478975236415863, |
| "learning_rate": 5.018655495038542e-06, |
| "loss": 0.4728, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5498220640569395, |
| "grad_norm": 0.4211094081401825, |
| "learning_rate": 5e-06, |
| "loss": 0.4614, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.5508896797153024, |
| "grad_norm": 0.45692694187164307, |
| "learning_rate": 4.981344504961459e-06, |
| "loss": 0.4711, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5519572953736654, |
| "grad_norm": 0.41460829973220825, |
| "learning_rate": 4.962689269628832e-06, |
| "loss": 0.467, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.5530249110320284, |
| "grad_norm": 0.4291308522224426, |
| "learning_rate": 4.944034553704412e-06, |
| "loss": 0.4901, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.5540925266903914, |
| "grad_norm": 0.4243936538696289, |
| "learning_rate": 4.9253806168832685e-06, |
| "loss": 0.4966, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.5551601423487544, |
| "grad_norm": 0.42817196249961853, |
| "learning_rate": 4.906727718849619e-06, |
| "loss": 0.46, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5562277580071174, |
| "grad_norm": 0.419493168592453, |
| "learning_rate": 4.888076119273223e-06, |
| "loss": 0.4788, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.5572953736654804, |
| "grad_norm": 0.4330461919307709, |
| "learning_rate": 4.8694260778057655e-06, |
| "loss": 0.4875, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.5583629893238434, |
| "grad_norm": 0.44664815068244934, |
| "learning_rate": 4.850777854077235e-06, |
| "loss": 0.4645, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.5594306049822064, |
| "grad_norm": 0.4051723778247833, |
| "learning_rate": 4.832131707692322e-06, |
| "loss": 0.4596, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.5604982206405694, |
| "grad_norm": 0.4329952895641327, |
| "learning_rate": 4.813487898226794e-06, |
| "loss": 0.4652, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.5615658362989324, |
| "grad_norm": 0.4694920480251312, |
| "learning_rate": 4.7948466852238844e-06, |
| "loss": 0.4751, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.5626334519572954, |
| "grad_norm": 0.42388251423835754, |
| "learning_rate": 4.7762083281906864e-06, |
| "loss": 0.457, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.5637010676156584, |
| "grad_norm": 0.4349200129508972, |
| "learning_rate": 4.757573086594529e-06, |
| "loss": 0.4655, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.5647686832740213, |
| "grad_norm": 0.42134931683540344, |
| "learning_rate": 4.738941219859375e-06, |
| "loss": 0.4806, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.5658362989323843, |
| "grad_norm": 0.4428733289241791, |
| "learning_rate": 4.720312987362204e-06, |
| "loss": 0.4581, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5669039145907473, |
| "grad_norm": 0.4444166123867035, |
| "learning_rate": 4.701688648429399e-06, |
| "loss": 0.4592, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.5679715302491103, |
| "grad_norm": 0.37343311309814453, |
| "learning_rate": 4.683068462333144e-06, |
| "loss": 0.4742, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.5690391459074733, |
| "grad_norm": 0.4210268557071686, |
| "learning_rate": 4.6644526882878145e-06, |
| "loss": 0.4853, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.5701067615658363, |
| "grad_norm": 0.46207836270332336, |
| "learning_rate": 4.645841585446356e-06, |
| "loss": 0.4698, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.5711743772241993, |
| "grad_norm": 0.469249963760376, |
| "learning_rate": 4.6272354128966924e-06, |
| "loss": 0.4578, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.5722419928825623, |
| "grad_norm": 0.45976918935775757, |
| "learning_rate": 4.6086344296581095e-06, |
| "loss": 0.4904, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.5733096085409253, |
| "grad_norm": 0.4256848096847534, |
| "learning_rate": 4.590038894677653e-06, |
| "loss": 0.4615, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.5743772241992883, |
| "grad_norm": 0.4688819348812103, |
| "learning_rate": 4.5714490668265245e-06, |
| "loss": 0.4806, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.5754448398576513, |
| "grad_norm": 0.44800180196762085, |
| "learning_rate": 4.55286520489647e-06, |
| "loss": 0.4964, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.5765124555160143, |
| "grad_norm": 0.42129072546958923, |
| "learning_rate": 4.534287567596189e-06, |
| "loss": 0.4693, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5775800711743773, |
| "grad_norm": 0.41702598333358765, |
| "learning_rate": 4.515716413547722e-06, |
| "loss": 0.4818, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.5786476868327403, |
| "grad_norm": 0.46011829376220703, |
| "learning_rate": 4.497152001282861e-06, |
| "loss": 0.4997, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.5797153024911033, |
| "grad_norm": 0.4624707102775574, |
| "learning_rate": 4.478594589239539e-06, |
| "loss": 0.5302, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.5807829181494661, |
| "grad_norm": 0.4008091688156128, |
| "learning_rate": 4.460044435758241e-06, |
| "loss": 0.4739, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.5818505338078291, |
| "grad_norm": 0.41396379470825195, |
| "learning_rate": 4.441501799078405e-06, |
| "loss": 0.4919, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.5829181494661921, |
| "grad_norm": 0.4313451945781708, |
| "learning_rate": 4.4229669373348225e-06, |
| "loss": 0.4872, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.5839857651245551, |
| "grad_norm": 0.41983485221862793, |
| "learning_rate": 4.404440108554055e-06, |
| "loss": 0.492, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.5850533807829181, |
| "grad_norm": 0.4576341211795807, |
| "learning_rate": 4.3859215706508295e-06, |
| "loss": 0.4676, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.5861209964412811, |
| "grad_norm": 0.4995148181915283, |
| "learning_rate": 4.3674115814244595e-06, |
| "loss": 0.5213, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.5871886120996441, |
| "grad_norm": 0.45019280910491943, |
| "learning_rate": 4.348910398555249e-06, |
| "loss": 0.4792, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5882562277580071, |
| "grad_norm": 0.4817792475223541, |
| "learning_rate": 4.330418279600907e-06, |
| "loss": 0.4968, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.5893238434163701, |
| "grad_norm": 0.4851461946964264, |
| "learning_rate": 4.311935481992965e-06, |
| "loss": 0.4855, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.5903914590747331, |
| "grad_norm": 0.3923802673816681, |
| "learning_rate": 4.2934622630331855e-06, |
| "loss": 0.4775, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.5914590747330961, |
| "grad_norm": 0.46750932931900024, |
| "learning_rate": 4.274998879889991e-06, |
| "loss": 0.4677, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.5925266903914591, |
| "grad_norm": 0.456074982881546, |
| "learning_rate": 4.2565455895948745e-06, |
| "loss": 0.4664, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.5935943060498221, |
| "grad_norm": 0.5120862126350403, |
| "learning_rate": 4.238102649038825e-06, |
| "loss": 0.4831, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.5946619217081851, |
| "grad_norm": 0.3992975950241089, |
| "learning_rate": 4.219670314968754e-06, |
| "loss": 0.4801, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.595729537366548, |
| "grad_norm": 0.4378175437450409, |
| "learning_rate": 4.2012488439839185e-06, |
| "loss": 0.4724, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.596797153024911, |
| "grad_norm": 0.4528578221797943, |
| "learning_rate": 4.182838492532342e-06, |
| "loss": 0.449, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.597864768683274, |
| "grad_norm": 0.46270951628685, |
| "learning_rate": 4.164439516907258e-06, |
| "loss": 0.483, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.598932384341637, |
| "grad_norm": 0.4630880057811737, |
| "learning_rate": 4.146052173243538e-06, |
| "loss": 0.4694, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.44219690561294556, |
| "learning_rate": 4.127676717514114e-06, |
| "loss": 0.5014, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.601067615658363, |
| "grad_norm": 0.41033241152763367, |
| "learning_rate": 4.109313405526433e-06, |
| "loss": 0.4957, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.602135231316726, |
| "grad_norm": 0.4031945765018463, |
| "learning_rate": 4.090962492918881e-06, |
| "loss": 0.4555, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.603202846975089, |
| "grad_norm": 0.5318504571914673, |
| "learning_rate": 4.072624235157234e-06, |
| "loss": 0.4834, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.604270462633452, |
| "grad_norm": 0.39881038665771484, |
| "learning_rate": 4.054298887531099e-06, |
| "loss": 0.4665, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.605338078291815, |
| "grad_norm": 0.4450599253177643, |
| "learning_rate": 4.035986705150355e-06, |
| "loss": 0.4724, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.606405693950178, |
| "grad_norm": 0.43651196360588074, |
| "learning_rate": 4.017687942941609e-06, |
| "loss": 0.5019, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.607473309608541, |
| "grad_norm": 0.41378405690193176, |
| "learning_rate": 3.9994028556446404e-06, |
| "loss": 0.4942, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.608540925266904, |
| "grad_norm": 0.45917779207229614, |
| "learning_rate": 3.981131697808862e-06, |
| "loss": 0.476, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.609608540925267, |
| "grad_norm": 0.3615592420101166, |
| "learning_rate": 3.9628747237897715e-06, |
| "loss": 0.4678, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.61067615658363, |
| "grad_norm": 0.4037294387817383, |
| "learning_rate": 3.94463218774541e-06, |
| "loss": 0.4717, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.6117437722419928, |
| "grad_norm": 0.4695199429988861, |
| "learning_rate": 3.926404343632826e-06, |
| "loss": 0.4758, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.6128113879003558, |
| "grad_norm": 0.45771774649620056, |
| "learning_rate": 3.90819144520454e-06, |
| "loss": 0.4931, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.6138790035587188, |
| "grad_norm": 0.36958596110343933, |
| "learning_rate": 3.889993746005011e-06, |
| "loss": 0.4487, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6149466192170818, |
| "grad_norm": 0.4408724904060364, |
| "learning_rate": 3.8718114993671086e-06, |
| "loss": 0.4563, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.6160142348754448, |
| "grad_norm": 0.4657142162322998, |
| "learning_rate": 3.853644958408582e-06, |
| "loss": 0.4743, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.6170818505338078, |
| "grad_norm": 0.42271625995635986, |
| "learning_rate": 3.835494376028544e-06, |
| "loss": 0.494, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.6181494661921708, |
| "grad_norm": 0.4289335608482361, |
| "learning_rate": 3.817360004903939e-06, |
| "loss": 0.4617, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.6192170818505338, |
| "grad_norm": 0.4913620352745056, |
| "learning_rate": 3.799242097486038e-06, |
| "loss": 0.4606, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6202846975088968, |
| "grad_norm": 0.4116392731666565, |
| "learning_rate": 3.7811409059969177e-06, |
| "loss": 0.4623, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.6213523131672598, |
| "grad_norm": 0.4178345799446106, |
| "learning_rate": 3.7630566824259456e-06, |
| "loss": 0.5072, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.6224199288256228, |
| "grad_norm": 0.4855571687221527, |
| "learning_rate": 3.7449896785262817e-06, |
| "loss": 0.4737, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.6234875444839858, |
| "grad_norm": 0.46843597292900085, |
| "learning_rate": 3.726940145811363e-06, |
| "loss": 0.4703, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.6245551601423488, |
| "grad_norm": 0.38505470752716064, |
| "learning_rate": 3.708908335551412e-06, |
| "loss": 0.4872, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.6256227758007118, |
| "grad_norm": 0.42972132563591003, |
| "learning_rate": 3.6908944987699346e-06, |
| "loss": 0.4792, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.6266903914590747, |
| "grad_norm": 0.5449157357215881, |
| "learning_rate": 3.67289888624022e-06, |
| "loss": 0.5233, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.6277580071174377, |
| "grad_norm": 0.4144046902656555, |
| "learning_rate": 3.6549217484818576e-06, |
| "loss": 0.4798, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.6288256227758007, |
| "grad_norm": 0.42087435722351074, |
| "learning_rate": 3.6369633357572514e-06, |
| "loss": 0.4573, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.6298932384341637, |
| "grad_norm": 0.42363405227661133, |
| "learning_rate": 3.6190238980681235e-06, |
| "loss": 0.4652, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6309608540925267, |
| "grad_norm": 0.4286684989929199, |
| "learning_rate": 3.6011036851520465e-06, |
| "loss": 0.4637, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.6320284697508897, |
| "grad_norm": 0.4206468462944031, |
| "learning_rate": 3.583202946478963e-06, |
| "loss": 0.4761, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.6330960854092527, |
| "grad_norm": 0.3857564926147461, |
| "learning_rate": 3.5653219312477085e-06, |
| "loss": 0.4771, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.6341637010676157, |
| "grad_norm": 0.42064541578292847, |
| "learning_rate": 3.5474608883825475e-06, |
| "loss": 0.499, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.6352313167259787, |
| "grad_norm": 0.35660263895988464, |
| "learning_rate": 3.529620066529704e-06, |
| "loss": 0.4626, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.6362989323843417, |
| "grad_norm": 0.4862718880176544, |
| "learning_rate": 3.5117997140539073e-06, |
| "loss": 0.5183, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.6373665480427047, |
| "grad_norm": 0.44003114104270935, |
| "learning_rate": 3.4940000790349226e-06, |
| "loss": 0.4649, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.6384341637010676, |
| "grad_norm": 0.38733163475990295, |
| "learning_rate": 3.47622140926411e-06, |
| "loss": 0.4378, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.6395017793594306, |
| "grad_norm": 0.41804930567741394, |
| "learning_rate": 3.458463952240967e-06, |
| "loss": 0.4664, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.6405693950177936, |
| "grad_norm": 0.44392499327659607, |
| "learning_rate": 3.4407279551696846e-06, |
| "loss": 0.4655, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6416370106761566, |
| "grad_norm": 0.39837706089019775, |
| "learning_rate": 3.4230136649557087e-06, |
| "loss": 0.4701, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.6427046263345195, |
| "grad_norm": 0.41694167256355286, |
| "learning_rate": 3.4053213282022983e-06, |
| "loss": 0.4716, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.6437722419928825, |
| "grad_norm": 0.41177675127983093, |
| "learning_rate": 3.387651191207097e-06, |
| "loss": 0.4802, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.6448398576512455, |
| "grad_norm": 0.4301503300666809, |
| "learning_rate": 3.370003499958703e-06, |
| "loss": 0.4546, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.6459074733096085, |
| "grad_norm": 0.37474584579467773, |
| "learning_rate": 3.352378500133239e-06, |
| "loss": 0.477, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.6469750889679715, |
| "grad_norm": 0.4323018491268158, |
| "learning_rate": 3.334776437090944e-06, |
| "loss": 0.4656, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.6480427046263345, |
| "grad_norm": 0.41514450311660767, |
| "learning_rate": 3.317197555872745e-06, |
| "loss": 0.4198, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.6491103202846975, |
| "grad_norm": 0.3888489007949829, |
| "learning_rate": 3.2996421011968546e-06, |
| "loss": 0.4535, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.6501779359430605, |
| "grad_norm": 0.4327705204486847, |
| "learning_rate": 3.28211031745536e-06, |
| "loss": 0.4983, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.6512455516014235, |
| "grad_norm": 0.4344913065433502, |
| "learning_rate": 3.264602448710822e-06, |
| "loss": 0.4947, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6523131672597865, |
| "grad_norm": 0.3991352915763855, |
| "learning_rate": 3.2471187386928766e-06, |
| "loss": 0.4805, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.6533807829181495, |
| "grad_norm": 0.39237743616104126, |
| "learning_rate": 3.2296594307948428e-06, |
| "loss": 0.4891, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.6544483985765125, |
| "grad_norm": 0.40971338748931885, |
| "learning_rate": 3.212224768070334e-06, |
| "loss": 0.453, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.6555160142348755, |
| "grad_norm": 0.41694802045822144, |
| "learning_rate": 3.194814993229878e-06, |
| "loss": 0.4718, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.6565836298932385, |
| "grad_norm": 0.429420530796051, |
| "learning_rate": 3.177430348637527e-06, |
| "loss": 0.4929, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.6576512455516014, |
| "grad_norm": 0.39137008786201477, |
| "learning_rate": 3.1600710763074972e-06, |
| "loss": 0.4672, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.6587188612099644, |
| "grad_norm": 0.40734052658081055, |
| "learning_rate": 3.142737417900793e-06, |
| "loss": 0.4999, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.6597864768683274, |
| "grad_norm": 0.36672934889793396, |
| "learning_rate": 3.125429614721842e-06, |
| "loss": 0.466, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.6608540925266904, |
| "grad_norm": 0.36196407675743103, |
| "learning_rate": 3.1081479077151387e-06, |
| "loss": 0.4425, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.6619217081850534, |
| "grad_norm": 0.3950616717338562, |
| "learning_rate": 3.090892537461889e-06, |
| "loss": 0.4726, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6629893238434164, |
| "grad_norm": 0.42815542221069336, |
| "learning_rate": 3.0736637441766594e-06, |
| "loss": 0.4753, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.6640569395017794, |
| "grad_norm": 0.3979141116142273, |
| "learning_rate": 3.056461767704037e-06, |
| "loss": 0.4799, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.6651245551601424, |
| "grad_norm": 0.37764808535575867, |
| "learning_rate": 3.039286847515284e-06, |
| "loss": 0.4752, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.6661921708185053, |
| "grad_norm": 0.38070034980773926, |
| "learning_rate": 3.0221392227050126e-06, |
| "loss": 0.4782, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.6672597864768683, |
| "grad_norm": 0.40708160400390625, |
| "learning_rate": 3.00501913198785e-06, |
| "loss": 0.4533, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.6683274021352313, |
| "grad_norm": 0.41723665595054626, |
| "learning_rate": 2.9879268136951163e-06, |
| "loss": 0.4827, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.6693950177935943, |
| "grad_norm": 0.41290441155433655, |
| "learning_rate": 2.970862505771509e-06, |
| "loss": 0.4443, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.6704626334519573, |
| "grad_norm": 0.4340071678161621, |
| "learning_rate": 2.953826445771788e-06, |
| "loss": 0.48, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.6715302491103203, |
| "grad_norm": 0.35983264446258545, |
| "learning_rate": 2.9368188708574706e-06, |
| "loss": 0.4415, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.6725978647686833, |
| "grad_norm": 0.3808664381504059, |
| "learning_rate": 2.9198400177935303e-06, |
| "loss": 0.4683, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6736654804270462, |
| "grad_norm": 0.3902174234390259, |
| "learning_rate": 2.902890122945096e-06, |
| "loss": 0.4984, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.6747330960854092, |
| "grad_norm": 0.38310402631759644, |
| "learning_rate": 2.8859694222741653e-06, |
| "loss": 0.5024, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.6758007117437722, |
| "grad_norm": 0.407287061214447, |
| "learning_rate": 2.869078151336323e-06, |
| "loss": 0.474, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.6768683274021352, |
| "grad_norm": 0.38502153754234314, |
| "learning_rate": 2.852216545277456e-06, |
| "loss": 0.4786, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.6779359430604982, |
| "grad_norm": 0.3770993649959564, |
| "learning_rate": 2.835384838830481e-06, |
| "loss": 0.4876, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.6790035587188612, |
| "grad_norm": 0.3542179763317108, |
| "learning_rate": 2.8185832663120817e-06, |
| "loss": 0.4748, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.6800711743772242, |
| "grad_norm": 0.3789761960506439, |
| "learning_rate": 2.8018120616194356e-06, |
| "loss": 0.4936, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.6811387900355872, |
| "grad_norm": 0.3685765564441681, |
| "learning_rate": 2.785071458226972e-06, |
| "loss": 0.4749, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.6822064056939502, |
| "grad_norm": 0.35032930970191956, |
| "learning_rate": 2.768361689183113e-06, |
| "loss": 0.4439, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.6832740213523132, |
| "grad_norm": 0.3704805374145508, |
| "learning_rate": 2.7516829871070295e-06, |
| "loss": 0.4622, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6843416370106762, |
| "grad_norm": 0.3895471692085266, |
| "learning_rate": 2.735035584185409e-06, |
| "loss": 0.4826, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.6854092526690392, |
| "grad_norm": 0.38154760003089905, |
| "learning_rate": 2.718419712169213e-06, |
| "loss": 0.4544, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.6864768683274022, |
| "grad_norm": 0.3842725157737732, |
| "learning_rate": 2.7018356023704574e-06, |
| "loss": 0.4961, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.6875444839857652, |
| "grad_norm": 0.3748033940792084, |
| "learning_rate": 2.685283485658995e-06, |
| "loss": 0.4642, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.6886120996441281, |
| "grad_norm": 0.36127620935440063, |
| "learning_rate": 2.668763592459297e-06, |
| "loss": 0.4591, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.6896797153024911, |
| "grad_norm": 0.3470078110694885, |
| "learning_rate": 2.6522761527472464e-06, |
| "loss": 0.4559, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.6907473309608541, |
| "grad_norm": 0.38986238837242126, |
| "learning_rate": 2.6358213960469357e-06, |
| "loss": 0.4881, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.691814946619217, |
| "grad_norm": 0.3686830699443817, |
| "learning_rate": 2.6193995514274705e-06, |
| "loss": 0.4754, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.69288256227758, |
| "grad_norm": 0.36502450704574585, |
| "learning_rate": 2.6030108474997854e-06, |
| "loss": 0.4739, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.693950177935943, |
| "grad_norm": 0.3817600905895233, |
| "learning_rate": 2.586655512413458e-06, |
| "loss": 0.4406, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.695017793594306, |
| "grad_norm": 0.3689401149749756, |
| "learning_rate": 2.5703337738535324e-06, |
| "loss": 0.4461, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.696085409252669, |
| "grad_norm": 0.39199331402778625, |
| "learning_rate": 2.554045859037353e-06, |
| "loss": 0.4631, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.697153024911032, |
| "grad_norm": 0.35440245270729065, |
| "learning_rate": 2.5377919947113917e-06, |
| "loss": 0.4523, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.698220640569395, |
| "grad_norm": 0.3502133786678314, |
| "learning_rate": 2.521572407148107e-06, |
| "loss": 0.4592, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.699288256227758, |
| "grad_norm": 0.37463781237602234, |
| "learning_rate": 2.505387322142782e-06, |
| "loss": 0.4719, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.700355871886121, |
| "grad_norm": 0.391875296831131, |
| "learning_rate": 2.4892369650103837e-06, |
| "loss": 0.4656, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.701423487544484, |
| "grad_norm": 0.3908476233482361, |
| "learning_rate": 2.4731215605824304e-06, |
| "loss": 0.4962, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.702491103202847, |
| "grad_norm": 0.3876582086086273, |
| "learning_rate": 2.4570413332038523e-06, |
| "loss": 0.4776, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.70355871886121, |
| "grad_norm": 0.36631351709365845, |
| "learning_rate": 2.440996506729881e-06, |
| "loss": 0.4603, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.7046263345195729, |
| "grad_norm": 0.40206146240234375, |
| "learning_rate": 2.4249873045229244e-06, |
| "loss": 0.456, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7056939501779359, |
| "grad_norm": 0.3983338475227356, |
| "learning_rate": 2.4090139494494596e-06, |
| "loss": 0.4664, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.7067615658362989, |
| "grad_norm": 0.37411966919898987, |
| "learning_rate": 2.3930766638769325e-06, |
| "loss": 0.4738, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.7078291814946619, |
| "grad_norm": 0.42127808928489685, |
| "learning_rate": 2.3771756696706553e-06, |
| "loss": 0.4782, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.7088967971530249, |
| "grad_norm": 0.4144476354122162, |
| "learning_rate": 2.3613111881907273e-06, |
| "loss": 0.4737, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.7099644128113879, |
| "grad_norm": 0.40858951210975647, |
| "learning_rate": 2.345483440288947e-06, |
| "loss": 0.4516, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.7110320284697509, |
| "grad_norm": 0.3829437792301178, |
| "learning_rate": 2.3296926463057396e-06, |
| "loss": 0.4509, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.7120996441281139, |
| "grad_norm": 0.37926656007766724, |
| "learning_rate": 2.313939026067091e-06, |
| "loss": 0.4628, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.7131672597864769, |
| "grad_norm": 0.36293280124664307, |
| "learning_rate": 2.29822279888148e-06, |
| "loss": 0.4454, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.7142348754448399, |
| "grad_norm": 0.40881264209747314, |
| "learning_rate": 2.2825441835368377e-06, |
| "loss": 0.4754, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.7153024911032029, |
| "grad_norm": 0.3915267884731293, |
| "learning_rate": 2.2669033982974946e-06, |
| "loss": 0.4869, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7163701067615659, |
| "grad_norm": 0.34906652569770813, |
| "learning_rate": 2.2513006609011365e-06, |
| "loss": 0.4686, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.7174377224199289, |
| "grad_norm": 0.4089764952659607, |
| "learning_rate": 2.235736188555787e-06, |
| "loss": 0.4766, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.7185053380782919, |
| "grad_norm": 0.35783180594444275, |
| "learning_rate": 2.2202101979367735e-06, |
| "loss": 0.4816, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.7195729537366548, |
| "grad_norm": 0.3813284635543823, |
| "learning_rate": 2.2047229051837107e-06, |
| "loss": 0.5012, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.7206405693950177, |
| "grad_norm": 0.34306350350379944, |
| "learning_rate": 2.189274525897498e-06, |
| "loss": 0.5031, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.7217081850533807, |
| "grad_norm": 0.3745080530643463, |
| "learning_rate": 2.173865275137314e-06, |
| "loss": 0.4705, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.7227758007117437, |
| "grad_norm": 0.3763768672943115, |
| "learning_rate": 2.158495367417625e-06, |
| "loss": 0.4748, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.7238434163701067, |
| "grad_norm": 0.3732641935348511, |
| "learning_rate": 2.143165016705192e-06, |
| "loss": 0.485, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.7249110320284697, |
| "grad_norm": 0.41331830620765686, |
| "learning_rate": 2.1278744364161007e-06, |
| "loss": 0.5154, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.7259786476868327, |
| "grad_norm": 0.3479762077331543, |
| "learning_rate": 2.1126238394127868e-06, |
| "loss": 0.4668, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7270462633451957, |
| "grad_norm": 0.3638448417186737, |
| "learning_rate": 2.0974134380010726e-06, |
| "loss": 0.479, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.7281138790035587, |
| "grad_norm": 0.3845721185207367, |
| "learning_rate": 2.082243443927212e-06, |
| "loss": 0.4757, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.7291814946619217, |
| "grad_norm": 0.3670172095298767, |
| "learning_rate": 2.0671140683749386e-06, |
| "loss": 0.4841, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.7302491103202847, |
| "grad_norm": 0.4244895279407501, |
| "learning_rate": 2.052025521962534e-06, |
| "loss": 0.4964, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.7313167259786477, |
| "grad_norm": 0.37311217188835144, |
| "learning_rate": 2.03697801473989e-06, |
| "loss": 0.4675, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.7323843416370107, |
| "grad_norm": 0.40401390194892883, |
| "learning_rate": 2.0219717561855857e-06, |
| "loss": 0.4787, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.7334519572953737, |
| "grad_norm": 0.4272782802581787, |
| "learning_rate": 2.0070069552039722e-06, |
| "loss": 0.4704, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.7345195729537367, |
| "grad_norm": 0.41608813405036926, |
| "learning_rate": 1.992083820122259e-06, |
| "loss": 0.4982, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.7355871886120996, |
| "grad_norm": 0.38170090317726135, |
| "learning_rate": 1.9772025586876252e-06, |
| "loss": 0.468, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.7366548042704626, |
| "grad_norm": 0.40004512667655945, |
| "learning_rate": 1.962363378064316e-06, |
| "loss": 0.4606, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7377224199288256, |
| "grad_norm": 0.3620181679725647, |
| "learning_rate": 1.947566484830765e-06, |
| "loss": 0.4608, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.7387900355871886, |
| "grad_norm": 0.378568559885025, |
| "learning_rate": 1.9328120849767198e-06, |
| "loss": 0.4974, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.7398576512455516, |
| "grad_norm": 0.4036838412284851, |
| "learning_rate": 1.9181003839003627e-06, |
| "loss": 0.4859, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.7409252669039146, |
| "grad_norm": 0.3742115795612335, |
| "learning_rate": 1.9034315864054682e-06, |
| "loss": 0.445, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.7419928825622776, |
| "grad_norm": 0.45754826068878174, |
| "learning_rate": 1.8888058966985407e-06, |
| "loss": 0.4882, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.7430604982206406, |
| "grad_norm": 0.3731890320777893, |
| "learning_rate": 1.8742235183859747e-06, |
| "loss": 0.4656, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.7441281138790036, |
| "grad_norm": 0.35599714517593384, |
| "learning_rate": 1.8596846544712233e-06, |
| "loss": 0.4508, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.7451957295373666, |
| "grad_norm": 0.3616451621055603, |
| "learning_rate": 1.8451895073519643e-06, |
| "loss": 0.4636, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.7462633451957296, |
| "grad_norm": 0.36844977736473083, |
| "learning_rate": 1.8307382788172877e-06, |
| "loss": 0.4858, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.7473309608540926, |
| "grad_norm": 0.37101319432258606, |
| "learning_rate": 1.8163311700448899e-06, |
| "loss": 0.4542, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7483985765124556, |
| "grad_norm": 0.34689757227897644, |
| "learning_rate": 1.8019683815982691e-06, |
| "loss": 0.4336, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.7494661921708186, |
| "grad_norm": 0.36886388063430786, |
| "learning_rate": 1.7876501134239316e-06, |
| "loss": 0.4688, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.7505338078291814, |
| "grad_norm": 0.40008699893951416, |
| "learning_rate": 1.7733765648486134e-06, |
| "loss": 0.4842, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.7516014234875444, |
| "grad_norm": 0.3825279772281647, |
| "learning_rate": 1.7591479345764972e-06, |
| "loss": 0.4843, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.7526690391459074, |
| "grad_norm": 0.33588531613349915, |
| "learning_rate": 1.7449644206864564e-06, |
| "loss": 0.4673, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.7537366548042704, |
| "grad_norm": 0.3410935699939728, |
| "learning_rate": 1.7308262206292898e-06, |
| "loss": 0.4593, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.7548042704626334, |
| "grad_norm": 0.3688999116420746, |
| "learning_rate": 1.7167335312249766e-06, |
| "loss": 0.4669, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.7558718861209964, |
| "grad_norm": 0.3429146409034729, |
| "learning_rate": 1.7026865486599375e-06, |
| "loss": 0.4686, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.7569395017793594, |
| "grad_norm": 0.3735763728618622, |
| "learning_rate": 1.6886854684842962e-06, |
| "loss": 0.4414, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.7580071174377224, |
| "grad_norm": 0.3942524790763855, |
| "learning_rate": 1.6747304856091662e-06, |
| "loss": 0.4921, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.7590747330960854, |
| "grad_norm": 0.3756312131881714, |
| "learning_rate": 1.660821794303934e-06, |
| "loss": 0.4729, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.7601423487544484, |
| "grad_norm": 0.3681127727031708, |
| "learning_rate": 1.6469595881935523e-06, |
| "loss": 0.4657, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.7612099644128114, |
| "grad_norm": 0.3375697135925293, |
| "learning_rate": 1.6331440602558501e-06, |
| "loss": 0.46, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.7622775800711744, |
| "grad_norm": 0.3568233549594879, |
| "learning_rate": 1.6193754028188363e-06, |
| "loss": 0.4758, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.7633451957295374, |
| "grad_norm": 0.3790285587310791, |
| "learning_rate": 1.6056538075580342e-06, |
| "loss": 0.4669, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.7644128113879004, |
| "grad_norm": 0.3637920022010803, |
| "learning_rate": 1.591979465493806e-06, |
| "loss": 0.4688, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.7654804270462633, |
| "grad_norm": 0.36419907212257385, |
| "learning_rate": 1.5783525669886934e-06, |
| "loss": 0.4705, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.7665480427046263, |
| "grad_norm": 0.40734171867370605, |
| "learning_rate": 1.5647733017447741e-06, |
| "loss": 0.4984, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.7676156583629893, |
| "grad_norm": 0.3663610816001892, |
| "learning_rate": 1.5512418588010086e-06, |
| "loss": 0.4833, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.7686832740213523, |
| "grad_norm": 0.3341020345687866, |
| "learning_rate": 1.5377584265306222e-06, |
| "loss": 0.4512, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7697508896797153, |
| "grad_norm": 0.3661962151527405, |
| "learning_rate": 1.5243231926384744e-06, |
| "loss": 0.4722, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.7708185053380783, |
| "grad_norm": 0.36284494400024414, |
| "learning_rate": 1.510936344158448e-06, |
| "loss": 0.4475, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.7718861209964413, |
| "grad_norm": 0.3552328646183014, |
| "learning_rate": 1.4975980674508472e-06, |
| "loss": 0.4568, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.7729537366548043, |
| "grad_norm": 0.3626512885093689, |
| "learning_rate": 1.484308548199796e-06, |
| "loss": 0.4832, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.7740213523131673, |
| "grad_norm": 0.35424965620040894, |
| "learning_rate": 1.4710679714106635e-06, |
| "loss": 0.4741, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.7750889679715303, |
| "grad_norm": 0.3459206223487854, |
| "learning_rate": 1.4578765214074842e-06, |
| "loss": 0.4401, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.7761565836298933, |
| "grad_norm": 0.38151949644088745, |
| "learning_rate": 1.444734381830386e-06, |
| "loss": 0.459, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.7772241992882563, |
| "grad_norm": 0.3755812346935272, |
| "learning_rate": 1.4316417356330441e-06, |
| "loss": 0.4612, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.7782918149466193, |
| "grad_norm": 0.3835029900074005, |
| "learning_rate": 1.4185987650801286e-06, |
| "loss": 0.4556, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.7793594306049823, |
| "grad_norm": 0.3661644756793976, |
| "learning_rate": 1.4056056517447637e-06, |
| "loss": 0.491, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7804270462633452, |
| "grad_norm": 0.3670632541179657, |
| "learning_rate": 1.392662576506007e-06, |
| "loss": 0.4821, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.7814946619217081, |
| "grad_norm": 0.3170434832572937, |
| "learning_rate": 1.3797697195463278e-06, |
| "loss": 0.4571, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.7825622775800711, |
| "grad_norm": 0.37059327960014343, |
| "learning_rate": 1.3669272603491002e-06, |
| "loss": 0.472, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.7836298932384341, |
| "grad_norm": 0.3722604513168335, |
| "learning_rate": 1.3541353776961035e-06, |
| "loss": 0.4716, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.7846975088967971, |
| "grad_norm": 0.3722414970397949, |
| "learning_rate": 1.3413942496650301e-06, |
| "loss": 0.4824, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.7857651245551601, |
| "grad_norm": 0.3409653306007385, |
| "learning_rate": 1.3287040536270135e-06, |
| "loss": 0.4605, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.7868327402135231, |
| "grad_norm": 0.3402983546257019, |
| "learning_rate": 1.3160649662441532e-06, |
| "loss": 0.4756, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.7879003558718861, |
| "grad_norm": 0.34389257431030273, |
| "learning_rate": 1.30347716346706e-06, |
| "loss": 0.491, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.7889679715302491, |
| "grad_norm": 0.35902342200279236, |
| "learning_rate": 1.290940820532403e-06, |
| "loss": 0.4962, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.7900355871886121, |
| "grad_norm": 0.3977390229701996, |
| "learning_rate": 1.2784561119604683e-06, |
| "loss": 0.4772, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7911032028469751, |
| "grad_norm": 0.3474990427494049, |
| "learning_rate": 1.266023211552736e-06, |
| "loss": 0.4722, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.7921708185053381, |
| "grad_norm": 0.3343373239040375, |
| "learning_rate": 1.2536422923894565e-06, |
| "loss": 0.4693, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.7932384341637011, |
| "grad_norm": 0.3417350649833679, |
| "learning_rate": 1.2413135268272403e-06, |
| "loss": 0.4557, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.7943060498220641, |
| "grad_norm": 0.35946568846702576, |
| "learning_rate": 1.2290370864966623e-06, |
| "loss": 0.4719, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.7953736654804271, |
| "grad_norm": 0.3475436866283417, |
| "learning_rate": 1.2168131422998653e-06, |
| "loss": 0.4822, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.79644128113879, |
| "grad_norm": 0.34873461723327637, |
| "learning_rate": 1.2046418644081904e-06, |
| "loss": 0.469, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.797508896797153, |
| "grad_norm": 0.37052375078201294, |
| "learning_rate": 1.192523422259802e-06, |
| "loss": 0.4926, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.798576512455516, |
| "grad_norm": 0.40255382657051086, |
| "learning_rate": 1.1804579845573288e-06, |
| "loss": 0.4759, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.799644128113879, |
| "grad_norm": 0.35330265760421753, |
| "learning_rate": 1.1684457192655207e-06, |
| "loss": 0.4904, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.800711743772242, |
| "grad_norm": 0.34803614020347595, |
| "learning_rate": 1.156486793608899e-06, |
| "loss": 0.4786, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.801779359430605, |
| "grad_norm": 0.3456575572490692, |
| "learning_rate": 1.144581374069444e-06, |
| "loss": 0.4493, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.802846975088968, |
| "grad_norm": 0.3717256188392639, |
| "learning_rate": 1.1327296263842653e-06, |
| "loss": 0.4414, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.803914590747331, |
| "grad_norm": 0.35381019115448, |
| "learning_rate": 1.120931715543299e-06, |
| "loss": 0.4235, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.804982206405694, |
| "grad_norm": 0.37933510541915894, |
| "learning_rate": 1.1091878057870137e-06, |
| "loss": 0.4721, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.806049822064057, |
| "grad_norm": 0.3646122217178345, |
| "learning_rate": 1.0974980606041152e-06, |
| "loss": 0.4799, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.80711743772242, |
| "grad_norm": 0.34057337045669556, |
| "learning_rate": 1.0858626427292796e-06, |
| "loss": 0.4549, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.808185053380783, |
| "grad_norm": 0.35293883085250854, |
| "learning_rate": 1.074281714140884e-06, |
| "loss": 0.4939, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.8092526690391459, |
| "grad_norm": 0.35031718015670776, |
| "learning_rate": 1.0627554360587533e-06, |
| "loss": 0.4707, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.8103202846975089, |
| "grad_norm": 0.36572709679603577, |
| "learning_rate": 1.0512839689419124e-06, |
| "loss": 0.473, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.8113879003558719, |
| "grad_norm": 0.3748714327812195, |
| "learning_rate": 1.0398674724863584e-06, |
| "loss": 0.4637, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8124555160142348, |
| "grad_norm": 0.35171017050743103, |
| "learning_rate": 1.0285061056228273e-06, |
| "loss": 0.4651, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.8135231316725978, |
| "grad_norm": 0.3429271876811981, |
| "learning_rate": 1.0172000265145938e-06, |
| "loss": 0.484, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.8145907473309608, |
| "grad_norm": 0.33533966541290283, |
| "learning_rate": 1.0059493925552604e-06, |
| "loss": 0.4695, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.8156583629893238, |
| "grad_norm": 0.40435880422592163, |
| "learning_rate": 9.947543603665711e-07, |
| "loss": 0.4899, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.8167259786476868, |
| "grad_norm": 0.35124266147613525, |
| "learning_rate": 9.836150857962296e-07, |
| "loss": 0.4515, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.8177935943060498, |
| "grad_norm": 0.3399527370929718, |
| "learning_rate": 9.72531723915726e-07, |
| "loss": 0.4715, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.8188612099644128, |
| "grad_norm": 0.35184887051582336, |
| "learning_rate": 9.615044290181863e-07, |
| "loss": 0.4596, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.8199288256227758, |
| "grad_norm": 0.39761510491371155, |
| "learning_rate": 9.505333546162171e-07, |
| "loss": 0.4723, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.8209964412811388, |
| "grad_norm": 0.39027488231658936, |
| "learning_rate": 9.396186534397711e-07, |
| "loss": 0.4922, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.8220640569395018, |
| "grad_norm": 0.3433700203895569, |
| "learning_rate": 9.287604774340236e-07, |
| "loss": 0.5038, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8231316725978648, |
| "grad_norm": 0.37167125940322876, |
| "learning_rate": 9.179589777572496e-07, |
| "loss": 0.4837, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.8241992882562278, |
| "grad_norm": 0.3502262532711029, |
| "learning_rate": 9.07214304778729e-07, |
| "loss": 0.4589, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.8252669039145908, |
| "grad_norm": 0.3304504454135895, |
| "learning_rate": 8.965266080766471e-07, |
| "loss": 0.4718, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.8263345195729538, |
| "grad_norm": 0.3667429983615875, |
| "learning_rate": 8.858960364360142e-07, |
| "loss": 0.4946, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.8274021352313167, |
| "grad_norm": 0.38245144486427307, |
| "learning_rate": 8.753227378465956e-07, |
| "loss": 0.4551, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8284697508896797, |
| "grad_norm": 0.3940775394439697, |
| "learning_rate": 8.648068595008458e-07, |
| "loss": 0.4492, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.8295373665480427, |
| "grad_norm": 0.3834594488143921, |
| "learning_rate": 8.543485477918672e-07, |
| "loss": 0.4642, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.8306049822064057, |
| "grad_norm": 0.35629889369010925, |
| "learning_rate": 8.439479483113683e-07, |
| "loss": 0.4479, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.8316725978647687, |
| "grad_norm": 0.38858264684677124, |
| "learning_rate": 8.336052058476374e-07, |
| "loss": 0.4774, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.8327402135231317, |
| "grad_norm": 0.3532935380935669, |
| "learning_rate": 8.233204643835235e-07, |
| "loss": 0.4941, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8338078291814947, |
| "grad_norm": 0.34240975975990295, |
| "learning_rate": 8.130938670944377e-07, |
| "loss": 0.4695, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.8348754448398576, |
| "grad_norm": 0.3459632396697998, |
| "learning_rate": 8.029255563463589e-07, |
| "loss": 0.4913, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.8359430604982206, |
| "grad_norm": 0.3634418547153473, |
| "learning_rate": 7.928156736938458e-07, |
| "loss": 0.4515, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.8370106761565836, |
| "grad_norm": 0.3613983690738678, |
| "learning_rate": 7.827643598780748e-07, |
| "loss": 0.4654, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.8380782918149466, |
| "grad_norm": 0.35897570848464966, |
| "learning_rate": 7.72771754824877e-07, |
| "loss": 0.4687, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.8391459074733096, |
| "grad_norm": 0.3587648570537567, |
| "learning_rate": 7.628379976427868e-07, |
| "loss": 0.4862, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.8402135231316726, |
| "grad_norm": 0.3191976249217987, |
| "learning_rate": 7.529632266211112e-07, |
| "loss": 0.4501, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.8412811387900356, |
| "grad_norm": 0.36679914593696594, |
| "learning_rate": 7.431475792280018e-07, |
| "loss": 0.4668, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.8423487544483986, |
| "grad_norm": 0.34455785155296326, |
| "learning_rate": 7.333911921085418e-07, |
| "loss": 0.4621, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.8434163701067615, |
| "grad_norm": 0.326860249042511, |
| "learning_rate": 7.23694201082843e-07, |
| "loss": 0.4336, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8444839857651245, |
| "grad_norm": 0.34012243151664734, |
| "learning_rate": 7.140567411441529e-07, |
| "loss": 0.4742, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.8455516014234875, |
| "grad_norm": 0.3265022933483124, |
| "learning_rate": 7.044789464569817e-07, |
| "loss": 0.4561, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.8466192170818505, |
| "grad_norm": 0.36320456862449646, |
| "learning_rate": 6.94960950355229e-07, |
| "loss": 0.4812, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.8476868327402135, |
| "grad_norm": 0.3391510546207428, |
| "learning_rate": 6.855028853403295e-07, |
| "loss": 0.4705, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.8487544483985765, |
| "grad_norm": 0.34259894490242004, |
| "learning_rate": 6.761048830794098e-07, |
| "loss": 0.4889, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.8498220640569395, |
| "grad_norm": 0.37212345004081726, |
| "learning_rate": 6.667670744034498e-07, |
| "loss": 0.4869, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.8508896797153025, |
| "grad_norm": 0.34556707739830017, |
| "learning_rate": 6.574895893054711e-07, |
| "loss": 0.507, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.8519572953736655, |
| "grad_norm": 0.3274592459201813, |
| "learning_rate": 6.482725569387171e-07, |
| "loss": 0.4678, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.8530249110320285, |
| "grad_norm": 0.3611302971839905, |
| "learning_rate": 6.391161056148637e-07, |
| "loss": 0.4792, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.8540925266903915, |
| "grad_norm": 0.36246782541275024, |
| "learning_rate": 6.300203628022272e-07, |
| "loss": 0.4867, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8551601423487545, |
| "grad_norm": 0.37306517362594604, |
| "learning_rate": 6.209854551239902e-07, |
| "loss": 0.4795, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.8562277580071175, |
| "grad_norm": 0.3382475972175598, |
| "learning_rate": 6.120115083564432e-07, |
| "loss": 0.437, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.8572953736654805, |
| "grad_norm": 0.33135363459587097, |
| "learning_rate": 6.030986474272288e-07, |
| "loss": 0.4715, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.8583629893238434, |
| "grad_norm": 0.3751276135444641, |
| "learning_rate": 5.942469964136055e-07, |
| "loss": 0.4808, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.8594306049822064, |
| "grad_norm": 0.3444526493549347, |
| "learning_rate": 5.854566785407212e-07, |
| "loss": 0.4636, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.8604982206405694, |
| "grad_norm": 0.3411964178085327, |
| "learning_rate": 5.767278161798912e-07, |
| "loss": 0.4396, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.8615658362989324, |
| "grad_norm": 0.33897465467453003, |
| "learning_rate": 5.680605308469045e-07, |
| "loss": 0.4747, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.8626334519572953, |
| "grad_norm": 0.3335667550563812, |
| "learning_rate": 5.594549432003244e-07, |
| "loss": 0.4449, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.8637010676156583, |
| "grad_norm": 0.35737693309783936, |
| "learning_rate": 5.509111730398125e-07, |
| "loss": 0.4713, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.8647686832740213, |
| "grad_norm": 0.3388114869594574, |
| "learning_rate": 5.42429339304461e-07, |
| "loss": 0.4657, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.8658362989323843, |
| "grad_norm": 0.36129823327064514, |
| "learning_rate": 5.340095600711343e-07, |
| "loss": 0.4817, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.8669039145907473, |
| "grad_norm": 0.32177579402923584, |
| "learning_rate": 5.256519525528254e-07, |
| "loss": 0.486, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.8679715302491103, |
| "grad_norm": 0.351857453584671, |
| "learning_rate": 5.173566330970286e-07, |
| "loss": 0.4574, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.8690391459074733, |
| "grad_norm": 0.34854841232299805, |
| "learning_rate": 5.091237171841173e-07, |
| "loss": 0.4603, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.8701067615658363, |
| "grad_norm": 0.36344021558761597, |
| "learning_rate": 5.009533194257332e-07, |
| "loss": 0.4655, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.8711743772241993, |
| "grad_norm": 0.3396829068660736, |
| "learning_rate": 4.92845553563196e-07, |
| "loss": 0.4776, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.8722419928825623, |
| "grad_norm": 0.35948312282562256, |
| "learning_rate": 4.848005324659144e-07, |
| "loss": 0.4879, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.8733096085409253, |
| "grad_norm": 0.3061416447162628, |
| "learning_rate": 4.768183681298211e-07, |
| "loss": 0.4335, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.8743772241992882, |
| "grad_norm": 0.35700371861457825, |
| "learning_rate": 4.6889917167580903e-07, |
| "loss": 0.4757, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.8754448398576512, |
| "grad_norm": 0.3622047007083893, |
| "learning_rate": 4.6104305334818577e-07, |
| "loss": 0.4792, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8765124555160142, |
| "grad_norm": 0.3496834337711334, |
| "learning_rate": 4.532501225131408e-07, |
| "loss": 0.4821, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.8775800711743772, |
| "grad_norm": 0.3690001666545868, |
| "learning_rate": 4.455204876572172e-07, |
| "loss": 0.4789, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.8786476868327402, |
| "grad_norm": 0.320921391248703, |
| "learning_rate": 4.3785425638580847e-07, |
| "loss": 0.4792, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.8797153024911032, |
| "grad_norm": 0.320486843585968, |
| "learning_rate": 4.3025153542165744e-07, |
| "loss": 0.4707, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.8807829181494662, |
| "grad_norm": 0.34054213762283325, |
| "learning_rate": 4.2271243060336976e-07, |
| "loss": 0.466, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.8818505338078292, |
| "grad_norm": 0.33979448676109314, |
| "learning_rate": 4.1523704688394176e-07, |
| "loss": 0.4562, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.8829181494661922, |
| "grad_norm": 0.3499307632446289, |
| "learning_rate": 4.0782548832929646e-07, |
| "loss": 0.4653, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.8839857651245552, |
| "grad_norm": 0.3363668620586395, |
| "learning_rate": 4.0047785811684116e-07, |
| "loss": 0.451, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.8850533807829182, |
| "grad_norm": 0.3545955717563629, |
| "learning_rate": 3.931942585340243e-07, |
| "loss": 0.4769, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.8861209964412812, |
| "grad_norm": 0.38059499859809875, |
| "learning_rate": 3.8597479097691626e-07, |
| "loss": 0.4946, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8871886120996442, |
| "grad_norm": 0.36763712763786316, |
| "learning_rate": 3.788195559487956e-07, |
| "loss": 0.4554, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.8882562277580072, |
| "grad_norm": 0.3406812250614166, |
| "learning_rate": 3.717286530587483e-07, |
| "loss": 0.4469, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.88932384341637, |
| "grad_norm": 0.3243533670902252, |
| "learning_rate": 3.6470218102028607e-07, |
| "loss": 0.4283, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.890391459074733, |
| "grad_norm": 0.34269580245018005, |
| "learning_rate": 3.577402376499672e-07, |
| "loss": 0.4439, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.891459074733096, |
| "grad_norm": 0.3207905888557434, |
| "learning_rate": 3.508429198660379e-07, |
| "loss": 0.4562, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.892526690391459, |
| "grad_norm": 0.34439972043037415, |
| "learning_rate": 3.440103236870823e-07, |
| "loss": 0.4705, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.893594306049822, |
| "grad_norm": 0.34473180770874023, |
| "learning_rate": 3.372425442306837e-07, |
| "loss": 0.4747, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.894661921708185, |
| "grad_norm": 0.3256348669528961, |
| "learning_rate": 3.3053967571210375e-07, |
| "loss": 0.4461, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.895729537366548, |
| "grad_norm": 0.3985244929790497, |
| "learning_rate": 3.2390181144296815e-07, |
| "loss": 0.5082, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.896797153024911, |
| "grad_norm": 0.3613927364349365, |
| "learning_rate": 3.1732904382996975e-07, |
| "loss": 0.4701, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.897864768683274, |
| "grad_norm": 0.34537020325660706, |
| "learning_rate": 3.108214643735813e-07, |
| "loss": 0.4683, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.898932384341637, |
| "grad_norm": 0.3321053385734558, |
| "learning_rate": 3.04379163666782e-07, |
| "loss": 0.4609, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.3575867712497711, |
| "learning_rate": 2.98002231393793e-07, |
| "loss": 0.4571, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.901067615658363, |
| "grad_norm": 0.34244054555892944, |
| "learning_rate": 2.916907563288357e-07, |
| "loss": 0.4798, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.902135231316726, |
| "grad_norm": 0.3529646694660187, |
| "learning_rate": 2.854448263348891e-07, |
| "loss": 0.4861, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.903202846975089, |
| "grad_norm": 0.3687219023704529, |
| "learning_rate": 2.792645283624712e-07, |
| "loss": 0.4688, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.904270462633452, |
| "grad_norm": 0.3753871023654938, |
| "learning_rate": 2.7314994844842623e-07, |
| "loss": 0.4744, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.9053380782918149, |
| "grad_norm": 0.31958386301994324, |
| "learning_rate": 2.671011717147276e-07, |
| "loss": 0.4667, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.9064056939501779, |
| "grad_norm": 0.34946852922439575, |
| "learning_rate": 2.611182823672931e-07, |
| "loss": 0.4763, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.9074733096085409, |
| "grad_norm": 0.32950615882873535, |
| "learning_rate": 2.5520136369481194e-07, |
| "loss": 0.4814, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9085409252669039, |
| "grad_norm": 0.35894763469696045, |
| "learning_rate": 2.493504980675865e-07, |
| "loss": 0.4643, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.9096085409252669, |
| "grad_norm": 0.3291400671005249, |
| "learning_rate": 2.4356576693638555e-07, |
| "loss": 0.4618, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.9106761565836299, |
| "grad_norm": 0.37263375520706177, |
| "learning_rate": 2.3784725083130678e-07, |
| "loss": 0.4765, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.9117437722419929, |
| "grad_norm": 0.346204936504364, |
| "learning_rate": 2.3219502936066228e-07, |
| "loss": 0.4684, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.9128113879003559, |
| "grad_norm": 0.3411816656589508, |
| "learning_rate": 2.266091812098642e-07, |
| "loss": 0.4665, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.9138790035587189, |
| "grad_norm": 0.39055925607681274, |
| "learning_rate": 2.210897841403331e-07, |
| "loss": 0.4752, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.9149466192170819, |
| "grad_norm": 0.3565220236778259, |
| "learning_rate": 2.1563691498841465e-07, |
| "loss": 0.4269, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.9160142348754449, |
| "grad_norm": 0.3413132131099701, |
| "learning_rate": 2.1025064966430697e-07, |
| "loss": 0.4569, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.9170818505338079, |
| "grad_norm": 0.3309057354927063, |
| "learning_rate": 2.0493106315100987e-07, |
| "loss": 0.458, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.9181494661921709, |
| "grad_norm": 0.34652769565582275, |
| "learning_rate": 1.9967822950327453e-07, |
| "loss": 0.4765, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9192170818505339, |
| "grad_norm": 0.34112629294395447, |
| "learning_rate": 1.944922218465778e-07, |
| "loss": 0.4573, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.9202846975088967, |
| "grad_norm": 0.35271042585372925, |
| "learning_rate": 1.8937311237610168e-07, |
| "loss": 0.4485, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.9213523131672597, |
| "grad_norm": 0.3256247639656067, |
| "learning_rate": 1.8432097235572655e-07, |
| "loss": 0.4762, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.9224199288256227, |
| "grad_norm": 0.33354514837265015, |
| "learning_rate": 1.793358721170435e-07, |
| "loss": 0.4764, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.9234875444839857, |
| "grad_norm": 0.36107322573661804, |
| "learning_rate": 1.7441788105837133e-07, |
| "loss": 0.4675, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.9245551601423487, |
| "grad_norm": 0.36529216170310974, |
| "learning_rate": 1.6956706764379438e-07, |
| "loss": 0.4663, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.9256227758007117, |
| "grad_norm": 0.387478232383728, |
| "learning_rate": 1.6478349940220294e-07, |
| "loss": 0.4626, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.9266903914590747, |
| "grad_norm": 0.33364489674568176, |
| "learning_rate": 1.6006724292636166e-07, |
| "loss": 0.4636, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.9277580071174377, |
| "grad_norm": 0.3466110825538635, |
| "learning_rate": 1.5541836387197528e-07, |
| "loss": 0.4599, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.9288256227758007, |
| "grad_norm": 0.3230234384536743, |
| "learning_rate": 1.508369269567783e-07, |
| "loss": 0.4628, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9298932384341637, |
| "grad_norm": 0.34074848890304565, |
| "learning_rate": 1.4632299595963294e-07, |
| "loss": 0.5069, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.9309608540925267, |
| "grad_norm": 0.3386795222759247, |
| "learning_rate": 1.418766337196431e-07, |
| "loss": 0.4608, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.9320284697508897, |
| "grad_norm": 0.35128504037857056, |
| "learning_rate": 1.374979021352757e-07, |
| "loss": 0.4636, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.9330960854092527, |
| "grad_norm": 0.3676503598690033, |
| "learning_rate": 1.3318686216350241e-07, |
| "loss": 0.5208, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.9341637010676157, |
| "grad_norm": 0.3126872479915619, |
| "learning_rate": 1.2894357381894984e-07, |
| "loss": 0.4391, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.9352313167259787, |
| "grad_norm": 0.32956287264823914, |
| "learning_rate": 1.2476809617306408e-07, |
| "loss": 0.4585, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.9362989323843416, |
| "grad_norm": 0.3517782688140869, |
| "learning_rate": 1.206604873532885e-07, |
| "loss": 0.4854, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.9373665480427046, |
| "grad_norm": 0.37964025139808655, |
| "learning_rate": 1.166208045422551e-07, |
| "loss": 0.4637, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.9384341637010676, |
| "grad_norm": 0.3676295876502991, |
| "learning_rate": 1.1264910397698614e-07, |
| "loss": 0.4779, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.9395017793594306, |
| "grad_norm": 0.3368426561355591, |
| "learning_rate": 1.0874544094811424e-07, |
| "loss": 0.4594, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9405693950177936, |
| "grad_norm": 0.35099488496780396, |
| "learning_rate": 1.0490986979911189e-07, |
| "loss": 0.4465, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.9416370106761566, |
| "grad_norm": 0.3372381925582886, |
| "learning_rate": 1.0114244392553318e-07, |
| "loss": 0.4676, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.9427046263345196, |
| "grad_norm": 0.33478647470474243, |
| "learning_rate": 9.744321577427218e-08, |
| "loss": 0.4646, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.9437722419928826, |
| "grad_norm": 0.359651654958725, |
| "learning_rate": 9.381223684283291e-08, |
| "loss": 0.4623, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.9448398576512456, |
| "grad_norm": 0.3485049605369568, |
| "learning_rate": 9.024955767861054e-08, |
| "loss": 0.4644, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.9459074733096086, |
| "grad_norm": 0.3376001715660095, |
| "learning_rate": 8.675522787819023e-08, |
| "loss": 0.4353, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.9469750889679716, |
| "grad_norm": 0.3420683741569519, |
| "learning_rate": 8.332929608665553e-08, |
| "loss": 0.4268, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.9480427046263346, |
| "grad_norm": 0.33343154191970825, |
| "learning_rate": 7.997180999691101e-08, |
| "loss": 0.4584, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.9491103202846976, |
| "grad_norm": 0.32583436369895935, |
| "learning_rate": 7.668281634901686e-08, |
| "loss": 0.4625, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.9501779359430605, |
| "grad_norm": 0.33751362562179565, |
| "learning_rate": 7.346236092954318e-08, |
| "loss": 0.46, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.9512455516014234, |
| "grad_norm": 0.329089879989624, |
| "learning_rate": 7.031048857092604e-08, |
| "loss": 0.4536, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.9523131672597864, |
| "grad_norm": 0.3587329685688019, |
| "learning_rate": 6.722724315084805e-08, |
| "loss": 0.4767, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.9533807829181494, |
| "grad_norm": 0.3284720480442047, |
| "learning_rate": 6.421266759162659e-08, |
| "loss": 0.4491, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.9544483985765124, |
| "grad_norm": 0.3474853038787842, |
| "learning_rate": 6.12668038596137e-08, |
| "loss": 0.461, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.9555160142348754, |
| "grad_norm": 0.31584909558296204, |
| "learning_rate": 5.838969296461605e-08, |
| "loss": 0.4404, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.9565836298932384, |
| "grad_norm": 0.3233558237552643, |
| "learning_rate": 5.5581374959320366e-08, |
| "loss": 0.453, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.9576512455516014, |
| "grad_norm": 0.3326091468334198, |
| "learning_rate": 5.2841888938738314e-08, |
| "loss": 0.4507, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.9587188612099644, |
| "grad_norm": 0.35787636041641235, |
| "learning_rate": 5.017127303966085e-08, |
| "loss": 0.4835, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.9597864768683274, |
| "grad_norm": 0.33811962604522705, |
| "learning_rate": 4.7569564440128055e-08, |
| "loss": 0.4424, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.9608540925266904, |
| "grad_norm": 0.3393824100494385, |
| "learning_rate": 4.50367993589107e-08, |
| "loss": 0.4733, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9619217081850534, |
| "grad_norm": 0.3107040822505951, |
| "learning_rate": 4.257301305500672e-08, |
| "loss": 0.4453, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.9629893238434164, |
| "grad_norm": 0.336866557598114, |
| "learning_rate": 4.0178239827151077e-08, |
| "loss": 0.4726, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.9640569395017794, |
| "grad_norm": 0.33734455704689026, |
| "learning_rate": 3.785251301333726e-08, |
| "loss": 0.4609, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.9651245551601424, |
| "grad_norm": 0.3539280593395233, |
| "learning_rate": 3.559586499035206e-08, |
| "loss": 0.4769, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.9661921708185054, |
| "grad_norm": 0.3523450791835785, |
| "learning_rate": 3.340832717332765e-08, |
| "loss": 0.4701, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.9672597864768683, |
| "grad_norm": 0.33282899856567383, |
| "learning_rate": 3.128993001530245e-08, |
| "loss": 0.4708, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.9683274021352313, |
| "grad_norm": 0.32930752635002136, |
| "learning_rate": 2.9240703006797044e-08, |
| "loss": 0.479, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.9693950177935943, |
| "grad_norm": 0.3469620943069458, |
| "learning_rate": 2.7260674675404498e-08, |
| "loss": 0.4634, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.9704626334519573, |
| "grad_norm": 0.3198079466819763, |
| "learning_rate": 2.5349872585392898e-08, |
| "loss": 0.4599, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.9715302491103203, |
| "grad_norm": 0.353118896484375, |
| "learning_rate": 2.3508323337321225e-08, |
| "loss": 0.4737, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.9725978647686833, |
| "grad_norm": 0.3712465763092041, |
| "learning_rate": 2.1736052567670195e-08, |
| "loss": 0.4837, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.9736654804270463, |
| "grad_norm": 0.32326361536979675, |
| "learning_rate": 2.0033084948483104e-08, |
| "loss": 0.4392, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.9747330960854093, |
| "grad_norm": 0.3160242736339569, |
| "learning_rate": 1.8399444187024995e-08, |
| "loss": 0.4778, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.9758007117437723, |
| "grad_norm": 0.3436198830604553, |
| "learning_rate": 1.6835153025451246e-08, |
| "loss": 0.4531, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.9768683274021353, |
| "grad_norm": 0.3504072427749634, |
| "learning_rate": 1.534023324049061e-08, |
| "loss": 0.458, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.9779359430604982, |
| "grad_norm": 0.3485338091850281, |
| "learning_rate": 1.3914705643143788e-08, |
| "loss": 0.4405, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.9790035587188612, |
| "grad_norm": 0.3303447961807251, |
| "learning_rate": 1.2558590078390886e-08, |
| "loss": 0.4714, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.9800711743772242, |
| "grad_norm": 0.31479117274284363, |
| "learning_rate": 1.1271905424918294e-08, |
| "loss": 0.4798, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.9811387900355872, |
| "grad_norm": 0.31873440742492676, |
| "learning_rate": 1.0054669594853905e-08, |
| "loss": 0.4448, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.9822064056939501, |
| "grad_norm": 0.35837072134017944, |
| "learning_rate": 8.906899533517866e-09, |
| "loss": 0.4583, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.9832740213523131, |
| "grad_norm": 0.33332061767578125, |
| "learning_rate": 7.828611219187765e-09, |
| "loss": 0.4802, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.9843416370106761, |
| "grad_norm": 0.3198853135108948, |
| "learning_rate": 6.819819662874372e-09, |
| "loss": 0.448, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.9854092526690391, |
| "grad_norm": 0.35830771923065186, |
| "learning_rate": 5.88053890811513e-09, |
| "loss": 0.494, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.9864768683274021, |
| "grad_norm": 0.3175657093524933, |
| "learning_rate": 5.0107820307770945e-09, |
| "loss": 0.4736, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.9875444839857651, |
| "grad_norm": 0.3394733965396881, |
| "learning_rate": 4.210561138873193e-09, |
| "loss": 0.4817, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.9886120996441281, |
| "grad_norm": 0.33911213278770447, |
| "learning_rate": 3.4798873723984604e-09, |
| "loss": 0.4372, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.9896797153024911, |
| "grad_norm": 0.34297052025794983, |
| "learning_rate": 2.818770903170176e-09, |
| "loss": 0.4613, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.9907473309608541, |
| "grad_norm": 0.3289053738117218, |
| "learning_rate": 2.2272209346885233e-09, |
| "loss": 0.4486, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.9918149466192171, |
| "grad_norm": 0.3399280905723572, |
| "learning_rate": 1.7052457020089175e-09, |
| "loss": 0.4625, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.9928825622775801, |
| "grad_norm": 0.3539047837257385, |
| "learning_rate": 1.2528524716259872e-09, |
| "loss": 0.4833, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.9939501779359431, |
| "grad_norm": 0.34324121475219727, |
| "learning_rate": 8.700475413719877e-10, |
| "loss": 0.4721, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.9950177935943061, |
| "grad_norm": 0.34528717398643494, |
| "learning_rate": 5.568362403318706e-10, |
| "loss": 0.4494, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.9960854092526691, |
| "grad_norm": 0.313473105430603, |
| "learning_rate": 3.132229287666766e-10, |
| "loss": 0.4741, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.9971530249110321, |
| "grad_norm": 0.35655054450035095, |
| "learning_rate": 1.3921099805302985e-10, |
| "loss": 0.4796, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.998220640569395, |
| "grad_norm": 0.3347904086112976, |
| "learning_rate": 3.480287063706289e-11, |
| "loss": 0.4614, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.999288256227758, |
| "grad_norm": 0.3562867343425751, |
| "learning_rate": 0.0, |
| "loss": 0.4649, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.999288256227758, |
| "step": 936, |
| "total_flos": 1106993868636160.0, |
| "train_loss": 0.49742555408141553, |
| "train_runtime": 30683.4136, |
| "train_samples_per_second": 2.93, |
| "train_steps_per_second": 0.031 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 936, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1106993868636160.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|