| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1056, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002840909090909091, |
| "grad_norm": 9.802746734876441, |
| "learning_rate": 0.0, |
| "loss": 0.6182, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005681818181818182, |
| "grad_norm": 12.388093403552265, |
| "learning_rate": 1.1627906976744187e-07, |
| "loss": 0.6649, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.008522727272727272, |
| "grad_norm": 12.643095212909474, |
| "learning_rate": 2.3255813953488374e-07, |
| "loss": 0.6794, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.011363636363636364, |
| "grad_norm": 9.628453582962425, |
| "learning_rate": 3.488372093023256e-07, |
| "loss": 0.5426, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.014204545454545454, |
| "grad_norm": 12.114285873199693, |
| "learning_rate": 4.651162790697675e-07, |
| "loss": 0.6628, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.017045454545454544, |
| "grad_norm": 12.590069642332757, |
| "learning_rate": 5.813953488372094e-07, |
| "loss": 0.6635, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.019886363636363636, |
| "grad_norm": 11.894881604292143, |
| "learning_rate": 6.976744186046513e-07, |
| "loss": 0.6478, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.022727272727272728, |
| "grad_norm": 10.523659604864859, |
| "learning_rate": 8.139534883720931e-07, |
| "loss": 0.6382, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02556818181818182, |
| "grad_norm": 9.260520595400251, |
| "learning_rate": 9.30232558139535e-07, |
| "loss": 0.5683, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.028409090909090908, |
| "grad_norm": 8.701673712634479, |
| "learning_rate": 1.0465116279069768e-06, |
| "loss": 0.5677, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03125, |
| "grad_norm": 7.754246744436588, |
| "learning_rate": 1.1627906976744188e-06, |
| "loss": 0.5026, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03409090909090909, |
| "grad_norm": 8.663705476348797, |
| "learning_rate": 1.2790697674418605e-06, |
| "loss": 0.6104, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.036931818181818184, |
| "grad_norm": 5.045315784322545, |
| "learning_rate": 1.3953488372093025e-06, |
| "loss": 0.4227, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03977272727272727, |
| "grad_norm": 4.926402953478099, |
| "learning_rate": 1.5116279069767443e-06, |
| "loss": 0.4896, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04261363636363636, |
| "grad_norm": 4.591926718398226, |
| "learning_rate": 1.6279069767441862e-06, |
| "loss": 0.4869, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.045454545454545456, |
| "grad_norm": 4.197025239911461, |
| "learning_rate": 1.7441860465116282e-06, |
| "loss": 0.4637, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.048295454545454544, |
| "grad_norm": 3.8588657903560684, |
| "learning_rate": 1.86046511627907e-06, |
| "loss": 0.4426, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05113636363636364, |
| "grad_norm": 1.8811670709600292, |
| "learning_rate": 1.976744186046512e-06, |
| "loss": 0.4305, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05397727272727273, |
| "grad_norm": 1.6752451580220031, |
| "learning_rate": 2.0930232558139536e-06, |
| "loss": 0.4529, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.056818181818181816, |
| "grad_norm": 1.2090823975791671, |
| "learning_rate": 2.2093023255813954e-06, |
| "loss": 0.3613, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05965909090909091, |
| "grad_norm": 1.1814336772386804, |
| "learning_rate": 2.3255813953488376e-06, |
| "loss": 0.4037, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 0.8954725283144086, |
| "learning_rate": 2.4418604651162793e-06, |
| "loss": 0.3702, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.06534090909090909, |
| "grad_norm": 0.8798870296631145, |
| "learning_rate": 2.558139534883721e-06, |
| "loss": 0.3973, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.06818181818181818, |
| "grad_norm": 0.5832983194953867, |
| "learning_rate": 2.674418604651163e-06, |
| "loss": 0.3262, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07102272727272728, |
| "grad_norm": 0.8732475291899245, |
| "learning_rate": 2.790697674418605e-06, |
| "loss": 0.3909, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07386363636363637, |
| "grad_norm": 1.100897285846476, |
| "learning_rate": 2.9069767441860468e-06, |
| "loss": 0.3817, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.07670454545454546, |
| "grad_norm": 1.0608377951702355, |
| "learning_rate": 3.0232558139534885e-06, |
| "loss": 0.3583, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07954545454545454, |
| "grad_norm": 1.0224952192594947, |
| "learning_rate": 3.1395348837209307e-06, |
| "loss": 0.4162, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.08238636363636363, |
| "grad_norm": 0.8097165887156961, |
| "learning_rate": 3.2558139534883724e-06, |
| "loss": 0.3477, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.08522727272727272, |
| "grad_norm": 0.7315228867679278, |
| "learning_rate": 3.372093023255814e-06, |
| "loss": 0.3951, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08806818181818182, |
| "grad_norm": 0.6032121177421607, |
| "learning_rate": 3.4883720930232564e-06, |
| "loss": 0.3414, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.09090909090909091, |
| "grad_norm": 0.5651833216962348, |
| "learning_rate": 3.6046511627906977e-06, |
| "loss": 0.3635, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.09375, |
| "grad_norm": 0.5192255380315864, |
| "learning_rate": 3.72093023255814e-06, |
| "loss": 0.3888, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.09659090909090909, |
| "grad_norm": 0.49173473741498314, |
| "learning_rate": 3.837209302325582e-06, |
| "loss": 0.3749, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.09943181818181818, |
| "grad_norm": 0.48300590116190206, |
| "learning_rate": 3.953488372093024e-06, |
| "loss": 0.3719, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.10227272727272728, |
| "grad_norm": 0.47568795818970555, |
| "learning_rate": 4.0697674418604655e-06, |
| "loss": 0.3502, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.10511363636363637, |
| "grad_norm": 0.5738976486828545, |
| "learning_rate": 4.186046511627907e-06, |
| "loss": 0.3553, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.10795454545454546, |
| "grad_norm": 0.48281438241706864, |
| "learning_rate": 4.302325581395349e-06, |
| "loss": 0.3194, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.11079545454545454, |
| "grad_norm": 0.6040813728082152, |
| "learning_rate": 4.418604651162791e-06, |
| "loss": 0.3753, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.11363636363636363, |
| "grad_norm": 0.5510018703021852, |
| "learning_rate": 4.5348837209302326e-06, |
| "loss": 0.3497, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11647727272727272, |
| "grad_norm": 0.4265614122633672, |
| "learning_rate": 4.651162790697675e-06, |
| "loss": 0.3067, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.11931818181818182, |
| "grad_norm": 0.3982552723726358, |
| "learning_rate": 4.767441860465117e-06, |
| "loss": 0.3166, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.12215909090909091, |
| "grad_norm": 0.42319934937905634, |
| "learning_rate": 4.883720930232559e-06, |
| "loss": 0.3406, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.48844669962812265, |
| "learning_rate": 5e-06, |
| "loss": 0.3809, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1278409090909091, |
| "grad_norm": 0.4283299903892573, |
| "learning_rate": 4.999987977618099e-06, |
| "loss": 0.3487, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.13068181818181818, |
| "grad_norm": 0.45165901843941525, |
| "learning_rate": 4.999951910588025e-06, |
| "loss": 0.3261, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.13352272727272727, |
| "grad_norm": 0.3309060296669714, |
| "learning_rate": 4.999891799256668e-06, |
| "loss": 0.3122, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.13636363636363635, |
| "grad_norm": 0.3836084760514636, |
| "learning_rate": 4.9998076442021725e-06, |
| "loss": 0.3001, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.13920454545454544, |
| "grad_norm": 0.425230874245839, |
| "learning_rate": 4.999699446233934e-06, |
| "loss": 0.3341, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.14204545454545456, |
| "grad_norm": 0.4444798732501407, |
| "learning_rate": 4.999567206392591e-06, |
| "loss": 0.3373, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14488636363636365, |
| "grad_norm": 0.381536539310927, |
| "learning_rate": 4.999410925950012e-06, |
| "loss": 0.3267, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.14772727272727273, |
| "grad_norm": 0.3767650025962174, |
| "learning_rate": 4.99923060640929e-06, |
| "loss": 0.328, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.15056818181818182, |
| "grad_norm": 0.3903203005773619, |
| "learning_rate": 4.99902624950472e-06, |
| "loss": 0.3367, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1534090909090909, |
| "grad_norm": 0.47731540090520985, |
| "learning_rate": 4.9987978572017875e-06, |
| "loss": 0.3749, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.15625, |
| "grad_norm": 0.36341294567474813, |
| "learning_rate": 4.998545431697149e-06, |
| "loss": 0.2952, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1590909090909091, |
| "grad_norm": 0.4160548663852485, |
| "learning_rate": 4.998268975418606e-06, |
| "loss": 0.3779, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.16193181818181818, |
| "grad_norm": 0.3664734921308225, |
| "learning_rate": 4.997968491025093e-06, |
| "loss": 0.3105, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.16477272727272727, |
| "grad_norm": 0.35755496009312704, |
| "learning_rate": 4.997643981406638e-06, |
| "loss": 0.3508, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.16761363636363635, |
| "grad_norm": 0.3738253178296096, |
| "learning_rate": 4.997295449684345e-06, |
| "loss": 0.349, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.17045454545454544, |
| "grad_norm": 0.3175005755892801, |
| "learning_rate": 4.996922899210358e-06, |
| "loss": 0.2984, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.17329545454545456, |
| "grad_norm": 0.39931619691125575, |
| "learning_rate": 4.996526333567833e-06, |
| "loss": 0.3627, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.17613636363636365, |
| "grad_norm": 0.3726199489633269, |
| "learning_rate": 4.9961057565709015e-06, |
| "loss": 0.3274, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.17897727272727273, |
| "grad_norm": 0.3954308613768431, |
| "learning_rate": 4.995661172264632e-06, |
| "loss": 0.34, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 0.3814105011898473, |
| "learning_rate": 4.995192584924995e-06, |
| "loss": 0.3122, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1846590909090909, |
| "grad_norm": 0.3655156038716592, |
| "learning_rate": 4.99469999905882e-06, |
| "loss": 0.35, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 0.39224970009402493, |
| "learning_rate": 4.99418341940375e-06, |
| "loss": 0.3057, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1903409090909091, |
| "grad_norm": 0.33083770067354695, |
| "learning_rate": 4.9936428509282e-06, |
| "loss": 0.3144, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.19318181818181818, |
| "grad_norm": 0.31786460924484966, |
| "learning_rate": 4.9930782988313065e-06, |
| "loss": 0.3156, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.19602272727272727, |
| "grad_norm": 0.3515587615165226, |
| "learning_rate": 4.992489768542877e-06, |
| "loss": 0.318, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.19886363636363635, |
| "grad_norm": 0.39680326873271354, |
| "learning_rate": 4.991877265723343e-06, |
| "loss": 0.3319, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.20170454545454544, |
| "grad_norm": 0.3532333123348208, |
| "learning_rate": 4.9912407962636965e-06, |
| "loss": 0.3343, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.20454545454545456, |
| "grad_norm": 0.3684851475562903, |
| "learning_rate": 4.990580366285441e-06, |
| "loss": 0.3214, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.20738636363636365, |
| "grad_norm": 0.34703126004025847, |
| "learning_rate": 4.98989598214053e-06, |
| "loss": 0.3497, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.21022727272727273, |
| "grad_norm": 0.331786659705209, |
| "learning_rate": 4.989187650411306e-06, |
| "loss": 0.3119, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.21306818181818182, |
| "grad_norm": 0.3514432926351399, |
| "learning_rate": 4.988455377910436e-06, |
| "loss": 0.3276, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2159090909090909, |
| "grad_norm": 0.45669134699095365, |
| "learning_rate": 4.987699171680846e-06, |
| "loss": 0.3502, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.21875, |
| "grad_norm": 0.3799997391446089, |
| "learning_rate": 4.98691903899566e-06, |
| "loss": 0.3389, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2215909090909091, |
| "grad_norm": 0.32987905423731806, |
| "learning_rate": 4.986114987358118e-06, |
| "loss": 0.3154, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.22443181818181818, |
| "grad_norm": 0.37320907794023317, |
| "learning_rate": 4.985287024501512e-06, |
| "loss": 0.2865, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.22727272727272727, |
| "grad_norm": 0.3606727238448836, |
| "learning_rate": 4.9844351583891125e-06, |
| "loss": 0.3352, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.23011363636363635, |
| "grad_norm": 0.28704484493903537, |
| "learning_rate": 4.983559397214086e-06, |
| "loss": 0.2761, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.23295454545454544, |
| "grad_norm": 0.3395805127723043, |
| "learning_rate": 4.982659749399421e-06, |
| "loss": 0.3013, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.23579545454545456, |
| "grad_norm": 0.32754503212231606, |
| "learning_rate": 4.981736223597845e-06, |
| "loss": 0.3291, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.23863636363636365, |
| "grad_norm": 0.3278411182469415, |
| "learning_rate": 4.9807888286917425e-06, |
| "loss": 0.281, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.24147727272727273, |
| "grad_norm": 0.3312034883074764, |
| "learning_rate": 4.979817573793068e-06, |
| "loss": 0.3484, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.24431818181818182, |
| "grad_norm": 0.3001329867151946, |
| "learning_rate": 4.978822468243259e-06, |
| "loss": 0.2842, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.2471590909090909, |
| "grad_norm": 0.3516159032278349, |
| "learning_rate": 4.977803521613147e-06, |
| "loss": 0.3084, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.3782753735314241, |
| "learning_rate": 4.9767607437028645e-06, |
| "loss": 0.3381, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2528409090909091, |
| "grad_norm": 0.3170089268559784, |
| "learning_rate": 4.97569414454175e-06, |
| "loss": 0.3215, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2556818181818182, |
| "grad_norm": 0.29420316873312097, |
| "learning_rate": 4.9746037343882545e-06, |
| "loss": 0.2998, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2585227272727273, |
| "grad_norm": 0.45657642279690197, |
| "learning_rate": 4.97348952372984e-06, |
| "loss": 0.3354, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.26136363636363635, |
| "grad_norm": 0.32675165284478025, |
| "learning_rate": 4.972351523282878e-06, |
| "loss": 0.2715, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.26420454545454547, |
| "grad_norm": 0.37411987401338476, |
| "learning_rate": 4.97118974399255e-06, |
| "loss": 0.331, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.26704545454545453, |
| "grad_norm": 0.2906231907319114, |
| "learning_rate": 4.970004197032741e-06, |
| "loss": 0.2635, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.26988636363636365, |
| "grad_norm": 0.42609899782651967, |
| "learning_rate": 4.968794893805927e-06, |
| "loss": 0.3662, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 0.35277264498485456, |
| "learning_rate": 4.967561845943074e-06, |
| "loss": 0.3656, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2755681818181818, |
| "grad_norm": 0.33825537104063047, |
| "learning_rate": 4.966305065303519e-06, |
| "loss": 0.2949, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2784090909090909, |
| "grad_norm": 0.36200881129772927, |
| "learning_rate": 4.96502456397486e-06, |
| "loss": 0.3457, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.28125, |
| "grad_norm": 0.31133758943801504, |
| "learning_rate": 4.963720354272837e-06, |
| "loss": 0.2831, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2840909090909091, |
| "grad_norm": 0.3398462998770164, |
| "learning_rate": 4.962392448741216e-06, |
| "loss": 0.308, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2869318181818182, |
| "grad_norm": 0.2825796948908475, |
| "learning_rate": 4.961040860151669e-06, |
| "loss": 0.2634, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2897727272727273, |
| "grad_norm": 0.38927704510942096, |
| "learning_rate": 4.9596656015036434e-06, |
| "loss": 0.2942, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.29261363636363635, |
| "grad_norm": 0.35680520232446933, |
| "learning_rate": 4.95826668602425e-06, |
| "loss": 0.3148, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.29545454545454547, |
| "grad_norm": 0.40848691247631896, |
| "learning_rate": 4.956844127168124e-06, |
| "loss": 0.3475, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.29829545454545453, |
| "grad_norm": 0.3675982469780909, |
| "learning_rate": 4.955397938617304e-06, |
| "loss": 0.3223, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.30113636363636365, |
| "grad_norm": 0.32048567892217283, |
| "learning_rate": 4.953928134281093e-06, |
| "loss": 0.316, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3039772727272727, |
| "grad_norm": 0.3107707861319827, |
| "learning_rate": 4.952434728295931e-06, |
| "loss": 0.3031, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3068181818181818, |
| "grad_norm": 0.38878643961644715, |
| "learning_rate": 4.950917735025256e-06, |
| "loss": 0.3355, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3096590909090909, |
| "grad_norm": 0.3735768679081344, |
| "learning_rate": 4.949377169059365e-06, |
| "loss": 0.3008, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 0.3808439931809935, |
| "learning_rate": 4.947813045215277e-06, |
| "loss": 0.3002, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3153409090909091, |
| "grad_norm": 0.3256292929675435, |
| "learning_rate": 4.946225378536587e-06, |
| "loss": 0.2988, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3181818181818182, |
| "grad_norm": 0.35150877205189135, |
| "learning_rate": 4.944614184293321e-06, |
| "loss": 0.2993, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3210227272727273, |
| "grad_norm": 0.37494589367664166, |
| "learning_rate": 4.942979477981797e-06, |
| "loss": 0.3129, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.32386363636363635, |
| "grad_norm": 0.3506621432286222, |
| "learning_rate": 4.941321275324463e-06, |
| "loss": 0.3015, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.32670454545454547, |
| "grad_norm": 0.30804865814837706, |
| "learning_rate": 4.939639592269757e-06, |
| "loss": 0.2709, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.32954545454545453, |
| "grad_norm": 0.4334401140811609, |
| "learning_rate": 4.9379344449919465e-06, |
| "loss": 0.3211, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.33238636363636365, |
| "grad_norm": 0.4113976286859321, |
| "learning_rate": 4.936205849890977e-06, |
| "loss": 0.3486, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3352272727272727, |
| "grad_norm": 0.38143204868428404, |
| "learning_rate": 4.934453823592313e-06, |
| "loss": 0.3248, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3380681818181818, |
| "grad_norm": 0.3935231496732602, |
| "learning_rate": 4.9326783829467795e-06, |
| "loss": 0.3369, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3409090909090909, |
| "grad_norm": 0.3715854335519974, |
| "learning_rate": 4.930879545030395e-06, |
| "loss": 0.3162, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.34375, |
| "grad_norm": 0.2987173708346766, |
| "learning_rate": 4.929057327144213e-06, |
| "loss": 0.2704, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.3465909090909091, |
| "grad_norm": 0.3505876441509565, |
| "learning_rate": 4.927211746814155e-06, |
| "loss": 0.2897, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.3494318181818182, |
| "grad_norm": 0.3808807666150658, |
| "learning_rate": 4.925342821790834e-06, |
| "loss": 0.298, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.3522727272727273, |
| "grad_norm": 0.40265933198110954, |
| "learning_rate": 4.923450570049398e-06, |
| "loss": 0.3063, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.35511363636363635, |
| "grad_norm": 0.329984359578131, |
| "learning_rate": 4.921535009789344e-06, |
| "loss": 0.281, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.35795454545454547, |
| "grad_norm": 0.3327810259029677, |
| "learning_rate": 4.91959615943435e-06, |
| "loss": 0.3035, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.36079545454545453, |
| "grad_norm": 0.33832701513333335, |
| "learning_rate": 4.917634037632095e-06, |
| "loss": 0.2817, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.3446767418817894, |
| "learning_rate": 4.915648663254081e-06, |
| "loss": 0.3275, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.3664772727272727, |
| "grad_norm": 0.4067285176470478, |
| "learning_rate": 4.9136400553954526e-06, |
| "loss": 0.2644, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.3693181818181818, |
| "grad_norm": 0.32647438056937467, |
| "learning_rate": 4.91160823337481e-06, |
| "loss": 0.3012, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3721590909090909, |
| "grad_norm": 0.2641653305047082, |
| "learning_rate": 4.909553216734024e-06, |
| "loss": 0.2551, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.3587439503975781, |
| "learning_rate": 4.907475025238051e-06, |
| "loss": 0.3429, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.3778409090909091, |
| "grad_norm": 0.39094595293189244, |
| "learning_rate": 4.905373678874741e-06, |
| "loss": 0.3428, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3806818181818182, |
| "grad_norm": 0.33295666810345625, |
| "learning_rate": 4.903249197854645e-06, |
| "loss": 0.3024, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.3835227272727273, |
| "grad_norm": 0.4067834961803898, |
| "learning_rate": 4.90110160261082e-06, |
| "loss": 0.388, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.38636363636363635, |
| "grad_norm": 0.3041105753158812, |
| "learning_rate": 4.898930913798635e-06, |
| "loss": 0.2791, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.38920454545454547, |
| "grad_norm": 0.3854716077313248, |
| "learning_rate": 4.89673715229557e-06, |
| "loss": 0.3516, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.39204545454545453, |
| "grad_norm": 0.41029172649451373, |
| "learning_rate": 4.894520339201014e-06, |
| "loss": 0.3221, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.39488636363636365, |
| "grad_norm": 0.31953693308642406, |
| "learning_rate": 4.892280495836068e-06, |
| "loss": 0.3268, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.3977272727272727, |
| "grad_norm": 0.4798811586379984, |
| "learning_rate": 4.890017643743334e-06, |
| "loss": 0.3115, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4005681818181818, |
| "grad_norm": 0.3603031050892597, |
| "learning_rate": 4.887731804686707e-06, |
| "loss": 0.2844, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.4034090909090909, |
| "grad_norm": 0.40465606169589835, |
| "learning_rate": 4.885423000651174e-06, |
| "loss": 0.3573, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.40625, |
| "grad_norm": 0.3643063680731307, |
| "learning_rate": 4.883091253842592e-06, |
| "loss": 0.2861, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.4090909090909091, |
| "grad_norm": 0.2855806950882976, |
| "learning_rate": 4.8807365866874825e-06, |
| "loss": 0.2856, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4119318181818182, |
| "grad_norm": 0.43700846878534866, |
| "learning_rate": 4.878359021832812e-06, |
| "loss": 0.3025, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.4147727272727273, |
| "grad_norm": 0.3691328488500052, |
| "learning_rate": 4.875958582145775e-06, |
| "loss": 0.3516, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.41761363636363635, |
| "grad_norm": 0.3602263970719629, |
| "learning_rate": 4.873535290713571e-06, |
| "loss": 0.3276, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.42045454545454547, |
| "grad_norm": 0.2873285630204768, |
| "learning_rate": 4.871089170843192e-06, |
| "loss": 0.272, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.42329545454545453, |
| "grad_norm": 0.3275589221978115, |
| "learning_rate": 4.868620246061185e-06, |
| "loss": 0.3127, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.42613636363636365, |
| "grad_norm": 0.3595600686315243, |
| "learning_rate": 4.866128540113436e-06, |
| "loss": 0.293, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4289772727272727, |
| "grad_norm": 0.39412366891247624, |
| "learning_rate": 4.863614076964937e-06, |
| "loss": 0.3105, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.4318181818181818, |
| "grad_norm": 0.2967856642106585, |
| "learning_rate": 4.8610768807995575e-06, |
| "loss": 0.2488, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.4346590909090909, |
| "grad_norm": 0.3353960107255814, |
| "learning_rate": 4.85851697601981e-06, |
| "loss": 0.31, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.4375, |
| "grad_norm": 0.3293934153604414, |
| "learning_rate": 4.855934387246619e-06, |
| "loss": 0.31, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.4403409090909091, |
| "grad_norm": 0.4020477745824599, |
| "learning_rate": 4.853329139319076e-06, |
| "loss": 0.3607, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4431818181818182, |
| "grad_norm": 0.40194438779646285, |
| "learning_rate": 4.850701257294212e-06, |
| "loss": 0.3194, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.4460227272727273, |
| "grad_norm": 0.35880107189234606, |
| "learning_rate": 4.848050766446746e-06, |
| "loss": 0.3257, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.44886363636363635, |
| "grad_norm": 0.3225921590602741, |
| "learning_rate": 4.84537769226885e-06, |
| "loss": 0.2865, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.45170454545454547, |
| "grad_norm": 0.43105913904133064, |
| "learning_rate": 4.842682060469899e-06, |
| "loss": 0.2917, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.3984098156673031, |
| "learning_rate": 4.839963896976223e-06, |
| "loss": 0.3137, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.45738636363636365, |
| "grad_norm": 0.34203541957482897, |
| "learning_rate": 4.837223227930864e-06, |
| "loss": 0.3021, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.4602272727272727, |
| "grad_norm": 0.3410914811625815, |
| "learning_rate": 4.834460079693317e-06, |
| "loss": 0.3197, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.4630681818181818, |
| "grad_norm": 0.3668120756523038, |
| "learning_rate": 4.831674478839281e-06, |
| "loss": 0.3242, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.4659090909090909, |
| "grad_norm": 0.34128762447014865, |
| "learning_rate": 4.828866452160402e-06, |
| "loss": 0.2626, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.46875, |
| "grad_norm": 0.34134817423813496, |
| "learning_rate": 4.826036026664014e-06, |
| "loss": 0.2771, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4715909090909091, |
| "grad_norm": 0.3270025125687817, |
| "learning_rate": 4.823183229572883e-06, |
| "loss": 0.2921, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.4744318181818182, |
| "grad_norm": 0.3701876487404051, |
| "learning_rate": 4.820308088324942e-06, |
| "loss": 0.3315, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.4772727272727273, |
| "grad_norm": 0.4223541290676315, |
| "learning_rate": 4.8174106305730284e-06, |
| "loss": 0.3458, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.48011363636363635, |
| "grad_norm": 0.36826807946452467, |
| "learning_rate": 4.814490884184615e-06, |
| "loss": 0.3098, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.48295454545454547, |
| "grad_norm": 0.34247450811498126, |
| "learning_rate": 4.811548877241549e-06, |
| "loss": 0.2794, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.48579545454545453, |
| "grad_norm": 0.36931394013248037, |
| "learning_rate": 4.808584638039774e-06, |
| "loss": 0.3075, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.48863636363636365, |
| "grad_norm": 0.38654212773141833, |
| "learning_rate": 4.805598195089063e-06, |
| "loss": 0.2957, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.4914772727272727, |
| "grad_norm": 0.327791247654709, |
| "learning_rate": 4.802589577112742e-06, |
| "loss": 0.317, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.4943181818181818, |
| "grad_norm": 0.4180368575468772, |
| "learning_rate": 4.7995588130474145e-06, |
| "loss": 0.2873, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.4971590909090909, |
| "grad_norm": 0.41772200012858535, |
| "learning_rate": 4.7965059320426825e-06, |
| "loss": 0.3365, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.3622810863279747, |
| "learning_rate": 4.7934309634608676e-06, |
| "loss": 0.3406, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5028409090909091, |
| "grad_norm": 0.33039829085718986, |
| "learning_rate": 4.790333936876727e-06, |
| "loss": 0.2582, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5056818181818182, |
| "grad_norm": 0.2963847161562058, |
| "learning_rate": 4.78721488207717e-06, |
| "loss": 0.2621, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5085227272727273, |
| "grad_norm": 0.3688579036529526, |
| "learning_rate": 4.7840738290609714e-06, |
| "loss": 0.3106, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5113636363636364, |
| "grad_norm": 0.3882009236138182, |
| "learning_rate": 4.78091080803848e-06, |
| "loss": 0.2615, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5142045454545454, |
| "grad_norm": 0.35367280178437593, |
| "learning_rate": 4.777725849431336e-06, |
| "loss": 0.3045, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5170454545454546, |
| "grad_norm": 0.3874603305325755, |
| "learning_rate": 4.774518983872169e-06, |
| "loss": 0.3151, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5198863636363636, |
| "grad_norm": 0.3089601400335368, |
| "learning_rate": 4.77129024220431e-06, |
| "loss": 0.2565, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5227272727272727, |
| "grad_norm": 0.3741939570187776, |
| "learning_rate": 4.7680396554814886e-06, |
| "loss": 0.2824, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5255681818181818, |
| "grad_norm": 0.3684238808190501, |
| "learning_rate": 4.764767254967544e-06, |
| "loss": 0.2717, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5284090909090909, |
| "grad_norm": 0.34181925499552346, |
| "learning_rate": 4.761473072136114e-06, |
| "loss": 0.2984, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.53125, |
| "grad_norm": 0.44267647661167453, |
| "learning_rate": 4.758157138670337e-06, |
| "loss": 0.3472, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5340909090909091, |
| "grad_norm": 0.3887831736377981, |
| "learning_rate": 4.75481948646255e-06, |
| "loss": 0.3111, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5369318181818182, |
| "grad_norm": 0.3683856304101638, |
| "learning_rate": 4.751460147613973e-06, |
| "loss": 0.3146, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.5397727272727273, |
| "grad_norm": 0.38527593119976, |
| "learning_rate": 4.748079154434413e-06, |
| "loss": 0.3314, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5426136363636364, |
| "grad_norm": 0.4031772051747187, |
| "learning_rate": 4.744676539441941e-06, |
| "loss": 0.315, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.3353722780310112, |
| "learning_rate": 4.741252335362588e-06, |
| "loss": 0.269, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5482954545454546, |
| "grad_norm": 0.3394618273632171, |
| "learning_rate": 4.737806575130024e-06, |
| "loss": 0.2745, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5511363636363636, |
| "grad_norm": 0.4450532210463518, |
| "learning_rate": 4.734339291885246e-06, |
| "loss": 0.3188, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5539772727272727, |
| "grad_norm": 0.397975066441739, |
| "learning_rate": 4.7308505189762565e-06, |
| "loss": 0.2985, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5568181818181818, |
| "grad_norm": 0.3440535351319966, |
| "learning_rate": 4.727340289957744e-06, |
| "loss": 0.2809, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5596590909090909, |
| "grad_norm": 0.6446149440778554, |
| "learning_rate": 4.723808638590759e-06, |
| "loss": 0.3218, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.5625, |
| "grad_norm": 0.37606508969708213, |
| "learning_rate": 4.720255598842392e-06, |
| "loss": 0.3176, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.5653409090909091, |
| "grad_norm": 0.43147254520622674, |
| "learning_rate": 4.716681204885442e-06, |
| "loss": 0.3268, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5681818181818182, |
| "grad_norm": 0.41993041372097106, |
| "learning_rate": 4.713085491098093e-06, |
| "loss": 0.2804, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5710227272727273, |
| "grad_norm": 0.48960282010679945, |
| "learning_rate": 4.70946849206358e-06, |
| "loss": 0.3996, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5738636363636364, |
| "grad_norm": 0.3375570582028718, |
| "learning_rate": 4.705830242569859e-06, |
| "loss": 0.2914, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5767045454545454, |
| "grad_norm": 0.33067898836626264, |
| "learning_rate": 4.70217077760927e-06, |
| "loss": 0.2717, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5795454545454546, |
| "grad_norm": 0.3919628586280393, |
| "learning_rate": 4.6984901323781996e-06, |
| "loss": 0.2758, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5823863636363636, |
| "grad_norm": 0.37621132131624546, |
| "learning_rate": 4.6947883422767475e-06, |
| "loss": 0.2927, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5852272727272727, |
| "grad_norm": 0.3588621280506994, |
| "learning_rate": 4.69106544290838e-06, |
| "loss": 0.3202, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5880681818181818, |
| "grad_norm": 0.36135048731331515, |
| "learning_rate": 4.687321470079593e-06, |
| "loss": 0.3075, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.5909090909090909, |
| "grad_norm": 0.3804960320633388, |
| "learning_rate": 4.683556459799562e-06, |
| "loss": 0.304, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.59375, |
| "grad_norm": 0.32482777456644224, |
| "learning_rate": 4.679770448279801e-06, |
| "loss": 0.2333, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.5965909090909091, |
| "grad_norm": 0.38423666885394503, |
| "learning_rate": 4.6759634719338106e-06, |
| "loss": 0.3079, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5994318181818182, |
| "grad_norm": 0.3584077009643052, |
| "learning_rate": 4.672135567376729e-06, |
| "loss": 0.3078, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6022727272727273, |
| "grad_norm": 0.43190228684358967, |
| "learning_rate": 4.668286771424982e-06, |
| "loss": 0.3693, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.6051136363636364, |
| "grad_norm": 0.3335333217535499, |
| "learning_rate": 4.664417121095925e-06, |
| "loss": 0.2978, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6079545454545454, |
| "grad_norm": 0.3343126694937098, |
| "learning_rate": 4.660526653607489e-06, |
| "loss": 0.2654, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6107954545454546, |
| "grad_norm": 0.400588578067547, |
| "learning_rate": 4.656615406377824e-06, |
| "loss": 0.3541, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6136363636363636, |
| "grad_norm": 0.28366454469863744, |
| "learning_rate": 4.652683417024933e-06, |
| "loss": 0.2595, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6164772727272727, |
| "grad_norm": 0.3333388085745537, |
| "learning_rate": 4.648730723366321e-06, |
| "loss": 0.3034, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.6193181818181818, |
| "grad_norm": 0.3802324883963107, |
| "learning_rate": 4.644757363418622e-06, |
| "loss": 0.3149, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.6221590909090909, |
| "grad_norm": 0.3323209944938239, |
| "learning_rate": 4.640763375397235e-06, |
| "loss": 0.2831, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.3816473948946037, |
| "learning_rate": 4.636748797715961e-06, |
| "loss": 0.2901, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6278409090909091, |
| "grad_norm": 0.45087508944423654, |
| "learning_rate": 4.632713668986628e-06, |
| "loss": 0.2668, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.6306818181818182, |
| "grad_norm": 0.3277834281020941, |
| "learning_rate": 4.628658028018723e-06, |
| "loss": 0.3115, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6335227272727273, |
| "grad_norm": 0.4149700033604779, |
| "learning_rate": 4.624581913819019e-06, |
| "loss": 0.3049, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.6363636363636364, |
| "grad_norm": 0.2986911926260575, |
| "learning_rate": 4.6204853655911945e-06, |
| "loss": 0.2828, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6392045454545454, |
| "grad_norm": 0.38662077935688544, |
| "learning_rate": 4.6163684227354656e-06, |
| "loss": 0.3019, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6420454545454546, |
| "grad_norm": 0.3670137115048512, |
| "learning_rate": 4.612231124848199e-06, |
| "loss": 0.2998, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6448863636363636, |
| "grad_norm": 0.3820920011764151, |
| "learning_rate": 4.608073511721534e-06, |
| "loss": 0.3627, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.6477272727272727, |
| "grad_norm": 0.26469955866368194, |
| "learning_rate": 4.6038956233430034e-06, |
| "loss": 0.2419, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.6505681818181818, |
| "grad_norm": 0.32240469660709375, |
| "learning_rate": 4.59969749989514e-06, |
| "loss": 0.2692, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.6534090909090909, |
| "grad_norm": 0.3896277142098736, |
| "learning_rate": 4.5954791817551e-06, |
| "loss": 0.2789, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.65625, |
| "grad_norm": 0.3510490299412409, |
| "learning_rate": 4.591240709494269e-06, |
| "loss": 0.281, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.6590909090909091, |
| "grad_norm": 0.3636438474583087, |
| "learning_rate": 4.586982123877871e-06, |
| "loss": 0.2998, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.6619318181818182, |
| "grad_norm": 0.3274578399993675, |
| "learning_rate": 4.582703465864582e-06, |
| "loss": 0.2758, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.6647727272727273, |
| "grad_norm": 0.3205713499503409, |
| "learning_rate": 4.5784047766061305e-06, |
| "loss": 0.2716, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.6676136363636364, |
| "grad_norm": 0.47159005981022434, |
| "learning_rate": 4.574086097446903e-06, |
| "loss": 0.3236, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6704545454545454, |
| "grad_norm": 0.3617567220761258, |
| "learning_rate": 4.569747469923547e-06, |
| "loss": 0.2863, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6732954545454546, |
| "grad_norm": 0.32166940611651096, |
| "learning_rate": 4.565388935764572e-06, |
| "loss": 0.31, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.6761363636363636, |
| "grad_norm": 0.3982166865116622, |
| "learning_rate": 4.56101053688995e-06, |
| "loss": 0.2874, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.6789772727272727, |
| "grad_norm": 0.4339388465917976, |
| "learning_rate": 4.5566123154107055e-06, |
| "loss": 0.3374, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6818181818181818, |
| "grad_norm": 0.36030799942916975, |
| "learning_rate": 4.552194313628518e-06, |
| "loss": 0.2668, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6846590909090909, |
| "grad_norm": 0.3940718141510353, |
| "learning_rate": 4.547756574035311e-06, |
| "loss": 0.3277, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.6875, |
| "grad_norm": 0.4326472723953054, |
| "learning_rate": 4.5432991393128446e-06, |
| "loss": 0.3227, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.6903409090909091, |
| "grad_norm": 0.41998189617141085, |
| "learning_rate": 4.538822052332306e-06, |
| "loss": 0.339, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.6931818181818182, |
| "grad_norm": 0.36510653915186314, |
| "learning_rate": 4.534325356153892e-06, |
| "loss": 0.2637, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.6960227272727273, |
| "grad_norm": 0.4748073641254545, |
| "learning_rate": 4.529809094026404e-06, |
| "loss": 0.3226, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6988636363636364, |
| "grad_norm": 0.3848777680236735, |
| "learning_rate": 4.525273309386825e-06, |
| "loss": 0.3401, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7017045454545454, |
| "grad_norm": 0.286675785535149, |
| "learning_rate": 4.5207180458599e-06, |
| "loss": 0.2495, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7045454545454546, |
| "grad_norm": 0.3770143744991594, |
| "learning_rate": 4.516143347257726e-06, |
| "loss": 0.2923, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7073863636363636, |
| "grad_norm": 0.37240976329747977, |
| "learning_rate": 4.511549257579322e-06, |
| "loss": 0.2968, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.7102272727272727, |
| "grad_norm": 0.53790018713925, |
| "learning_rate": 4.506935821010206e-06, |
| "loss": 0.298, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7130681818181818, |
| "grad_norm": 0.3896643010491094, |
| "learning_rate": 4.502303081921978e-06, |
| "loss": 0.3125, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.7159090909090909, |
| "grad_norm": 0.32770126981260167, |
| "learning_rate": 4.497651084871883e-06, |
| "loss": 0.2781, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.71875, |
| "grad_norm": 0.3541924637393212, |
| "learning_rate": 4.492979874602389e-06, |
| "loss": 0.3023, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.7215909090909091, |
| "grad_norm": 0.3735099253437524, |
| "learning_rate": 4.4882894960407566e-06, |
| "loss": 0.3225, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.7244318181818182, |
| "grad_norm": 0.3853359485269271, |
| "learning_rate": 4.483579994298602e-06, |
| "loss": 0.3119, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.4232262055395998, |
| "learning_rate": 4.478851414671469e-06, |
| "loss": 0.2996, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7301136363636364, |
| "grad_norm": 0.3403475343187684, |
| "learning_rate": 4.474103802638389e-06, |
| "loss": 0.2948, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.7329545454545454, |
| "grad_norm": 0.4197482437210073, |
| "learning_rate": 4.469337203861447e-06, |
| "loss": 0.2999, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.7357954545454546, |
| "grad_norm": 0.33941700168906186, |
| "learning_rate": 4.464551664185339e-06, |
| "loss": 0.2636, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.7386363636363636, |
| "grad_norm": 0.35067662494508334, |
| "learning_rate": 4.459747229636933e-06, |
| "loss": 0.3153, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7414772727272727, |
| "grad_norm": 0.33432839847763335, |
| "learning_rate": 4.454923946424827e-06, |
| "loss": 0.2646, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.7443181818181818, |
| "grad_norm": 0.3486384565640427, |
| "learning_rate": 4.450081860938904e-06, |
| "loss": 0.3026, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.7471590909090909, |
| "grad_norm": 0.3647193452879592, |
| "learning_rate": 4.4452210197498845e-06, |
| "loss": 0.3208, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.3621939393169193, |
| "learning_rate": 4.440341469608879e-06, |
| "loss": 0.3042, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.7528409090909091, |
| "grad_norm": 0.2856803312521231, |
| "learning_rate": 4.43544325744694e-06, |
| "loss": 0.2548, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7556818181818182, |
| "grad_norm": 0.41636147550676134, |
| "learning_rate": 4.4305264303746085e-06, |
| "loss": 0.2743, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.7585227272727273, |
| "grad_norm": 0.3149004485762251, |
| "learning_rate": 4.425591035681465e-06, |
| "loss": 0.2768, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.7613636363636364, |
| "grad_norm": 0.39793987625802313, |
| "learning_rate": 4.420637120835668e-06, |
| "loss": 0.3055, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.7642045454545454, |
| "grad_norm": 0.4058178861459375, |
| "learning_rate": 4.415664733483502e-06, |
| "loss": 0.3168, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.7670454545454546, |
| "grad_norm": 0.3732878019312248, |
| "learning_rate": 4.4106739214489195e-06, |
| "loss": 0.2935, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7698863636363636, |
| "grad_norm": 0.31801887671340195, |
| "learning_rate": 4.405664732733079e-06, |
| "loss": 0.2768, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.7727272727272727, |
| "grad_norm": 0.43538965465048635, |
| "learning_rate": 4.400637215513883e-06, |
| "loss": 0.2644, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.7755681818181818, |
| "grad_norm": 0.3619890541849985, |
| "learning_rate": 4.395591418145519e-06, |
| "loss": 0.2671, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.7784090909090909, |
| "grad_norm": 0.43611885998338823, |
| "learning_rate": 4.390527389157989e-06, |
| "loss": 0.3481, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.78125, |
| "grad_norm": 0.411038314679305, |
| "learning_rate": 4.385445177256646e-06, |
| "loss": 0.3283, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7840909090909091, |
| "grad_norm": 0.4004177118376606, |
| "learning_rate": 4.380344831321722e-06, |
| "loss": 0.3421, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.7869318181818182, |
| "grad_norm": 0.31547958031028983, |
| "learning_rate": 4.375226400407863e-06, |
| "loss": 0.2541, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.7897727272727273, |
| "grad_norm": 0.36900762280860266, |
| "learning_rate": 4.370089933743654e-06, |
| "loss": 0.3097, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.7926136363636364, |
| "grad_norm": 0.4686945698836896, |
| "learning_rate": 4.364935480731147e-06, |
| "loss": 0.2918, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.7954545454545454, |
| "grad_norm": 0.3509902009735286, |
| "learning_rate": 4.3597630909453835e-06, |
| "loss": 0.2646, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7982954545454546, |
| "grad_norm": 0.30875325359327965, |
| "learning_rate": 4.35457281413392e-06, |
| "loss": 0.2349, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.8011363636363636, |
| "grad_norm": 0.3943745151294021, |
| "learning_rate": 4.349364700216346e-06, |
| "loss": 0.2764, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.8039772727272727, |
| "grad_norm": 0.35558604531483284, |
| "learning_rate": 4.344138799283814e-06, |
| "loss": 0.2442, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.8068181818181818, |
| "grad_norm": 0.38278211936173095, |
| "learning_rate": 4.338895161598541e-06, |
| "loss": 0.3294, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.8096590909090909, |
| "grad_norm": 0.3932974746294698, |
| "learning_rate": 4.333633837593341e-06, |
| "loss": 0.2951, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8125, |
| "grad_norm": 0.31762648150994005, |
| "learning_rate": 4.328354877871131e-06, |
| "loss": 0.2612, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.8153409090909091, |
| "grad_norm": 0.3405862130473983, |
| "learning_rate": 4.323058333204446e-06, |
| "loss": 0.2833, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 0.31883855959276614, |
| "learning_rate": 4.317744254534954e-06, |
| "loss": 0.2609, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8210227272727273, |
| "grad_norm": 0.39913277335187336, |
| "learning_rate": 4.312412692972959e-06, |
| "loss": 0.2758, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.8238636363636364, |
| "grad_norm": 0.39064418227258985, |
| "learning_rate": 4.307063699796918e-06, |
| "loss": 0.2664, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8267045454545454, |
| "grad_norm": 0.3126978473531618, |
| "learning_rate": 4.301697326452942e-06, |
| "loss": 0.2572, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.8295454545454546, |
| "grad_norm": 0.3641340405050646, |
| "learning_rate": 4.296313624554303e-06, |
| "loss": 0.286, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.8323863636363636, |
| "grad_norm": 0.4168496899263259, |
| "learning_rate": 4.290912645880936e-06, |
| "loss": 0.3035, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.8352272727272727, |
| "grad_norm": 0.3466683321895305, |
| "learning_rate": 4.285494442378945e-06, |
| "loss": 0.2853, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.8380681818181818, |
| "grad_norm": 0.3572355149237221, |
| "learning_rate": 4.280059066160098e-06, |
| "loss": 0.3021, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.8409090909090909, |
| "grad_norm": 0.36054386776426756, |
| "learning_rate": 4.274606569501332e-06, |
| "loss": 0.3041, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.84375, |
| "grad_norm": 0.3220431488871405, |
| "learning_rate": 4.269137004844242e-06, |
| "loss": 0.2542, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.8465909090909091, |
| "grad_norm": 0.4103185848899213, |
| "learning_rate": 4.2636504247945865e-06, |
| "loss": 0.2859, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.8494318181818182, |
| "grad_norm": 0.3444474167498623, |
| "learning_rate": 4.258146882121772e-06, |
| "loss": 0.3082, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.8522727272727273, |
| "grad_norm": 0.35145064032825696, |
| "learning_rate": 4.252626429758354e-06, |
| "loss": 0.2679, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8551136363636364, |
| "grad_norm": 0.39931471518127176, |
| "learning_rate": 4.247089120799521e-06, |
| "loss": 0.3486, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.8579545454545454, |
| "grad_norm": 0.2860970262972797, |
| "learning_rate": 4.241535008502587e-06, |
| "loss": 0.23, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.8607954545454546, |
| "grad_norm": 0.4649020596412495, |
| "learning_rate": 4.235964146286479e-06, |
| "loss": 0.3252, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.8636363636363636, |
| "grad_norm": 0.3482820070437071, |
| "learning_rate": 4.230376587731225e-06, |
| "loss": 0.2854, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.8664772727272727, |
| "grad_norm": 0.3269410990279316, |
| "learning_rate": 4.2247723865774336e-06, |
| "loss": 0.2563, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8693181818181818, |
| "grad_norm": 0.31949294830520775, |
| "learning_rate": 4.219151596725782e-06, |
| "loss": 0.2688, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.8721590909090909, |
| "grad_norm": 0.43502447469171057, |
| "learning_rate": 4.213514272236499e-06, |
| "loss": 0.3386, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.3797117211719601, |
| "learning_rate": 4.207860467328835e-06, |
| "loss": 0.2855, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.8778409090909091, |
| "grad_norm": 0.3799062699361923, |
| "learning_rate": 4.202190236380552e-06, |
| "loss": 0.2545, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.8806818181818182, |
| "grad_norm": 0.3360385792661154, |
| "learning_rate": 4.196503633927398e-06, |
| "loss": 0.2909, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8835227272727273, |
| "grad_norm": 0.4188943106281552, |
| "learning_rate": 4.190800714662576e-06, |
| "loss": 0.3291, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.8863636363636364, |
| "grad_norm": 0.43183074366157487, |
| "learning_rate": 4.185081533436226e-06, |
| "loss": 0.3303, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.8892045454545454, |
| "grad_norm": 0.35087669084397133, |
| "learning_rate": 4.179346145254892e-06, |
| "loss": 0.3152, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.8920454545454546, |
| "grad_norm": 0.34360678080641915, |
| "learning_rate": 4.173594605280995e-06, |
| "loss": 0.2726, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.8948863636363636, |
| "grad_norm": 0.39638626020449463, |
| "learning_rate": 4.1678269688323045e-06, |
| "loss": 0.3369, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8977272727272727, |
| "grad_norm": 0.3566510725505037, |
| "learning_rate": 4.1620432913814026e-06, |
| "loss": 0.2469, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.9005681818181818, |
| "grad_norm": 0.32842562735745623, |
| "learning_rate": 4.156243628555151e-06, |
| "loss": 0.3018, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.9034090909090909, |
| "grad_norm": 0.30679142774263857, |
| "learning_rate": 4.150428036134161e-06, |
| "loss": 0.2476, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.90625, |
| "grad_norm": 0.38736943533330265, |
| "learning_rate": 4.144596570052249e-06, |
| "loss": 0.279, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.3461038914128392, |
| "learning_rate": 4.1387492863959076e-06, |
| "loss": 0.262, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9119318181818182, |
| "grad_norm": 0.3328949084965424, |
| "learning_rate": 4.132886241403756e-06, |
| "loss": 0.2841, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.9147727272727273, |
| "grad_norm": 0.3684252037786764, |
| "learning_rate": 4.127007491466008e-06, |
| "loss": 0.3032, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.9176136363636364, |
| "grad_norm": 0.44163540100916987, |
| "learning_rate": 4.121113093123925e-06, |
| "loss": 0.3164, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.9204545454545454, |
| "grad_norm": 0.49048074141989995, |
| "learning_rate": 4.115203103069273e-06, |
| "loss": 0.2623, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.9232954545454546, |
| "grad_norm": 0.34827477492871306, |
| "learning_rate": 4.109277578143779e-06, |
| "loss": 0.2717, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9261363636363636, |
| "grad_norm": 0.3603610666299997, |
| "learning_rate": 4.10333657533858e-06, |
| "loss": 0.2783, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.9289772727272727, |
| "grad_norm": 0.3901080384564019, |
| "learning_rate": 4.097380151793681e-06, |
| "loss": 0.286, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.9318181818181818, |
| "grad_norm": 0.3598672604385207, |
| "learning_rate": 4.0914083647974025e-06, |
| "loss": 0.3375, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.9346590909090909, |
| "grad_norm": 0.32775404856254314, |
| "learning_rate": 4.085421271785824e-06, |
| "loss": 0.2904, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.9375, |
| "grad_norm": 0.29442351680114387, |
| "learning_rate": 4.079418930342243e-06, |
| "loss": 0.2629, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9403409090909091, |
| "grad_norm": 0.4405796100351076, |
| "learning_rate": 4.0734013981966125e-06, |
| "loss": 0.3665, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.9431818181818182, |
| "grad_norm": 0.3334068109525356, |
| "learning_rate": 4.0673687332249866e-06, |
| "loss": 0.3079, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.9460227272727273, |
| "grad_norm": 0.32669985590044703, |
| "learning_rate": 4.061320993448968e-06, |
| "loss": 0.2904, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.9488636363636364, |
| "grad_norm": 0.3442146928076968, |
| "learning_rate": 4.055258237035146e-06, |
| "loss": 0.3146, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.9517045454545454, |
| "grad_norm": 0.4309052746676042, |
| "learning_rate": 4.04918052229454e-06, |
| "loss": 0.3446, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.9545454545454546, |
| "grad_norm": 0.35908542610160016, |
| "learning_rate": 4.043087907682035e-06, |
| "loss": 0.2534, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.9573863636363636, |
| "grad_norm": 0.3894188962377372, |
| "learning_rate": 4.036980451795822e-06, |
| "loss": 0.3262, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.9602272727272727, |
| "grad_norm": 0.37392061032103363, |
| "learning_rate": 4.030858213376838e-06, |
| "loss": 0.3158, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.9630681818181818, |
| "grad_norm": 0.3880624083667109, |
| "learning_rate": 4.02472125130819e-06, |
| "loss": 0.2908, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.9659090909090909, |
| "grad_norm": 0.4031632690009814, |
| "learning_rate": 4.018569624614602e-06, |
| "loss": 0.3279, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.96875, |
| "grad_norm": 0.38583919245780574, |
| "learning_rate": 4.012403392461837e-06, |
| "loss": 0.2657, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.9715909090909091, |
| "grad_norm": 0.4657940346556613, |
| "learning_rate": 4.006222614156132e-06, |
| "loss": 0.3176, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.9744318181818182, |
| "grad_norm": 0.28406132307929355, |
| "learning_rate": 4.000027349143633e-06, |
| "loss": 0.2261, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.9772727272727273, |
| "grad_norm": 0.3809447081607224, |
| "learning_rate": 3.993817657009808e-06, |
| "loss": 0.291, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.9801136363636364, |
| "grad_norm": 0.37276416289236974, |
| "learning_rate": 3.987593597478894e-06, |
| "loss": 0.3229, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9829545454545454, |
| "grad_norm": 0.36213806018136363, |
| "learning_rate": 3.981355230413305e-06, |
| "loss": 0.2785, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.9857954545454546, |
| "grad_norm": 0.3774008729788378, |
| "learning_rate": 3.975102615813068e-06, |
| "loss": 0.272, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.9886363636363636, |
| "grad_norm": 0.3268419464248498, |
| "learning_rate": 3.968835813815236e-06, |
| "loss": 0.2468, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.9914772727272727, |
| "grad_norm": 0.401670934547313, |
| "learning_rate": 3.962554884693323e-06, |
| "loss": 0.2953, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.9943181818181818, |
| "grad_norm": 0.40169610324443583, |
| "learning_rate": 3.956259888856708e-06, |
| "loss": 0.2939, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9971590909090909, |
| "grad_norm": 0.2891600640815435, |
| "learning_rate": 3.949950886850069e-06, |
| "loss": 0.2805, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.3279215818041681, |
| "learning_rate": 3.943627939352789e-06, |
| "loss": 0.2598, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.0028409090909092, |
| "grad_norm": 0.3533913319935541, |
| "learning_rate": 3.9372911071783805e-06, |
| "loss": 0.2673, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.0056818181818181, |
| "grad_norm": 0.38416565428145066, |
| "learning_rate": 3.930940451273898e-06, |
| "loss": 0.2933, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.0085227272727273, |
| "grad_norm": 0.41220420942768127, |
| "learning_rate": 3.924576032719349e-06, |
| "loss": 0.2952, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.0113636363636365, |
| "grad_norm": 0.4096268298831798, |
| "learning_rate": 3.9181979127271076e-06, |
| "loss": 0.2575, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.0142045454545454, |
| "grad_norm": 0.45379315898269595, |
| "learning_rate": 3.911806152641333e-06, |
| "loss": 0.2717, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.0170454545454546, |
| "grad_norm": 0.32770827000624236, |
| "learning_rate": 3.9054008139373675e-06, |
| "loss": 0.266, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.0198863636363635, |
| "grad_norm": 0.2965104343367262, |
| "learning_rate": 3.8989819582211555e-06, |
| "loss": 0.2548, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.0227272727272727, |
| "grad_norm": 0.4054461782711258, |
| "learning_rate": 3.892549647228642e-06, |
| "loss": 0.3398, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0255681818181819, |
| "grad_norm": 0.39022556113460055, |
| "learning_rate": 3.886103942825189e-06, |
| "loss": 0.2826, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.0284090909090908, |
| "grad_norm": 0.3374532413491821, |
| "learning_rate": 3.879644907004972e-06, |
| "loss": 0.2644, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.03125, |
| "grad_norm": 0.337718457045594, |
| "learning_rate": 3.873172601890386e-06, |
| "loss": 0.2545, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.0340909090909092, |
| "grad_norm": 0.3729922751436951, |
| "learning_rate": 3.86668708973145e-06, |
| "loss": 0.2951, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.0369318181818181, |
| "grad_norm": 0.31238473142978845, |
| "learning_rate": 3.860188432905209e-06, |
| "loss": 0.2537, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.0397727272727273, |
| "grad_norm": 0.37350151083829397, |
| "learning_rate": 3.853676693915129e-06, |
| "loss": 0.2614, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.0426136363636365, |
| "grad_norm": 0.3575634359205247, |
| "learning_rate": 3.8471519353905025e-06, |
| "loss": 0.2437, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.0454545454545454, |
| "grad_norm": 0.3537757819725644, |
| "learning_rate": 3.840614220085837e-06, |
| "loss": 0.2747, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.0482954545454546, |
| "grad_norm": 0.34943668518465093, |
| "learning_rate": 3.834063610880263e-06, |
| "loss": 0.2844, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.0511363636363635, |
| "grad_norm": 0.32611370130766987, |
| "learning_rate": 3.827500170776921e-06, |
| "loss": 0.2578, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.0539772727272727, |
| "grad_norm": 0.29743321074762596, |
| "learning_rate": 3.8209239629023565e-06, |
| "loss": 0.2361, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.0568181818181819, |
| "grad_norm": 0.3317934285561481, |
| "learning_rate": 3.814335050505916e-06, |
| "loss": 0.2645, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.0596590909090908, |
| "grad_norm": 0.40729226447208133, |
| "learning_rate": 3.8077334969591377e-06, |
| "loss": 0.2929, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.0625, |
| "grad_norm": 0.35583822537265253, |
| "learning_rate": 3.801119365755138e-06, |
| "loss": 0.3036, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.0653409090909092, |
| "grad_norm": 0.47116931222172215, |
| "learning_rate": 3.7944927205080073e-06, |
| "loss": 0.2962, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.0681818181818181, |
| "grad_norm": 0.4620500786524589, |
| "learning_rate": 3.7878536249521935e-06, |
| "loss": 0.3186, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.0710227272727273, |
| "grad_norm": 0.4310223125222202, |
| "learning_rate": 3.7812021429418886e-06, |
| "loss": 0.305, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.0738636363636365, |
| "grad_norm": 0.35860375920691345, |
| "learning_rate": 3.77453833845042e-06, |
| "loss": 0.3124, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.0767045454545454, |
| "grad_norm": 0.40493909967111513, |
| "learning_rate": 3.7678622755696292e-06, |
| "loss": 0.2649, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.0795454545454546, |
| "grad_norm": 0.3699164344949677, |
| "learning_rate": 3.7611740185092587e-06, |
| "loss": 0.3346, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.0823863636363635, |
| "grad_norm": 0.5931781411606138, |
| "learning_rate": 3.754473631596332e-06, |
| "loss": 0.2729, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.0852272727272727, |
| "grad_norm": 0.3122039055630976, |
| "learning_rate": 3.7477611792745384e-06, |
| "loss": 0.2816, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.0880681818181819, |
| "grad_norm": 0.35273556528651445, |
| "learning_rate": 3.7410367261036094e-06, |
| "loss": 0.2765, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.0909090909090908, |
| "grad_norm": 0.361323677115818, |
| "learning_rate": 3.7343003367587e-06, |
| "loss": 0.2831, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.09375, |
| "grad_norm": 0.3776789429609578, |
| "learning_rate": 3.727552076029767e-06, |
| "loss": 0.3006, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.0965909090909092, |
| "grad_norm": 0.4049848534001206, |
| "learning_rate": 3.7207920088209454e-06, |
| "loss": 0.3213, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.0994318181818181, |
| "grad_norm": 0.3541711790485223, |
| "learning_rate": 3.7140202001499214e-06, |
| "loss": 0.2902, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.1022727272727273, |
| "grad_norm": 0.3501668624619801, |
| "learning_rate": 3.707236715147312e-06, |
| "loss": 0.2809, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.1051136363636365, |
| "grad_norm": 0.38321621491594765, |
| "learning_rate": 3.700441619056035e-06, |
| "loss": 0.3163, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.1079545454545454, |
| "grad_norm": 0.4044457614031915, |
| "learning_rate": 3.693634977230681e-06, |
| "loss": 0.2862, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.1107954545454546, |
| "grad_norm": 0.40951036198359486, |
| "learning_rate": 3.686816855136891e-06, |
| "loss": 0.28, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.1136363636363635, |
| "grad_norm": 0.29410719311103134, |
| "learning_rate": 3.679987318350717e-06, |
| "loss": 0.2299, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.1164772727272727, |
| "grad_norm": 0.3819079818809885, |
| "learning_rate": 3.673146432557998e-06, |
| "loss": 0.32, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.1193181818181819, |
| "grad_norm": 0.3628245440460693, |
| "learning_rate": 3.666294263553729e-06, |
| "loss": 0.2724, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.1221590909090908, |
| "grad_norm": 0.34928329721642853, |
| "learning_rate": 3.659430877241423e-06, |
| "loss": 0.248, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 0.442925717620733, |
| "learning_rate": 3.6525563396324826e-06, |
| "loss": 0.2942, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.1278409090909092, |
| "grad_norm": 0.4525323331322651, |
| "learning_rate": 3.6456707168455584e-06, |
| "loss": 0.3258, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.1306818181818181, |
| "grad_norm": 0.39153087965891287, |
| "learning_rate": 3.6387740751059218e-06, |
| "loss": 0.3072, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.1335227272727273, |
| "grad_norm": 0.3886102447660378, |
| "learning_rate": 3.6318664807448218e-06, |
| "loss": 0.3415, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.1363636363636362, |
| "grad_norm": 0.3642339507412296, |
| "learning_rate": 3.6249480001988463e-06, |
| "loss": 0.2691, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1392045454545454, |
| "grad_norm": 0.3380651370346197, |
| "learning_rate": 3.6180187000092894e-06, |
| "loss": 0.2791, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.1420454545454546, |
| "grad_norm": 0.3193544491508243, |
| "learning_rate": 3.611078646821505e-06, |
| "loss": 0.2326, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.1448863636363638, |
| "grad_norm": 0.30524333443799656, |
| "learning_rate": 3.6041279073842684e-06, |
| "loss": 0.2489, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.1477272727272727, |
| "grad_norm": 0.39683144371135337, |
| "learning_rate": 3.597166548549136e-06, |
| "loss": 0.2656, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.1505681818181819, |
| "grad_norm": 0.39975422805218463, |
| "learning_rate": 3.590194637269798e-06, |
| "loss": 0.2823, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.1534090909090908, |
| "grad_norm": 0.3781718281788356, |
| "learning_rate": 3.5832122406014398e-06, |
| "loss": 0.2545, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.15625, |
| "grad_norm": 0.39633632407524205, |
| "learning_rate": 3.576219425700092e-06, |
| "loss": 0.2656, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.1590909090909092, |
| "grad_norm": 0.503126670284463, |
| "learning_rate": 3.5692162598219877e-06, |
| "loss": 0.3106, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.1619318181818181, |
| "grad_norm": 0.3803993289484403, |
| "learning_rate": 3.5622028103229154e-06, |
| "loss": 0.2777, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.1647727272727273, |
| "grad_norm": 0.32896270814306483, |
| "learning_rate": 3.555179144657568e-06, |
| "loss": 0.2681, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.1676136363636362, |
| "grad_norm": 0.45079184347220275, |
| "learning_rate": 3.548145330378901e-06, |
| "loss": 0.298, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.1704545454545454, |
| "grad_norm": 0.3409745563125651, |
| "learning_rate": 3.5411014351374735e-06, |
| "loss": 0.2829, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.1732954545454546, |
| "grad_norm": 0.3524051821269997, |
| "learning_rate": 3.5340475266808046e-06, |
| "loss": 0.2897, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.1761363636363638, |
| "grad_norm": 0.31354296956532873, |
| "learning_rate": 3.5269836728527194e-06, |
| "loss": 0.2512, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.1789772727272727, |
| "grad_norm": 0.2819333444591201, |
| "learning_rate": 3.5199099415926985e-06, |
| "loss": 0.2336, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.1818181818181819, |
| "grad_norm": 0.3667062945127836, |
| "learning_rate": 3.5128264009352177e-06, |
| "loss": 0.2797, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.1846590909090908, |
| "grad_norm": 0.3717065816803459, |
| "learning_rate": 3.5057331190091036e-06, |
| "loss": 0.2625, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.1875, |
| "grad_norm": 0.34247191523071263, |
| "learning_rate": 3.4986301640368726e-06, |
| "loss": 0.2915, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.1903409090909092, |
| "grad_norm": 0.28055115946196074, |
| "learning_rate": 3.4915176043340726e-06, |
| "loss": 0.2323, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.1931818181818181, |
| "grad_norm": 0.3512617852047132, |
| "learning_rate": 3.4843955083086315e-06, |
| "loss": 0.276, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1960227272727273, |
| "grad_norm": 0.3402592655838616, |
| "learning_rate": 3.477263944460196e-06, |
| "loss": 0.258, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.1988636363636362, |
| "grad_norm": 0.3440775197912379, |
| "learning_rate": 3.4701229813794744e-06, |
| "loss": 0.2686, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.2017045454545454, |
| "grad_norm": 0.32159613738142184, |
| "learning_rate": 3.4629726877475733e-06, |
| "loss": 0.2775, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.2045454545454546, |
| "grad_norm": 0.3405153808986929, |
| "learning_rate": 3.4558131323353423e-06, |
| "loss": 0.2947, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.2073863636363638, |
| "grad_norm": 0.4111884872726661, |
| "learning_rate": 3.4486443840027084e-06, |
| "loss": 0.2427, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.2102272727272727, |
| "grad_norm": 0.38692560086654654, |
| "learning_rate": 3.4414665116980167e-06, |
| "loss": 0.3084, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.2130681818181819, |
| "grad_norm": 0.4000466884476275, |
| "learning_rate": 3.4342795844573634e-06, |
| "loss": 0.2933, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.2159090909090908, |
| "grad_norm": 0.3605831840618787, |
| "learning_rate": 3.427083671403937e-06, |
| "loss": 0.2892, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.21875, |
| "grad_norm": 0.3225439729294941, |
| "learning_rate": 3.4198788417473485e-06, |
| "loss": 0.2579, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.2215909090909092, |
| "grad_norm": 0.3869565428112392, |
| "learning_rate": 3.41266516478297e-06, |
| "loss": 0.3349, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.2244318181818181, |
| "grad_norm": 0.3790938940448294, |
| "learning_rate": 3.4054427098912636e-06, |
| "loss": 0.2836, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.2272727272727273, |
| "grad_norm": 0.33485764653621325, |
| "learning_rate": 3.3982115465371185e-06, |
| "loss": 0.2465, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.2301136363636362, |
| "grad_norm": 0.3421027182025914, |
| "learning_rate": 3.390971744269181e-06, |
| "loss": 0.2436, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.2329545454545454, |
| "grad_norm": 0.3343569283936874, |
| "learning_rate": 3.3837233727191856e-06, |
| "loss": 0.2533, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.2357954545454546, |
| "grad_norm": 0.3490337805677148, |
| "learning_rate": 3.3764665016012842e-06, |
| "loss": 0.2401, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.2386363636363638, |
| "grad_norm": 0.3116736362955648, |
| "learning_rate": 3.3692012007113776e-06, |
| "loss": 0.2482, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.2414772727272727, |
| "grad_norm": 0.3963218536576595, |
| "learning_rate": 3.3619275399264444e-06, |
| "loss": 0.2944, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.2443181818181819, |
| "grad_norm": 0.39432480274886955, |
| "learning_rate": 3.3546455892038666e-06, |
| "loss": 0.2918, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.2471590909090908, |
| "grad_norm": 0.3775480283393243, |
| "learning_rate": 3.3473554185807573e-06, |
| "loss": 0.2771, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.34490450741107803, |
| "learning_rate": 3.340057098173288e-06, |
| "loss": 0.2756, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.2528409090909092, |
| "grad_norm": 0.3324905873722346, |
| "learning_rate": 3.3327506981760183e-06, |
| "loss": 0.2608, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.2556818181818181, |
| "grad_norm": 0.47138267546166734, |
| "learning_rate": 3.32543628886121e-06, |
| "loss": 0.3077, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.2585227272727273, |
| "grad_norm": 0.2953842775844083, |
| "learning_rate": 3.3181139405781616e-06, |
| "loss": 0.2377, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.2613636363636362, |
| "grad_norm": 0.3612627525520785, |
| "learning_rate": 3.3107837237525274e-06, |
| "loss": 0.2427, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.2642045454545454, |
| "grad_norm": 0.3653963278501932, |
| "learning_rate": 3.3034457088856396e-06, |
| "loss": 0.2559, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.2670454545454546, |
| "grad_norm": 0.3129568330696853, |
| "learning_rate": 3.2960999665538335e-06, |
| "loss": 0.2534, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.2698863636363638, |
| "grad_norm": 0.3510947430261117, |
| "learning_rate": 3.288746567407763e-06, |
| "loss": 0.2502, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.2727272727272727, |
| "grad_norm": 0.3437157582636368, |
| "learning_rate": 3.281385582171727e-06, |
| "loss": 0.2525, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.2755681818181819, |
| "grad_norm": 0.3888446263801318, |
| "learning_rate": 3.274017081642986e-06, |
| "loss": 0.2885, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.2784090909090908, |
| "grad_norm": 0.35942811400817226, |
| "learning_rate": 3.2666411366910827e-06, |
| "loss": 0.2571, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.28125, |
| "grad_norm": 0.41674090701769867, |
| "learning_rate": 3.2592578182571583e-06, |
| "loss": 0.2973, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.2840909090909092, |
| "grad_norm": 0.3702323179560626, |
| "learning_rate": 3.2518671973532704e-06, |
| "loss": 0.2415, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.2869318181818181, |
| "grad_norm": 0.36007563550430505, |
| "learning_rate": 3.244469345061715e-06, |
| "loss": 0.2277, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.2897727272727273, |
| "grad_norm": 0.3914691699646844, |
| "learning_rate": 3.237064332534336e-06, |
| "loss": 0.2828, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.2926136363636362, |
| "grad_norm": 0.3522104855581335, |
| "learning_rate": 3.229652230991843e-06, |
| "loss": 0.2671, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.2954545454545454, |
| "grad_norm": 0.3553148108185653, |
| "learning_rate": 3.2222331117231283e-06, |
| "loss": 0.2817, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.2982954545454546, |
| "grad_norm": 0.3771227330111479, |
| "learning_rate": 3.2148070460845814e-06, |
| "loss": 0.274, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.3011363636363638, |
| "grad_norm": 0.41388528735027136, |
| "learning_rate": 3.2073741054994e-06, |
| "loss": 0.3181, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.3039772727272727, |
| "grad_norm": 0.33865063205260826, |
| "learning_rate": 3.199934361456903e-06, |
| "loss": 0.2634, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.3068181818181819, |
| "grad_norm": 0.3520115660135833, |
| "learning_rate": 3.1924878855118475e-06, |
| "loss": 0.2618, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.3096590909090908, |
| "grad_norm": 0.40034402955639337, |
| "learning_rate": 3.185034749283734e-06, |
| "loss": 0.2837, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.3125, |
| "grad_norm": 0.34422942980117177, |
| "learning_rate": 3.1775750244561233e-06, |
| "loss": 0.2638, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.3153409090909092, |
| "grad_norm": 0.38963794033279253, |
| "learning_rate": 3.1701087827759434e-06, |
| "loss": 0.294, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.3181818181818181, |
| "grad_norm": 0.4262376192411251, |
| "learning_rate": 3.162636096052803e-06, |
| "loss": 0.3342, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.3210227272727273, |
| "grad_norm": 0.38196782588004025, |
| "learning_rate": 3.155157036158295e-06, |
| "loss": 0.281, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.3238636363636362, |
| "grad_norm": 0.39128577037723217, |
| "learning_rate": 3.147671675025313e-06, |
| "loss": 0.2864, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.3267045454545454, |
| "grad_norm": 0.3622238856754979, |
| "learning_rate": 3.1401800846473506e-06, |
| "loss": 0.2742, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.3295454545454546, |
| "grad_norm": 0.3187408313823274, |
| "learning_rate": 3.132682337077818e-06, |
| "loss": 0.2549, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.3323863636363638, |
| "grad_norm": 0.33256196577073566, |
| "learning_rate": 3.1251785044293425e-06, |
| "loss": 0.2921, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.3352272727272727, |
| "grad_norm": 0.377119549478706, |
| "learning_rate": 3.117668658873078e-06, |
| "loss": 0.2722, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.3380681818181819, |
| "grad_norm": 0.31419013026351733, |
| "learning_rate": 3.1101528726380085e-06, |
| "loss": 0.2519, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.3409090909090908, |
| "grad_norm": 0.3471415869479363, |
| "learning_rate": 3.102631218010257e-06, |
| "loss": 0.2817, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.34375, |
| "grad_norm": 0.37953158089107286, |
| "learning_rate": 3.0951037673323863e-06, |
| "loss": 0.2642, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.3465909090909092, |
| "grad_norm": 0.34488245509452714, |
| "learning_rate": 3.0875705930027065e-06, |
| "loss": 0.2499, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.3494318181818181, |
| "grad_norm": 0.29818790329911665, |
| "learning_rate": 3.0800317674745755e-06, |
| "loss": 0.2572, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.3522727272727273, |
| "grad_norm": 0.35582979006101406, |
| "learning_rate": 3.0724873632557068e-06, |
| "loss": 0.2806, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.3551136363636362, |
| "grad_norm": 0.3886707765043663, |
| "learning_rate": 3.064937452907465e-06, |
| "loss": 0.2395, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.3579545454545454, |
| "grad_norm": 0.39452409132776717, |
| "learning_rate": 3.057382109044177e-06, |
| "loss": 0.2748, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.3607954545454546, |
| "grad_norm": 0.34362558608870675, |
| "learning_rate": 3.049821404332424e-06, |
| "loss": 0.2664, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 0.3923547533127044, |
| "learning_rate": 3.0422554114903514e-06, |
| "loss": 0.3134, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.3664772727272727, |
| "grad_norm": 0.42311598203108824, |
| "learning_rate": 3.0346842032869624e-06, |
| "loss": 0.3227, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.3693181818181819, |
| "grad_norm": 0.49341501720924236, |
| "learning_rate": 3.0271078525414234e-06, |
| "loss": 0.2789, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.3721590909090908, |
| "grad_norm": 0.3923870792288359, |
| "learning_rate": 3.0195264321223584e-06, |
| "loss": 0.3003, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 0.5047411107384405, |
| "learning_rate": 3.0119400149471535e-06, |
| "loss": 0.2835, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.3778409090909092, |
| "grad_norm": 0.3431083613633404, |
| "learning_rate": 3.004348673981252e-06, |
| "loss": 0.2744, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.3806818181818181, |
| "grad_norm": 0.3370392701002557, |
| "learning_rate": 2.996752482237456e-06, |
| "loss": 0.2503, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.3835227272727273, |
| "grad_norm": 0.35789574905836263, |
| "learning_rate": 2.9891515127752172e-06, |
| "loss": 0.2558, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.3863636363636362, |
| "grad_norm": 0.39542709664531145, |
| "learning_rate": 2.981545838699943e-06, |
| "loss": 0.2499, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.3892045454545454, |
| "grad_norm": 0.4799271866705037, |
| "learning_rate": 2.9739355331622886e-06, |
| "loss": 0.2845, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.3920454545454546, |
| "grad_norm": 0.30250300604212543, |
| "learning_rate": 2.966320669357453e-06, |
| "loss": 0.2428, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.3948863636363638, |
| "grad_norm": 0.27928557627455064, |
| "learning_rate": 2.9587013205244767e-06, |
| "loss": 0.2354, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.3977272727272727, |
| "grad_norm": 0.3254689902299252, |
| "learning_rate": 2.951077559945538e-06, |
| "loss": 0.2719, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.4005681818181819, |
| "grad_norm": 0.38918459975286523, |
| "learning_rate": 2.943449460945244e-06, |
| "loss": 0.2726, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.4034090909090908, |
| "grad_norm": 0.29871192903714955, |
| "learning_rate": 2.9358170968899323e-06, |
| "loss": 0.263, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.40625, |
| "grad_norm": 0.3943630183447143, |
| "learning_rate": 2.9281805411869573e-06, |
| "loss": 0.2931, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.4090909090909092, |
| "grad_norm": 0.34932644595142737, |
| "learning_rate": 2.920539867283992e-06, |
| "loss": 0.2577, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.4119318181818181, |
| "grad_norm": 0.36296363929883135, |
| "learning_rate": 2.9128951486683144e-06, |
| "loss": 0.2884, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.4147727272727273, |
| "grad_norm": 0.3536090241186941, |
| "learning_rate": 2.9052464588661076e-06, |
| "loss": 0.2518, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.4176136363636362, |
| "grad_norm": 0.4071123114766137, |
| "learning_rate": 2.8975938714417466e-06, |
| "loss": 0.2955, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.4204545454545454, |
| "grad_norm": 0.36319240545094117, |
| "learning_rate": 2.8899374599970943e-06, |
| "loss": 0.2933, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4232954545454546, |
| "grad_norm": 0.33541538203913807, |
| "learning_rate": 2.882277298170792e-06, |
| "loss": 0.2693, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.4261363636363638, |
| "grad_norm": 0.42293889077814073, |
| "learning_rate": 2.8746134596375534e-06, |
| "loss": 0.2907, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.4289772727272727, |
| "grad_norm": 0.3702782961580686, |
| "learning_rate": 2.866946018107453e-06, |
| "loss": 0.2701, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.4318181818181819, |
| "grad_norm": 0.3454390175085058, |
| "learning_rate": 2.8592750473252197e-06, |
| "loss": 0.2612, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.4346590909090908, |
| "grad_norm": 0.33107307095308464, |
| "learning_rate": 2.8516006210695244e-06, |
| "loss": 0.239, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.4375, |
| "grad_norm": 0.3569062909249772, |
| "learning_rate": 2.843922813152275e-06, |
| "loss": 0.2755, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.4403409090909092, |
| "grad_norm": 0.37131837135922086, |
| "learning_rate": 2.836241697417902e-06, |
| "loss": 0.2623, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.4431818181818181, |
| "grad_norm": 0.3699557028893426, |
| "learning_rate": 2.8285573477426504e-06, |
| "loss": 0.2811, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.4460227272727273, |
| "grad_norm": 0.33561480648358855, |
| "learning_rate": 2.820869838033871e-06, |
| "loss": 0.2686, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.4488636363636362, |
| "grad_norm": 0.4711840304366533, |
| "learning_rate": 2.813179242229304e-06, |
| "loss": 0.2946, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.4517045454545454, |
| "grad_norm": 0.382672820843295, |
| "learning_rate": 2.805485634296374e-06, |
| "loss": 0.2945, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.4545454545454546, |
| "grad_norm": 0.3264806302650397, |
| "learning_rate": 2.7977890882314763e-06, |
| "loss": 0.2658, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.4573863636363638, |
| "grad_norm": 0.3590459125833833, |
| "learning_rate": 2.7900896780592616e-06, |
| "loss": 0.2675, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.4602272727272727, |
| "grad_norm": 0.41777977412669154, |
| "learning_rate": 2.7823874778319316e-06, |
| "loss": 0.3133, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.4630681818181819, |
| "grad_norm": 0.3700743186678299, |
| "learning_rate": 2.774682561628519e-06, |
| "loss": 0.2781, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.4659090909090908, |
| "grad_norm": 0.3586139592020702, |
| "learning_rate": 2.7669750035541798e-06, |
| "loss": 0.2709, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.46875, |
| "grad_norm": 0.32668952210259083, |
| "learning_rate": 2.759264877739481e-06, |
| "loss": 0.2628, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.4715909090909092, |
| "grad_norm": 0.3304970455370839, |
| "learning_rate": 2.7515522583396825e-06, |
| "loss": 0.2859, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.4744318181818181, |
| "grad_norm": 0.3188860297893081, |
| "learning_rate": 2.74383721953403e-06, |
| "loss": 0.2435, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.4772727272727273, |
| "grad_norm": 0.3701340525867732, |
| "learning_rate": 2.736119835525037e-06, |
| "loss": 0.2571, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.4801136363636362, |
| "grad_norm": 0.4888553988204271, |
| "learning_rate": 2.728400180537775e-06, |
| "loss": 0.2956, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.4829545454545454, |
| "grad_norm": 0.4110586830757001, |
| "learning_rate": 2.720678328819155e-06, |
| "loss": 0.2396, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.4857954545454546, |
| "grad_norm": 0.3828799651532281, |
| "learning_rate": 2.712954354637218e-06, |
| "loss": 0.2701, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.4886363636363638, |
| "grad_norm": 0.359763211121689, |
| "learning_rate": 2.705228332280418e-06, |
| "loss": 0.2387, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.4914772727272727, |
| "grad_norm": 0.3785795319364518, |
| "learning_rate": 2.6975003360569087e-06, |
| "loss": 0.2761, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.4943181818181819, |
| "grad_norm": 0.34255573500581615, |
| "learning_rate": 2.689770440293825e-06, |
| "loss": 0.267, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.4971590909090908, |
| "grad_norm": 0.37025650452574843, |
| "learning_rate": 2.6820387193365764e-06, |
| "loss": 0.2781, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.35002281689988746, |
| "learning_rate": 2.674305247548125e-06, |
| "loss": 0.2947, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.5028409090909092, |
| "grad_norm": 0.34143779580523753, |
| "learning_rate": 2.6665700993082705e-06, |
| "loss": 0.2658, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.5056818181818183, |
| "grad_norm": 0.3560924867441854, |
| "learning_rate": 2.6588333490129376e-06, |
| "loss": 0.2742, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.5085227272727273, |
| "grad_norm": 0.32295396334903814, |
| "learning_rate": 2.65109507107346e-06, |
| "loss": 0.2382, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.5113636363636362, |
| "grad_norm": 0.33859114158227865, |
| "learning_rate": 2.6433553399158652e-06, |
| "loss": 0.2937, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.5142045454545454, |
| "grad_norm": 0.35244369608972004, |
| "learning_rate": 2.6356142299801544e-06, |
| "loss": 0.3037, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.5170454545454546, |
| "grad_norm": 0.3336662584141403, |
| "learning_rate": 2.6278718157195924e-06, |
| "loss": 0.2844, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.5198863636363638, |
| "grad_norm": 0.35862845558521106, |
| "learning_rate": 2.620128171599989e-06, |
| "loss": 0.246, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.5227272727272727, |
| "grad_norm": 0.31358277794725126, |
| "learning_rate": 2.6123833720989796e-06, |
| "loss": 0.2653, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.5255681818181817, |
| "grad_norm": 0.36029376106362876, |
| "learning_rate": 2.6046374917053156e-06, |
| "loss": 0.2785, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.5284090909090908, |
| "grad_norm": 0.3512123146788697, |
| "learning_rate": 2.5968906049181425e-06, |
| "loss": 0.2723, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.53125, |
| "grad_norm": 0.35559911829983626, |
| "learning_rate": 2.5891427862462853e-06, |
| "loss": 0.2939, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.5340909090909092, |
| "grad_norm": 0.3774459233336894, |
| "learning_rate": 2.581394110207532e-06, |
| "loss": 0.2593, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.5369318181818183, |
| "grad_norm": 0.3213295704503383, |
| "learning_rate": 2.5736446513279166e-06, |
| "loss": 0.2615, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.5397727272727273, |
| "grad_norm": 0.33894998490392014, |
| "learning_rate": 2.5658944841410032e-06, |
| "loss": 0.2856, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.5426136363636362, |
| "grad_norm": 0.4085808452620872, |
| "learning_rate": 2.5581436831871666e-06, |
| "loss": 0.2611, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.5454545454545454, |
| "grad_norm": 0.3377548562078041, |
| "learning_rate": 2.5503923230128787e-06, |
| "loss": 0.2445, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.5482954545454546, |
| "grad_norm": 0.2986016210832829, |
| "learning_rate": 2.5426404781699886e-06, |
| "loss": 0.2345, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.5511363636363638, |
| "grad_norm": 0.3130189679053128, |
| "learning_rate": 2.534888223215008e-06, |
| "loss": 0.2648, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.5539772727272727, |
| "grad_norm": 0.29362772394820585, |
| "learning_rate": 2.5271356327083927e-06, |
| "loss": 0.2231, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.5568181818181817, |
| "grad_norm": 0.3371287342113354, |
| "learning_rate": 2.5193827812138268e-06, |
| "loss": 0.2801, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.5596590909090908, |
| "grad_norm": 0.438680590348071, |
| "learning_rate": 2.511629743297502e-06, |
| "loss": 0.3117, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.5625, |
| "grad_norm": 0.3623332826643985, |
| "learning_rate": 2.5038765935274038e-06, |
| "loss": 0.2582, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.5653409090909092, |
| "grad_norm": 0.3611764461964591, |
| "learning_rate": 2.4961234064725966e-06, |
| "loss": 0.2606, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.5681818181818183, |
| "grad_norm": 0.6683755911265977, |
| "learning_rate": 2.488370256702499e-06, |
| "loss": 0.2686, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.5710227272727273, |
| "grad_norm": 0.3699878510363697, |
| "learning_rate": 2.4806172187861736e-06, |
| "loss": 0.2823, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.5738636363636362, |
| "grad_norm": 0.3603575134404355, |
| "learning_rate": 2.4728643672916073e-06, |
| "loss": 0.2696, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.5767045454545454, |
| "grad_norm": 0.5708462895257692, |
| "learning_rate": 2.465111776784993e-06, |
| "loss": 0.3003, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.5795454545454546, |
| "grad_norm": 0.414861092800249, |
| "learning_rate": 2.4573595218300127e-06, |
| "loss": 0.2878, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.5823863636363638, |
| "grad_norm": 0.36176025431242964, |
| "learning_rate": 2.4496076769871226e-06, |
| "loss": 0.2614, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.5852272727272727, |
| "grad_norm": 0.4170474058146532, |
| "learning_rate": 2.4418563168128346e-06, |
| "loss": 0.2868, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.5880681818181817, |
| "grad_norm": 0.3270649689091589, |
| "learning_rate": 2.4341055158589976e-06, |
| "loss": 0.2699, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.5909090909090908, |
| "grad_norm": 0.3807070125410976, |
| "learning_rate": 2.4263553486720838e-06, |
| "loss": 0.303, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.59375, |
| "grad_norm": 0.3848553762149162, |
| "learning_rate": 2.4186058897924685e-06, |
| "loss": 0.2748, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.5965909090909092, |
| "grad_norm": 0.3232840810454203, |
| "learning_rate": 2.410857213753715e-06, |
| "loss": 0.2445, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.5994318181818183, |
| "grad_norm": 0.3092676360533537, |
| "learning_rate": 2.4031093950818583e-06, |
| "loss": 0.2356, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.6022727272727273, |
| "grad_norm": 0.45118596036379494, |
| "learning_rate": 2.3953625082946856e-06, |
| "loss": 0.2837, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.6051136363636362, |
| "grad_norm": 0.34970482571526373, |
| "learning_rate": 2.3876166279010212e-06, |
| "loss": 0.2973, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.6079545454545454, |
| "grad_norm": 0.3364465296058301, |
| "learning_rate": 2.379871828400012e-06, |
| "loss": 0.2423, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.6107954545454546, |
| "grad_norm": 0.363328151622841, |
| "learning_rate": 2.372128184280408e-06, |
| "loss": 0.269, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.6136363636363638, |
| "grad_norm": 0.26766248199292697, |
| "learning_rate": 2.364385770019846e-06, |
| "loss": 0.2346, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.6164772727272727, |
| "grad_norm": 0.3913465078730921, |
| "learning_rate": 2.356644660084135e-06, |
| "loss": 0.2866, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.6193181818181817, |
| "grad_norm": 0.31905393138162685, |
| "learning_rate": 2.34890492892654e-06, |
| "loss": 0.2666, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.6221590909090908, |
| "grad_norm": 0.3432468450311117, |
| "learning_rate": 2.341166650987064e-06, |
| "loss": 0.2443, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 0.34070598347786063, |
| "learning_rate": 2.333429900691731e-06, |
| "loss": 0.2968, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.6278409090909092, |
| "grad_norm": 0.4257323783577944, |
| "learning_rate": 2.3256947524518756e-06, |
| "loss": 0.275, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.6306818181818183, |
| "grad_norm": 0.35120372623976087, |
| "learning_rate": 2.317961280663424e-06, |
| "loss": 0.2779, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.6335227272727273, |
| "grad_norm": 0.3288834361465399, |
| "learning_rate": 2.3102295597061757e-06, |
| "loss": 0.262, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.6363636363636362, |
| "grad_norm": 0.3781083785525166, |
| "learning_rate": 2.3024996639430925e-06, |
| "loss": 0.2705, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.6392045454545454, |
| "grad_norm": 0.3309675255585671, |
| "learning_rate": 2.2947716677195823e-06, |
| "loss": 0.2607, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.6420454545454546, |
| "grad_norm": 0.4097606078658523, |
| "learning_rate": 2.2870456453627823e-06, |
| "loss": 0.3267, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.6448863636363638, |
| "grad_norm": 0.25572751310886616, |
| "learning_rate": 2.2793216711808456e-06, |
| "loss": 0.2278, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.6477272727272727, |
| "grad_norm": 0.3060607584281395, |
| "learning_rate": 2.2715998194622257e-06, |
| "loss": 0.2517, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.6505681818181817, |
| "grad_norm": 0.37963871119456877, |
| "learning_rate": 2.2638801644749636e-06, |
| "loss": 0.2634, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.6534090909090908, |
| "grad_norm": 0.3762574705485531, |
| "learning_rate": 2.2561627804659704e-06, |
| "loss": 0.2534, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.65625, |
| "grad_norm": 0.34282885282451137, |
| "learning_rate": 2.2484477416603183e-06, |
| "loss": 0.2666, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.6590909090909092, |
| "grad_norm": 0.3508691585265268, |
| "learning_rate": 2.24073512226052e-06, |
| "loss": 0.2589, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.6619318181818183, |
| "grad_norm": 0.38903092342578377, |
| "learning_rate": 2.2330249964458202e-06, |
| "loss": 0.2853, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.6647727272727273, |
| "grad_norm": 0.3466002683474289, |
| "learning_rate": 2.2253174383714816e-06, |
| "loss": 0.2812, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.6676136363636362, |
| "grad_norm": 0.46395674632161, |
| "learning_rate": 2.21761252216807e-06, |
| "loss": 0.2692, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.6704545454545454, |
| "grad_norm": 0.3699824822038089, |
| "learning_rate": 2.2099103219407392e-06, |
| "loss": 0.2699, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.6732954545454546, |
| "grad_norm": 0.3805031596017454, |
| "learning_rate": 2.2022109117685246e-06, |
| "loss": 0.2953, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.6761363636363638, |
| "grad_norm": 0.37764726137134685, |
| "learning_rate": 2.1945143657036267e-06, |
| "loss": 0.2753, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.6789772727272727, |
| "grad_norm": 0.3304479070305256, |
| "learning_rate": 2.1868207577706964e-06, |
| "loss": 0.2524, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.6818181818181817, |
| "grad_norm": 0.3587520279737923, |
| "learning_rate": 2.1791301619661297e-06, |
| "loss": 0.2602, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.6846590909090908, |
| "grad_norm": 0.3323465218687911, |
| "learning_rate": 2.17144265225735e-06, |
| "loss": 0.2692, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.6875, |
| "grad_norm": 0.3572276587914552, |
| "learning_rate": 2.1637583025820985e-06, |
| "loss": 0.2858, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.6903409090909092, |
| "grad_norm": 0.37800630772529514, |
| "learning_rate": 2.156077186847726e-06, |
| "loss": 0.294, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.6931818181818183, |
| "grad_norm": 0.3421660175170903, |
| "learning_rate": 2.148399378930476e-06, |
| "loss": 0.2573, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.6960227272727273, |
| "grad_norm": 0.34254475964042214, |
| "learning_rate": 2.1407249526747816e-06, |
| "loss": 0.275, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.6988636363636362, |
| "grad_norm": 0.3715201904697272, |
| "learning_rate": 2.133053981892547e-06, |
| "loss": 0.2833, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.7017045454545454, |
| "grad_norm": 0.36015289752626467, |
| "learning_rate": 2.125386540362447e-06, |
| "loss": 0.2828, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.7045454545454546, |
| "grad_norm": 0.40367397113055686, |
| "learning_rate": 2.1177227018292086e-06, |
| "loss": 0.2621, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.7073863636363638, |
| "grad_norm": 0.32129619035430856, |
| "learning_rate": 2.110062540002906e-06, |
| "loss": 0.2757, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.7102272727272727, |
| "grad_norm": 0.3137451287766472, |
| "learning_rate": 2.1024061285582546e-06, |
| "loss": 0.2535, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.7130681818181817, |
| "grad_norm": 0.4280343421587481, |
| "learning_rate": 2.0947535411338936e-06, |
| "loss": 0.2559, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.7159090909090908, |
| "grad_norm": 0.38561258389624026, |
| "learning_rate": 2.087104851331686e-06, |
| "loss": 0.339, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.71875, |
| "grad_norm": 0.3187139343663328, |
| "learning_rate": 2.0794601327160083e-06, |
| "loss": 0.224, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.7215909090909092, |
| "grad_norm": 0.4058807325173988, |
| "learning_rate": 2.0718194588130435e-06, |
| "loss": 0.2743, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.7244318181818183, |
| "grad_norm": 0.3501025253129524, |
| "learning_rate": 2.0641829031100685e-06, |
| "loss": 0.2534, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.7272727272727273, |
| "grad_norm": 0.34621897515864436, |
| "learning_rate": 2.0565505390547558e-06, |
| "loss": 0.2565, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.7301136363636362, |
| "grad_norm": 0.2972165110796837, |
| "learning_rate": 2.0489224400544626e-06, |
| "loss": 0.2472, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.7329545454545454, |
| "grad_norm": 0.28430139406095895, |
| "learning_rate": 2.041298679475524e-06, |
| "loss": 0.2278, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.7357954545454546, |
| "grad_norm": 0.3424108937746101, |
| "learning_rate": 2.033679330642548e-06, |
| "loss": 0.2708, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.7386363636363638, |
| "grad_norm": 0.34689691643105225, |
| "learning_rate": 2.026064466837712e-06, |
| "loss": 0.2489, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.7414772727272727, |
| "grad_norm": 0.36538604704717154, |
| "learning_rate": 2.018454161300058e-06, |
| "loss": 0.2959, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.7443181818181817, |
| "grad_norm": 0.3914980478603566, |
| "learning_rate": 2.0108484872247836e-06, |
| "loss": 0.2877, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.7471590909090908, |
| "grad_norm": 0.3460591534025964, |
| "learning_rate": 2.003247517762545e-06, |
| "loss": 0.2392, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.35201168894909723, |
| "learning_rate": 1.995651326018748e-06, |
| "loss": 0.2775, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.7528409090909092, |
| "grad_norm": 0.3907457148602396, |
| "learning_rate": 1.988059985052847e-06, |
| "loss": 0.2649, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.7556818181818183, |
| "grad_norm": 0.31089272434312254, |
| "learning_rate": 1.980473567877643e-06, |
| "loss": 0.2717, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.7585227272727273, |
| "grad_norm": 0.39029862965581613, |
| "learning_rate": 1.9728921474585783e-06, |
| "loss": 0.2996, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.7613636363636362, |
| "grad_norm": 0.37522254054472837, |
| "learning_rate": 1.965315796713038e-06, |
| "loss": 0.3206, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.7642045454545454, |
| "grad_norm": 0.37421333571503007, |
| "learning_rate": 1.957744588509649e-06, |
| "loss": 0.2953, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.7670454545454546, |
| "grad_norm": 0.4113713231201874, |
| "learning_rate": 1.9501785956675767e-06, |
| "loss": 0.2587, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.7698863636363638, |
| "grad_norm": 0.3775256295092349, |
| "learning_rate": 1.942617890955824e-06, |
| "loss": 0.2706, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.7727272727272727, |
| "grad_norm": 0.361676860315546, |
| "learning_rate": 1.935062547092535e-06, |
| "loss": 0.2573, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.7755681818181817, |
| "grad_norm": 0.3828484280989141, |
| "learning_rate": 1.927512636744294e-06, |
| "loss": 0.2635, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.7784090909090908, |
| "grad_norm": 0.3194894627210845, |
| "learning_rate": 1.9199682325254258e-06, |
| "loss": 0.2412, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.78125, |
| "grad_norm": 0.3467465431720772, |
| "learning_rate": 1.9124294069972947e-06, |
| "loss": 0.2558, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.7840909090909092, |
| "grad_norm": 0.40591415428499084, |
| "learning_rate": 1.9048962326676145e-06, |
| "loss": 0.2591, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.7869318181818183, |
| "grad_norm": 0.324247081690912, |
| "learning_rate": 1.897368781989744e-06, |
| "loss": 0.2525, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.7897727272727273, |
| "grad_norm": 0.30168524950243947, |
| "learning_rate": 1.889847127361992e-06, |
| "loss": 0.2414, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.7926136363636362, |
| "grad_norm": 0.3391445741041072, |
| "learning_rate": 1.8823313411269226e-06, |
| "loss": 0.2666, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.7954545454545454, |
| "grad_norm": 0.3695919372425977, |
| "learning_rate": 1.874821495570658e-06, |
| "loss": 0.2738, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.7982954545454546, |
| "grad_norm": 0.41985233793486193, |
| "learning_rate": 1.8673176629221824e-06, |
| "loss": 0.2843, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.8011363636363638, |
| "grad_norm": 0.34508550168400526, |
| "learning_rate": 1.8598199153526502e-06, |
| "loss": 0.2762, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.8039772727272727, |
| "grad_norm": 0.34432258391495646, |
| "learning_rate": 1.852328324974688e-06, |
| "loss": 0.2746, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.8068181818181817, |
| "grad_norm": 0.432219335772206, |
| "learning_rate": 1.8448429638417053e-06, |
| "loss": 0.293, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.8096590909090908, |
| "grad_norm": 0.30494323840811877, |
| "learning_rate": 1.8373639039471974e-06, |
| "loss": 0.2483, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.8125, |
| "grad_norm": 0.38979888807881874, |
| "learning_rate": 1.8298912172240568e-06, |
| "loss": 0.2665, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.8153409090909092, |
| "grad_norm": 0.4409357967627925, |
| "learning_rate": 1.8224249755438773e-06, |
| "loss": 0.2979, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.40058073253392457, |
| "learning_rate": 1.8149652507162662e-06, |
| "loss": 0.2402, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.8210227272727273, |
| "grad_norm": 0.3781947301668901, |
| "learning_rate": 1.807512114488153e-06, |
| "loss": 0.2815, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.8238636363636362, |
| "grad_norm": 0.32982880475917153, |
| "learning_rate": 1.8000656385430978e-06, |
| "loss": 0.274, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.8267045454545454, |
| "grad_norm": 0.34588388650165885, |
| "learning_rate": 1.7926258945006008e-06, |
| "loss": 0.2415, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.8295454545454546, |
| "grad_norm": 0.46509270816531234, |
| "learning_rate": 1.7851929539154188e-06, |
| "loss": 0.2352, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.8323863636363638, |
| "grad_norm": 0.3949892127680776, |
| "learning_rate": 1.7777668882768723e-06, |
| "loss": 0.2731, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.8352272727272727, |
| "grad_norm": 0.33118981202064834, |
| "learning_rate": 1.7703477690081584e-06, |
| "loss": 0.2062, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.8380681818181817, |
| "grad_norm": 0.41123429927968475, |
| "learning_rate": 1.762935667465665e-06, |
| "loss": 0.2603, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.8409090909090908, |
| "grad_norm": 0.4086985175493265, |
| "learning_rate": 1.7555306549382853e-06, |
| "loss": 0.2633, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.84375, |
| "grad_norm": 0.3829776136552432, |
| "learning_rate": 1.7481328026467292e-06, |
| "loss": 0.2645, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.8465909090909092, |
| "grad_norm": 0.36580249698143114, |
| "learning_rate": 1.7407421817428432e-06, |
| "loss": 0.2907, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.8494318181818183, |
| "grad_norm": 0.5114322764325684, |
| "learning_rate": 1.733358863308918e-06, |
| "loss": 0.2491, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.8522727272727273, |
| "grad_norm": 0.3758211802363351, |
| "learning_rate": 1.7259829183570146e-06, |
| "loss": 0.275, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.8551136363636362, |
| "grad_norm": 0.44005362349975546, |
| "learning_rate": 1.7186144178282735e-06, |
| "loss": 0.2759, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.8579545454545454, |
| "grad_norm": 0.41121803130231066, |
| "learning_rate": 1.7112534325922381e-06, |
| "loss": 0.2835, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.8607954545454546, |
| "grad_norm": 0.37656111256141905, |
| "learning_rate": 1.7039000334461673e-06, |
| "loss": 0.2808, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.8636363636363638, |
| "grad_norm": 0.3651987202447528, |
| "learning_rate": 1.6965542911143601e-06, |
| "loss": 0.3218, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.8664772727272727, |
| "grad_norm": 0.40004844795530625, |
| "learning_rate": 1.6892162762474732e-06, |
| "loss": 0.2945, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.8693181818181817, |
| "grad_norm": 0.33043091198634184, |
| "learning_rate": 1.6818860594218396e-06, |
| "loss": 0.2277, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.8721590909090908, |
| "grad_norm": 0.3346497899463932, |
| "learning_rate": 1.674563711138791e-06, |
| "loss": 0.2324, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 0.32658486289094646, |
| "learning_rate": 1.6672493018239828e-06, |
| "loss": 0.242, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.8778409090909092, |
| "grad_norm": 0.3483520142042606, |
| "learning_rate": 1.659942901826712e-06, |
| "loss": 0.2724, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.8806818181818183, |
| "grad_norm": 0.3447989906256544, |
| "learning_rate": 1.6526445814192437e-06, |
| "loss": 0.2522, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.8835227272727273, |
| "grad_norm": 0.3745982582543309, |
| "learning_rate": 1.6453544107961338e-06, |
| "loss": 0.268, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.8863636363636362, |
| "grad_norm": 0.47460009049304464, |
| "learning_rate": 1.638072460073556e-06, |
| "loss": 0.3004, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.8892045454545454, |
| "grad_norm": 0.38922747831910864, |
| "learning_rate": 1.6307987992886221e-06, |
| "loss": 0.2923, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.8920454545454546, |
| "grad_norm": 0.3619334724335469, |
| "learning_rate": 1.6235334983987166e-06, |
| "loss": 0.2929, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.8948863636363638, |
| "grad_norm": 0.4134447223169521, |
| "learning_rate": 1.6162766272808153e-06, |
| "loss": 0.2443, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.8977272727272727, |
| "grad_norm": 0.37827695457409233, |
| "learning_rate": 1.6090282557308199e-06, |
| "loss": 0.2634, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.9005681818181817, |
| "grad_norm": 0.37553439336248, |
| "learning_rate": 1.6017884534628821e-06, |
| "loss": 0.2624, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.9034090909090908, |
| "grad_norm": 0.30503546597237136, |
| "learning_rate": 1.594557290108737e-06, |
| "loss": 0.2448, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.90625, |
| "grad_norm": 0.33139361815750534, |
| "learning_rate": 1.5873348352170309e-06, |
| "loss": 0.2344, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.9090909090909092, |
| "grad_norm": 0.4071705047497215, |
| "learning_rate": 1.5801211582526515e-06, |
| "loss": 0.2972, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.9119318181818183, |
| "grad_norm": 0.3520108684037794, |
| "learning_rate": 1.5729163285960636e-06, |
| "loss": 0.3064, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.9147727272727273, |
| "grad_norm": 1.044294639450523, |
| "learning_rate": 1.5657204155426372e-06, |
| "loss": 0.2764, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.9176136363636362, |
| "grad_norm": 0.2733575442921981, |
| "learning_rate": 1.5585334883019845e-06, |
| "loss": 0.2115, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.9204545454545454, |
| "grad_norm": 0.3436043484209694, |
| "learning_rate": 1.551355615997292e-06, |
| "loss": 0.2613, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.9232954545454546, |
| "grad_norm": 0.347973792440035, |
| "learning_rate": 1.5441868676646588e-06, |
| "loss": 0.2984, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.9261363636363638, |
| "grad_norm": 0.3714627621893232, |
| "learning_rate": 1.537027312252427e-06, |
| "loss": 0.2939, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.9289772727272727, |
| "grad_norm": 0.36946185178466473, |
| "learning_rate": 1.5298770186205262e-06, |
| "loss": 0.3133, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.9318181818181817, |
| "grad_norm": 0.4445653274012168, |
| "learning_rate": 1.522736055539804e-06, |
| "loss": 0.2638, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.9346590909090908, |
| "grad_norm": 0.36757539498984404, |
| "learning_rate": 1.5156044916913687e-06, |
| "loss": 0.2594, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.9375, |
| "grad_norm": 0.3740375047815139, |
| "learning_rate": 1.5084823956659284e-06, |
| "loss": 0.2816, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.9403409090909092, |
| "grad_norm": 0.3691390978665013, |
| "learning_rate": 1.5013698359631284e-06, |
| "loss": 0.3269, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.9431818181818183, |
| "grad_norm": 0.3882033860276734, |
| "learning_rate": 1.4942668809908966e-06, |
| "loss": 0.2926, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.9460227272727273, |
| "grad_norm": 0.3303389257139215, |
| "learning_rate": 1.487173599064783e-06, |
| "loss": 0.2813, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.9488636363636362, |
| "grad_norm": 0.385716686112769, |
| "learning_rate": 1.4800900584073025e-06, |
| "loss": 0.3027, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.9517045454545454, |
| "grad_norm": 0.30969234063219786, |
| "learning_rate": 1.4730163271472808e-06, |
| "loss": 0.2848, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.9545454545454546, |
| "grad_norm": 0.39460846418007084, |
| "learning_rate": 1.465952473319196e-06, |
| "loss": 0.2638, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.9573863636363638, |
| "grad_norm": 0.38043423948555954, |
| "learning_rate": 1.458898564862528e-06, |
| "loss": 0.3017, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.9602272727272727, |
| "grad_norm": 0.344190102552331, |
| "learning_rate": 1.4518546696211003e-06, |
| "loss": 0.2475, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.9630681818181817, |
| "grad_norm": 0.3584182768945062, |
| "learning_rate": 1.4448208553424318e-06, |
| "loss": 0.2599, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.9659090909090908, |
| "grad_norm": 0.4193998956615056, |
| "learning_rate": 1.4377971896770854e-06, |
| "loss": 0.2932, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.96875, |
| "grad_norm": 0.3183638489077071, |
| "learning_rate": 1.4307837401780129e-06, |
| "loss": 0.2353, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.9715909090909092, |
| "grad_norm": 0.551291367904842, |
| "learning_rate": 1.4237805742999078e-06, |
| "loss": 0.2888, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.9744318181818183, |
| "grad_norm": 0.3836625936106596, |
| "learning_rate": 1.4167877593985604e-06, |
| "loss": 0.2606, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.9772727272727273, |
| "grad_norm": 0.33238753899253215, |
| "learning_rate": 1.4098053627302021e-06, |
| "loss": 0.2511, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.9801136363636362, |
| "grad_norm": 0.3697431181915492, |
| "learning_rate": 1.402833451450865e-06, |
| "loss": 0.2592, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.9829545454545454, |
| "grad_norm": 0.39546306881879256, |
| "learning_rate": 1.3958720926157326e-06, |
| "loss": 0.2867, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.9857954545454546, |
| "grad_norm": 0.37081182355768993, |
| "learning_rate": 1.3889213531784967e-06, |
| "loss": 0.2774, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.9886363636363638, |
| "grad_norm": 0.31730996135018236, |
| "learning_rate": 1.3819812999907112e-06, |
| "loss": 0.2558, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.9914772727272727, |
| "grad_norm": 0.48697102294004946, |
| "learning_rate": 1.3750519998011545e-06, |
| "loss": 0.2807, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.9943181818181817, |
| "grad_norm": 0.32660834038500147, |
| "learning_rate": 1.3681335192551795e-06, |
| "loss": 0.266, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.9971590909090908, |
| "grad_norm": 0.37088856838391165, |
| "learning_rate": 1.3612259248940778e-06, |
| "loss": 0.3023, |
| "step": 703 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.46910986149494815, |
| "learning_rate": 1.354329283154442e-06, |
| "loss": 0.354, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.002840909090909, |
| "grad_norm": 0.3578196229806462, |
| "learning_rate": 1.3474436603675195e-06, |
| "loss": 0.2863, |
| "step": 705 |
| }, |
| { |
| "epoch": 2.0056818181818183, |
| "grad_norm": 0.3320147175830239, |
| "learning_rate": 1.3405691227585774e-06, |
| "loss": 0.2791, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.008522727272727, |
| "grad_norm": 0.4104267883722151, |
| "learning_rate": 1.333705736446272e-06, |
| "loss": 0.2599, |
| "step": 707 |
| }, |
| { |
| "epoch": 2.0113636363636362, |
| "grad_norm": 0.44025732665188794, |
| "learning_rate": 1.326853567442003e-06, |
| "loss": 0.2648, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.0142045454545454, |
| "grad_norm": 0.4463091829454087, |
| "learning_rate": 1.320012681649284e-06, |
| "loss": 0.3235, |
| "step": 709 |
| }, |
| { |
| "epoch": 2.0170454545454546, |
| "grad_norm": 0.3977418006694515, |
| "learning_rate": 1.3131831448631099e-06, |
| "loss": 0.2494, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.0198863636363638, |
| "grad_norm": 0.30294420075479717, |
| "learning_rate": 1.3063650227693192e-06, |
| "loss": 0.2274, |
| "step": 711 |
| }, |
| { |
| "epoch": 2.022727272727273, |
| "grad_norm": 0.3580935126068431, |
| "learning_rate": 1.2995583809439655e-06, |
| "loss": 0.2641, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.0255681818181817, |
| "grad_norm": 0.3633999760316955, |
| "learning_rate": 1.2927632848526892e-06, |
| "loss": 0.2664, |
| "step": 713 |
| }, |
| { |
| "epoch": 2.028409090909091, |
| "grad_norm": 0.39362626572566367, |
| "learning_rate": 1.285979799850079e-06, |
| "loss": 0.3028, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.03125, |
| "grad_norm": 0.3732307387516034, |
| "learning_rate": 1.2792079911790554e-06, |
| "loss": 0.2903, |
| "step": 715 |
| }, |
| { |
| "epoch": 2.034090909090909, |
| "grad_norm": 0.348231549102206, |
| "learning_rate": 1.2724479239702334e-06, |
| "loss": 0.2776, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.0369318181818183, |
| "grad_norm": 0.32154175294270404, |
| "learning_rate": 1.2656996632413e-06, |
| "loss": 0.2363, |
| "step": 717 |
| }, |
| { |
| "epoch": 2.039772727272727, |
| "grad_norm": 0.3738689076803405, |
| "learning_rate": 1.2589632738963915e-06, |
| "loss": 0.2747, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.0426136363636362, |
| "grad_norm": 0.370533612023648, |
| "learning_rate": 1.2522388207254624e-06, |
| "loss": 0.2568, |
| "step": 719 |
| }, |
| { |
| "epoch": 2.0454545454545454, |
| "grad_norm": 0.3839434235801676, |
| "learning_rate": 1.2455263684036687e-06, |
| "loss": 0.2792, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.0482954545454546, |
| "grad_norm": 0.5003341324574189, |
| "learning_rate": 1.2388259814907421e-06, |
| "loss": 0.2769, |
| "step": 721 |
| }, |
| { |
| "epoch": 2.0511363636363638, |
| "grad_norm": 0.3351671952514299, |
| "learning_rate": 1.2321377244303718e-06, |
| "loss": 0.2296, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.053977272727273, |
| "grad_norm": 0.2999985412422647, |
| "learning_rate": 1.22546166154958e-06, |
| "loss": 0.2284, |
| "step": 723 |
| }, |
| { |
| "epoch": 2.0568181818181817, |
| "grad_norm": 0.3135859144132813, |
| "learning_rate": 1.2187978570581118e-06, |
| "loss": 0.251, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.059659090909091, |
| "grad_norm": 0.4125239171099722, |
| "learning_rate": 1.212146375047808e-06, |
| "loss": 0.2569, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.0625, |
| "grad_norm": 0.5126461046016878, |
| "learning_rate": 1.2055072794919927e-06, |
| "loss": 0.2867, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.065340909090909, |
| "grad_norm": 0.32428865281600694, |
| "learning_rate": 1.198880634244862e-06, |
| "loss": 0.2526, |
| "step": 727 |
| }, |
| { |
| "epoch": 2.0681818181818183, |
| "grad_norm": 0.5892083787676873, |
| "learning_rate": 1.192266503040863e-06, |
| "loss": 0.2827, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.071022727272727, |
| "grad_norm": 0.2947475596312562, |
| "learning_rate": 1.1856649494940842e-06, |
| "loss": 0.2288, |
| "step": 729 |
| }, |
| { |
| "epoch": 2.0738636363636362, |
| "grad_norm": 0.35972607487628616, |
| "learning_rate": 1.1790760370976445e-06, |
| "loss": 0.268, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.0767045454545454, |
| "grad_norm": 0.36619988601771414, |
| "learning_rate": 1.1724998292230804e-06, |
| "loss": 0.2832, |
| "step": 731 |
| }, |
| { |
| "epoch": 2.0795454545454546, |
| "grad_norm": 0.3733558388597783, |
| "learning_rate": 1.1659363891197373e-06, |
| "loss": 0.2723, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.0823863636363638, |
| "grad_norm": 0.39404340487463446, |
| "learning_rate": 1.1593857799141635e-06, |
| "loss": 0.2823, |
| "step": 733 |
| }, |
| { |
| "epoch": 2.085227272727273, |
| "grad_norm": 0.39535002691603904, |
| "learning_rate": 1.152848064609499e-06, |
| "loss": 0.2765, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.0880681818181817, |
| "grad_norm": 0.4562125910263655, |
| "learning_rate": 1.1463233060848701e-06, |
| "loss": 0.2229, |
| "step": 735 |
| }, |
| { |
| "epoch": 2.090909090909091, |
| "grad_norm": 0.34157106543064586, |
| "learning_rate": 1.139811567094791e-06, |
| "loss": 0.251, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.09375, |
| "grad_norm": 0.3975912471137775, |
| "learning_rate": 1.1333129102685504e-06, |
| "loss": 0.2953, |
| "step": 737 |
| }, |
| { |
| "epoch": 2.096590909090909, |
| "grad_norm": 0.4344936348962993, |
| "learning_rate": 1.1268273981096154e-06, |
| "loss": 0.2481, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.0994318181818183, |
| "grad_norm": 0.40663820339750667, |
| "learning_rate": 1.1203550929950296e-06, |
| "loss": 0.2704, |
| "step": 739 |
| }, |
| { |
| "epoch": 2.102272727272727, |
| "grad_norm": 0.4525407147079834, |
| "learning_rate": 1.1138960571748122e-06, |
| "loss": 0.2308, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.1051136363636362, |
| "grad_norm": 0.36101599924638966, |
| "learning_rate": 1.107450352771358e-06, |
| "loss": 0.3198, |
| "step": 741 |
| }, |
| { |
| "epoch": 2.1079545454545454, |
| "grad_norm": 0.4132570992405224, |
| "learning_rate": 1.1010180417788458e-06, |
| "loss": 0.3157, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.1107954545454546, |
| "grad_norm": 0.4296796806025471, |
| "learning_rate": 1.094599186062633e-06, |
| "loss": 0.2719, |
| "step": 743 |
| }, |
| { |
| "epoch": 2.1136363636363638, |
| "grad_norm": 0.4115860705303619, |
| "learning_rate": 1.0881938473586672e-06, |
| "loss": 0.2588, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.116477272727273, |
| "grad_norm": 0.3341390354972397, |
| "learning_rate": 1.0818020872728935e-06, |
| "loss": 0.2803, |
| "step": 745 |
| }, |
| { |
| "epoch": 2.1193181818181817, |
| "grad_norm": 0.386666143661149, |
| "learning_rate": 1.0754239672806526e-06, |
| "loss": 0.2954, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.122159090909091, |
| "grad_norm": 0.39729795109834065, |
| "learning_rate": 1.0690595487261032e-06, |
| "loss": 0.292, |
| "step": 747 |
| }, |
| { |
| "epoch": 2.125, |
| "grad_norm": 0.4632063849794996, |
| "learning_rate": 1.0627088928216203e-06, |
| "loss": 0.3011, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.127840909090909, |
| "grad_norm": 0.364788422480122, |
| "learning_rate": 1.0563720606472116e-06, |
| "loss": 0.2887, |
| "step": 749 |
| }, |
| { |
| "epoch": 2.1306818181818183, |
| "grad_norm": 0.3613800764493521, |
| "learning_rate": 1.050049113149932e-06, |
| "loss": 0.2698, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.133522727272727, |
| "grad_norm": 0.4840054604670755, |
| "learning_rate": 1.0437401111432928e-06, |
| "loss": 0.2671, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.1363636363636362, |
| "grad_norm": 0.35647589283664843, |
| "learning_rate": 1.0374451153066773e-06, |
| "loss": 0.277, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.1392045454545454, |
| "grad_norm": 0.3070617647042118, |
| "learning_rate": 1.0311641861847644e-06, |
| "loss": 0.2262, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.1420454545454546, |
| "grad_norm": 0.36421008528422827, |
| "learning_rate": 1.0248973841869336e-06, |
| "loss": 0.2541, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.1448863636363638, |
| "grad_norm": 0.36442145568995793, |
| "learning_rate": 1.018644769586695e-06, |
| "loss": 0.2968, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.147727272727273, |
| "grad_norm": 0.5392899583290776, |
| "learning_rate": 1.0124064025211063e-06, |
| "loss": 0.2338, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.1505681818181817, |
| "grad_norm": 0.40485627469450297, |
| "learning_rate": 1.006182342990192e-06, |
| "loss": 0.2734, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.153409090909091, |
| "grad_norm": 0.36165309778969656, |
| "learning_rate": 9.99972650856368e-07, |
| "loss": 0.2717, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.15625, |
| "grad_norm": 0.37054356564143653, |
| "learning_rate": 9.937773858438677e-07, |
| "loss": 0.2867, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.159090909090909, |
| "grad_norm": 0.3209190334600411, |
| "learning_rate": 9.87596607538164e-07, |
| "loss": 0.2026, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.1619318181818183, |
| "grad_norm": 0.33862908014599463, |
| "learning_rate": 9.81430375385399e-07, |
| "loss": 0.2589, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.164772727272727, |
| "grad_norm": 0.33768216225160724, |
| "learning_rate": 9.752787486918108e-07, |
| "loss": 0.2832, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.1676136363636362, |
| "grad_norm": 0.33566640920720886, |
| "learning_rate": 9.691417866231633e-07, |
| "loss": 0.2646, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.1704545454545454, |
| "grad_norm": 0.296999788237227, |
| "learning_rate": 9.630195482041778e-07, |
| "loss": 0.2405, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.1732954545454546, |
| "grad_norm": 0.36623960819597895, |
| "learning_rate": 9.569120923179661e-07, |
| "loss": 0.2997, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.1761363636363638, |
| "grad_norm": 0.35989187708509074, |
| "learning_rate": 9.508194777054613e-07, |
| "loss": 0.2627, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.178977272727273, |
| "grad_norm": 0.45558444510597795, |
| "learning_rate": 9.447417629648542e-07, |
| "loss": 0.2939, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.1818181818181817, |
| "grad_norm": 1.352661504436191, |
| "learning_rate": 9.386790065510326e-07, |
| "loss": 0.2674, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.184659090909091, |
| "grad_norm": 0.3484066310248953, |
| "learning_rate": 9.326312667750143e-07, |
| "loss": 0.2647, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.1875, |
| "grad_norm": 0.3372643949746599, |
| "learning_rate": 9.265986018033887e-07, |
| "loss": 0.2712, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.190340909090909, |
| "grad_norm": 0.45171014423025785, |
| "learning_rate": 9.205810696577577e-07, |
| "loss": 0.2531, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.1931818181818183, |
| "grad_norm": 0.3426033696862187, |
| "learning_rate": 9.14578728214176e-07, |
| "loss": 0.2594, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.196022727272727, |
| "grad_norm": 0.44139931309445984, |
| "learning_rate": 9.085916352025983e-07, |
| "loss": 0.2747, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.1988636363636362, |
| "grad_norm": 0.3644501914038969, |
| "learning_rate": 9.02619848206319e-07, |
| "loss": 0.3172, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.2017045454545454, |
| "grad_norm": 0.41216240398841364, |
| "learning_rate": 8.966634246614208e-07, |
| "loss": 0.2614, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.2045454545454546, |
| "grad_norm": 0.34732770899892357, |
| "learning_rate": 8.907224218562219e-07, |
| "loss": 0.248, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.2073863636363638, |
| "grad_norm": 0.34245013389418555, |
| "learning_rate": 8.847968969307283e-07, |
| "loss": 0.295, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.210227272727273, |
| "grad_norm": 0.3893001282929315, |
| "learning_rate": 8.788869068760758e-07, |
| "loss": 0.2967, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.2130681818181817, |
| "grad_norm": 0.48226856220499215, |
| "learning_rate": 8.729925085339929e-07, |
| "loss": 0.3055, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.215909090909091, |
| "grad_norm": 0.36479498548889644, |
| "learning_rate": 8.67113758596245e-07, |
| "loss": 0.2944, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.21875, |
| "grad_norm": 0.311563765449273, |
| "learning_rate": 8.612507136040926e-07, |
| "loss": 0.2208, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.221590909090909, |
| "grad_norm": 0.39153959534391375, |
| "learning_rate": 8.554034299477506e-07, |
| "loss": 0.2955, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.2244318181818183, |
| "grad_norm": 0.3752941766025436, |
| "learning_rate": 8.495719638658395e-07, |
| "loss": 0.2882, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.227272727272727, |
| "grad_norm": 0.34306207357731855, |
| "learning_rate": 8.437563714448496e-07, |
| "loss": 0.2855, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.2301136363636362, |
| "grad_norm": 0.2911256041409022, |
| "learning_rate": 8.379567086185989e-07, |
| "loss": 0.2245, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.2329545454545454, |
| "grad_norm": 0.38423726910475914, |
| "learning_rate": 8.321730311676965e-07, |
| "loss": 0.2881, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.2357954545454546, |
| "grad_norm": 0.28685959612362666, |
| "learning_rate": 8.264053947190051e-07, |
| "loss": 0.2168, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.2386363636363638, |
| "grad_norm": 0.3177020831576707, |
| "learning_rate": 8.206538547451088e-07, |
| "loss": 0.2392, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.241477272727273, |
| "grad_norm": 0.314674201211804, |
| "learning_rate": 8.149184665637746e-07, |
| "loss": 0.2244, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.2443181818181817, |
| "grad_norm": 0.34609325605203806, |
| "learning_rate": 8.091992853374239e-07, |
| "loss": 0.2506, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.247159090909091, |
| "grad_norm": 0.37417875469018747, |
| "learning_rate": 8.034963660726022e-07, |
| "loss": 0.297, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.4190001624824225, |
| "learning_rate": 7.978097636194482e-07, |
| "loss": 0.2822, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.252840909090909, |
| "grad_norm": 0.31172594700443673, |
| "learning_rate": 7.921395326711664e-07, |
| "loss": 0.2277, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.2556818181818183, |
| "grad_norm": 0.35515884644954326, |
| "learning_rate": 7.864857277635027e-07, |
| "loss": 0.252, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.2585227272727275, |
| "grad_norm": 0.48510568393864467, |
| "learning_rate": 7.808484032742184e-07, |
| "loss": 0.2661, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.2613636363636362, |
| "grad_norm": 0.40576550011180185, |
| "learning_rate": 7.75227613422567e-07, |
| "loss": 0.2624, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.2642045454545454, |
| "grad_norm": 0.3153702935106711, |
| "learning_rate": 7.696234122687756e-07, |
| "loss": 0.2423, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.2670454545454546, |
| "grad_norm": 0.45813794434618704, |
| "learning_rate": 7.640358537135214e-07, |
| "loss": 0.2773, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.2698863636363638, |
| "grad_norm": 0.43799221687287815, |
| "learning_rate": 7.584649914974132e-07, |
| "loss": 0.2543, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 0.36099400774254925, |
| "learning_rate": 7.5291087920048e-07, |
| "loss": 0.2554, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.2755681818181817, |
| "grad_norm": 0.3681744190202427, |
| "learning_rate": 7.47373570241646e-07, |
| "loss": 0.2393, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.278409090909091, |
| "grad_norm": 0.30088848462434675, |
| "learning_rate": 7.418531178782281e-07, |
| "loss": 0.2443, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.28125, |
| "grad_norm": 0.36658882990515207, |
| "learning_rate": 7.363495752054145e-07, |
| "loss": 0.2716, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.284090909090909, |
| "grad_norm": 0.3691396379554879, |
| "learning_rate": 7.30862995155758e-07, |
| "loss": 0.281, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.2869318181818183, |
| "grad_norm": 0.3976865364065572, |
| "learning_rate": 7.25393430498669e-07, |
| "loss": 0.3126, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.2897727272727275, |
| "grad_norm": 0.34972134382431147, |
| "learning_rate": 7.199409338399024e-07, |
| "loss": 0.2716, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.2926136363636362, |
| "grad_norm": 0.359990470488163, |
| "learning_rate": 7.145055576210552e-07, |
| "loss": 0.282, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.2954545454545454, |
| "grad_norm": 0.32127716098200765, |
| "learning_rate": 7.090873541190649e-07, |
| "loss": 0.2537, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.2982954545454546, |
| "grad_norm": 0.3386422816466643, |
| "learning_rate": 7.036863754456985e-07, |
| "loss": 0.2663, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.3011363636363638, |
| "grad_norm": 0.43294818109617667, |
| "learning_rate": 6.983026735470586e-07, |
| "loss": 0.3144, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.3039772727272725, |
| "grad_norm": 0.3668974255373313, |
| "learning_rate": 6.929363002030829e-07, |
| "loss": 0.2665, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.3068181818181817, |
| "grad_norm": 0.3372045903540735, |
| "learning_rate": 6.875873070270423e-07, |
| "loss": 0.2291, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.309659090909091, |
| "grad_norm": 0.3686361653405783, |
| "learning_rate": 6.822557454650472e-07, |
| "loss": 0.3127, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.3125, |
| "grad_norm": 0.3287416264369441, |
| "learning_rate": 6.769416667955545e-07, |
| "loss": 0.2497, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.315340909090909, |
| "grad_norm": 0.378493696975223, |
| "learning_rate": 6.7164512212887e-07, |
| "loss": 0.2538, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.3181818181818183, |
| "grad_norm": 0.3527906349071735, |
| "learning_rate": 6.6636616240666e-07, |
| "loss": 0.2759, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.3210227272727275, |
| "grad_norm": 0.3283146351073707, |
| "learning_rate": 6.611048384014601e-07, |
| "loss": 0.2787, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.3238636363636362, |
| "grad_norm": 0.4262766716182643, |
| "learning_rate": 6.558612007161876e-07, |
| "loss": 0.3367, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.3267045454545454, |
| "grad_norm": 0.29243285573134076, |
| "learning_rate": 6.506352997836537e-07, |
| "loss": 0.2312, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.3295454545454546, |
| "grad_norm": 0.3708515561207515, |
| "learning_rate": 6.454271858660816e-07, |
| "loss": 0.2947, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.3323863636363638, |
| "grad_norm": 0.3031026424988807, |
| "learning_rate": 6.402369090546173e-07, |
| "loss": 0.2376, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.3352272727272725, |
| "grad_norm": 0.40063837240803074, |
| "learning_rate": 6.350645192688531e-07, |
| "loss": 0.2706, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.3380681818181817, |
| "grad_norm": 0.3931219211524187, |
| "learning_rate": 6.299100662563459e-07, |
| "loss": 0.2245, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.340909090909091, |
| "grad_norm": 0.496053050631395, |
| "learning_rate": 6.247735995921375e-07, |
| "loss": 0.2665, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.34375, |
| "grad_norm": 0.36983619426324377, |
| "learning_rate": 6.19655168678279e-07, |
| "loss": 0.2437, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.346590909090909, |
| "grad_norm": 0.31853434490396093, |
| "learning_rate": 6.145548227433551e-07, |
| "loss": 0.237, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.3494318181818183, |
| "grad_norm": 0.3833013165526796, |
| "learning_rate": 6.094726108420105e-07, |
| "loss": 0.2321, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.3522727272727275, |
| "grad_norm": 0.34709948082141423, |
| "learning_rate": 6.044085818544807e-07, |
| "loss": 0.2435, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.3551136363636362, |
| "grad_norm": 0.346027003213824, |
| "learning_rate": 5.993627844861172e-07, |
| "loss": 0.2536, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.3579545454545454, |
| "grad_norm": 0.3350399776737133, |
| "learning_rate": 5.943352672669215e-07, |
| "loss": 0.2403, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.3607954545454546, |
| "grad_norm": 0.32396672340715865, |
| "learning_rate": 5.89326078551081e-07, |
| "loss": 0.2213, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.3636363636363638, |
| "grad_norm": 0.3844292483072848, |
| "learning_rate": 5.843352665164992e-07, |
| "loss": 0.249, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.3664772727272725, |
| "grad_norm": 0.35019451009540753, |
| "learning_rate": 5.793628791643327e-07, |
| "loss": 0.2888, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.3693181818181817, |
| "grad_norm": 0.3164025713303425, |
| "learning_rate": 5.744089643185355e-07, |
| "loss": 0.2515, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.372159090909091, |
| "grad_norm": 0.3308520526667594, |
| "learning_rate": 5.69473569625392e-07, |
| "loss": 0.2587, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.375, |
| "grad_norm": 0.3378919456195333, |
| "learning_rate": 5.645567425530607e-07, |
| "loss": 0.2433, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.377840909090909, |
| "grad_norm": 0.3354025023866522, |
| "learning_rate": 5.596585303911217e-07, |
| "loss": 0.2542, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.3806818181818183, |
| "grad_norm": 0.3756871055057431, |
| "learning_rate": 5.547789802501164e-07, |
| "loss": 0.2755, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.3835227272727275, |
| "grad_norm": 0.3363888579054467, |
| "learning_rate": 5.499181390610958e-07, |
| "loss": 0.2545, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.3863636363636362, |
| "grad_norm": 0.3726730517187886, |
| "learning_rate": 5.450760535751734e-07, |
| "loss": 0.2679, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.3892045454545454, |
| "grad_norm": 0.34128788657594616, |
| "learning_rate": 5.402527703630681e-07, |
| "loss": 0.2744, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.3920454545454546, |
| "grad_norm": 0.3112695417600679, |
| "learning_rate": 5.354483358146617e-07, |
| "loss": 0.2231, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.3948863636363638, |
| "grad_norm": 0.34694374550516704, |
| "learning_rate": 5.306627961385538e-07, |
| "loss": 0.2571, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.3977272727272725, |
| "grad_norm": 0.302981543192964, |
| "learning_rate": 5.258961973616117e-07, |
| "loss": 0.2427, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.4005681818181817, |
| "grad_norm": 0.3008721863153869, |
| "learning_rate": 5.211485853285314e-07, |
| "loss": 0.2251, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.403409090909091, |
| "grad_norm": 0.33302783458473956, |
| "learning_rate": 5.164200057013985e-07, |
| "loss": 0.2711, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.40625, |
| "grad_norm": 0.3898327860869564, |
| "learning_rate": 5.117105039592444e-07, |
| "loss": 0.2869, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.409090909090909, |
| "grad_norm": 0.3770328552305208, |
| "learning_rate": 5.070201253976115e-07, |
| "loss": 0.2777, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.4119318181818183, |
| "grad_norm": 0.32513904942970184, |
| "learning_rate": 5.02348915128118e-07, |
| "loss": 0.2655, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.4147727272727275, |
| "grad_norm": 0.3173329184832482, |
| "learning_rate": 4.976969180780225e-07, |
| "loss": 0.2398, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.4176136363636362, |
| "grad_norm": 0.3853995789331807, |
| "learning_rate": 4.930641789897938e-07, |
| "loss": 0.2699, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.4204545454545454, |
| "grad_norm": 0.3880784265747346, |
| "learning_rate": 4.884507424206788e-07, |
| "loss": 0.2649, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.4232954545454546, |
| "grad_norm": 0.3710421332719178, |
| "learning_rate": 4.838566527422742e-07, |
| "loss": 0.2604, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.4261363636363638, |
| "grad_norm": 0.42114780257384915, |
| "learning_rate": 4.792819541400998e-07, |
| "loss": 0.2982, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.4289772727272725, |
| "grad_norm": 0.3704518376341159, |
| "learning_rate": 4.747266906131759e-07, |
| "loss": 0.2916, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.4318181818181817, |
| "grad_norm": 0.3664937063178789, |
| "learning_rate": 4.7019090597359624e-07, |
| "loss": 0.2586, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.434659090909091, |
| "grad_norm": 0.30129914419743803, |
| "learning_rate": 4.656746438461085e-07, |
| "loss": 0.233, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.4375, |
| "grad_norm": 0.3610260573998573, |
| "learning_rate": 4.611779476676956e-07, |
| "loss": 0.2295, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.440340909090909, |
| "grad_norm": 0.31555005162338934, |
| "learning_rate": 4.5670086068715564e-07, |
| "loss": 0.2324, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.4431818181818183, |
| "grad_norm": 0.38647155996115823, |
| "learning_rate": 4.522434259646896e-07, |
| "loss": 0.2509, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.4460227272727275, |
| "grad_norm": 0.3295294330692125, |
| "learning_rate": 4.4780568637148277e-07, |
| "loss": 0.2409, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.4488636363636362, |
| "grad_norm": 0.40919134523297795, |
| "learning_rate": 4.4338768458929455e-07, |
| "loss": 0.2753, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.4517045454545454, |
| "grad_norm": 0.3281509333195072, |
| "learning_rate": 4.3898946311005054e-07, |
| "loss": 0.2776, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.4545454545454546, |
| "grad_norm": 0.3003640118064134, |
| "learning_rate": 4.346110642354284e-07, |
| "loss": 0.2288, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.4573863636363638, |
| "grad_norm": 0.2856917980597871, |
| "learning_rate": 4.30252530076454e-07, |
| "loss": 0.2262, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.4602272727272725, |
| "grad_norm": 0.3716917156792666, |
| "learning_rate": 4.259139025530981e-07, |
| "loss": 0.2704, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.4630681818181817, |
| "grad_norm": 0.3646615055009088, |
| "learning_rate": 4.2159522339387027e-07, |
| "loss": 0.2422, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.465909090909091, |
| "grad_norm": 0.352013885171188, |
| "learning_rate": 4.1729653413541795e-07, |
| "loss": 0.2586, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.46875, |
| "grad_norm": 0.3341889105921635, |
| "learning_rate": 4.13017876122129e-07, |
| "loss": 0.2514, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.471590909090909, |
| "grad_norm": 0.3436869862214985, |
| "learning_rate": 4.087592905057319e-07, |
| "loss": 0.2663, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.4744318181818183, |
| "grad_norm": 0.3459285477446355, |
| "learning_rate": 4.0452081824490007e-07, |
| "loss": 0.2274, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.4772727272727275, |
| "grad_norm": 0.39474776701060227, |
| "learning_rate": 4.0030250010486106e-07, |
| "loss": 0.2635, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.4801136363636362, |
| "grad_norm": 0.3588162845171683, |
| "learning_rate": 3.9610437665699803e-07, |
| "loss": 0.2702, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.4829545454545454, |
| "grad_norm": 0.3055170644052573, |
| "learning_rate": 3.919264882784662e-07, |
| "loss": 0.2642, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.4857954545454546, |
| "grad_norm": 0.4004147388266674, |
| "learning_rate": 3.8776887515180215e-07, |
| "loss": 0.2673, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.4886363636363638, |
| "grad_norm": 0.3435684772838886, |
| "learning_rate": 3.836315772645355e-07, |
| "loss": 0.2572, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.4914772727272725, |
| "grad_norm": 0.3929983920357782, |
| "learning_rate": 3.79514634408806e-07, |
| "loss": 0.314, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.4943181818181817, |
| "grad_norm": 0.3402456651574272, |
| "learning_rate": 3.7541808618098225e-07, |
| "loss": 0.2742, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.497159090909091, |
| "grad_norm": 0.3391484648776555, |
| "learning_rate": 3.713419719812775e-07, |
| "loss": 0.2957, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.3300372482602716, |
| "learning_rate": 3.6728633101337283e-07, |
| "loss": 0.2402, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.502840909090909, |
| "grad_norm": 0.3880324057454857, |
| "learning_rate": 3.632512022840401e-07, |
| "loss": 0.225, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.5056818181818183, |
| "grad_norm": 0.40083562156829194, |
| "learning_rate": 3.592366246027654e-07, |
| "loss": 0.2885, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.5085227272727275, |
| "grad_norm": 0.3898508645513151, |
| "learning_rate": 3.552426365813791e-07, |
| "loss": 0.279, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.5113636363636362, |
| "grad_norm": 0.34747356344583896, |
| "learning_rate": 3.512692766336795e-07, |
| "loss": 0.2551, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.5142045454545454, |
| "grad_norm": 0.3697878476145354, |
| "learning_rate": 3.4731658297506717e-07, |
| "loss": 0.2584, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.5170454545454546, |
| "grad_norm": 0.3442593343497222, |
| "learning_rate": 3.433845936221772e-07, |
| "loss": 0.2323, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.5198863636363638, |
| "grad_norm": 0.4052191198887473, |
| "learning_rate": 3.394733463925115e-07, |
| "loss": 0.2895, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.5227272727272725, |
| "grad_norm": 0.3639886136390821, |
| "learning_rate": 3.355828789040752e-07, |
| "loss": 0.276, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.5255681818181817, |
| "grad_norm": 0.39883666474289897, |
| "learning_rate": 3.3171322857501796e-07, |
| "loss": 0.2858, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.528409090909091, |
| "grad_norm": 0.40889869044433336, |
| "learning_rate": 3.278644326232713e-07, |
| "loss": 0.257, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.53125, |
| "grad_norm": 0.3284086126915543, |
| "learning_rate": 3.2403652806619e-07, |
| "loss": 0.2699, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.534090909090909, |
| "grad_norm": 0.3806103148982155, |
| "learning_rate": 3.2022955172019947e-07, |
| "loss": 0.2607, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.5369318181818183, |
| "grad_norm": 0.414262076377764, |
| "learning_rate": 3.1644354020043846e-07, |
| "loss": 0.2709, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.5397727272727275, |
| "grad_norm": 0.3564646964673218, |
| "learning_rate": 3.1267852992040715e-07, |
| "loss": 0.2845, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.5426136363636362, |
| "grad_norm": 0.35912306046922576, |
| "learning_rate": 3.0893455709162023e-07, |
| "loss": 0.2466, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.5454545454545454, |
| "grad_norm": 0.3605968532309376, |
| "learning_rate": 3.052116577232533e-07, |
| "loss": 0.2868, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.5482954545454546, |
| "grad_norm": 0.4129969651465434, |
| "learning_rate": 3.015098676218009e-07, |
| "loss": 0.2738, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.5511363636363638, |
| "grad_norm": 0.38800081862705826, |
| "learning_rate": 2.9782922239073084e-07, |
| "loss": 0.274, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.5539772727272725, |
| "grad_norm": 0.28725463626604075, |
| "learning_rate": 2.9416975743014134e-07, |
| "loss": 0.246, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.5568181818181817, |
| "grad_norm": 0.33194490572792595, |
| "learning_rate": 2.9053150793642013e-07, |
| "loss": 0.2418, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.559659090909091, |
| "grad_norm": 0.31927368240055043, |
| "learning_rate": 2.8691450890190794e-07, |
| "loss": 0.259, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.5625, |
| "grad_norm": 0.4514488260064792, |
| "learning_rate": 2.833187951145588e-07, |
| "loss": 0.2674, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.565340909090909, |
| "grad_norm": 0.3952590748072181, |
| "learning_rate": 2.797444011576089e-07, |
| "loss": 0.2764, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.5681818181818183, |
| "grad_norm": 0.3035956390116324, |
| "learning_rate": 2.7619136140924153e-07, |
| "loss": 0.2361, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.5710227272727275, |
| "grad_norm": 0.365463810965996, |
| "learning_rate": 2.726597100422565e-07, |
| "loss": 0.2955, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.5738636363636362, |
| "grad_norm": 0.37417152902560946, |
| "learning_rate": 2.6914948102374384e-07, |
| "loss": 0.3007, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.5767045454545454, |
| "grad_norm": 0.36872656495257466, |
| "learning_rate": 2.656607081147547e-07, |
| "loss": 0.2647, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.5795454545454546, |
| "grad_norm": 0.381314240650295, |
| "learning_rate": 2.621934248699767e-07, |
| "loss": 0.3176, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.5823863636363638, |
| "grad_norm": 0.41529466546435734, |
| "learning_rate": 2.5874766463741263e-07, |
| "loss": 0.2482, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.5852272727272725, |
| "grad_norm": 0.4258649726301599, |
| "learning_rate": 2.553234605580593e-07, |
| "loss": 0.2618, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.5880681818181817, |
| "grad_norm": 0.3762825021021476, |
| "learning_rate": 2.5192084556558776e-07, |
| "loss": 0.2914, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.590909090909091, |
| "grad_norm": 0.3627506684619514, |
| "learning_rate": 2.4853985238602745e-07, |
| "loss": 0.2875, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.59375, |
| "grad_norm": 0.3173651745814326, |
| "learning_rate": 2.451805135374516e-07, |
| "loss": 0.2421, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.596590909090909, |
| "grad_norm": 0.44802208559240897, |
| "learning_rate": 2.4184286132966305e-07, |
| "loss": 0.2803, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.5994318181818183, |
| "grad_norm": 0.36772649044669337, |
| "learning_rate": 2.3852692786388634e-07, |
| "loss": 0.3018, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.6022727272727275, |
| "grad_norm": 0.3473737442586536, |
| "learning_rate": 2.3523274503245624e-07, |
| "loss": 0.2565, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.6051136363636362, |
| "grad_norm": 0.31723371911082704, |
| "learning_rate": 2.319603445185109e-07, |
| "loss": 0.2769, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.6079545454545454, |
| "grad_norm": 0.36837062880150556, |
| "learning_rate": 2.2870975779569066e-07, |
| "loss": 0.294, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.6107954545454546, |
| "grad_norm": 0.34124708806422904, |
| "learning_rate": 2.2548101612783147e-07, |
| "loss": 0.2516, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.6136363636363638, |
| "grad_norm": 0.3202170151424555, |
| "learning_rate": 2.2227415056866431e-07, |
| "loss": 0.254, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.6164772727272725, |
| "grad_norm": 0.4260342271233267, |
| "learning_rate": 2.1908919196152013e-07, |
| "loss": 0.2719, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.6193181818181817, |
| "grad_norm": 0.37728441327420986, |
| "learning_rate": 2.1592617093902978e-07, |
| "loss": 0.2753, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.622159090909091, |
| "grad_norm": 0.39060937907330195, |
| "learning_rate": 2.1278511792283018e-07, |
| "loss": 0.2947, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.625, |
| "grad_norm": 0.30888479325881507, |
| "learning_rate": 2.0966606312327303e-07, |
| "loss": 0.2284, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.627840909090909, |
| "grad_norm": 0.40561974710485005, |
| "learning_rate": 2.065690365391329e-07, |
| "loss": 0.2943, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.6306818181818183, |
| "grad_norm": 0.355886681039042, |
| "learning_rate": 2.0349406795731774e-07, |
| "loss": 0.2462, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.6335227272727275, |
| "grad_norm": 0.37901081172880524, |
| "learning_rate": 2.0044118695258657e-07, |
| "loss": 0.2918, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.6363636363636362, |
| "grad_norm": 0.48522777901179487, |
| "learning_rate": 1.9741042288725893e-07, |
| "loss": 0.3463, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.6392045454545454, |
| "grad_norm": 0.35552067688931177, |
| "learning_rate": 1.944018049109375e-07, |
| "loss": 0.2589, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.6420454545454546, |
| "grad_norm": 0.3245196964527464, |
| "learning_rate": 1.9141536196022658e-07, |
| "loss": 0.2667, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.6448863636363638, |
| "grad_norm": 0.397373448701769, |
| "learning_rate": 1.884511227584518e-07, |
| "loss": 0.2635, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.6477272727272725, |
| "grad_norm": 0.3230165219575403, |
| "learning_rate": 1.8550911581538517e-07, |
| "loss": 0.2524, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.6505681818181817, |
| "grad_norm": 0.3201491067518106, |
| "learning_rate": 1.825893694269723e-07, |
| "loss": 0.2704, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.653409090909091, |
| "grad_norm": 0.3806372642940993, |
| "learning_rate": 1.7969191167505811e-07, |
| "loss": 0.2891, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.65625, |
| "grad_norm": 0.3315048294973883, |
| "learning_rate": 1.7681677042711732e-07, |
| "loss": 0.2469, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.659090909090909, |
| "grad_norm": 0.3429832481491404, |
| "learning_rate": 1.7396397333598657e-07, |
| "loss": 0.2344, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.6619318181818183, |
| "grad_norm": 0.31805225672924486, |
| "learning_rate": 1.711335478395984e-07, |
| "loss": 0.2301, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.6647727272727275, |
| "grad_norm": 0.347431193735004, |
| "learning_rate": 1.6832552116071905e-07, |
| "loss": 0.274, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.6676136363636362, |
| "grad_norm": 0.3276581659477082, |
| "learning_rate": 1.6553992030668293e-07, |
| "loss": 0.2569, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.6704545454545454, |
| "grad_norm": 0.4181936566989231, |
| "learning_rate": 1.6277677206913588e-07, |
| "loss": 0.2737, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.6732954545454546, |
| "grad_norm": 0.37610721012897674, |
| "learning_rate": 1.6003610302377708e-07, |
| "loss": 0.2999, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.6761363636363638, |
| "grad_norm": 0.33046264353939814, |
| "learning_rate": 1.5731793953010193e-07, |
| "loss": 0.2427, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.6789772727272725, |
| "grad_norm": 0.3494974820800891, |
| "learning_rate": 1.5462230773115066e-07, |
| "loss": 0.264, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.6818181818181817, |
| "grad_norm": 0.3468159326122336, |
| "learning_rate": 1.5194923355325464e-07, |
| "loss": 0.3076, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.684659090909091, |
| "grad_norm": 0.40045232274054987, |
| "learning_rate": 1.492987427057893e-07, |
| "loss": 0.3051, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.6875, |
| "grad_norm": 0.4030575958079979, |
| "learning_rate": 1.4667086068092446e-07, |
| "loss": 0.2437, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.690340909090909, |
| "grad_norm": 0.34082328928674294, |
| "learning_rate": 1.440656127533821e-07, |
| "loss": 0.2501, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.6931818181818183, |
| "grad_norm": 0.34010796962843276, |
| "learning_rate": 1.414830239801898e-07, |
| "loss": 0.27, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.6960227272727275, |
| "grad_norm": 0.4274695728838406, |
| "learning_rate": 1.3892311920044282e-07, |
| "loss": 0.2964, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.6988636363636362, |
| "grad_norm": 0.35443571450269734, |
| "learning_rate": 1.3638592303506364e-07, |
| "loss": 0.252, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.7017045454545454, |
| "grad_norm": 0.40737204314859, |
| "learning_rate": 1.3387145988656537e-07, |
| "loss": 0.2891, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.7045454545454546, |
| "grad_norm": 0.352138799387513, |
| "learning_rate": 1.313797539388159e-07, |
| "loss": 0.2439, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.7073863636363638, |
| "grad_norm": 0.33845536331763004, |
| "learning_rate": 1.2891082915680864e-07, |
| "loss": 0.2802, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.7102272727272725, |
| "grad_norm": 0.35504925601892684, |
| "learning_rate": 1.264647092864288e-07, |
| "loss": 0.2514, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.7130681818181817, |
| "grad_norm": 0.3609121713893806, |
| "learning_rate": 1.2404141785422568e-07, |
| "loss": 0.25, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.715909090909091, |
| "grad_norm": 0.3936221787085924, |
| "learning_rate": 1.2164097816718818e-07, |
| "loss": 0.2312, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.71875, |
| "grad_norm": 0.38365034429115125, |
| "learning_rate": 1.1926341331251756e-07, |
| "loss": 0.2682, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.721590909090909, |
| "grad_norm": 0.31959559051327435, |
| "learning_rate": 1.169087461574081e-07, |
| "loss": 0.2457, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.7244318181818183, |
| "grad_norm": 0.3799557870602865, |
| "learning_rate": 1.1457699934882715e-07, |
| "loss": 0.2968, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 0.27723093935677195, |
| "learning_rate": 1.1226819531329342e-07, |
| "loss": 0.2219, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.7301136363636362, |
| "grad_norm": 0.3534828660456155, |
| "learning_rate": 1.0998235625666708e-07, |
| "loss": 0.2433, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.7329545454545454, |
| "grad_norm": 0.35791787748576426, |
| "learning_rate": 1.0771950416393228e-07, |
| "loss": 0.2597, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.7357954545454546, |
| "grad_norm": 0.4475649717820448, |
| "learning_rate": 1.0547966079898637e-07, |
| "loss": 0.2636, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.7386363636363638, |
| "grad_norm": 0.39027504830647813, |
| "learning_rate": 1.0326284770443063e-07, |
| "loss": 0.2728, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.7414772727272725, |
| "grad_norm": 0.4315977251477179, |
| "learning_rate": 1.0106908620136525e-07, |
| "loss": 0.2588, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.7443181818181817, |
| "grad_norm": 0.32246952155074843, |
| "learning_rate": 9.889839738918022e-08, |
| "loss": 0.2369, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.747159090909091, |
| "grad_norm": 0.3333508436923039, |
| "learning_rate": 9.675080214535559e-08, |
| "loss": 0.2574, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.3654840156563527, |
| "learning_rate": 9.46263211252596e-08, |
| "loss": 0.3222, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.752840909090909, |
| "grad_norm": 0.3366414190912868, |
| "learning_rate": 9.252497476194972e-08, |
| "loss": 0.2926, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.7556818181818183, |
| "grad_norm": 0.3243823618475195, |
| "learning_rate": 9.044678326597722e-08, |
| "loss": 0.2484, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.7585227272727275, |
| "grad_norm": 0.34777278160161157, |
| "learning_rate": 8.839176662519155e-08, |
| "loss": 0.2349, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.7613636363636362, |
| "grad_norm": 0.34671371366502046, |
| "learning_rate": 8.635994460454766e-08, |
| "loss": 0.2574, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.7642045454545454, |
| "grad_norm": 0.38617683116302787, |
| "learning_rate": 8.435133674591922e-08, |
| "loss": 0.3007, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.7670454545454546, |
| "grad_norm": 0.4218961579649425, |
| "learning_rate": 8.2365962367906e-08, |
| "loss": 0.2916, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.7698863636363638, |
| "grad_norm": 0.3971792338298757, |
| "learning_rate": 8.040384056565098e-08, |
| "loss": 0.2563, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.7727272727272725, |
| "grad_norm": 0.3226524769417545, |
| "learning_rate": 7.846499021065684e-08, |
| "loss": 0.266, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.7755681818181817, |
| "grad_norm": 0.3540519465775941, |
| "learning_rate": 7.654942995060283e-08, |
| "loss": 0.2616, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.778409090909091, |
| "grad_norm": 0.3781537081979966, |
| "learning_rate": 7.465717820916624e-08, |
| "loss": 0.2698, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.78125, |
| "grad_norm": 0.3564755050368105, |
| "learning_rate": 7.278825318584647e-08, |
| "loss": 0.27, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.784090909090909, |
| "grad_norm": 0.3510249393237661, |
| "learning_rate": 7.094267285578688e-08, |
| "loss": 0.2666, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.7869318181818183, |
| "grad_norm": 0.3998246424539849, |
| "learning_rate": 6.912045496960507e-08, |
| "loss": 0.2851, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.7897727272727275, |
| "grad_norm": 0.37123966300816885, |
| "learning_rate": 6.732161705322093e-08, |
| "loss": 0.2528, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.7926136363636362, |
| "grad_norm": 0.32607742324666744, |
| "learning_rate": 6.554617640768674e-08, |
| "loss": 0.2682, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.7954545454545454, |
| "grad_norm": 0.3091883263291907, |
| "learning_rate": 6.379415010902362e-08, |
| "loss": 0.2431, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.7982954545454546, |
| "grad_norm": 0.3896435979654701, |
| "learning_rate": 6.206555500805455e-08, |
| "loss": 0.2662, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.8011363636363638, |
| "grad_norm": 0.36244662485716045, |
| "learning_rate": 6.036040773024387e-08, |
| "loss": 0.2708, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.8039772727272725, |
| "grad_norm": 0.3558651773572941, |
| "learning_rate": 5.867872467553715e-08, |
| "loss": 0.3004, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.8068181818181817, |
| "grad_norm": 0.37311773304851065, |
| "learning_rate": 5.702052201820352e-08, |
| "loss": 0.3088, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.809659090909091, |
| "grad_norm": 0.411421481665237, |
| "learning_rate": 5.5385815706678894e-08, |
| "loss": 0.2923, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.8125, |
| "grad_norm": 0.3759229007631887, |
| "learning_rate": 5.377462146341439e-08, |
| "loss": 0.2945, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.815340909090909, |
| "grad_norm": 0.3014861546323833, |
| "learning_rate": 5.218695478472397e-08, |
| "loss": 0.2119, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.8181818181818183, |
| "grad_norm": 0.4021583403485505, |
| "learning_rate": 5.062283094063536e-08, |
| "loss": 0.2878, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.8210227272727275, |
| "grad_norm": 0.3293364475828707, |
| "learning_rate": 4.9082264974744665e-08, |
| "loss": 0.266, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.8238636363636362, |
| "grad_norm": 0.30933470398564117, |
| "learning_rate": 4.756527170406922e-08, |
| "loss": 0.2314, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.8267045454545454, |
| "grad_norm": 0.37909174739130147, |
| "learning_rate": 4.607186571890715e-08, |
| "loss": 0.2667, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.8295454545454546, |
| "grad_norm": 0.37878603560502083, |
| "learning_rate": 4.46020613826964e-08, |
| "loss": 0.2937, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.8323863636363638, |
| "grad_norm": 0.408496513297682, |
| "learning_rate": 4.3155872831875946e-08, |
| "loss": 0.2757, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.8352272727272725, |
| "grad_norm": 0.3566593848752578, |
| "learning_rate": 4.1733313975750586e-08, |
| "loss": 0.2584, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.8380681818181817, |
| "grad_norm": 0.352150696238673, |
| "learning_rate": 4.033439849635695e-08, |
| "loss": 0.2115, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.840909090909091, |
| "grad_norm": 0.39392089147895293, |
| "learning_rate": 3.895913984833216e-08, |
| "loss": 0.2816, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.84375, |
| "grad_norm": 0.3412262767323334, |
| "learning_rate": 3.760755125878368e-08, |
| "loss": 0.2431, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.846590909090909, |
| "grad_norm": 0.3325324503502811, |
| "learning_rate": 3.627964572716331e-08, |
| "loss": 0.264, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.8494318181818183, |
| "grad_norm": 0.35296040111990046, |
| "learning_rate": 3.497543602514059e-08, |
| "loss": 0.2614, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.8522727272727275, |
| "grad_norm": 0.2837474483774213, |
| "learning_rate": 3.3694934696481275e-08, |
| "loss": 0.2123, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.8551136363636362, |
| "grad_norm": 0.34272040018575495, |
| "learning_rate": 3.24381540569263e-08, |
| "loss": 0.2808, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.8579545454545454, |
| "grad_norm": 0.3498353760521046, |
| "learning_rate": 3.120510619407324e-08, |
| "loss": 0.251, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.8607954545454546, |
| "grad_norm": 0.4069913912888687, |
| "learning_rate": 2.9995802967259516e-08, |
| "loss": 0.316, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.8636363636363638, |
| "grad_norm": 0.3361233831001831, |
| "learning_rate": 2.8810256007449632e-08, |
| "loss": 0.2293, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.8664772727272725, |
| "grad_norm": 0.4519558529396144, |
| "learning_rate": 2.7648476717122287e-08, |
| "loss": 0.2792, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.8693181818181817, |
| "grad_norm": 0.409732040720535, |
| "learning_rate": 2.651047627016068e-08, |
| "loss": 0.2904, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.872159090909091, |
| "grad_norm": 0.3250171306579268, |
| "learning_rate": 2.5396265611745687e-08, |
| "loss": 0.2463, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.875, |
| "grad_norm": 0.3856346320602474, |
| "learning_rate": 2.4305855458250373e-08, |
| "loss": 0.2356, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.877840909090909, |
| "grad_norm": 0.3526716263439721, |
| "learning_rate": 2.3239256297136193e-08, |
| "loss": 0.258, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.8806818181818183, |
| "grad_norm": 0.41510762650616695, |
| "learning_rate": 2.2196478386853624e-08, |
| "loss": 0.3018, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.8835227272727275, |
| "grad_norm": 0.2827340090469283, |
| "learning_rate": 2.117753175674142e-08, |
| "loss": 0.1949, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.8863636363636362, |
| "grad_norm": 0.42491879871002564, |
| "learning_rate": 2.0182426206932503e-08, |
| "loss": 0.2607, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.8892045454545454, |
| "grad_norm": 0.3281820518654654, |
| "learning_rate": 1.921117130825767e-08, |
| "loss": 0.266, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.8920454545454546, |
| "grad_norm": 0.5241869397210815, |
| "learning_rate": 1.82637764021551e-08, |
| "loss": 0.2566, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.8948863636363638, |
| "grad_norm": 0.36254656882284764, |
| "learning_rate": 1.7340250600579588e-08, |
| "loss": 0.2683, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.8977272727272725, |
| "grad_norm": 0.32113348760758087, |
| "learning_rate": 1.6440602785914584e-08, |
| "loss": 0.2495, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.9005681818181817, |
| "grad_norm": 0.39293475539987827, |
| "learning_rate": 1.556484161088806e-08, |
| "loss": 0.2673, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.903409090909091, |
| "grad_norm": 0.3692023050105476, |
| "learning_rate": 1.4712975498488158e-08, |
| "loss": 0.2676, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.90625, |
| "grad_norm": 0.3301143389304983, |
| "learning_rate": 1.3885012641882967e-08, |
| "loss": 0.2549, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.909090909090909, |
| "grad_norm": 0.35000768054036296, |
| "learning_rate": 1.3080961004340308e-08, |
| "loss": 0.2769, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.9119318181818183, |
| "grad_norm": 0.3257952826732702, |
| "learning_rate": 1.2300828319153635e-08, |
| "loss": 0.2455, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.9147727272727275, |
| "grad_norm": 0.40990948777245817, |
| "learning_rate": 1.1544622089565139e-08, |
| "loss": 0.2999, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.9176136363636362, |
| "grad_norm": 0.5767979073587421, |
| "learning_rate": 1.0812349588694426e-08, |
| "loss": 0.2985, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.9204545454545454, |
| "grad_norm": 0.39277960762361686, |
| "learning_rate": 1.010401785947024e-08, |
| "loss": 0.3085, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.9232954545454546, |
| "grad_norm": 0.3487911223998262, |
| "learning_rate": 9.419633714559118e-09, |
| "loss": 0.2771, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.9261363636363638, |
| "grad_norm": 0.35424561590037207, |
| "learning_rate": 8.759203736304067e-09, |
| "loss": 0.2753, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.9289772727272725, |
| "grad_norm": 0.29485879447799396, |
| "learning_rate": 8.122734276657384e-09, |
| "loss": 0.2089, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.9318181818181817, |
| "grad_norm": 0.6294311065483419, |
| "learning_rate": 7.51023145712293e-09, |
| "loss": 0.3052, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.934659090909091, |
| "grad_norm": 0.354680706106559, |
| "learning_rate": 6.921701168694228e-09, |
| "loss": 0.2638, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.9375, |
| "grad_norm": 0.31404010890218703, |
| "learning_rate": 6.357149071800628e-09, |
| "loss": 0.2396, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.940340909090909, |
| "grad_norm": 0.3191510959590836, |
| "learning_rate": 5.816580596250676e-09, |
| "loss": 0.2652, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.9431818181818183, |
| "grad_norm": 0.33936071059872674, |
| "learning_rate": 5.300000941180494e-09, |
| "loss": 0.2761, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.9460227272727275, |
| "grad_norm": 0.6694940206582203, |
| "learning_rate": 4.807415075005206e-09, |
| "loss": 0.2716, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.9488636363636362, |
| "grad_norm": 0.3022654996639677, |
| "learning_rate": 4.338827735368423e-09, |
| "loss": 0.267, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.9517045454545454, |
| "grad_norm": 0.31223716729746726, |
| "learning_rate": 3.894243429098943e-09, |
| "loss": 0.2556, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.9545454545454546, |
| "grad_norm": 0.33999761359381697, |
| "learning_rate": 3.4736664321671777e-09, |
| "loss": 0.2234, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.9573863636363638, |
| "grad_norm": 0.38818142260184346, |
| "learning_rate": 3.0771007896424066e-09, |
| "loss": 0.2822, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.9602272727272725, |
| "grad_norm": 0.3915644733747401, |
| "learning_rate": 2.7045503156555853e-09, |
| "loss": 0.3089, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.9630681818181817, |
| "grad_norm": 0.35070734375473045, |
| "learning_rate": 2.3560185933621526e-09, |
| "loss": 0.2485, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.965909090909091, |
| "grad_norm": 0.32676962221864597, |
| "learning_rate": 2.031508974907337e-09, |
| "loss": 0.2564, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.96875, |
| "grad_norm": 0.37376434665996433, |
| "learning_rate": 1.7310245813939586e-09, |
| "loss": 0.2843, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.971590909090909, |
| "grad_norm": 0.3812123549505928, |
| "learning_rate": 1.4545683028521772e-09, |
| "loss": 0.2642, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.9744318181818183, |
| "grad_norm": 0.40366173461812144, |
| "learning_rate": 1.2021427982128463e-09, |
| "loss": 0.2714, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.9772727272727275, |
| "grad_norm": 0.38234650853272395, |
| "learning_rate": 9.737504952803124e-10, |
| "loss": 0.2483, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.9801136363636362, |
| "grad_norm": 0.3581632163317752, |
| "learning_rate": 7.693935907102102e-10, |
| "loss": 0.2448, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.9829545454545454, |
| "grad_norm": 0.44654505449503146, |
| "learning_rate": 5.890740499878145e-10, |
| "loss": 0.295, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.9857954545454546, |
| "grad_norm": 0.33560840489821, |
| "learning_rate": 4.3279360740972053e-10, |
| "loss": 0.2217, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.9886363636363638, |
| "grad_norm": 0.3283855292339783, |
| "learning_rate": 3.005537660663582e-10, |
| "loss": 0.219, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.9914772727272725, |
| "grad_norm": 0.35996516047736465, |
| "learning_rate": 1.923557978281143e-10, |
| "loss": 0.2571, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.9943181818181817, |
| "grad_norm": 0.30525860331677324, |
| "learning_rate": 1.0820074333256492e-10, |
| "loss": 0.2571, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.997159090909091, |
| "grad_norm": 0.37025001234738963, |
| "learning_rate": 4.808941197531614e-11, |
| "loss": 0.269, |
| "step": 1055 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.3619551402376093, |
| "learning_rate": 1.2022381901399815e-11, |
| "loss": 0.24, |
| "step": 1056 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1056, |
| "total_flos": 1454552492015616.0, |
| "train_loss": 0.28625056774101476, |
| "train_runtime": 131042.0993, |
| "train_samples_per_second": 0.258, |
| "train_steps_per_second": 0.008 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1056, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1454552492015616.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|