| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5523101433882103, |
| "eval_steps": 500, |
| "global_step": 650, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "grad_norm": 2.1944120442820862, |
| "learning_rate": 2.777777777777778e-06, |
| "loss": 1.6587, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 2.158646448617879, |
| "learning_rate": 5.555555555555556e-06, |
| "loss": 1.6685, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 2.1343466361951706, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 1.6836, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 2.0231531922210753, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 1.6523, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 1.6162620203223095, |
| "learning_rate": 1.388888888888889e-05, |
| "loss": 1.6636, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 1.3779619042576137, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 1.6289, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 1.223668866838671, |
| "learning_rate": 1.9444444444444445e-05, |
| "loss": 1.6201, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 1.5016416362830853, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 1.5596, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 1.465420671008811, |
| "learning_rate": 2.5e-05, |
| "loss": 1.6113, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 1.1965670018804309, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 1.5898, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 1.1117797752417102, |
| "learning_rate": 3.055555555555556e-05, |
| "loss": 1.6035, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.9878470790338667, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 1.5625, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 1.02494625138462, |
| "learning_rate": 3.611111111111111e-05, |
| "loss": 1.5547, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 1.0223917263016193, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 1.5615, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.9433437823947872, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 1.5728, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.8737056838198499, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 1.5327, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.847350291380953, |
| "learning_rate": 4.722222222222222e-05, |
| "loss": 1.4829, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.9546966542598146, |
| "learning_rate": 5e-05, |
| "loss": 1.5532, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.9232787655185869, |
| "learning_rate": 5.2777777777777784e-05, |
| "loss": 1.5303, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.879349873123116, |
| "learning_rate": 5.555555555555556e-05, |
| "loss": 1.502, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.8709535184620328, |
| "learning_rate": 5.833333333333334e-05, |
| "loss": 1.4585, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.8819786376627559, |
| "learning_rate": 6.111111111111112e-05, |
| "loss": 1.4858, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.8589200426275411, |
| "learning_rate": 6.388888888888888e-05, |
| "loss": 1.4644, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.8190182080399642, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 1.4561, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.8796864611649672, |
| "learning_rate": 6.944444444444444e-05, |
| "loss": 1.4546, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.8331325598252782, |
| "learning_rate": 7.222222222222222e-05, |
| "loss": 1.4624, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.8345520972989295, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 1.4453, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.8176489443002161, |
| "learning_rate": 7.777777777777778e-05, |
| "loss": 1.4541, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.7528421779691234, |
| "learning_rate": 8.055555555555556e-05, |
| "loss": 1.4434, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.7991219912695795, |
| "learning_rate": 8.333333333333334e-05, |
| "loss": 1.4546, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.7453012031751974, |
| "learning_rate": 8.611111111111112e-05, |
| "loss": 1.4541, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.7356336435000073, |
| "learning_rate": 8.888888888888889e-05, |
| "loss": 1.4565, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.7239229910223912, |
| "learning_rate": 9.166666666666667e-05, |
| "loss": 1.4253, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.6995782237549312, |
| "learning_rate": 9.444444444444444e-05, |
| "loss": 1.4116, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.7108394167974162, |
| "learning_rate": 9.722222222222223e-05, |
| "loss": 1.4053, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.7270375728618296, |
| "learning_rate": 0.0001, |
| "loss": 1.4214, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.7494051458635556, |
| "learning_rate": 9.999981014161752e-05, |
| "loss": 1.4644, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.733832068426627, |
| "learning_rate": 9.999924056791192e-05, |
| "loss": 1.4141, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.6719534359517587, |
| "learning_rate": 9.999829128320874e-05, |
| "loss": 1.4023, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.7406841100980851, |
| "learning_rate": 9.999696229471716e-05, |
| "loss": 1.4263, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.6561411493710649, |
| "learning_rate": 9.999525361252996e-05, |
| "loss": 1.4126, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6703214903768667, |
| "learning_rate": 9.999316524962345e-05, |
| "loss": 1.3955, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6952049638900921, |
| "learning_rate": 9.999069722185737e-05, |
| "loss": 1.4072, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6806747265810544, |
| "learning_rate": 9.998784954797474e-05, |
| "loss": 1.4146, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6761436892518071, |
| "learning_rate": 9.998462224960175e-05, |
| "loss": 1.4009, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.694044598842866, |
| "learning_rate": 9.998101535124758e-05, |
| "loss": 1.4268, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6557563304435648, |
| "learning_rate": 9.997702888030423e-05, |
| "loss": 1.3794, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6612841638564682, |
| "learning_rate": 9.997266286704631e-05, |
| "loss": 1.3892, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6486556767977087, |
| "learning_rate": 9.996791734463077e-05, |
| "loss": 1.3652, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6483540085185676, |
| "learning_rate": 9.996279234909671e-05, |
| "loss": 1.3984, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6700432628612305, |
| "learning_rate": 9.995728791936504e-05, |
| "loss": 1.3999, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6432744026831555, |
| "learning_rate": 9.99514040972383e-05, |
| "loss": 1.356, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6216728827903856, |
| "learning_rate": 9.994514092740015e-05, |
| "loss": 1.3882, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6467739800460915, |
| "learning_rate": 9.993849845741524e-05, |
| "loss": 1.3765, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6503437970639988, |
| "learning_rate": 9.99314767377287e-05, |
| "loss": 1.373, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6657501610674698, |
| "learning_rate": 9.992407582166581e-05, |
| "loss": 1.3838, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6605689841115963, |
| "learning_rate": 9.991629576543163e-05, |
| "loss": 1.3716, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6989365655033877, |
| "learning_rate": 9.990813662811051e-05, |
| "loss": 1.3882, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6084957965701701, |
| "learning_rate": 9.989959847166567e-05, |
| "loss": 1.3545, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6699929146209974, |
| "learning_rate": 9.989068136093873e-05, |
| "loss": 1.3418, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6247455324530298, |
| "learning_rate": 9.988138536364922e-05, |
| "loss": 1.3486, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6622758669061856, |
| "learning_rate": 9.987171055039408e-05, |
| "loss": 1.3892, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6034683645026113, |
| "learning_rate": 9.986165699464705e-05, |
| "loss": 1.3491, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6266046141102322, |
| "learning_rate": 9.985122477275824e-05, |
| "loss": 1.3452, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.6299383394087646, |
| "learning_rate": 9.984041396395343e-05, |
| "loss": 1.3569, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.6104287148111909, |
| "learning_rate": 9.98292246503335e-05, |
| "loss": 1.333, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.6519029188667027, |
| "learning_rate": 9.981765691687388e-05, |
| "loss": 1.3857, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.6142161143427214, |
| "learning_rate": 9.980571085142381e-05, |
| "loss": 1.3228, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.6229626554946482, |
| "learning_rate": 9.979338654470569e-05, |
| "loss": 1.3574, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.6071740965934106, |
| "learning_rate": 9.978068409031449e-05, |
| "loss": 1.3379, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.6180402576227992, |
| "learning_rate": 9.976760358471686e-05, |
| "loss": 1.3672, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.6254634252353867, |
| "learning_rate": 9.975414512725057e-05, |
| "loss": 1.3525, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.6146388668983097, |
| "learning_rate": 9.974030882012367e-05, |
| "loss": 1.3677, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.60548422624436, |
| "learning_rate": 9.972609476841367e-05, |
| "loss": 1.3271, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.5960020566477471, |
| "learning_rate": 9.97115030800669e-05, |
| "loss": 1.3203, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.5840389582557357, |
| "learning_rate": 9.969653386589748e-05, |
| "loss": 1.3457, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.6017170229389899, |
| "learning_rate": 9.968118723958668e-05, |
| "loss": 1.3555, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.59548463038904, |
| "learning_rate": 9.966546331768191e-05, |
| "loss": 1.312, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.6376362739222085, |
| "learning_rate": 9.96493622195959e-05, |
| "loss": 1.3896, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.5924552743524675, |
| "learning_rate": 9.963288406760582e-05, |
| "loss": 1.3882, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.5932204834859686, |
| "learning_rate": 9.961602898685226e-05, |
| "loss": 1.3228, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.6343152356114848, |
| "learning_rate": 9.959879710533835e-05, |
| "loss": 1.3418, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.619176447518611, |
| "learning_rate": 9.958118855392876e-05, |
| "loss": 1.3511, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.6059536670723797, |
| "learning_rate": 9.956320346634876e-05, |
| "loss": 1.3496, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.5833794255152709, |
| "learning_rate": 9.954484197918315e-05, |
| "loss": 1.3047, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.6201224777123214, |
| "learning_rate": 9.952610423187516e-05, |
| "loss": 1.3486, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.6007754625054771, |
| "learning_rate": 9.950699036672559e-05, |
| "loss": 1.3281, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.5942092191181105, |
| "learning_rate": 9.94875005288915e-05, |
| "loss": 1.3247, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.5943876215982206, |
| "learning_rate": 9.946763486638528e-05, |
| "loss": 1.3286, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.5851272119218502, |
| "learning_rate": 9.944739353007344e-05, |
| "loss": 1.333, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.5736452322086703, |
| "learning_rate": 9.942677667367541e-05, |
| "loss": 1.3281, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.5858368666665404, |
| "learning_rate": 9.940578445376258e-05, |
| "loss": 1.3408, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.5951660769871204, |
| "learning_rate": 9.938441702975689e-05, |
| "loss": 1.332, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.5673882981333603, |
| "learning_rate": 9.936267456392971e-05, |
| "loss": 1.29, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.57937244503091, |
| "learning_rate": 9.934055722140061e-05, |
| "loss": 1.3379, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.5404623165150114, |
| "learning_rate": 9.931806517013612e-05, |
| "loss": 1.2832, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.570458957982483, |
| "learning_rate": 9.929519858094843e-05, |
| "loss": 1.2827, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.6087016644937208, |
| "learning_rate": 9.927195762749405e-05, |
| "loss": 1.3218, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.5870682794787072, |
| "learning_rate": 9.92483424862726e-05, |
| "loss": 1.3135, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.5856272723632059, |
| "learning_rate": 9.922435333662536e-05, |
| "loss": 1.2881, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.6111832032418365, |
| "learning_rate": 9.9199990360734e-05, |
| "loss": 1.3203, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.5726861967258199, |
| "learning_rate": 9.917525374361912e-05, |
| "loss": 1.3179, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.5884861410749106, |
| "learning_rate": 9.915014367313888e-05, |
| "loss": 1.3228, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.6135395087168646, |
| "learning_rate": 9.912466033998757e-05, |
| "loss": 1.3335, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.5645793699483922, |
| "learning_rate": 9.90988039376942e-05, |
| "loss": 1.3125, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.5857606819583933, |
| "learning_rate": 9.90725746626209e-05, |
| "loss": 1.2744, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.5831028711698518, |
| "learning_rate": 9.904597271396162e-05, |
| "loss": 1.311, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.6348588219528606, |
| "learning_rate": 9.901899829374047e-05, |
| "loss": 1.3452, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.5603970131542654, |
| "learning_rate": 9.899165160681025e-05, |
| "loss": 1.2964, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.5865675897271678, |
| "learning_rate": 9.896393286085084e-05, |
| "loss": 1.3071, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.5784164637201951, |
| "learning_rate": 9.893584226636772e-05, |
| "loss": 1.3008, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5710383398686641, |
| "learning_rate": 9.890738003669029e-05, |
| "loss": 1.2886, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5846130781014983, |
| "learning_rate": 9.887854638797023e-05, |
| "loss": 1.3096, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5880954219156209, |
| "learning_rate": 9.884934153917997e-05, |
| "loss": 1.3145, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5637844902513754, |
| "learning_rate": 9.88197657121109e-05, |
| "loss": 1.29, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5876469941096398, |
| "learning_rate": 9.878981913137179e-05, |
| "loss": 1.3418, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.6000214582832701, |
| "learning_rate": 9.8759502024387e-05, |
| "loss": 1.2896, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5839730365390634, |
| "learning_rate": 9.872881462139479e-05, |
| "loss": 1.2705, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5990677563429729, |
| "learning_rate": 9.869775715544562e-05, |
| "loss": 1.3071, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5953706456388055, |
| "learning_rate": 9.86663298624003e-05, |
| "loss": 1.2959, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5817561833721856, |
| "learning_rate": 9.86345329809282e-05, |
| "loss": 1.2852, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5744763014438757, |
| "learning_rate": 9.860236675250552e-05, |
| "loss": 1.2783, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.597834970808429, |
| "learning_rate": 9.856983142141339e-05, |
| "loss": 1.2925, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.58712068164488, |
| "learning_rate": 9.8536927234736e-05, |
| "loss": 1.3042, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.5750527697531876, |
| "learning_rate": 9.85036544423588e-05, |
| "loss": 1.2734, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.5809004038351853, |
| "learning_rate": 9.847001329696653e-05, |
| "loss": 1.2886, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.5913258260888848, |
| "learning_rate": 9.843600405404131e-05, |
| "loss": 1.2871, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.6178847163930624, |
| "learning_rate": 9.840162697186075e-05, |
| "loss": 1.3066, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.569525516075595, |
| "learning_rate": 9.836688231149592e-05, |
| "loss": 1.2866, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.6019297014242129, |
| "learning_rate": 9.833177033680944e-05, |
| "loss": 1.2881, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.5570240623213132, |
| "learning_rate": 9.829629131445342e-05, |
| "loss": 1.2739, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.6690303999031133, |
| "learning_rate": 9.826044551386744e-05, |
| "loss": 1.3208, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.5869605252956118, |
| "learning_rate": 9.822423320727654e-05, |
| "loss": 1.3271, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.6041810553976592, |
| "learning_rate": 9.818765466968909e-05, |
| "loss": 1.3071, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.6055207100602872, |
| "learning_rate": 9.815071017889482e-05, |
| "loss": 1.3208, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.598286558624508, |
| "learning_rate": 9.811340001546251e-05, |
| "loss": 1.2842, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.581665353584805, |
| "learning_rate": 9.807572446273814e-05, |
| "loss": 1.2959, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.5983711752241108, |
| "learning_rate": 9.803768380684242e-05, |
| "loss": 1.3027, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.6044826878147297, |
| "learning_rate": 9.799927833666887e-05, |
| "loss": 1.3169, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.5879408855078629, |
| "learning_rate": 9.796050834388149e-05, |
| "loss": 1.2935, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.5963062749845591, |
| "learning_rate": 9.792137412291265e-05, |
| "loss": 1.2979, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.5897254261995125, |
| "learning_rate": 9.788187597096069e-05, |
| "loss": 1.3018, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.578076556919417, |
| "learning_rate": 9.784201418798786e-05, |
| "loss": 1.2939, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.5718451016522863, |
| "learning_rate": 9.780178907671789e-05, |
| "loss": 1.2871, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.5799606456697325, |
| "learning_rate": 9.776120094263376e-05, |
| "loss": 1.2803, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.5668567319879788, |
| "learning_rate": 9.772025009397537e-05, |
| "loss": 1.2905, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.5789124237655097, |
| "learning_rate": 9.767893684173721e-05, |
| "loss": 1.271, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.585619412138699, |
| "learning_rate": 9.763726149966596e-05, |
| "loss": 1.3115, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.5859239926184487, |
| "learning_rate": 9.759522438425813e-05, |
| "loss": 1.29, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.5735625871596765, |
| "learning_rate": 9.755282581475769e-05, |
| "loss": 1.2432, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.5854811026507778, |
| "learning_rate": 9.751006611315356e-05, |
| "loss": 1.3008, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.5900623773496287, |
| "learning_rate": 9.746694560417731e-05, |
| "loss": 1.2822, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.5822405028437867, |
| "learning_rate": 9.742346461530048e-05, |
| "loss": 1.2822, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.572768675663712, |
| "learning_rate": 9.737962347673231e-05, |
| "loss": 1.2783, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.6054400604030475, |
| "learning_rate": 9.733542252141711e-05, |
| "loss": 1.292, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.5813631045634108, |
| "learning_rate": 9.729086208503174e-05, |
| "loss": 1.2803, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.5807050077512271, |
| "learning_rate": 9.724594250598311e-05, |
| "loss": 1.2949, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.5909109801864655, |
| "learning_rate": 9.720066412540554e-05, |
| "loss": 1.2695, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.5997130269760653, |
| "learning_rate": 9.715502728715826e-05, |
| "loss": 1.3262, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.5949543658594536, |
| "learning_rate": 9.710903233782272e-05, |
| "loss": 1.2852, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.57306909788563, |
| "learning_rate": 9.706267962669998e-05, |
| "loss": 1.2896, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.5729420362088954, |
| "learning_rate": 9.701596950580806e-05, |
| "loss": 1.2944, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.5840523135217965, |
| "learning_rate": 9.696890232987931e-05, |
| "loss": 1.3315, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.5838059803168936, |
| "learning_rate": 9.692147845635761e-05, |
| "loss": 1.2759, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.598956713705172, |
| "learning_rate": 9.687369824539577e-05, |
| "loss": 1.2949, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.5684032035075478, |
| "learning_rate": 9.682556205985274e-05, |
| "loss": 1.2656, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.6042525592488565, |
| "learning_rate": 9.677707026529086e-05, |
| "loss": 1.2734, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.5948932876750508, |
| "learning_rate": 9.672822322997305e-05, |
| "loss": 1.3013, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.5849401513666068, |
| "learning_rate": 9.667902132486009e-05, |
| "loss": 1.2871, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.5735157149486471, |
| "learning_rate": 9.662946492360776e-05, |
| "loss": 1.2852, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.584305724268113, |
| "learning_rate": 9.657955440256395e-05, |
| "loss": 1.2622, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.5564763345369137, |
| "learning_rate": 9.652929014076593e-05, |
| "loss": 1.2876, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.5891098312264256, |
| "learning_rate": 9.647867251993734e-05, |
| "loss": 1.2642, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.5686838714294669, |
| "learning_rate": 9.642770192448536e-05, |
| "loss": 1.272, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.5750296942467902, |
| "learning_rate": 9.637637874149779e-05, |
| "loss": 1.2275, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.5643960417977013, |
| "learning_rate": 9.632470336074009e-05, |
| "loss": 1.2671, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.5852327056741696, |
| "learning_rate": 9.627267617465243e-05, |
| "loss": 1.2661, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.6187631168037819, |
| "learning_rate": 9.62202975783467e-05, |
| "loss": 1.3086, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.5975346392701849, |
| "learning_rate": 9.616756796960353e-05, |
| "loss": 1.2822, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.5861121272844693, |
| "learning_rate": 9.611448774886924e-05, |
| "loss": 1.2686, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.5894840252008043, |
| "learning_rate": 9.606105731925283e-05, |
| "loss": 1.2729, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.5611807927613827, |
| "learning_rate": 9.600727708652289e-05, |
| "loss": 1.2593, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5963397000712469, |
| "learning_rate": 9.595314745910456e-05, |
| "loss": 1.2539, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5735424943279671, |
| "learning_rate": 9.589866884807635e-05, |
| "loss": 1.2842, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5721828195245187, |
| "learning_rate": 9.584384166716714e-05, |
| "loss": 1.2588, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.581984160350711, |
| "learning_rate": 9.578866633275288e-05, |
| "loss": 1.2769, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5783634261178175, |
| "learning_rate": 9.573314326385359e-05, |
| "loss": 1.2812, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5970813411028905, |
| "learning_rate": 9.567727288213005e-05, |
| "loss": 1.2666, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.586596312906345, |
| "learning_rate": 9.562105561188069e-05, |
| "loss": 1.269, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.598789269026695, |
| "learning_rate": 9.556449188003831e-05, |
| "loss": 1.312, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5817408021330366, |
| "learning_rate": 9.550758211616684e-05, |
| "loss": 1.2749, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5877525727442802, |
| "learning_rate": 9.545032675245813e-05, |
| "loss": 1.2949, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.565735316640337, |
| "learning_rate": 9.539272622372858e-05, |
| "loss": 1.2646, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5758462655500972, |
| "learning_rate": 9.533478096741597e-05, |
| "loss": 1.2842, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.5684311218285671, |
| "learning_rate": 9.527649142357596e-05, |
| "loss": 1.2607, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.5618003842331919, |
| "learning_rate": 9.521785803487889e-05, |
| "loss": 1.248, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.5876045436622994, |
| "learning_rate": 9.515888124660638e-05, |
| "loss": 1.2642, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.5655105832120266, |
| "learning_rate": 9.509956150664796e-05, |
| "loss": 1.2764, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.5729197026608559, |
| "learning_rate": 9.50398992654976e-05, |
| "loss": 1.2812, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.6001679004926507, |
| "learning_rate": 9.497989497625035e-05, |
| "loss": 1.2935, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.5764744840651138, |
| "learning_rate": 9.491954909459895e-05, |
| "loss": 1.2363, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.5929339319221091, |
| "learning_rate": 9.485886207883022e-05, |
| "loss": 1.2974, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.5764385418022675, |
| "learning_rate": 9.479783438982172e-05, |
| "loss": 1.2925, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.5837028558309609, |
| "learning_rate": 9.473646649103818e-05, |
| "loss": 1.2891, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.5650735749077636, |
| "learning_rate": 9.4674758848528e-05, |
| "loss": 1.2334, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.5586611238688065, |
| "learning_rate": 9.46127119309197e-05, |
| "loss": 1.2305, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.6080687697649292, |
| "learning_rate": 9.45503262094184e-05, |
| "loss": 1.2827, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.5993996873094892, |
| "learning_rate": 9.448760215780217e-05, |
| "loss": 1.2695, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.5778443809302056, |
| "learning_rate": 9.442454025241847e-05, |
| "loss": 1.2744, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.5746167067960812, |
| "learning_rate": 9.43611409721806e-05, |
| "loss": 1.2754, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.5733796335171946, |
| "learning_rate": 9.42974047985639e-05, |
| "loss": 1.2627, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.5836676487926156, |
| "learning_rate": 9.42333322156023e-05, |
| "loss": 1.2583, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.5553156591047226, |
| "learning_rate": 9.416892370988444e-05, |
| "loss": 1.2373, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.582882964454643, |
| "learning_rate": 9.410417977055011e-05, |
| "loss": 1.2417, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.5669189146341135, |
| "learning_rate": 9.403910088928651e-05, |
| "loss": 1.248, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.5851076716461637, |
| "learning_rate": 9.397368756032445e-05, |
| "loss": 1.2485, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.5763454225514788, |
| "learning_rate": 9.390794028043474e-05, |
| "loss": 1.2559, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.5670534619234323, |
| "learning_rate": 9.384185954892422e-05, |
| "loss": 1.2524, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.5649816822215726, |
| "learning_rate": 9.377544586763215e-05, |
| "loss": 1.2646, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.5654358097466196, |
| "learning_rate": 9.370869974092629e-05, |
| "loss": 1.23, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.5870883099911602, |
| "learning_rate": 9.364162167569907e-05, |
| "loss": 1.2319, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.5828901563721925, |
| "learning_rate": 9.357421218136386e-05, |
| "loss": 1.2515, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.5601565336888377, |
| "learning_rate": 9.350647176985095e-05, |
| "loss": 1.2588, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.5817838980314579, |
| "learning_rate": 9.343840095560372e-05, |
| "loss": 1.2612, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.5789842413499999, |
| "learning_rate": 9.337000025557476e-05, |
| "loss": 1.2642, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.5840330292514332, |
| "learning_rate": 9.330127018922194e-05, |
| "loss": 1.2705, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.5505547742761182, |
| "learning_rate": 9.323221127850441e-05, |
| "loss": 1.2285, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.590833567081984, |
| "learning_rate": 9.316282404787871e-05, |
| "loss": 1.2666, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.5716485112550096, |
| "learning_rate": 9.309310902429472e-05, |
| "loss": 1.2563, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5927741075240744, |
| "learning_rate": 9.30230667371917e-05, |
| "loss": 1.2559, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5713472314685684, |
| "learning_rate": 9.295269771849427e-05, |
| "loss": 1.2632, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5553599915751299, |
| "learning_rate": 9.288200250260836e-05, |
| "loss": 1.2393, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5541231337910232, |
| "learning_rate": 9.281098162641714e-05, |
| "loss": 1.2393, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5756192048225591, |
| "learning_rate": 9.273963562927695e-05, |
| "loss": 1.2627, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5607724586820175, |
| "learning_rate": 9.266796505301322e-05, |
| "loss": 1.2319, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5829558605752644, |
| "learning_rate": 9.259597044191636e-05, |
| "loss": 1.2144, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5462589451489466, |
| "learning_rate": 9.252365234273755e-05, |
| "loss": 1.249, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5728804325543755, |
| "learning_rate": 9.24510113046847e-05, |
| "loss": 1.2725, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5661301120279436, |
| "learning_rate": 9.237804787941819e-05, |
| "loss": 1.251, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5590909151931563, |
| "learning_rate": 9.230476262104677e-05, |
| "loss": 1.2544, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.547954794791917, |
| "learning_rate": 9.223115608612325e-05, |
| "loss": 1.2505, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5532925940395454, |
| "learning_rate": 9.215722883364033e-05, |
| "loss": 1.2173, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5384117130456804, |
| "learning_rate": 9.208298142502636e-05, |
| "loss": 1.27, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5603423300490713, |
| "learning_rate": 9.200841442414106e-05, |
| "loss": 1.2266, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5367634686371322, |
| "learning_rate": 9.193352839727121e-05, |
| "loss": 1.2163, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5645847437540861, |
| "learning_rate": 9.185832391312644e-05, |
| "loss": 1.2354, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5663009948987631, |
| "learning_rate": 9.17828015428348e-05, |
| "loss": 1.2354, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5552105400298469, |
| "learning_rate": 9.17069618599385e-05, |
| "loss": 1.2383, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5754960899676003, |
| "learning_rate": 9.163080544038952e-05, |
| "loss": 1.2456, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.563524941652744, |
| "learning_rate": 9.155433286254525e-05, |
| "loss": 1.2554, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5517648385505713, |
| "learning_rate": 9.147754470716408e-05, |
| "loss": 1.2266, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5559354777913459, |
| "learning_rate": 9.140044155740101e-05, |
| "loss": 1.2661, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5533913100102068, |
| "learning_rate": 9.132302399880321e-05, |
| "loss": 1.2559, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.5643293887010528, |
| "learning_rate": 9.124529261930559e-05, |
| "loss": 1.2612, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.5582480686922173, |
| "learning_rate": 9.116724800922629e-05, |
| "loss": 1.2466, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.596671009095723, |
| "learning_rate": 9.108889076126226e-05, |
| "loss": 1.2827, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.5549405425453204, |
| "learning_rate": 9.101022147048473e-05, |
| "loss": 1.2354, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.5568898420832058, |
| "learning_rate": 9.093124073433463e-05, |
| "loss": 1.2285, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.5495005874150289, |
| "learning_rate": 9.085194915261818e-05, |
| "loss": 1.2461, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.5707362551733327, |
| "learning_rate": 9.077234732750224e-05, |
| "loss": 1.2231, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.5598850464506612, |
| "learning_rate": 9.069243586350975e-05, |
| "loss": 1.2583, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.5524944518616185, |
| "learning_rate": 9.061221536751517e-05, |
| "loss": 1.2222, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.545356383712922, |
| "learning_rate": 9.053168644873984e-05, |
| "loss": 1.2178, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.5762727272844113, |
| "learning_rate": 9.045084971874738e-05, |
| "loss": 1.2349, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.5725082474964974, |
| "learning_rate": 9.0369705791439e-05, |
| "loss": 1.2632, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.5621844240082758, |
| "learning_rate": 9.028825528304892e-05, |
| "loss": 1.2373, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.5634252832307994, |
| "learning_rate": 9.020649881213958e-05, |
| "loss": 1.2554, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.5813087529333412, |
| "learning_rate": 9.012443699959705e-05, |
| "loss": 1.2505, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.5606046951042896, |
| "learning_rate": 9.004207046862624e-05, |
| "loss": 1.2734, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.5619672577392087, |
| "learning_rate": 8.995939984474624e-05, |
| "loss": 1.2349, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.5872821822263775, |
| "learning_rate": 8.987642575578545e-05, |
| "loss": 1.2314, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.5715303843479439, |
| "learning_rate": 8.979314883187693e-05, |
| "loss": 1.2227, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.5741431027758869, |
| "learning_rate": 8.970956970545355e-05, |
| "loss": 1.2271, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.6059321787013856, |
| "learning_rate": 8.962568901124327e-05, |
| "loss": 1.2534, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.556501982362245, |
| "learning_rate": 8.954150738626414e-05, |
| "loss": 1.2363, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.6086209059398, |
| "learning_rate": 8.945702546981969e-05, |
| "loss": 1.2847, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5564876767683732, |
| "learning_rate": 8.93722439034939e-05, |
| "loss": 1.2153, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5942368515326768, |
| "learning_rate": 8.928716333114643e-05, |
| "loss": 1.2588, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5662096725519149, |
| "learning_rate": 8.920178439890765e-05, |
| "loss": 1.2441, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5683718909670799, |
| "learning_rate": 8.911610775517382e-05, |
| "loss": 1.2275, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5430162332075814, |
| "learning_rate": 8.903013405060211e-05, |
| "loss": 1.2188, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5646245018782939, |
| "learning_rate": 8.894386393810563e-05, |
| "loss": 1.2305, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5775822002852916, |
| "learning_rate": 8.885729807284856e-05, |
| "loss": 1.2432, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5984775169761717, |
| "learning_rate": 8.877043711224108e-05, |
| "loss": 1.2598, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.6320516038682688, |
| "learning_rate": 8.868328171593448e-05, |
| "loss": 1.2437, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5529680708320875, |
| "learning_rate": 8.859583254581605e-05, |
| "loss": 1.2344, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5742550531241745, |
| "learning_rate": 8.85080902660041e-05, |
| "loss": 1.23, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5543548657925883, |
| "learning_rate": 8.842005554284296e-05, |
| "loss": 1.2632, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5639830569646097, |
| "learning_rate": 8.83317290448978e-05, |
| "loss": 1.2314, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5824488432279822, |
| "learning_rate": 8.824311144294965e-05, |
| "loss": 1.2661, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5586136460629528, |
| "learning_rate": 8.815420340999033e-05, |
| "loss": 1.1987, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5642272069241788, |
| "learning_rate": 8.806500562121723e-05, |
| "loss": 1.21, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5987204031332185, |
| "learning_rate": 8.797551875402827e-05, |
| "loss": 1.2246, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5530693719354123, |
| "learning_rate": 8.788574348801675e-05, |
| "loss": 1.2202, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.532182798905587, |
| "learning_rate": 8.77956805049661e-05, |
| "loss": 1.2026, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5554774523967565, |
| "learning_rate": 8.770533048884482e-05, |
| "loss": 1.2256, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5496250811271655, |
| "learning_rate": 8.761469412580125e-05, |
| "loss": 1.2197, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5513817728642723, |
| "learning_rate": 8.75237721041583e-05, |
| "loss": 1.2026, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5783631239023462, |
| "learning_rate": 8.74325651144083e-05, |
| "loss": 1.2666, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5570391531595814, |
| "learning_rate": 8.73410738492077e-05, |
| "loss": 1.2158, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.5991957140636426, |
| "learning_rate": 8.724929900337186e-05, |
| "loss": 1.27, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.5642120830729553, |
| "learning_rate": 8.715724127386972e-05, |
| "loss": 1.2095, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.5848386239977618, |
| "learning_rate": 8.706490135981855e-05, |
| "loss": 1.2495, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.5511112850774736, |
| "learning_rate": 8.697227996247861e-05, |
| "loss": 1.2305, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.5846633795708265, |
| "learning_rate": 8.687937778524786e-05, |
| "loss": 1.209, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.5540295140780195, |
| "learning_rate": 8.678619553365659e-05, |
| "loss": 1.2354, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.5414463070611637, |
| "learning_rate": 8.669273391536204e-05, |
| "loss": 1.2344, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.5427098846896791, |
| "learning_rate": 8.659899364014309e-05, |
| "loss": 1.209, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.5574933062513657, |
| "learning_rate": 8.650497541989482e-05, |
| "loss": 1.2178, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.5444595039607957, |
| "learning_rate": 8.641067996862311e-05, |
| "loss": 1.2363, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.5655086471261345, |
| "learning_rate": 8.631610800243926e-05, |
| "loss": 1.2236, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5676258965708015, |
| "learning_rate": 8.622126023955446e-05, |
| "loss": 1.2222, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5738164390287753, |
| "learning_rate": 8.612613740027443e-05, |
| "loss": 1.2437, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5763979630809666, |
| "learning_rate": 8.603074020699393e-05, |
| "loss": 1.2588, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5694093785228663, |
| "learning_rate": 8.59350693841912e-05, |
| "loss": 1.2305, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5668385068217152, |
| "learning_rate": 8.583912565842257e-05, |
| "loss": 1.2324, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5681247446685661, |
| "learning_rate": 8.574290975831685e-05, |
| "loss": 1.2461, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5705548704956539, |
| "learning_rate": 8.564642241456986e-05, |
| "loss": 1.2529, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5490624808218363, |
| "learning_rate": 8.554966435993882e-05, |
| "loss": 1.2119, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5327956614769455, |
| "learning_rate": 8.545263632923687e-05, |
| "loss": 1.2051, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5394754263176863, |
| "learning_rate": 8.535533905932738e-05, |
| "loss": 1.2207, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.547540468306315, |
| "learning_rate": 8.525777328911846e-05, |
| "loss": 1.2241, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5262754503627509, |
| "learning_rate": 8.515993975955727e-05, |
| "loss": 1.2227, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.5839641855087577, |
| "learning_rate": 8.506183921362443e-05, |
| "loss": 1.228, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.5658179501896037, |
| "learning_rate": 8.49634723963284e-05, |
| "loss": 1.2534, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.5500813153743531, |
| "learning_rate": 8.486484005469977e-05, |
| "loss": 1.2104, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.547119722986805, |
| "learning_rate": 8.476594293778561e-05, |
| "loss": 1.1938, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.5596429046688678, |
| "learning_rate": 8.466678179664379e-05, |
| "loss": 1.2148, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.5705084720451127, |
| "learning_rate": 8.456735738433723e-05, |
| "loss": 1.2432, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.5895336557197053, |
| "learning_rate": 8.44676704559283e-05, |
| "loss": 1.252, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.5774427874239505, |
| "learning_rate": 8.436772176847294e-05, |
| "loss": 1.2251, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.5394619079429321, |
| "learning_rate": 8.4267512081015e-05, |
| "loss": 1.2329, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.5755505544475856, |
| "learning_rate": 8.416704215458043e-05, |
| "loss": 1.2471, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.5725344759637591, |
| "learning_rate": 8.406631275217156e-05, |
| "loss": 1.2397, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.5518081872615708, |
| "learning_rate": 8.396532463876124e-05, |
| "loss": 1.248, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5841438683442003, |
| "learning_rate": 8.386407858128706e-05, |
| "loss": 1.2339, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5513391183560247, |
| "learning_rate": 8.376257534864553e-05, |
| "loss": 1.2373, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5702720231441866, |
| "learning_rate": 8.366081571168625e-05, |
| "loss": 1.2202, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5401170183476215, |
| "learning_rate": 8.355880044320598e-05, |
| "loss": 1.2036, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5584668011986428, |
| "learning_rate": 8.345653031794292e-05, |
| "loss": 1.2109, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5651374075473236, |
| "learning_rate": 8.335400611257067e-05, |
| "loss": 1.2305, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5576999267528816, |
| "learning_rate": 8.32512286056924e-05, |
| "loss": 1.208, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5417887475825113, |
| "learning_rate": 8.314819857783503e-05, |
| "loss": 1.2212, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5697549522190456, |
| "learning_rate": 8.304491681144306e-05, |
| "loss": 1.2227, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5916950215563477, |
| "learning_rate": 8.29413840908729e-05, |
| "loss": 1.2256, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5429495837198551, |
| "learning_rate": 8.283760120238672e-05, |
| "loss": 1.2036, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.5586180705362634, |
| "learning_rate": 8.273356893414659e-05, |
| "loss": 1.2095, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5575989259323167, |
| "learning_rate": 8.262928807620843e-05, |
| "loss": 1.2231, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5441346695675087, |
| "learning_rate": 8.252475942051605e-05, |
| "loss": 1.2056, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5639413959564339, |
| "learning_rate": 8.241998376089508e-05, |
| "loss": 1.2173, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.590712779900491, |
| "learning_rate": 8.231496189304704e-05, |
| "loss": 1.2568, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5777848614323381, |
| "learning_rate": 8.220969461454322e-05, |
| "loss": 1.2393, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5340681608181079, |
| "learning_rate": 8.210418272481859e-05, |
| "loss": 1.2041, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5677393547154248, |
| "learning_rate": 8.199842702516583e-05, |
| "loss": 1.2192, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5437413315036653, |
| "learning_rate": 8.18924283187292e-05, |
| "loss": 1.2139, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5659533238197777, |
| "learning_rate": 8.178618741049842e-05, |
| "loss": 1.207, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5070802266343845, |
| "learning_rate": 8.167970510730253e-05, |
| "loss": 1.1914, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5556418994287101, |
| "learning_rate": 8.157298221780389e-05, |
| "loss": 1.1938, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.5473208831723899, |
| "learning_rate": 8.146601955249188e-05, |
| "loss": 1.2183, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.5703775361988737, |
| "learning_rate": 8.135881792367686e-05, |
| "loss": 1.2417, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.561879279645762, |
| "learning_rate": 8.125137814548393e-05, |
| "loss": 1.2148, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.550588101278353, |
| "learning_rate": 8.114370103384681e-05, |
| "loss": 1.228, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.5259832736436021, |
| "learning_rate": 8.103578740650156e-05, |
| "loss": 1.21, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.5147619335698311, |
| "learning_rate": 8.092763808298048e-05, |
| "loss": 1.2026, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.5504438033485523, |
| "learning_rate": 8.081925388460578e-05, |
| "loss": 1.2026, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.5446547745345772, |
| "learning_rate": 8.07106356344834e-05, |
| "loss": 1.2236, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.5282234491024778, |
| "learning_rate": 8.060178415749674e-05, |
| "loss": 1.2046, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.5685704417565632, |
| "learning_rate": 8.049270028030046e-05, |
| "loss": 1.1948, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.5387912641493281, |
| "learning_rate": 8.038338483131407e-05, |
| "loss": 1.1987, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.5524731924712689, |
| "learning_rate": 8.027383864071573e-05, |
| "loss": 1.2261, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5326013165738928, |
| "learning_rate": 8.016406254043595e-05, |
| "loss": 1.1987, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5694990420557361, |
| "learning_rate": 8.005405736415126e-05, |
| "loss": 1.2246, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5512702902650021, |
| "learning_rate": 7.994382394727784e-05, |
| "loss": 1.25, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5627692554190333, |
| "learning_rate": 7.983336312696522e-05, |
| "loss": 1.2344, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5723746306616886, |
| "learning_rate": 7.972267574208991e-05, |
| "loss": 1.2266, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5785199319910487, |
| "learning_rate": 7.961176263324901e-05, |
| "loss": 1.2046, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5638166540487942, |
| "learning_rate": 7.950062464275387e-05, |
| "loss": 1.2124, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5591166049939134, |
| "learning_rate": 7.938926261462366e-05, |
| "loss": 1.2251, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5505127627644203, |
| "learning_rate": 7.927767739457897e-05, |
| "loss": 1.2158, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5650623189448578, |
| "learning_rate": 7.916586983003533e-05, |
| "loss": 1.208, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5691768474472332, |
| "learning_rate": 7.905384077009693e-05, |
| "loss": 1.1875, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5660613389542086, |
| "learning_rate": 7.894159106554997e-05, |
| "loss": 1.2227, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5582910482328445, |
| "learning_rate": 7.882912156885637e-05, |
| "loss": 1.2173, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5687428665442464, |
| "learning_rate": 7.871643313414718e-05, |
| "loss": 1.2188, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5700426706301734, |
| "learning_rate": 7.860352661721619e-05, |
| "loss": 1.2534, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5640156767511431, |
| "learning_rate": 7.849040287551331e-05, |
| "loss": 1.2256, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5730028379052688, |
| "learning_rate": 7.837706276813819e-05, |
| "loss": 1.2383, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5729196218457621, |
| "learning_rate": 7.82635071558336e-05, |
| "loss": 1.2539, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5389989913836808, |
| "learning_rate": 7.814973690097893e-05, |
| "loss": 1.2114, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.563817849905785, |
| "learning_rate": 7.803575286758364e-05, |
| "loss": 1.1978, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5742098449729457, |
| "learning_rate": 7.79215559212807e-05, |
| "loss": 1.2104, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5482109799457336, |
| "learning_rate": 7.780714692932002e-05, |
| "loss": 1.1978, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5619582390386062, |
| "learning_rate": 7.769252676056187e-05, |
| "loss": 1.2197, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5707713766775032, |
| "learning_rate": 7.757769628547018e-05, |
| "loss": 1.2349, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5491475936260862, |
| "learning_rate": 7.746265637610613e-05, |
| "loss": 1.1758, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5500938802153376, |
| "learning_rate": 7.734740790612136e-05, |
| "loss": 1.2041, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5293011700429642, |
| "learning_rate": 7.723195175075136e-05, |
| "loss": 1.1821, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5596089219360537, |
| "learning_rate": 7.711628878680892e-05, |
| "loss": 1.2539, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5754494198608471, |
| "learning_rate": 7.700041989267736e-05, |
| "loss": 1.2378, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5705286060826581, |
| "learning_rate": 7.688434594830392e-05, |
| "loss": 1.2192, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5320392604087235, |
| "learning_rate": 7.676806783519304e-05, |
| "loss": 1.2021, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5573096847631821, |
| "learning_rate": 7.66515864363997e-05, |
| "loss": 1.229, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5670482309405055, |
| "learning_rate": 7.653490263652269e-05, |
| "loss": 1.2324, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5214893797779285, |
| "learning_rate": 7.641801732169795e-05, |
| "loss": 1.1968, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5729861431933309, |
| "learning_rate": 7.630093137959171e-05, |
| "loss": 1.2163, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5552719599472679, |
| "learning_rate": 7.618364569939391e-05, |
| "loss": 1.2075, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5465863524732877, |
| "learning_rate": 7.606616117181128e-05, |
| "loss": 1.1968, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5748817703147777, |
| "learning_rate": 7.594847868906076e-05, |
| "loss": 1.2227, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5439693138286017, |
| "learning_rate": 7.583059914486257e-05, |
| "loss": 1.2031, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5635085757798922, |
| "learning_rate": 7.571252343443349e-05, |
| "loss": 1.2324, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5490256924391643, |
| "learning_rate": 7.559425245448006e-05, |
| "loss": 1.1953, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5581764204211717, |
| "learning_rate": 7.547578710319174e-05, |
| "loss": 1.2158, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5554727329505762, |
| "learning_rate": 7.535712828023416e-05, |
| "loss": 1.2236, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5395464303361225, |
| "learning_rate": 7.52382768867422e-05, |
| "loss": 1.2114, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5613556527369445, |
| "learning_rate": 7.511923382531317e-05, |
| "loss": 1.1792, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5802200387551621, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 1.1899, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5673594518905887, |
| "learning_rate": 7.488057631630437e-05, |
| "loss": 1.2236, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.5439832310153052, |
| "learning_rate": 7.476096368116974e-05, |
| "loss": 1.2168, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.601300632455188, |
| "learning_rate": 7.464116300297458e-05, |
| "loss": 1.2534, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.5489145931334055, |
| "learning_rate": 7.452117519152542e-05, |
| "loss": 1.2007, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.5398362128758979, |
| "learning_rate": 7.440100115804991e-05, |
| "loss": 1.1743, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.5816646831232165, |
| "learning_rate": 7.428064181518997e-05, |
| "loss": 1.2344, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.5633431228968494, |
| "learning_rate": 7.416009807699482e-05, |
| "loss": 1.2017, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.5589499609875991, |
| "learning_rate": 7.403937085891397e-05, |
| "loss": 1.2095, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.5493151850885487, |
| "learning_rate": 7.391846107779047e-05, |
| "loss": 1.1865, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.540833710956897, |
| "learning_rate": 7.379736965185368e-05, |
| "loss": 1.2041, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.524036751110743, |
| "learning_rate": 7.367609750071252e-05, |
| "loss": 1.1826, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.569282857600664, |
| "learning_rate": 7.355464554534837e-05, |
| "loss": 1.187, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.5524428783713417, |
| "learning_rate": 7.343301470810808e-05, |
| "loss": 1.2202, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.5623114711099751, |
| "learning_rate": 7.331120591269701e-05, |
| "loss": 1.1899, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.5301869310349011, |
| "learning_rate": 7.318922008417203e-05, |
| "loss": 1.1919, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.5687590875970512, |
| "learning_rate": 7.30670581489344e-05, |
| "loss": 1.2056, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.5589775032893604, |
| "learning_rate": 7.294472103472281e-05, |
| "loss": 1.2188, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.578465226917116, |
| "learning_rate": 7.282220967060633e-05, |
| "loss": 1.2158, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.542415194752666, |
| "learning_rate": 7.269952498697734e-05, |
| "loss": 1.187, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.5438469592749641, |
| "learning_rate": 7.257666791554448e-05, |
| "loss": 1.1494, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.56103545827402, |
| "learning_rate": 7.245363938932551e-05, |
| "loss": 1.2085, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.5543439263354124, |
| "learning_rate": 7.233044034264034e-05, |
| "loss": 1.186, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.5621095189445257, |
| "learning_rate": 7.220707171110382e-05, |
| "loss": 1.2036, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.578507048853321, |
| "learning_rate": 7.20835344316187e-05, |
| "loss": 1.2158, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.5393466899110284, |
| "learning_rate": 7.195982944236851e-05, |
| "loss": 1.1807, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5390437694953595, |
| "learning_rate": 7.183595768281043e-05, |
| "loss": 1.1914, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.525445074400616, |
| "learning_rate": 7.171192009366814e-05, |
| "loss": 1.1655, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5397543259123138, |
| "learning_rate": 7.158771761692464e-05, |
| "loss": 1.2139, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5301908429870645, |
| "learning_rate": 7.146335119581523e-05, |
| "loss": 1.2163, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5528560850589617, |
| "learning_rate": 7.133882177482019e-05, |
| "loss": 1.2046, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5223068286596114, |
| "learning_rate": 7.121413029965769e-05, |
| "loss": 1.1855, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5375221567597188, |
| "learning_rate": 7.108927771727661e-05, |
| "loss": 1.1841, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5528153279757392, |
| "learning_rate": 7.096426497584933e-05, |
| "loss": 1.2002, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5641067857153084, |
| "learning_rate": 7.083909302476453e-05, |
| "loss": 1.1914, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5543757360519023, |
| "learning_rate": 7.071376281461994e-05, |
| "loss": 1.2026, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5521178435951897, |
| "learning_rate": 7.058827529721525e-05, |
| "loss": 1.1816, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.545288440889916, |
| "learning_rate": 7.04626314255447e-05, |
| "loss": 1.2202, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.5437815680869471, |
| "learning_rate": 7.033683215379002e-05, |
| "loss": 1.2031, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.5551130898620283, |
| "learning_rate": 7.021087843731302e-05, |
| "loss": 1.189, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.5553626624921362, |
| "learning_rate": 7.008477123264848e-05, |
| "loss": 1.2261, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.5452825817203325, |
| "learning_rate": 6.99585114974968e-05, |
| "loss": 1.1768, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.5642586993770626, |
| "learning_rate": 6.98321001907167e-05, |
| "loss": 1.1841, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.5398272756531507, |
| "learning_rate": 6.97055382723181e-05, |
| "loss": 1.1865, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.5522752869750288, |
| "learning_rate": 6.957882670345458e-05, |
| "loss": 1.2061, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.5308798815860719, |
| "learning_rate": 6.94519664464163e-05, |
| "loss": 1.1948, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.5495493844679439, |
| "learning_rate": 6.932495846462261e-05, |
| "loss": 1.1914, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.5482416736955646, |
| "learning_rate": 6.91978037226147e-05, |
| "loss": 1.2017, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.5503012321222697, |
| "learning_rate": 6.90705031860483e-05, |
| "loss": 1.2119, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5725638118414337, |
| "learning_rate": 6.894305782168638e-05, |
| "loss": 1.1899, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5624448379824697, |
| "learning_rate": 6.881546859739179e-05, |
| "loss": 1.23, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5561279665840796, |
| "learning_rate": 6.868773648211983e-05, |
| "loss": 1.2017, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5396579505393837, |
| "learning_rate": 6.855986244591104e-05, |
| "loss": 1.1733, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5593889291768533, |
| "learning_rate": 6.843184745988373e-05, |
| "loss": 1.2119, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5517593854010368, |
| "learning_rate": 6.830369249622662e-05, |
| "loss": 1.2114, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.565602467217196, |
| "learning_rate": 6.817539852819149e-05, |
| "loss": 1.1968, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5454828029904284, |
| "learning_rate": 6.804696653008575e-05, |
| "loss": 1.1938, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5312848354649821, |
| "learning_rate": 6.7918397477265e-05, |
| "loss": 1.1909, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5379892410248488, |
| "learning_rate": 6.778969234612584e-05, |
| "loss": 1.1733, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5640301128196445, |
| "learning_rate": 6.76608521140981e-05, |
| "loss": 1.1938, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5659554102145757, |
| "learning_rate": 6.753187775963773e-05, |
| "loss": 1.2192, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.5330483555414703, |
| "learning_rate": 6.740277026221923e-05, |
| "loss": 1.2163, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.5518117687862835, |
| "learning_rate": 6.727353060232822e-05, |
| "loss": 1.1904, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.556594787840222, |
| "learning_rate": 6.714415976145402e-05, |
| "loss": 1.2056, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.5718671551889151, |
| "learning_rate": 6.701465872208216e-05, |
| "loss": 1.2271, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.5512989214363787, |
| "learning_rate": 6.688502846768696e-05, |
| "loss": 1.2031, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.5494732510246357, |
| "learning_rate": 6.675526998272405e-05, |
| "loss": 1.2119, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.5378563166396855, |
| "learning_rate": 6.662538425262285e-05, |
| "loss": 1.1621, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.5480052049772425, |
| "learning_rate": 6.649537226377915e-05, |
| "loss": 1.1841, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.5636382985336955, |
| "learning_rate": 6.636523500354759e-05, |
| "loss": 1.2056, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.5416198773488824, |
| "learning_rate": 6.623497346023418e-05, |
| "loss": 1.1646, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.5478977270550187, |
| "learning_rate": 6.610458862308872e-05, |
| "loss": 1.1914, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.5607213727964705, |
| "learning_rate": 6.59740814822974e-05, |
| "loss": 1.2012, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5434978380272973, |
| "learning_rate": 6.584345302897523e-05, |
| "loss": 1.167, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5616431237123319, |
| "learning_rate": 6.571270425515843e-05, |
| "loss": 1.1938, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5718848045230116, |
| "learning_rate": 6.558183615379707e-05, |
| "loss": 1.1968, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5468507624315109, |
| "learning_rate": 6.545084971874738e-05, |
| "loss": 1.1719, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5648647170806229, |
| "learning_rate": 6.531974594476425e-05, |
| "loss": 1.207, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5503576509488237, |
| "learning_rate": 6.518852582749373e-05, |
| "loss": 1.1992, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5609821252964683, |
| "learning_rate": 6.505719036346539e-05, |
| "loss": 1.1997, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5498062769196067, |
| "learning_rate": 6.492574055008473e-05, |
| "loss": 1.1875, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5539230993166063, |
| "learning_rate": 6.479417738562576e-05, |
| "loss": 1.1909, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5496978328792177, |
| "learning_rate": 6.466250186922325e-05, |
| "loss": 1.2139, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.558021764032463, |
| "learning_rate": 6.45307150008652e-05, |
| "loss": 1.1943, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5762104557001627, |
| "learning_rate": 6.439881778138531e-05, |
| "loss": 1.2148, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.5437539068220771, |
| "learning_rate": 6.426681121245527e-05, |
| "loss": 1.1758, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.5525541610501483, |
| "learning_rate": 6.413469629657723e-05, |
| "loss": 1.1782, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.5459086539364053, |
| "learning_rate": 6.400247403707617e-05, |
| "loss": 1.1768, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.5447162084221712, |
| "learning_rate": 6.387014543809223e-05, |
| "loss": 1.1875, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.539362373363219, |
| "learning_rate": 6.37377115045732e-05, |
| "loss": 1.1782, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.5277353968375915, |
| "learning_rate": 6.360517324226676e-05, |
| "loss": 1.1626, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.5509172858352601, |
| "learning_rate": 6.34725316577129e-05, |
| "loss": 1.1992, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.5399724501945063, |
| "learning_rate": 6.333978775823631e-05, |
| "loss": 1.1782, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.5491551337995615, |
| "learning_rate": 6.320694255193867e-05, |
| "loss": 1.1934, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.5226603555093761, |
| "learning_rate": 6.307399704769099e-05, |
| "loss": 1.1777, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.5241657351802804, |
| "learning_rate": 6.294095225512603e-05, |
| "loss": 1.1631, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.567471175339963, |
| "learning_rate": 6.280780918463057e-05, |
| "loss": 1.1631, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5311367678485023, |
| "learning_rate": 6.26745688473377e-05, |
| "loss": 1.2012, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5672537989414137, |
| "learning_rate": 6.254123225511923e-05, |
| "loss": 1.1812, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5528630101019885, |
| "learning_rate": 6.240780042057796e-05, |
| "loss": 1.1895, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5414545180829297, |
| "learning_rate": 6.227427435703997e-05, |
| "loss": 1.1826, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5448803056038374, |
| "learning_rate": 6.214065507854693e-05, |
| "loss": 1.2002, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5564074455520511, |
| "learning_rate": 6.200694359984849e-05, |
| "loss": 1.167, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5579919679639724, |
| "learning_rate": 6.187314093639444e-05, |
| "loss": 1.1772, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5454994152327644, |
| "learning_rate": 6.173924810432705e-05, |
| "loss": 1.1528, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5269445652508755, |
| "learning_rate": 6.16052661204734e-05, |
| "loss": 1.1538, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5527514906339344, |
| "learning_rate": 6.147119600233758e-05, |
| "loss": 1.1929, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5719834333027595, |
| "learning_rate": 6.133703876809305e-05, |
| "loss": 1.1846, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5511795175086387, |
| "learning_rate": 6.12027954365748e-05, |
| "loss": 1.2114, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5662900403265925, |
| "learning_rate": 6.106846702727172e-05, |
| "loss": 1.1924, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5446585359175732, |
| "learning_rate": 6.09340545603188e-05, |
| "loss": 1.1875, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5272501396749111, |
| "learning_rate": 6.079955905648934e-05, |
| "loss": 1.1777, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5552116100408138, |
| "learning_rate": 6.066498153718735e-05, |
| "loss": 1.2012, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5405642777891093, |
| "learning_rate": 6.053032302443959e-05, |
| "loss": 1.1758, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5194384677157028, |
| "learning_rate": 6.0395584540887963e-05, |
| "loss": 1.1509, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5353733776852289, |
| "learning_rate": 6.026076710978171e-05, |
| "loss": 1.1733, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5659519018102039, |
| "learning_rate": 6.012587175496961e-05, |
| "loss": 1.2126, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.547582738299199, |
| "learning_rate": 5.999089950089218e-05, |
| "loss": 1.165, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5774060783436066, |
| "learning_rate": 5.985585137257401e-05, |
| "loss": 1.2031, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5467010209197095, |
| "learning_rate": 5.9720728395615875e-05, |
| "loss": 1.2197, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.5450094529020909, |
| "learning_rate": 5.958553159618693e-05, |
| "loss": 1.1553, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.5610956593277242, |
| "learning_rate": 5.945026200101702e-05, |
| "loss": 1.186, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.5764296878494267, |
| "learning_rate": 5.9314920637388815e-05, |
| "loss": 1.189, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.5349973942044762, |
| "learning_rate": 5.9179508533130004e-05, |
| "loss": 1.1992, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.5714789800075968, |
| "learning_rate": 5.90440267166055e-05, |
| "loss": 1.1729, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.5548036359072505, |
| "learning_rate": 5.890847621670966e-05, |
| "loss": 1.1768, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.5687294444903879, |
| "learning_rate": 5.8772858062858416e-05, |
| "loss": 1.1738, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.5611833030729912, |
| "learning_rate": 5.8637173284981526e-05, |
| "loss": 1.2061, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.5575092635577042, |
| "learning_rate": 5.850142291351466e-05, |
| "loss": 1.1831, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.5687051530623985, |
| "learning_rate": 5.8365607979391666e-05, |
| "loss": 1.1948, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.577821529713708, |
| "learning_rate": 5.8229729514036705e-05, |
| "loss": 1.2173, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.5186158179380925, |
| "learning_rate": 5.809378854935639e-05, |
| "loss": 1.1768, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5706185855275759, |
| "learning_rate": 5.795778611773197e-05, |
| "loss": 1.1956, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5379096408770375, |
| "learning_rate": 5.782172325201155e-05, |
| "loss": 1.2021, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5353383381657976, |
| "learning_rate": 5.768560098550213e-05, |
| "loss": 1.1968, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5372181300034581, |
| "learning_rate": 5.7549420351961844e-05, |
| "loss": 1.2012, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5553294742739052, |
| "learning_rate": 5.74131823855921e-05, |
| "loss": 1.1953, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5737647121690529, |
| "learning_rate": 5.727688812102967e-05, |
| "loss": 1.1914, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5544835592886659, |
| "learning_rate": 5.714053859333893e-05, |
| "loss": 1.1777, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5567302152570535, |
| "learning_rate": 5.70041348380039e-05, |
| "loss": 1.1777, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5591009062517888, |
| "learning_rate": 5.686767789092041e-05, |
| "loss": 1.1987, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5533228068390937, |
| "learning_rate": 5.67311687883883e-05, |
| "loss": 1.166, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5842917371524872, |
| "learning_rate": 5.6594608567103456e-05, |
| "loss": 1.1899, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.5497677778912127, |
| "learning_rate": 5.645799826414997e-05, |
| "loss": 1.1704, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.555335985223624, |
| "learning_rate": 5.6321338916992315e-05, |
| "loss": 1.2061, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5548137482348383, |
| "learning_rate": 5.618463156346739e-05, |
| "loss": 1.1641, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5476304334015565, |
| "learning_rate": 5.604787724177666e-05, |
| "loss": 1.2256, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5763238869488186, |
| "learning_rate": 5.5911076990478304e-05, |
| "loss": 1.1924, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.573010995844112, |
| "learning_rate": 5.577423184847932e-05, |
| "loss": 1.1758, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5839306160757675, |
| "learning_rate": 5.5637342855027554e-05, |
| "loss": 1.1768, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5549096632518822, |
| "learning_rate": 5.550041104970397e-05, |
| "loss": 1.1907, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.6129707331967412, |
| "learning_rate": 5.5363437472414595e-05, |
| "loss": 1.189, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5750836944584178, |
| "learning_rate": 5.522642316338268e-05, |
| "loss": 1.2017, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5726355632677141, |
| "learning_rate": 5.508936916314086e-05, |
| "loss": 1.1724, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5825041874796189, |
| "learning_rate": 5.495227651252315e-05, |
| "loss": 1.1792, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5273709654132104, |
| "learning_rate": 5.481514625265709e-05, |
| "loss": 1.1516, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5554175843789619, |
| "learning_rate": 5.467797942495589e-05, |
| "loss": 1.1816, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5689370332939844, |
| "learning_rate": 5.454077707111042e-05, |
| "loss": 1.1785, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5575026615875126, |
| "learning_rate": 5.440354023308134e-05, |
| "loss": 1.1768, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5808160899990858, |
| "learning_rate": 5.426626995309123e-05, |
| "loss": 1.1758, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5669830221600316, |
| "learning_rate": 5.4128967273616625e-05, |
| "loss": 1.1792, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5749514269220795, |
| "learning_rate": 5.39916332373801e-05, |
| "loss": 1.2261, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5376405664372714, |
| "learning_rate": 5.3854268887342374e-05, |
| "loss": 1.1724, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5558539989580943, |
| "learning_rate": 5.371687526669439e-05, |
| "loss": 1.1729, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5320630027563065, |
| "learning_rate": 5.357945341884936e-05, |
| "loss": 1.1606, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5674454724879522, |
| "learning_rate": 5.344200438743489e-05, |
| "loss": 1.2314, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5637653657439295, |
| "learning_rate": 5.330452921628497e-05, |
| "loss": 1.1831, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5678489554012958, |
| "learning_rate": 5.316702894943221e-05, |
| "loss": 1.1963, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5508681780073812, |
| "learning_rate": 5.3029504631099694e-05, |
| "loss": 1.1865, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5384421199943779, |
| "learning_rate": 5.2891957305693205e-05, |
| "loss": 1.1743, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5729065210520914, |
| "learning_rate": 5.2754388017793274e-05, |
| "loss": 1.1968, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5344230276822168, |
| "learning_rate": 5.26167978121472e-05, |
| "loss": 1.1753, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.566288258614336, |
| "learning_rate": 5.247918773366112e-05, |
| "loss": 1.2319, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5638737439746134, |
| "learning_rate": 5.234155882739212e-05, |
| "loss": 1.1953, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5584891776108306, |
| "learning_rate": 5.220391213854028e-05, |
| "loss": 1.1943, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.533800617826596, |
| "learning_rate": 5.2066248712440656e-05, |
| "loss": 1.1865, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5496864217323237, |
| "learning_rate": 5.1928569594555524e-05, |
| "loss": 1.1655, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5480560448217406, |
| "learning_rate": 5.179087583046625e-05, |
| "loss": 1.2075, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5532088287070902, |
| "learning_rate": 5.165316846586541e-05, |
| "loss": 1.1855, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.5630555316923159, |
| "learning_rate": 5.151544854654895e-05, |
| "loss": 1.23, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.5392332890587693, |
| "learning_rate": 5.1377717118408105e-05, |
| "loss": 1.1699, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.5472899225833604, |
| "learning_rate": 5.123997522742151e-05, |
| "loss": 1.1572, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.5656144365417692, |
| "learning_rate": 5.110222391964728e-05, |
| "loss": 1.1758, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.5540535837344962, |
| "learning_rate": 5.096446424121502e-05, |
| "loss": 1.1748, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.55509116396377, |
| "learning_rate": 5.0826697238317935e-05, |
| "loss": 1.1997, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.5436489773935463, |
| "learning_rate": 5.068892395720483e-05, |
| "loss": 1.1729, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.5585801228260714, |
| "learning_rate": 5.0551145444172186e-05, |
| "loss": 1.187, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.5462531168282607, |
| "learning_rate": 5.041336274555625e-05, |
| "loss": 1.1733, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.5482685278208174, |
| "learning_rate": 5.027557690772503e-05, |
| "loss": 1.1753, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.5611858463960577, |
| "learning_rate": 5.0137788977070353e-05, |
| "loss": 1.1841, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.5353100416101868, |
| "learning_rate": 5e-05, |
| "loss": 1.1602, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5233236785257575, |
| "learning_rate": 4.986221102292965e-05, |
| "loss": 1.1558, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5587990303850614, |
| "learning_rate": 4.972442309227498e-05, |
| "loss": 1.187, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5637753129581402, |
| "learning_rate": 4.9586637254443756e-05, |
| "loss": 1.1851, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5423527328811115, |
| "learning_rate": 4.9448854555827825e-05, |
| "loss": 1.1748, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5497783942620118, |
| "learning_rate": 4.9311076042795185e-05, |
| "loss": 1.1851, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5807596942076099, |
| "learning_rate": 4.917330276168208e-05, |
| "loss": 1.1807, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5317366496866546, |
| "learning_rate": 4.903553575878499e-05, |
| "loss": 1.187, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5406062562287378, |
| "learning_rate": 4.889777608035273e-05, |
| "loss": 1.1616, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5427419787096719, |
| "learning_rate": 4.87600247725785e-05, |
| "loss": 1.1953, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5801014528130511, |
| "learning_rate": 4.8622282881591906e-05, |
| "loss": 1.2139, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5540961517309043, |
| "learning_rate": 4.848455145345105e-05, |
| "loss": 1.1616, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5569837113502658, |
| "learning_rate": 4.834683153413459e-05, |
| "loss": 1.1958, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5425605956374352, |
| "learning_rate": 4.820912416953377e-05, |
| "loss": 1.1572, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5554597115281151, |
| "learning_rate": 4.8071430405444474e-05, |
| "loss": 1.1772, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5485857472054, |
| "learning_rate": 4.7933751287559335e-05, |
| "loss": 1.145, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5586235960070927, |
| "learning_rate": 4.779608786145974e-05, |
| "loss": 1.1812, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5388195705943047, |
| "learning_rate": 4.7658441172607876e-05, |
| "loss": 1.1492, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5467938877160613, |
| "learning_rate": 4.7520812266338885e-05, |
| "loss": 1.1479, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5515115704576615, |
| "learning_rate": 4.738320218785281e-05, |
| "loss": 1.1831, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5642123389961948, |
| "learning_rate": 4.7245611982206724e-05, |
| "loss": 1.1958, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5319095686312612, |
| "learning_rate": 4.710804269430681e-05, |
| "loss": 1.1528, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5623903821527982, |
| "learning_rate": 4.697049536890033e-05, |
| "loss": 1.2104, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.5463696543302314, |
| "learning_rate": 4.683297105056782e-05, |
| "loss": 1.1797, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.5514366100998066, |
| "learning_rate": 4.669547078371504e-05, |
| "loss": 1.1372, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.546668668176686, |
| "learning_rate": 4.6557995612565144e-05, |
| "loss": 1.1753, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.5885081571165839, |
| "learning_rate": 4.642054658115067e-05, |
| "loss": 1.1787, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.5560558507398484, |
| "learning_rate": 4.6283124733305624e-05, |
| "loss": 1.1521, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.5259493759891583, |
| "learning_rate": 4.6145731112657644e-05, |
| "loss": 1.1553, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.564427420414382, |
| "learning_rate": 4.6008366762619926e-05, |
| "loss": 1.2036, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.5668971367973982, |
| "learning_rate": 4.5871032726383386e-05, |
| "loss": 1.1782, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.5200826420651361, |
| "learning_rate": 4.573373004690878e-05, |
| "loss": 1.1526, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.547830495213073, |
| "learning_rate": 4.559645976691868e-05, |
| "loss": 1.1792, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.5429616401498136, |
| "learning_rate": 4.545922292888959e-05, |
| "loss": 1.1587, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.5864665641735851, |
| "learning_rate": 4.5322020575044114e-05, |
| "loss": 1.2114, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.551477685551023, |
| "learning_rate": 4.518485374734292e-05, |
| "loss": 1.1592, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.5614304257410367, |
| "learning_rate": 4.504772348747687e-05, |
| "loss": 1.1621, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.5757278465009605, |
| "learning_rate": 4.491063083685916e-05, |
| "loss": 1.1738, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.5680283164376482, |
| "learning_rate": 4.477357683661734e-05, |
| "loss": 1.1631, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.5408927859941929, |
| "learning_rate": 4.463656252758542e-05, |
| "loss": 1.1504, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.5633890124288261, |
| "learning_rate": 4.449958895029604e-05, |
| "loss": 1.1694, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.583375657455577, |
| "learning_rate": 4.436265714497245e-05, |
| "loss": 1.1831, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.5671885330225722, |
| "learning_rate": 4.4225768151520694e-05, |
| "loss": 1.1904, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.5717204637102599, |
| "learning_rate": 4.408892300952171e-05, |
| "loss": 1.1997, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.5496978270089248, |
| "learning_rate": 4.3952122758223354e-05, |
| "loss": 1.1865, |
| "step": 650 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1176, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "total_flos": 5.251348662432301e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|