| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9894736842105263, |
| "eval_steps": 500, |
| "global_step": 378, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007894736842105263, |
| "grad_norm": 7.0740203857421875, |
| "learning_rate": 2.6315789473684213e-07, |
| "loss": 1.1186, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.015789473684210527, |
| "grad_norm": 7.031124114990234, |
| "learning_rate": 5.263157894736843e-07, |
| "loss": 1.1281, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.02368421052631579, |
| "grad_norm": 6.89492654800415, |
| "learning_rate": 7.894736842105263e-07, |
| "loss": 1.1185, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.031578947368421054, |
| "grad_norm": 7.015303134918213, |
| "learning_rate": 1.0526315789473685e-06, |
| "loss": 1.129, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.039473684210526314, |
| "grad_norm": 6.810600757598877, |
| "learning_rate": 1.3157894736842106e-06, |
| "loss": 1.103, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.04736842105263158, |
| "grad_norm": 6.700727462768555, |
| "learning_rate": 1.5789473684210526e-06, |
| "loss": 1.129, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.05526315789473684, |
| "grad_norm": 5.1340131759643555, |
| "learning_rate": 1.8421052631578948e-06, |
| "loss": 1.0762, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.06315789473684211, |
| "grad_norm": 4.850973606109619, |
| "learning_rate": 2.105263157894737e-06, |
| "loss": 1.0529, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.07105263157894737, |
| "grad_norm": 4.519778728485107, |
| "learning_rate": 2.368421052631579e-06, |
| "loss": 1.0531, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.07894736842105263, |
| "grad_norm": 2.700388193130493, |
| "learning_rate": 2.631578947368421e-06, |
| "loss": 0.9795, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0868421052631579, |
| "grad_norm": 2.6331188678741455, |
| "learning_rate": 2.8947368421052634e-06, |
| "loss": 1.0095, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.09473684210526316, |
| "grad_norm": 2.4724442958831787, |
| "learning_rate": 3.157894736842105e-06, |
| "loss": 0.9728, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.10263157894736842, |
| "grad_norm": 3.1326963901519775, |
| "learning_rate": 3.421052631578948e-06, |
| "loss": 0.9307, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.11052631578947368, |
| "grad_norm": 3.435920476913452, |
| "learning_rate": 3.6842105263157896e-06, |
| "loss": 0.9612, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.11842105263157894, |
| "grad_norm": 3.3896520137786865, |
| "learning_rate": 3.947368421052632e-06, |
| "loss": 0.9617, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.12631578947368421, |
| "grad_norm": 2.802786111831665, |
| "learning_rate": 4.210526315789474e-06, |
| "loss": 0.9333, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.13421052631578947, |
| "grad_norm": 1.9803308248519897, |
| "learning_rate": 4.473684210526316e-06, |
| "loss": 0.9159, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.14210526315789473, |
| "grad_norm": 2.1656057834625244, |
| "learning_rate": 4.736842105263158e-06, |
| "loss": 0.8997, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.4422030448913574, |
| "learning_rate": 5e-06, |
| "loss": 0.9104, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.15789473684210525, |
| "grad_norm": 2.1623854637145996, |
| "learning_rate": 5.263157894736842e-06, |
| "loss": 0.8804, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.16578947368421051, |
| "grad_norm": 1.7336848974227905, |
| "learning_rate": 5.526315789473685e-06, |
| "loss": 0.8454, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.1736842105263158, |
| "grad_norm": 1.314452886581421, |
| "learning_rate": 5.789473684210527e-06, |
| "loss": 0.8531, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.18157894736842106, |
| "grad_norm": 1.3969931602478027, |
| "learning_rate": 6.0526315789473685e-06, |
| "loss": 0.8296, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.18947368421052632, |
| "grad_norm": 1.5788418054580688, |
| "learning_rate": 6.31578947368421e-06, |
| "loss": 0.828, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.19736842105263158, |
| "grad_norm": 1.5884418487548828, |
| "learning_rate": 6.578947368421054e-06, |
| "loss": 0.8286, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.20526315789473684, |
| "grad_norm": 1.1489850282669067, |
| "learning_rate": 6.842105263157896e-06, |
| "loss": 0.8069, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.2131578947368421, |
| "grad_norm": 1.0133432149887085, |
| "learning_rate": 7.1052631578947375e-06, |
| "loss": 0.8102, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.22105263157894736, |
| "grad_norm": 0.9435691237449646, |
| "learning_rate": 7.368421052631579e-06, |
| "loss": 0.7892, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.22894736842105262, |
| "grad_norm": 1.0618520975112915, |
| "learning_rate": 7.631578947368423e-06, |
| "loss": 0.7961, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.23684210526315788, |
| "grad_norm": 0.953088641166687, |
| "learning_rate": 7.894736842105265e-06, |
| "loss": 0.8092, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.24473684210526317, |
| "grad_norm": 0.843917727470398, |
| "learning_rate": 8.157894736842106e-06, |
| "loss": 0.7846, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.25263157894736843, |
| "grad_norm": 0.8666284680366516, |
| "learning_rate": 8.421052631578948e-06, |
| "loss": 0.7538, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.26052631578947366, |
| "grad_norm": 0.8682100176811218, |
| "learning_rate": 8.68421052631579e-06, |
| "loss": 0.7802, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.26842105263157895, |
| "grad_norm": 0.7664394974708557, |
| "learning_rate": 8.947368421052632e-06, |
| "loss": 0.7718, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.27631578947368424, |
| "grad_norm": 0.7890555262565613, |
| "learning_rate": 9.210526315789474e-06, |
| "loss": 0.7826, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.28421052631578947, |
| "grad_norm": 0.8435359001159668, |
| "learning_rate": 9.473684210526315e-06, |
| "loss": 0.7763, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.29210526315789476, |
| "grad_norm": 0.8021636605262756, |
| "learning_rate": 9.736842105263159e-06, |
| "loss": 0.7564, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.8132019639015198, |
| "learning_rate": 1e-05, |
| "loss": 0.7508, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.3078947368421053, |
| "grad_norm": 0.7610636949539185, |
| "learning_rate": 9.99978655851684e-06, |
| "loss": 0.7492, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 0.8269503712654114, |
| "learning_rate": 9.999146252290264e-06, |
| "loss": 0.7627, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3236842105263158, |
| "grad_norm": 0.7848504781723022, |
| "learning_rate": 9.998079135987437e-06, |
| "loss": 0.739, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.33157894736842103, |
| "grad_norm": 0.7971328496932983, |
| "learning_rate": 9.996585300715117e-06, |
| "loss": 0.7372, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.3394736842105263, |
| "grad_norm": 0.7159483432769775, |
| "learning_rate": 9.994664874011864e-06, |
| "loss": 0.7537, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.3473684210526316, |
| "grad_norm": 0.7174955606460571, |
| "learning_rate": 9.992318019837171e-06, |
| "loss": 0.7485, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.35526315789473684, |
| "grad_norm": 0.8257307410240173, |
| "learning_rate": 9.989544938557453e-06, |
| "loss": 0.7487, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.3631578947368421, |
| "grad_norm": 0.7395874857902527, |
| "learning_rate": 9.98634586692894e-06, |
| "loss": 0.7465, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.37105263157894736, |
| "grad_norm": 0.8656660914421082, |
| "learning_rate": 9.982721078077474e-06, |
| "loss": 0.7471, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.37894736842105264, |
| "grad_norm": 0.6341963410377502, |
| "learning_rate": 9.978670881475173e-06, |
| "loss": 0.7643, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.3868421052631579, |
| "grad_norm": 0.8313978910446167, |
| "learning_rate": 9.97419562291403e-06, |
| "loss": 0.7386, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.39473684210526316, |
| "grad_norm": 0.93310546875, |
| "learning_rate": 9.96929568447637e-06, |
| "loss": 0.754, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4026315789473684, |
| "grad_norm": 0.7993932962417603, |
| "learning_rate": 9.963971484502247e-06, |
| "loss": 0.731, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.4105263157894737, |
| "grad_norm": 0.7355050444602966, |
| "learning_rate": 9.958223477553715e-06, |
| "loss": 0.7542, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.41842105263157897, |
| "grad_norm": 0.7957926392555237, |
| "learning_rate": 9.952052154376027e-06, |
| "loss": 0.7626, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.4263157894736842, |
| "grad_norm": 0.9310785531997681, |
| "learning_rate": 9.945458041855732e-06, |
| "loss": 0.7303, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.4342105263157895, |
| "grad_norm": 0.8477827906608582, |
| "learning_rate": 9.938441702975689e-06, |
| "loss": 0.7143, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.4421052631578947, |
| "grad_norm": 0.654358446598053, |
| "learning_rate": 9.931003736767013e-06, |
| "loss": 0.7097, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.8673765063285828, |
| "learning_rate": 9.923144778257918e-06, |
| "loss": 0.7253, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.45789473684210524, |
| "grad_norm": 0.7678876519203186, |
| "learning_rate": 9.91486549841951e-06, |
| "loss": 0.7212, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.46578947368421053, |
| "grad_norm": 0.7339031100273132, |
| "learning_rate": 9.906166604108494e-06, |
| "loss": 0.7254, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.47368421052631576, |
| "grad_norm": 0.7133976221084595, |
| "learning_rate": 9.89704883800683e-06, |
| "loss": 0.7306, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.48157894736842105, |
| "grad_norm": 0.8026192784309387, |
| "learning_rate": 9.887512978558329e-06, |
| "loss": 0.7245, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.48947368421052634, |
| "grad_norm": 0.659774899482727, |
| "learning_rate": 9.877559839902185e-06, |
| "loss": 0.7239, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.49736842105263157, |
| "grad_norm": 0.6718412637710571, |
| "learning_rate": 9.867190271803466e-06, |
| "loss": 0.6966, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.5052631578947369, |
| "grad_norm": 0.782952070236206, |
| "learning_rate": 9.85640515958057e-06, |
| "loss": 0.7246, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.5131578947368421, |
| "grad_norm": 0.749127984046936, |
| "learning_rate": 9.845205424029639e-06, |
| "loss": 0.7121, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5210526315789473, |
| "grad_norm": 0.773841917514801, |
| "learning_rate": 9.833592021345938e-06, |
| "loss": 0.7341, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.5289473684210526, |
| "grad_norm": 0.7773014903068542, |
| "learning_rate": 9.821565943042225e-06, |
| "loss": 0.707, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.5368421052631579, |
| "grad_norm": 0.748015820980072, |
| "learning_rate": 9.809128215864096e-06, |
| "loss": 0.7118, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.5447368421052632, |
| "grad_norm": 0.6848940849304199, |
| "learning_rate": 9.796279901702326e-06, |
| "loss": 0.6977, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.5526315789473685, |
| "grad_norm": 0.8350101709365845, |
| "learning_rate": 9.783022097502204e-06, |
| "loss": 0.7213, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5605263157894737, |
| "grad_norm": 0.7320327758789062, |
| "learning_rate": 9.76935593516989e-06, |
| "loss": 0.7079, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.5684210526315789, |
| "grad_norm": 0.7068721055984497, |
| "learning_rate": 9.755282581475769e-06, |
| "loss": 0.7304, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.5763157894736842, |
| "grad_norm": 0.7000291347503662, |
| "learning_rate": 9.74080323795483e-06, |
| "loss": 0.714, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.5842105263157895, |
| "grad_norm": 0.8786887526512146, |
| "learning_rate": 9.7259191408041e-06, |
| "loss": 0.7155, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.5921052631578947, |
| "grad_norm": 0.7386007905006409, |
| "learning_rate": 9.710631560777082e-06, |
| "loss": 0.7088, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.6937209367752075, |
| "learning_rate": 9.694941803075285e-06, |
| "loss": 0.7058, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.6078947368421053, |
| "grad_norm": 0.9193783402442932, |
| "learning_rate": 9.678851207236764e-06, |
| "loss": 0.738, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.6157894736842106, |
| "grad_norm": 0.9059263467788696, |
| "learning_rate": 9.66236114702178e-06, |
| "loss": 0.7116, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.6236842105263158, |
| "grad_norm": 0.6896800994873047, |
| "learning_rate": 9.645473030295496e-06, |
| "loss": 0.7207, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 0.8141930103302002, |
| "learning_rate": 9.628188298907782e-06, |
| "loss": 0.7015, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6394736842105263, |
| "grad_norm": 0.6706604361534119, |
| "learning_rate": 9.610508428570122e-06, |
| "loss": 0.7204, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.6473684210526316, |
| "grad_norm": 0.7771408557891846, |
| "learning_rate": 9.592434928729617e-06, |
| "loss": 0.7215, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.6552631578947369, |
| "grad_norm": 0.6452600955963135, |
| "learning_rate": 9.573969342440107e-06, |
| "loss": 0.7187, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.6631578947368421, |
| "grad_norm": 0.704627275466919, |
| "learning_rate": 9.555113246230443e-06, |
| "loss": 0.7383, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.6710526315789473, |
| "grad_norm": 0.675726056098938, |
| "learning_rate": 9.535868249969882e-06, |
| "loss": 0.7006, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.6789473684210526, |
| "grad_norm": 0.807037353515625, |
| "learning_rate": 9.516235996730645e-06, |
| "loss": 0.7098, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.6868421052631579, |
| "grad_norm": 0.6850730776786804, |
| "learning_rate": 9.496218162647629e-06, |
| "loss": 0.7095, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.6947368421052632, |
| "grad_norm": 0.7169205546379089, |
| "learning_rate": 9.475816456775313e-06, |
| "loss": 0.7209, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.7026315789473684, |
| "grad_norm": 0.8442161083221436, |
| "learning_rate": 9.45503262094184e-06, |
| "loss": 0.7133, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.7105263157894737, |
| "grad_norm": 0.67853182554245, |
| "learning_rate": 9.43386842960031e-06, |
| "loss": 0.7017, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.718421052631579, |
| "grad_norm": 0.6674996018409729, |
| "learning_rate": 9.41232568967728e-06, |
| "loss": 0.7039, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.7263157894736842, |
| "grad_norm": 0.6516873240470886, |
| "learning_rate": 9.39040624041849e-06, |
| "loss": 0.6955, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.7342105263157894, |
| "grad_norm": 0.6596349477767944, |
| "learning_rate": 9.368111953231849e-06, |
| "loss": 0.6895, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.7421052631578947, |
| "grad_norm": 0.6478255987167358, |
| "learning_rate": 9.345444731527642e-06, |
| "loss": 0.6935, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.6235653758049011, |
| "learning_rate": 9.32240651055604e-06, |
| "loss": 0.6787, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.7578947368421053, |
| "grad_norm": 0.638984739780426, |
| "learning_rate": 9.298999257241862e-06, |
| "loss": 0.7282, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.7657894736842106, |
| "grad_norm": 0.710728108882904, |
| "learning_rate": 9.275224970016656e-06, |
| "loss": 0.7166, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.7736842105263158, |
| "grad_norm": 0.6417115926742554, |
| "learning_rate": 9.251085678648072e-06, |
| "loss": 0.6881, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.781578947368421, |
| "grad_norm": 0.7576119303703308, |
| "learning_rate": 9.22658344406657e-06, |
| "loss": 0.7071, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.7894736842105263, |
| "grad_norm": 0.8099032640457153, |
| "learning_rate": 9.201720358189464e-06, |
| "loss": 0.708, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7973684210526316, |
| "grad_norm": 0.649307131767273, |
| "learning_rate": 9.176498543742328e-06, |
| "loss": 0.696, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.8052631578947368, |
| "grad_norm": 0.7050330638885498, |
| "learning_rate": 9.150920154077753e-06, |
| "loss": 0.7138, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.8131578947368421, |
| "grad_norm": 0.6682823896408081, |
| "learning_rate": 9.124987372991512e-06, |
| "loss": 0.6937, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.8210526315789474, |
| "grad_norm": 0.718499481678009, |
| "learning_rate": 9.098702414536107e-06, |
| "loss": 0.7022, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.8289473684210527, |
| "grad_norm": 0.7294292449951172, |
| "learning_rate": 9.072067522831743e-06, |
| "loss": 0.6789, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.8368421052631579, |
| "grad_norm": 0.6921276450157166, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.7057, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.8447368421052631, |
| "grad_norm": 0.8754315376281738, |
| "learning_rate": 9.017757065343368e-06, |
| "loss": 0.7262, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.8526315789473684, |
| "grad_norm": 0.6188150644302368, |
| "learning_rate": 8.990086136401199e-06, |
| "loss": 0.6853, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.8605263157894737, |
| "grad_norm": 0.7342610955238342, |
| "learning_rate": 8.96207454749787e-06, |
| "loss": 0.7164, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.868421052631579, |
| "grad_norm": 0.7715557217597961, |
| "learning_rate": 8.933724690167417e-06, |
| "loss": 0.7058, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8763157894736842, |
| "grad_norm": 0.7064869403839111, |
| "learning_rate": 8.905038984824079e-06, |
| "loss": 0.698, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.8842105263157894, |
| "grad_norm": 0.7494336366653442, |
| "learning_rate": 8.87601988055565e-06, |
| "loss": 0.6961, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.8921052631578947, |
| "grad_norm": 0.664580762386322, |
| "learning_rate": 8.846669854914395e-06, |
| "loss": 0.7129, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.7173993587493896, |
| "learning_rate": 8.816991413705515e-06, |
| "loss": 0.716, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.9078947368421053, |
| "grad_norm": 0.6451180577278137, |
| "learning_rate": 8.786987090773214e-06, |
| "loss": 0.7034, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.9157894736842105, |
| "grad_norm": 0.6762385368347168, |
| "learning_rate": 8.756659447784367e-06, |
| "loss": 0.6933, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.9236842105263158, |
| "grad_norm": 0.688220739364624, |
| "learning_rate": 8.726011074009813e-06, |
| "loss": 0.7154, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.9315789473684211, |
| "grad_norm": 0.6731222867965698, |
| "learning_rate": 8.695044586103297e-06, |
| "loss": 0.6916, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.9394736842105263, |
| "grad_norm": 0.6552385687828064, |
| "learning_rate": 8.663762627878059e-06, |
| "loss": 0.7074, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 0.6504393815994263, |
| "learning_rate": 8.632167870081122e-06, |
| "loss": 0.7028, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9552631578947368, |
| "grad_norm": 0.7418521046638489, |
| "learning_rate": 8.600263010165275e-06, |
| "loss": 0.6997, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.9631578947368421, |
| "grad_norm": 0.6728154420852661, |
| "learning_rate": 8.568050772058763e-06, |
| "loss": 0.7066, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.9710526315789474, |
| "grad_norm": 0.6942195892333984, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 0.7001, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.9789473684210527, |
| "grad_norm": 0.6641739010810852, |
| "learning_rate": 8.502715187966455e-06, |
| "loss": 0.702, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.9868421052631579, |
| "grad_norm": 0.7299260497093201, |
| "learning_rate": 8.469597420110249e-06, |
| "loss": 0.6927, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.9947368421052631, |
| "grad_norm": 0.6769669055938721, |
| "learning_rate": 8.436183429846314e-06, |
| "loss": 0.6743, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.0052631578947369, |
| "grad_norm": 0.7567976713180542, |
| "learning_rate": 8.402476069947309e-06, |
| "loss": 0.6556, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.013157894736842, |
| "grad_norm": 0.7088767290115356, |
| "learning_rate": 8.368478218232787e-06, |
| "loss": 0.6348, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.0210526315789474, |
| "grad_norm": 0.6701803207397461, |
| "learning_rate": 8.334192777323508e-06, |
| "loss": 0.63, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.0289473684210526, |
| "grad_norm": 0.7436509132385254, |
| "learning_rate": 8.299622674393615e-06, |
| "loss": 0.621, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.0368421052631578, |
| "grad_norm": 0.6837674379348755, |
| "learning_rate": 8.264770860920722e-06, |
| "loss": 0.6294, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.0447368421052632, |
| "grad_norm": 0.6941933035850525, |
| "learning_rate": 8.229640312433938e-06, |
| "loss": 0.6003, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 0.6441699266433716, |
| "learning_rate": 8.194234028259806e-06, |
| "loss": 0.6044, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.0605263157894738, |
| "grad_norm": 0.6503862142562866, |
| "learning_rate": 8.158555031266255e-06, |
| "loss": 0.6059, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.068421052631579, |
| "grad_norm": 0.6873996257781982, |
| "learning_rate": 8.122606367604497e-06, |
| "loss": 0.5948, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.0763157894736841, |
| "grad_norm": 0.6901219487190247, |
| "learning_rate": 8.086391106448965e-06, |
| "loss": 0.617, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.0842105263157895, |
| "grad_norm": 0.5913821458816528, |
| "learning_rate": 8.049912339735284e-06, |
| "loss": 0.6205, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.0921052631578947, |
| "grad_norm": 0.6473250389099121, |
| "learning_rate": 8.013173181896283e-06, |
| "loss": 0.5898, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.6466673016548157, |
| "learning_rate": 7.976176769596095e-06, |
| "loss": 0.6283, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.1078947368421053, |
| "grad_norm": 0.562574565410614, |
| "learning_rate": 7.938926261462366e-06, |
| "loss": 0.5947, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.1157894736842104, |
| "grad_norm": 0.6042968034744263, |
| "learning_rate": 7.90142483781658e-06, |
| "loss": 0.6338, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.1236842105263158, |
| "grad_norm": 0.6527702808380127, |
| "learning_rate": 7.863675700402527e-06, |
| "loss": 0.6113, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.131578947368421, |
| "grad_norm": 0.5683473944664001, |
| "learning_rate": 7.82568207211296e-06, |
| "loss": 0.6254, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.1394736842105262, |
| "grad_norm": 0.6214303374290466, |
| "learning_rate": 7.787447196714428e-06, |
| "loss": 0.6213, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.1473684210526316, |
| "grad_norm": 0.5913365483283997, |
| "learning_rate": 7.748974338570337e-06, |
| "loss": 0.6221, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.1552631578947368, |
| "grad_norm": 0.6266749501228333, |
| "learning_rate": 7.710266782362248e-06, |
| "loss": 0.5909, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.1631578947368422, |
| "grad_norm": 0.6693281531333923, |
| "learning_rate": 7.671327832809442e-06, |
| "loss": 0.6373, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.1710526315789473, |
| "grad_norm": 0.6370000839233398, |
| "learning_rate": 7.63216081438678e-06, |
| "loss": 0.5908, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.1789473684210527, |
| "grad_norm": 0.6319229602813721, |
| "learning_rate": 7.5927690710408606e-06, |
| "loss": 0.6138, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.186842105263158, |
| "grad_norm": 0.6503207683563232, |
| "learning_rate": 7.553155965904535e-06, |
| "loss": 0.6165, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.194736842105263, |
| "grad_norm": 0.6979773640632629, |
| "learning_rate": 7.513324881009769e-06, |
| "loss": 0.6308, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.2026315789473685, |
| "grad_norm": 0.591997504234314, |
| "learning_rate": 7.473279216998896e-06, |
| "loss": 0.6227, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.2105263157894737, |
| "grad_norm": 0.6470649242401123, |
| "learning_rate": 7.4330223928342814e-06, |
| "loss": 0.6248, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.2184210526315788, |
| "grad_norm": 0.6023352742195129, |
| "learning_rate": 7.392557845506433e-06, |
| "loss": 0.6043, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.2263157894736842, |
| "grad_norm": 0.6112087368965149, |
| "learning_rate": 7.351889029740548e-06, |
| "loss": 0.6122, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.2342105263157894, |
| "grad_norm": 0.737591028213501, |
| "learning_rate": 7.311019417701567e-06, |
| "loss": 0.6091, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.2421052631578948, |
| "grad_norm": 0.6064261198043823, |
| "learning_rate": 7.269952498697734e-06, |
| "loss": 0.6092, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.6505361199378967, |
| "learning_rate": 7.2286917788826926e-06, |
| "loss": 0.6164, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.2578947368421054, |
| "grad_norm": 0.6192215085029602, |
| "learning_rate": 7.187240780956133e-06, |
| "loss": 0.6028, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.2657894736842106, |
| "grad_norm": 0.6353914141654968, |
| "learning_rate": 7.145603043863045e-06, |
| "loss": 0.6052, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.2736842105263158, |
| "grad_norm": 0.634981632232666, |
| "learning_rate": 7.103782122491577e-06, |
| "loss": 0.6188, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.2815789473684212, |
| "grad_norm": 0.6064332127571106, |
| "learning_rate": 7.061781587369518e-06, |
| "loss": 0.6288, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.2894736842105263, |
| "grad_norm": 0.5805593729019165, |
| "learning_rate": 7.019605024359475e-06, |
| "loss": 0.6037, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.2973684210526315, |
| "grad_norm": 0.7273927927017212, |
| "learning_rate": 6.977256034352713e-06, |
| "loss": 0.6268, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.305263157894737, |
| "grad_norm": 0.6278533339500427, |
| "learning_rate": 6.934738232961728e-06, |
| "loss": 0.6283, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.313157894736842, |
| "grad_norm": 0.6402047872543335, |
| "learning_rate": 6.892055250211552e-06, |
| "loss": 0.6157, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.3210526315789473, |
| "grad_norm": 0.6458708047866821, |
| "learning_rate": 6.849210730229846e-06, |
| "loss": 0.6187, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.3289473684210527, |
| "grad_norm": 0.5871455073356628, |
| "learning_rate": 6.806208330935766e-06, |
| "loss": 0.6162, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.3368421052631578, |
| "grad_norm": 0.5851725935935974, |
| "learning_rate": 6.763051723727663e-06, |
| "loss": 0.605, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.3447368421052632, |
| "grad_norm": 0.607880175113678, |
| "learning_rate": 6.719744593169642e-06, |
| "loss": 0.5954, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.3526315789473684, |
| "grad_norm": 0.6497223973274231, |
| "learning_rate": 6.67629063667697e-06, |
| "loss": 0.6233, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.3605263157894738, |
| "grad_norm": 0.6154512166976929, |
| "learning_rate": 6.6326935642004165e-06, |
| "loss": 0.6186, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.368421052631579, |
| "grad_norm": 0.6759777665138245, |
| "learning_rate": 6.588957097909509e-06, |
| "loss": 0.6171, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.3763157894736842, |
| "grad_norm": 0.6606743931770325, |
| "learning_rate": 6.545084971874738e-06, |
| "loss": 0.5943, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.3842105263157896, |
| "grad_norm": 0.6321778297424316, |
| "learning_rate": 6.501080931748764e-06, |
| "loss": 0.6191, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.3921052631578947, |
| "grad_norm": 0.622314453125, |
| "learning_rate": 6.456948734446624e-06, |
| "loss": 0.6134, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.6384446620941162, |
| "learning_rate": 6.412692147824976e-06, |
| "loss": 0.6005, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.4078947368421053, |
| "grad_norm": 0.6004672646522522, |
| "learning_rate": 6.368314950360416e-06, |
| "loss": 0.6082, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.4157894736842105, |
| "grad_norm": 0.6092491745948792, |
| "learning_rate": 6.323820930826879e-06, |
| "loss": 0.6306, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.4236842105263157, |
| "grad_norm": 0.6291044354438782, |
| "learning_rate": 6.279213887972179e-06, |
| "loss": 0.6188, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.431578947368421, |
| "grad_norm": 0.5878775119781494, |
| "learning_rate": 6.234497630193666e-06, |
| "loss": 0.6154, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.4394736842105262, |
| "grad_norm": 0.5391842126846313, |
| "learning_rate": 6.189675975213094e-06, |
| "loss": 0.6105, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.4473684210526316, |
| "grad_norm": 0.5454199314117432, |
| "learning_rate": 6.144752749750671e-06, |
| "loss": 0.6023, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.4552631578947368, |
| "grad_norm": 0.6040089130401611, |
| "learning_rate": 6.099731789198344e-06, |
| "loss": 0.6081, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.4631578947368422, |
| "grad_norm": 0.6564128398895264, |
| "learning_rate": 6.05461693729235e-06, |
| "loss": 0.6115, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.4710526315789474, |
| "grad_norm": 0.5978277325630188, |
| "learning_rate": 6.009412045785051e-06, |
| "loss": 0.6044, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.4789473684210526, |
| "grad_norm": 0.6195292472839355, |
| "learning_rate": 5.964120974116085e-06, |
| "loss": 0.6253, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.486842105263158, |
| "grad_norm": 0.5672737956047058, |
| "learning_rate": 5.918747589082853e-06, |
| "loss": 0.614, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.4947368421052631, |
| "grad_norm": 0.5825613737106323, |
| "learning_rate": 5.8732957645103946e-06, |
| "loss": 0.5987, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.5026315789473683, |
| "grad_norm": 0.5839316844940186, |
| "learning_rate": 5.82776938092065e-06, |
| "loss": 0.6099, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.5105263157894737, |
| "grad_norm": 0.592494010925293, |
| "learning_rate": 5.782172325201155e-06, |
| "loss": 0.6272, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.518421052631579, |
| "grad_norm": 0.6230676174163818, |
| "learning_rate": 5.736508490273189e-06, |
| "loss": 0.6132, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.526315789473684, |
| "grad_norm": 0.5931450724601746, |
| "learning_rate": 5.690781774759412e-06, |
| "loss": 0.6022, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.5342105263157895, |
| "grad_norm": 0.6010186076164246, |
| "learning_rate": 5.644996082651018e-06, |
| "loss": 0.6144, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.5421052631578949, |
| "grad_norm": 0.6734380722045898, |
| "learning_rate": 5.5991553229744166e-06, |
| "loss": 0.6343, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.5894003510475159, |
| "learning_rate": 5.553263409457504e-06, |
| "loss": 0.6165, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.5578947368421052, |
| "grad_norm": 0.5955084562301636, |
| "learning_rate": 5.507324260195516e-06, |
| "loss": 0.611, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.5657894736842106, |
| "grad_norm": 0.5771483778953552, |
| "learning_rate": 5.46134179731651e-06, |
| "loss": 0.6079, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.5736842105263158, |
| "grad_norm": 0.6015431880950928, |
| "learning_rate": 5.41531994664652e-06, |
| "loss": 0.5994, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.581578947368421, |
| "grad_norm": 0.5537875890731812, |
| "learning_rate": 5.36926263737437e-06, |
| "loss": 0.597, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.5894736842105264, |
| "grad_norm": 0.5928704142570496, |
| "learning_rate": 5.323173801716222e-06, |
| "loss": 0.596, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.5973684210526315, |
| "grad_norm": 0.6157421469688416, |
| "learning_rate": 5.27705737457985e-06, |
| "loss": 0.6261, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.6052631578947367, |
| "grad_norm": 0.612349271774292, |
| "learning_rate": 5.230917293228699e-06, |
| "loss": 0.6079, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.6131578947368421, |
| "grad_norm": 0.54092937707901, |
| "learning_rate": 5.184757496945726e-06, |
| "loss": 0.5942, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.6210526315789475, |
| "grad_norm": 0.5439791083335876, |
| "learning_rate": 5.138581926697083e-06, |
| "loss": 0.6135, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.6289473684210525, |
| "grad_norm": 0.5337532758712769, |
| "learning_rate": 5.09239452479565e-06, |
| "loss": 0.587, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.6368421052631579, |
| "grad_norm": 0.5457773208618164, |
| "learning_rate": 5.046199234564455e-06, |
| "loss": 0.5961, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.6447368421052633, |
| "grad_norm": 0.5572032332420349, |
| "learning_rate": 5e-06, |
| "loss": 0.6022, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.6526315789473685, |
| "grad_norm": 0.5409188270568848, |
| "learning_rate": 4.953800765435547e-06, |
| "loss": 0.5966, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.6605263157894736, |
| "grad_norm": 0.5420222878456116, |
| "learning_rate": 4.907605475204352e-06, |
| "loss": 0.6069, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.668421052631579, |
| "grad_norm": 0.5424708724021912, |
| "learning_rate": 4.861418073302919e-06, |
| "loss": 0.6009, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.6763157894736842, |
| "grad_norm": 0.5323647856712341, |
| "learning_rate": 4.815242503054277e-06, |
| "loss": 0.6183, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.6842105263157894, |
| "grad_norm": 0.5776340961456299, |
| "learning_rate": 4.7690827067713035e-06, |
| "loss": 0.6109, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.6921052631578948, |
| "grad_norm": 0.5825272798538208, |
| "learning_rate": 4.7229426254201504e-06, |
| "loss": 0.615, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.5867627859115601, |
| "learning_rate": 4.676826198283779e-06, |
| "loss": 0.6114, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.7078947368421051, |
| "grad_norm": 0.5459916591644287, |
| "learning_rate": 4.630737362625631e-06, |
| "loss": 0.6039, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.7157894736842105, |
| "grad_norm": 0.5879709124565125, |
| "learning_rate": 4.584680053353481e-06, |
| "loss": 0.6154, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.723684210526316, |
| "grad_norm": 0.5895808339118958, |
| "learning_rate": 4.53865820268349e-06, |
| "loss": 0.6133, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.731578947368421, |
| "grad_norm": 0.6173647046089172, |
| "learning_rate": 4.492675739804486e-06, |
| "loss": 0.6018, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.7394736842105263, |
| "grad_norm": 0.5456452965736389, |
| "learning_rate": 4.446736590542497e-06, |
| "loss": 0.6083, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.7473684210526317, |
| "grad_norm": 0.5733683705329895, |
| "learning_rate": 4.400844677025585e-06, |
| "loss": 0.6015, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.7552631578947369, |
| "grad_norm": 0.5852586627006531, |
| "learning_rate": 4.355003917348985e-06, |
| "loss": 0.5983, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.763157894736842, |
| "grad_norm": 0.5621438026428223, |
| "learning_rate": 4.309218225240591e-06, |
| "loss": 0.5891, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.7710526315789474, |
| "grad_norm": 0.582830011844635, |
| "learning_rate": 4.263491509726812e-06, |
| "loss": 0.5864, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.7789473684210526, |
| "grad_norm": 0.5376387238502502, |
| "learning_rate": 4.217827674798845e-06, |
| "loss": 0.6084, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.7868421052631578, |
| "grad_norm": 0.6110755801200867, |
| "learning_rate": 4.17223061907935e-06, |
| "loss": 0.6055, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.7947368421052632, |
| "grad_norm": 0.5580980777740479, |
| "learning_rate": 4.126704235489606e-06, |
| "loss": 0.616, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.8026315789473686, |
| "grad_norm": 0.6138029098510742, |
| "learning_rate": 4.081252410917148e-06, |
| "loss": 0.6088, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.8105263157894735, |
| "grad_norm": 0.5521398186683655, |
| "learning_rate": 4.035879025883916e-06, |
| "loss": 0.6146, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.818421052631579, |
| "grad_norm": 0.5325535535812378, |
| "learning_rate": 3.99058795421495e-06, |
| "loss": 0.5895, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.8263157894736843, |
| "grad_norm": 0.6027734875679016, |
| "learning_rate": 3.945383062707652e-06, |
| "loss": 0.5979, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.8342105263157895, |
| "grad_norm": 0.5285260677337646, |
| "learning_rate": 3.9002682108016585e-06, |
| "loss": 0.6119, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 0.5392246246337891, |
| "learning_rate": 3.855247250249331e-06, |
| "loss": 0.6104, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.5578750967979431, |
| "learning_rate": 3.8103240247869077e-06, |
| "loss": 0.606, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.8578947368421053, |
| "grad_norm": 0.5479610562324524, |
| "learning_rate": 3.765502369806334e-06, |
| "loss": 0.6147, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.8657894736842104, |
| "grad_norm": 0.5477908253669739, |
| "learning_rate": 3.720786112027822e-06, |
| "loss": 0.5947, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.8736842105263158, |
| "grad_norm": 0.5339275598526001, |
| "learning_rate": 3.6761790691731207e-06, |
| "loss": 0.5975, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.881578947368421, |
| "grad_norm": 0.5823328495025635, |
| "learning_rate": 3.6316850496395863e-06, |
| "loss": 0.5898, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.8894736842105262, |
| "grad_norm": 0.5550434589385986, |
| "learning_rate": 3.587307852175025e-06, |
| "loss": 0.5953, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.8973684210526316, |
| "grad_norm": 0.5742660164833069, |
| "learning_rate": 3.5430512655533774e-06, |
| "loss": 0.6185, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.905263157894737, |
| "grad_norm": 0.5972154140472412, |
| "learning_rate": 3.498919068251237e-06, |
| "loss": 0.5985, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.913157894736842, |
| "grad_norm": 0.5590418577194214, |
| "learning_rate": 3.4549150281252635e-06, |
| "loss": 0.6052, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.9210526315789473, |
| "grad_norm": 0.58773273229599, |
| "learning_rate": 3.4110429020904924e-06, |
| "loss": 0.6158, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.9289473684210527, |
| "grad_norm": 0.5843963027000427, |
| "learning_rate": 3.3673064357995844e-06, |
| "loss": 0.6079, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.936842105263158, |
| "grad_norm": 0.5518509745597839, |
| "learning_rate": 3.3237093633230323e-06, |
| "loss": 0.6023, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.944736842105263, |
| "grad_norm": 0.5634933114051819, |
| "learning_rate": 3.2802554068303595e-06, |
| "loss": 0.5972, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.9526315789473685, |
| "grad_norm": 0.5546795129776001, |
| "learning_rate": 3.236948276272337e-06, |
| "loss": 0.6012, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.9605263157894737, |
| "grad_norm": 0.55860435962677, |
| "learning_rate": 3.1937916690642356e-06, |
| "loss": 0.6186, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.9684210526315788, |
| "grad_norm": 0.5482208728790283, |
| "learning_rate": 3.150789269770155e-06, |
| "loss": 0.6218, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.9763157894736842, |
| "grad_norm": 0.6123985648155212, |
| "learning_rate": 3.107944749788449e-06, |
| "loss": 0.6088, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.9842105263157894, |
| "grad_norm": 0.5692940354347229, |
| "learning_rate": 3.0652617670382745e-06, |
| "loss": 0.5958, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.9921052631578946, |
| "grad_norm": 0.5609405040740967, |
| "learning_rate": 3.0227439656472878e-06, |
| "loss": 0.6141, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.0026315789473683, |
| "grad_norm": 0.5651322603225708, |
| "learning_rate": 2.980394975640526e-06, |
| "loss": 0.5795, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.0105263157894737, |
| "grad_norm": 0.6917658448219299, |
| "learning_rate": 2.9382184126304834e-06, |
| "loss": 0.5615, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.018421052631579, |
| "grad_norm": 0.6464757323265076, |
| "learning_rate": 2.8962178775084267e-06, |
| "loss": 0.517, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.026315789473684, |
| "grad_norm": 0.5215118527412415, |
| "learning_rate": 2.8543969561369556e-06, |
| "loss": 0.5421, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.0342105263157895, |
| "grad_norm": 0.5354113578796387, |
| "learning_rate": 2.812759219043869e-06, |
| "loss": 0.5397, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.042105263157895, |
| "grad_norm": 0.6157158613204956, |
| "learning_rate": 2.771308221117309e-06, |
| "loss": 0.5523, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 0.646392822265625, |
| "learning_rate": 2.7300475013022666e-06, |
| "loss": 0.518, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.057894736842105, |
| "grad_norm": 0.5618102550506592, |
| "learning_rate": 2.6889805822984348e-06, |
| "loss": 0.5343, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.0657894736842106, |
| "grad_norm": 0.5714638233184814, |
| "learning_rate": 2.648110970259454e-06, |
| "loss": 0.5421, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.0736842105263156, |
| "grad_norm": 0.55697101354599, |
| "learning_rate": 2.607442154493568e-06, |
| "loss": 0.5382, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.081578947368421, |
| "grad_norm": 0.6028394103050232, |
| "learning_rate": 2.5669776071657194e-06, |
| "loss": 0.5367, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.0894736842105264, |
| "grad_norm": 0.580823540687561, |
| "learning_rate": 2.526720783001107e-06, |
| "loss": 0.5371, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.0973684210526318, |
| "grad_norm": 0.558423638343811, |
| "learning_rate": 2.486675118990233e-06, |
| "loss": 0.5304, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.1052631578947367, |
| "grad_norm": 0.5470890998840332, |
| "learning_rate": 2.4468440340954664e-06, |
| "loss": 0.5286, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.113157894736842, |
| "grad_norm": 0.5406467914581299, |
| "learning_rate": 2.4072309289591394e-06, |
| "loss": 0.5733, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.1210526315789475, |
| "grad_norm": 0.5414575934410095, |
| "learning_rate": 2.3678391856132203e-06, |
| "loss": 0.5464, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.1289473684210525, |
| "grad_norm": 0.5568278431892395, |
| "learning_rate": 2.328672167190558e-06, |
| "loss": 0.5326, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.136842105263158, |
| "grad_norm": 0.5567864775657654, |
| "learning_rate": 2.289733217637753e-06, |
| "loss": 0.5045, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.1447368421052633, |
| "grad_norm": 0.5617071986198425, |
| "learning_rate": 2.2510256614296638e-06, |
| "loss": 0.5214, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.1526315789473682, |
| "grad_norm": 0.5503396391868591, |
| "learning_rate": 2.2125528032855727e-06, |
| "loss": 0.5436, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.1605263157894736, |
| "grad_norm": 0.5096573829650879, |
| "learning_rate": 2.174317927887041e-06, |
| "loss": 0.5268, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.168421052631579, |
| "grad_norm": 0.5499228239059448, |
| "learning_rate": 2.136324299597474e-06, |
| "loss": 0.5281, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.1763157894736844, |
| "grad_norm": 0.4998083710670471, |
| "learning_rate": 2.098575162183422e-06, |
| "loss": 0.5177, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.1842105263157894, |
| "grad_norm": 0.4903915524482727, |
| "learning_rate": 2.061073738537635e-06, |
| "loss": 0.533, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.192105263157895, |
| "grad_norm": 0.5267230868339539, |
| "learning_rate": 2.023823230403907e-06, |
| "loss": 0.5286, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.5258969664573669, |
| "learning_rate": 1.9868268181037186e-06, |
| "loss": 0.5249, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.207894736842105, |
| "grad_norm": 0.5024856925010681, |
| "learning_rate": 1.9500876602647167e-06, |
| "loss": 0.5307, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.2157894736842105, |
| "grad_norm": 0.5637997984886169, |
| "learning_rate": 1.913608893551036e-06, |
| "loss": 0.5511, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.223684210526316, |
| "grad_norm": 0.5119351744651794, |
| "learning_rate": 1.8773936323955055e-06, |
| "loss": 0.5424, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.231578947368421, |
| "grad_norm": 0.5853419303894043, |
| "learning_rate": 1.8414449687337467e-06, |
| "loss": 0.5365, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.2394736842105263, |
| "grad_norm": 0.496768593788147, |
| "learning_rate": 1.8057659717401948e-06, |
| "loss": 0.5303, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.2473684210526317, |
| "grad_norm": 0.5818266272544861, |
| "learning_rate": 1.7703596875660645e-06, |
| "loss": 0.5386, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.2552631578947366, |
| "grad_norm": 0.5075578689575195, |
| "learning_rate": 1.7352291390792798e-06, |
| "loss": 0.5349, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.263157894736842, |
| "grad_norm": 0.4751499593257904, |
| "learning_rate": 1.7003773256063882e-06, |
| "loss": 0.5159, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.2710526315789474, |
| "grad_norm": 0.4846212565898895, |
| "learning_rate": 1.6658072226764949e-06, |
| "loss": 0.5432, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.2789473684210524, |
| "grad_norm": 0.5411502122879028, |
| "learning_rate": 1.6315217817672142e-06, |
| "loss": 0.5133, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.286842105263158, |
| "grad_norm": 0.5099013447761536, |
| "learning_rate": 1.5975239300526924e-06, |
| "loss": 0.5364, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.294736842105263, |
| "grad_norm": 0.5247920155525208, |
| "learning_rate": 1.5638165701536866e-06, |
| "loss": 0.5325, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.3026315789473686, |
| "grad_norm": 0.5047224760055542, |
| "learning_rate": 1.5304025798897521e-06, |
| "loss": 0.5429, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.3105263157894735, |
| "grad_norm": 0.4913298487663269, |
| "learning_rate": 1.4972848120335453e-06, |
| "loss": 0.5384, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.318421052631579, |
| "grad_norm": 0.4855342507362366, |
| "learning_rate": 1.4644660940672628e-06, |
| "loss": 0.5449, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.3263157894736843, |
| "grad_norm": 0.5031416416168213, |
| "learning_rate": 1.4319492279412388e-06, |
| "loss": 0.5404, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.3342105263157893, |
| "grad_norm": 0.4946608245372772, |
| "learning_rate": 1.399736989834728e-06, |
| "loss": 0.5431, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.3421052631578947, |
| "grad_norm": 0.5071939826011658, |
| "learning_rate": 1.3678321299188802e-06, |
| "loss": 0.5221, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 0.48161017894744873, |
| "learning_rate": 1.336237372121944e-06, |
| "loss": 0.5229, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.3578947368421055, |
| "grad_norm": 0.5062857270240784, |
| "learning_rate": 1.3049554138967052e-06, |
| "loss": 0.5344, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.3657894736842104, |
| "grad_norm": 0.5024528503417969, |
| "learning_rate": 1.2739889259901866e-06, |
| "loss": 0.5432, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.373684210526316, |
| "grad_norm": 0.5160316824913025, |
| "learning_rate": 1.2433405522156334e-06, |
| "loss": 0.5374, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.3815789473684212, |
| "grad_norm": 0.527028501033783, |
| "learning_rate": 1.213012909226786e-06, |
| "loss": 0.5286, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.389473684210526, |
| "grad_norm": 0.5148992538452148, |
| "learning_rate": 1.1830085862944851e-06, |
| "loss": 0.5527, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.3973684210526316, |
| "grad_norm": 0.47975584864616394, |
| "learning_rate": 1.1533301450856054e-06, |
| "loss": 0.567, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.405263157894737, |
| "grad_norm": 0.4557743966579437, |
| "learning_rate": 1.1239801194443507e-06, |
| "loss": 0.5406, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.413157894736842, |
| "grad_norm": 0.5039398074150085, |
| "learning_rate": 1.0949610151759233e-06, |
| "loss": 0.5395, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.4210526315789473, |
| "grad_norm": 0.5025436282157898, |
| "learning_rate": 1.066275309832584e-06, |
| "loss": 0.5485, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.4289473684210527, |
| "grad_norm": 0.48488861322402954, |
| "learning_rate": 1.037925452502131e-06, |
| "loss": 0.5344, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.4368421052631577, |
| "grad_norm": 0.47348925471305847, |
| "learning_rate": 1.0099138635988026e-06, |
| "loss": 0.5647, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.444736842105263, |
| "grad_norm": 0.5118060111999512, |
| "learning_rate": 9.822429346566314e-07, |
| "loss": 0.5219, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.4526315789473685, |
| "grad_norm": 0.48763391375541687, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.5413, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.4605263157894735, |
| "grad_norm": 0.4737011194229126, |
| "learning_rate": 9.279324771682586e-07, |
| "loss": 0.5288, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.468421052631579, |
| "grad_norm": 0.501611590385437, |
| "learning_rate": 9.01297585463895e-07, |
| "loss": 0.5194, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.4763157894736842, |
| "grad_norm": 0.4514743983745575, |
| "learning_rate": 8.750126270084891e-07, |
| "loss": 0.5446, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.4842105263157896, |
| "grad_norm": 0.4870568811893463, |
| "learning_rate": 8.490798459222477e-07, |
| "loss": 0.5544, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.4921052631578946, |
| "grad_norm": 0.4649040102958679, |
| "learning_rate": 8.235014562576732e-07, |
| "loss": 0.5344, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.49083462357521057, |
| "learning_rate": 7.98279641810537e-07, |
| "loss": 0.5304, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.5078947368421054, |
| "grad_norm": 0.49484091997146606, |
| "learning_rate": 7.734165559334327e-07, |
| "loss": 0.5423, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.515789473684211, |
| "grad_norm": 0.5281040072441101, |
| "learning_rate": 7.489143213519301e-07, |
| "loss": 0.5383, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.5236842105263158, |
| "grad_norm": 0.44312354922294617, |
| "learning_rate": 7.24775029983345e-07, |
| "loss": 0.5626, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.531578947368421, |
| "grad_norm": 0.47553321719169617, |
| "learning_rate": 7.010007427581378e-07, |
| "loss": 0.5321, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.5394736842105265, |
| "grad_norm": 0.4963271915912628, |
| "learning_rate": 6.775934894439606e-07, |
| "loss": 0.5305, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.5473684210526315, |
| "grad_norm": 0.4486421048641205, |
| "learning_rate": 6.545552684723583e-07, |
| "loss": 0.5698, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.555263157894737, |
| "grad_norm": 0.49345487356185913, |
| "learning_rate": 6.318880467681527e-07, |
| "loss": 0.5368, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.5631578947368423, |
| "grad_norm": 0.4889611601829529, |
| "learning_rate": 6.095937595815104e-07, |
| "loss": 0.5099, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.5710526315789473, |
| "grad_norm": 0.4943506419658661, |
| "learning_rate": 5.876743103227217e-07, |
| "loss": 0.5048, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.5789473684210527, |
| "grad_norm": 0.48465171456336975, |
| "learning_rate": 5.661315703996905e-07, |
| "loss": 0.5433, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.586842105263158, |
| "grad_norm": 0.49014216661453247, |
| "learning_rate": 5.449673790581611e-07, |
| "loss": 0.5684, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.594736842105263, |
| "grad_norm": 0.47829702496528625, |
| "learning_rate": 5.241835432246888e-07, |
| "loss": 0.5395, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.6026315789473684, |
| "grad_norm": 0.507520854473114, |
| "learning_rate": 5.037818373523723e-07, |
| "loss": 0.5369, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.610526315789474, |
| "grad_norm": 0.4816722869873047, |
| "learning_rate": 4.837640032693558e-07, |
| "loss": 0.5295, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.6184210526315788, |
| "grad_norm": 0.484370619058609, |
| "learning_rate": 4.641317500301173e-07, |
| "loss": 0.5354, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.626315789473684, |
| "grad_norm": 0.45856356620788574, |
| "learning_rate": 4.448867537695578e-07, |
| "loss": 0.563, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.6342105263157896, |
| "grad_norm": 0.47436991333961487, |
| "learning_rate": 4.2603065755989493e-07, |
| "loss": 0.5453, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.6421052631578945, |
| "grad_norm": 0.48465612530708313, |
| "learning_rate": 4.0756507127038494e-07, |
| "loss": 0.5348, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.65, |
| "grad_norm": 0.4726357161998749, |
| "learning_rate": 3.894915714298775e-07, |
| "loss": 0.5247, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.6578947368421053, |
| "grad_norm": 0.4793488681316376, |
| "learning_rate": 3.71811701092219e-07, |
| "loss": 0.5541, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.6657894736842103, |
| "grad_norm": 0.5060055255889893, |
| "learning_rate": 3.5452696970450674e-07, |
| "loss": 0.5609, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.6736842105263157, |
| "grad_norm": 0.5144080519676208, |
| "learning_rate": 3.3763885297822153e-07, |
| "loss": 0.5143, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.681578947368421, |
| "grad_norm": 0.461753249168396, |
| "learning_rate": 3.2114879276323783e-07, |
| "loss": 0.5422, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.6894736842105265, |
| "grad_norm": 0.48919305205345154, |
| "learning_rate": 3.0505819692471797e-07, |
| "loss": 0.5403, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.6973684210526314, |
| "grad_norm": 0.4654390811920166, |
| "learning_rate": 2.893684392229185e-07, |
| "loss": 0.5299, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.705263157894737, |
| "grad_norm": 0.45241108536720276, |
| "learning_rate": 2.7408085919590265e-07, |
| "loss": 0.5385, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.713157894736842, |
| "grad_norm": 0.4543478190898895, |
| "learning_rate": 2.5919676204517073e-07, |
| "loss": 0.5522, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.7210526315789476, |
| "grad_norm": 0.4557345509529114, |
| "learning_rate": 2.447174185242324e-07, |
| "loss": 0.5499, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.7289473684210526, |
| "grad_norm": 0.47247329354286194, |
| "learning_rate": 2.3064406483010947e-07, |
| "loss": 0.539, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.736842105263158, |
| "grad_norm": 0.5086033344268799, |
| "learning_rate": 2.1697790249779638e-07, |
| "loss": 0.5388, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.7447368421052634, |
| "grad_norm": 0.45670172572135925, |
| "learning_rate": 2.0372009829767558e-07, |
| "loss": 0.5378, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.7526315789473683, |
| "grad_norm": 0.47339746356010437, |
| "learning_rate": 1.908717841359048e-07, |
| "loss": 0.5362, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.7605263157894737, |
| "grad_norm": 0.47306424379348755, |
| "learning_rate": 1.7843405695777582e-07, |
| "loss": 0.54, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.768421052631579, |
| "grad_norm": 0.49057334661483765, |
| "learning_rate": 1.664079786540629e-07, |
| "loss": 0.5466, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.776315789473684, |
| "grad_norm": 0.4364169239997864, |
| "learning_rate": 1.547945759703623e-07, |
| "loss": 0.5468, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.7842105263157895, |
| "grad_norm": 0.45972496271133423, |
| "learning_rate": 1.435948404194304e-07, |
| "loss": 0.5359, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.792105263157895, |
| "grad_norm": 0.4681927561759949, |
| "learning_rate": 1.328097281965357e-07, |
| "loss": 0.5218, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.48078453540802, |
| "learning_rate": 1.22440160097817e-07, |
| "loss": 0.5318, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.807894736842105, |
| "grad_norm": 0.5014187097549438, |
| "learning_rate": 1.1248702144167123e-07, |
| "loss": 0.5175, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.8157894736842106, |
| "grad_norm": 0.4723798632621765, |
| "learning_rate": 1.0295116199317057e-07, |
| "loss": 0.5459, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.8236842105263156, |
| "grad_norm": 0.485458105802536, |
| "learning_rate": 9.383339589150776e-08, |
| "loss": 0.5221, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.831578947368421, |
| "grad_norm": 0.4910981059074402, |
| "learning_rate": 8.513450158049109e-08, |
| "loss": 0.5192, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.8394736842105264, |
| "grad_norm": 0.49855437874794006, |
| "learning_rate": 7.685522174208205e-08, |
| "loss": 0.5347, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.8473684210526313, |
| "grad_norm": 0.46188458800315857, |
| "learning_rate": 6.899626323298714e-08, |
| "loss": 0.5286, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.8552631578947367, |
| "grad_norm": 0.488492876291275, |
| "learning_rate": 6.15582970243117e-08, |
| "loss": 0.5202, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.863157894736842, |
| "grad_norm": 0.516136109828949, |
| "learning_rate": 5.454195814427021e-08, |
| "loss": 0.5044, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.8710526315789475, |
| "grad_norm": 0.4855652451515198, |
| "learning_rate": 4.794784562397459e-08, |
| "loss": 0.535, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.8789473684210525, |
| "grad_norm": 0.49933773279190063, |
| "learning_rate": 4.177652244628627e-08, |
| "loss": 0.5383, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.886842105263158, |
| "grad_norm": 0.48090240359306335, |
| "learning_rate": 3.602851549775521e-08, |
| "loss": 0.5396, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.8947368421052633, |
| "grad_norm": 0.4793301820755005, |
| "learning_rate": 3.0704315523631956e-08, |
| "loss": 0.5389, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.9026315789473687, |
| "grad_norm": 0.5019791722297668, |
| "learning_rate": 2.5804377085972278e-08, |
| "loss": 0.5565, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.9105263157894736, |
| "grad_norm": 0.4860020577907562, |
| "learning_rate": 2.1329118524827662e-08, |
| "loss": 0.522, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.918421052631579, |
| "grad_norm": 0.49818456172943115, |
| "learning_rate": 1.7278921922527224e-08, |
| "loss": 0.5146, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.9263157894736844, |
| "grad_norm": 0.46853312849998474, |
| "learning_rate": 1.3654133071059894e-08, |
| "loss": 0.5414, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.9342105263157894, |
| "grad_norm": 0.47929537296295166, |
| "learning_rate": 1.0455061442548597e-08, |
| "loss": 0.5294, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.942105263157895, |
| "grad_norm": 0.4867682158946991, |
| "learning_rate": 7.681980162830283e-09, |
| "loss": 0.5533, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.95, |
| "grad_norm": 0.47420981526374817, |
| "learning_rate": 5.3351259881379016e-09, |
| "loss": 0.5315, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.957894736842105, |
| "grad_norm": 0.5050917863845825, |
| "learning_rate": 3.41469928488547e-09, |
| "loss": 0.5376, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.9657894736842105, |
| "grad_norm": 0.4844934940338135, |
| "learning_rate": 1.9208640125628618e-09, |
| "loss": 0.5484, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.973684210526316, |
| "grad_norm": 0.478400319814682, |
| "learning_rate": 8.537477097364522e-10, |
| "loss": 0.5329, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.981578947368421, |
| "grad_norm": 0.47381076216697693, |
| "learning_rate": 2.1344148316060352e-10, |
| "loss": 0.5301, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.9894736842105263, |
| "grad_norm": 0.5020145177841187, |
| "learning_rate": 0.0, |
| "loss": 0.5492, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.9894736842105263, |
| "step": 378, |
| "total_flos": 3.713025592344904e+17, |
| "train_loss": 0.0, |
| "train_runtime": 12.3976, |
| "train_samples_per_second": 2940.336, |
| "train_steps_per_second": 30.49 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 378, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.713025592344904e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|