| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.2525597269624573, |
| "eval_steps": 500, |
| "global_step": 1110, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00022753128555176336, |
| "grad_norm": 12.3071932545212, |
| "learning_rate": 1.25e-06, |
| "loss": 0.3186, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0004550625711035267, |
| "grad_norm": 11.959825961880057, |
| "learning_rate": 1.2499999936130725e-06, |
| "loss": 0.3776, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0006825938566552901, |
| "grad_norm": 5.4315221034586365, |
| "learning_rate": 1.2499999744522896e-06, |
| "loss": 0.4755, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0009101251422070534, |
| "grad_norm": 21.003860231065644, |
| "learning_rate": 1.2499999425176518e-06, |
| "loss": 0.3334, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0011376564277588168, |
| "grad_norm": 9.549170994835775, |
| "learning_rate": 1.2499998978091598e-06, |
| "loss": 0.375, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0013651877133105802, |
| "grad_norm": 3.400827392368318, |
| "learning_rate": 1.2499998403268147e-06, |
| "loss": 0.2286, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0015927189988623437, |
| "grad_norm": 8.451175634489234, |
| "learning_rate": 1.2499997700706173e-06, |
| "loss": 0.3216, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0018202502844141069, |
| "grad_norm": 7.494987211346803, |
| "learning_rate": 1.2499996870405692e-06, |
| "loss": 0.2339, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0020477815699658703, |
| "grad_norm": 9.138399201835718, |
| "learning_rate": 1.2499995912366722e-06, |
| "loss": 0.326, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0022753128555176336, |
| "grad_norm": 3.2188295955534123, |
| "learning_rate": 1.2499994826589282e-06, |
| "loss": 0.2514, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.002502844141069397, |
| "grad_norm": 22.66663249526738, |
| "learning_rate": 1.2499993613073393e-06, |
| "loss": 0.4005, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0027303754266211604, |
| "grad_norm": 21.954799290073527, |
| "learning_rate": 1.2499992271819083e-06, |
| "loss": 0.1492, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0029579067121729237, |
| "grad_norm": 6.624298118045555, |
| "learning_rate": 1.2499990802826377e-06, |
| "loss": 0.3024, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0031854379977246873, |
| "grad_norm": 3.923975454400113, |
| "learning_rate": 1.2499989206095304e-06, |
| "loss": 0.2411, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0034129692832764505, |
| "grad_norm": 1.9398605915746092, |
| "learning_rate": 1.2499987481625899e-06, |
| "loss": 0.1849, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0036405005688282138, |
| "grad_norm": 5.482695785208493, |
| "learning_rate": 1.2499985629418195e-06, |
| "loss": 0.3122, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0038680318543799774, |
| "grad_norm": 5.552109300872593, |
| "learning_rate": 1.2499983649472233e-06, |
| "loss": 0.3393, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.004095563139931741, |
| "grad_norm": 4.610318891370888, |
| "learning_rate": 1.249998154178805e-06, |
| "loss": 0.3, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.004323094425483504, |
| "grad_norm": 8.793267315718285, |
| "learning_rate": 1.2499979306365692e-06, |
| "loss": 0.2266, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.004550625711035267, |
| "grad_norm": 11.785540460314868, |
| "learning_rate": 1.2499976943205202e-06, |
| "loss": 0.258, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00477815699658703, |
| "grad_norm": 7.848333910468807, |
| "learning_rate": 1.249997445230663e-06, |
| "loss": 0.3733, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.005005688282138794, |
| "grad_norm": 11.509651474854413, |
| "learning_rate": 1.2499971833670026e-06, |
| "loss": 0.3606, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.005233219567690558, |
| "grad_norm": 8.662973783002895, |
| "learning_rate": 1.2499969087295443e-06, |
| "loss": 0.3884, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.005460750853242321, |
| "grad_norm": 5.341258812295752, |
| "learning_rate": 1.249996621318294e-06, |
| "loss": 0.2677, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.005688282138794084, |
| "grad_norm": 4.742018594757072, |
| "learning_rate": 1.2499963211332573e-06, |
| "loss": 0.3253, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.005915813424345847, |
| "grad_norm": 2.4536573603250624, |
| "learning_rate": 1.2499960081744405e-06, |
| "loss": 0.2393, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0061433447098976105, |
| "grad_norm": 6.34705088291597, |
| "learning_rate": 1.24999568244185e-06, |
| "loss": 0.4326, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.006370875995449375, |
| "grad_norm": 9.775833264439491, |
| "learning_rate": 1.249995343935492e-06, |
| "loss": 0.4252, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.006598407281001138, |
| "grad_norm": 6.064212735225404, |
| "learning_rate": 1.2499949926553743e-06, |
| "loss": 0.2988, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.006825938566552901, |
| "grad_norm": 4.4254830237015845, |
| "learning_rate": 1.2499946286015032e-06, |
| "loss": 0.2988, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.007053469852104664, |
| "grad_norm": 4.883047495609927, |
| "learning_rate": 1.2499942517738867e-06, |
| "loss": 0.2285, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0072810011376564275, |
| "grad_norm": 8.135398866699179, |
| "learning_rate": 1.2499938621725322e-06, |
| "loss": 0.1529, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.007508532423208191, |
| "grad_norm": 2.973365765084456, |
| "learning_rate": 1.2499934597974478e-06, |
| "loss": 0.2436, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.007736063708759955, |
| "grad_norm": 5.612693729952574, |
| "learning_rate": 1.2499930446486416e-06, |
| "loss": 0.3466, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.007963594994311717, |
| "grad_norm": 3.022290156639827, |
| "learning_rate": 1.2499926167261224e-06, |
| "loss": 0.2728, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.008191126279863481, |
| "grad_norm": 3.1279992715224467, |
| "learning_rate": 1.2499921760298987e-06, |
| "loss": 0.2469, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.008418657565415245, |
| "grad_norm": 14.845448376418034, |
| "learning_rate": 1.2499917225599796e-06, |
| "loss": 0.5145, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.008646188850967008, |
| "grad_norm": 14.138433401115075, |
| "learning_rate": 1.2499912563163742e-06, |
| "loss": 0.2705, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.008873720136518772, |
| "grad_norm": 4.324563647824762, |
| "learning_rate": 1.249990777299092e-06, |
| "loss": 0.1563, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.009101251422070534, |
| "grad_norm": 11.315529959215173, |
| "learning_rate": 1.249990285508143e-06, |
| "loss": 0.4123, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.009328782707622298, |
| "grad_norm": 6.3112839729366765, |
| "learning_rate": 1.2499897809435374e-06, |
| "loss": 0.1742, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.00955631399317406, |
| "grad_norm": 8.25726966946455, |
| "learning_rate": 1.249989263605285e-06, |
| "loss": 0.3229, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.009783845278725825, |
| "grad_norm": 6.3545712967505334, |
| "learning_rate": 1.249988733493397e-06, |
| "loss": 0.3055, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.010011376564277589, |
| "grad_norm": 5.356373706603287, |
| "learning_rate": 1.2499881906078836e-06, |
| "loss": 0.2601, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.010238907849829351, |
| "grad_norm": 1.9215795165819936, |
| "learning_rate": 1.2499876349487564e-06, |
| "loss": 0.1517, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.010466439135381115, |
| "grad_norm": 8.506503892761648, |
| "learning_rate": 1.2499870665160262e-06, |
| "loss": 0.2831, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.010693970420932878, |
| "grad_norm": 5.909503420571465, |
| "learning_rate": 1.2499864853097054e-06, |
| "loss": 0.2252, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.010921501706484642, |
| "grad_norm": 5.488265194188453, |
| "learning_rate": 1.2499858913298053e-06, |
| "loss": 0.3466, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.011149032992036406, |
| "grad_norm": 12.162427245650075, |
| "learning_rate": 1.249985284576338e-06, |
| "loss": 0.2426, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.011376564277588168, |
| "grad_norm": 9.969211407495816, |
| "learning_rate": 1.2499846650493164e-06, |
| "loss": 0.2801, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.011604095563139932, |
| "grad_norm": 5.741578552447352, |
| "learning_rate": 1.2499840327487528e-06, |
| "loss": 0.2664, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.011831626848691695, |
| "grad_norm": 2.937767840084915, |
| "learning_rate": 1.24998338767466e-06, |
| "loss": 0.1834, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.012059158134243459, |
| "grad_norm": 4.130655112830682, |
| "learning_rate": 1.2499827298270515e-06, |
| "loss": 0.2675, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.012286689419795221, |
| "grad_norm": 4.5227789119131625, |
| "learning_rate": 1.2499820592059405e-06, |
| "loss": 0.3205, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.012514220705346985, |
| "grad_norm": 4.653850683576537, |
| "learning_rate": 1.2499813758113409e-06, |
| "loss": 0.1921, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.01274175199089875, |
| "grad_norm": 6.204991552012506, |
| "learning_rate": 1.2499806796432665e-06, |
| "loss": 0.1989, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.012969283276450512, |
| "grad_norm": 7.81696538748595, |
| "learning_rate": 1.2499799707017315e-06, |
| "loss": 0.1301, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.013196814562002276, |
| "grad_norm": 6.427887275035889, |
| "learning_rate": 1.2499792489867508e-06, |
| "loss": 0.3376, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.013424345847554038, |
| "grad_norm": 4.713573539887475, |
| "learning_rate": 1.2499785144983386e-06, |
| "loss": 0.1673, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.013651877133105802, |
| "grad_norm": 6.7169275734426055, |
| "learning_rate": 1.24997776723651e-06, |
| "loss": 0.2501, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.013879408418657566, |
| "grad_norm": 11.702392641770421, |
| "learning_rate": 1.2499770072012809e-06, |
| "loss": 0.293, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.014106939704209329, |
| "grad_norm": 5.86563350345107, |
| "learning_rate": 1.2499762343926661e-06, |
| "loss": 0.2346, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.014334470989761093, |
| "grad_norm": 4.562933746130791, |
| "learning_rate": 1.2499754488106817e-06, |
| "loss": 0.1349, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.014562002275312855, |
| "grad_norm": 16.935870758573948, |
| "learning_rate": 1.2499746504553436e-06, |
| "loss": 0.2869, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.01478953356086462, |
| "grad_norm": 3.252674290241083, |
| "learning_rate": 1.2499738393266684e-06, |
| "loss": 0.2125, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.015017064846416382, |
| "grad_norm": 3.767321260449828, |
| "learning_rate": 1.2499730154246726e-06, |
| "loss": 0.2049, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.015244596131968146, |
| "grad_norm": 7.264091175555215, |
| "learning_rate": 1.2499721787493726e-06, |
| "loss": 0.2521, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.01547212741751991, |
| "grad_norm": 2.846384337735166, |
| "learning_rate": 1.2499713293007862e-06, |
| "loss": 0.1745, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.015699658703071672, |
| "grad_norm": 30.829215228751778, |
| "learning_rate": 1.2499704670789301e-06, |
| "loss": 0.1514, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.015927189988623434, |
| "grad_norm": 7.168923083631056, |
| "learning_rate": 1.2499695920838225e-06, |
| "loss": 0.2393, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0161547212741752, |
| "grad_norm": 3.418723817035884, |
| "learning_rate": 1.2499687043154809e-06, |
| "loss": 0.1342, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.016382252559726963, |
| "grad_norm": 6.316537441364383, |
| "learning_rate": 1.2499678037739235e-06, |
| "loss": 0.1698, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.016609783845278725, |
| "grad_norm": 3.8561981086650596, |
| "learning_rate": 1.2499668904591688e-06, |
| "loss": 0.3104, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.01683731513083049, |
| "grad_norm": 4.679806938064617, |
| "learning_rate": 1.2499659643712356e-06, |
| "loss": 0.2139, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.017064846416382253, |
| "grad_norm": 4.26137230837329, |
| "learning_rate": 1.2499650255101425e-06, |
| "loss": 0.2433, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.017292377701934016, |
| "grad_norm": 3.7227188471827914, |
| "learning_rate": 1.2499640738759088e-06, |
| "loss": 0.2334, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.017519908987485778, |
| "grad_norm": 6.044525591826923, |
| "learning_rate": 1.249963109468554e-06, |
| "loss": 0.3106, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.017747440273037544, |
| "grad_norm": 6.248705646938244, |
| "learning_rate": 1.2499621322880979e-06, |
| "loss": 0.2025, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.017974971558589306, |
| "grad_norm": 2.8368621495357313, |
| "learning_rate": 1.2499611423345604e-06, |
| "loss": 0.1492, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.01820250284414107, |
| "grad_norm": 5.049736361542706, |
| "learning_rate": 1.2499601396079617e-06, |
| "loss": 0.1341, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.018430034129692834, |
| "grad_norm": 6.760221850362585, |
| "learning_rate": 1.2499591241083222e-06, |
| "loss": 0.2092, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.018657565415244597, |
| "grad_norm": 6.630540720646431, |
| "learning_rate": 1.2499580958356628e-06, |
| "loss": 0.2181, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.01888509670079636, |
| "grad_norm": 3.8482585047631863, |
| "learning_rate": 1.2499570547900045e-06, |
| "loss": 0.1613, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.01911262798634812, |
| "grad_norm": 6.605304588968454, |
| "learning_rate": 1.2499560009713684e-06, |
| "loss": 0.2959, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.019340159271899887, |
| "grad_norm": 6.012809221970948, |
| "learning_rate": 1.2499549343797764e-06, |
| "loss": 0.2393, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.01956769055745165, |
| "grad_norm": 6.254621323206641, |
| "learning_rate": 1.24995385501525e-06, |
| "loss": 0.2285, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.019795221843003412, |
| "grad_norm": 3.4046999226542733, |
| "learning_rate": 1.2499527628778116e-06, |
| "loss": 0.1187, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.020022753128555178, |
| "grad_norm": 7.419781715158706, |
| "learning_rate": 1.2499516579674831e-06, |
| "loss": 0.2817, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.02025028441410694, |
| "grad_norm": 21.819719933471735, |
| "learning_rate": 1.2499505402842872e-06, |
| "loss": 0.2469, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.020477815699658702, |
| "grad_norm": 2.8418419055080766, |
| "learning_rate": 1.2499494098282469e-06, |
| "loss": 0.2955, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.020705346985210465, |
| "grad_norm": 7.066317637431583, |
| "learning_rate": 1.2499482665993851e-06, |
| "loss": 0.2044, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.02093287827076223, |
| "grad_norm": 5.925737098985834, |
| "learning_rate": 1.2499471105977252e-06, |
| "loss": 0.2335, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.021160409556313993, |
| "grad_norm": 3.0480275776898473, |
| "learning_rate": 1.249945941823291e-06, |
| "loss": 0.3633, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.021387940841865755, |
| "grad_norm": 2.946352549362824, |
| "learning_rate": 1.2499447602761063e-06, |
| "loss": 0.2011, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.02161547212741752, |
| "grad_norm": 6.07129225638081, |
| "learning_rate": 1.2499435659561954e-06, |
| "loss": 0.2585, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.021843003412969283, |
| "grad_norm": 4.592794032374342, |
| "learning_rate": 1.2499423588635823e-06, |
| "loss": 0.2336, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.022070534698521046, |
| "grad_norm": 19.61835193566366, |
| "learning_rate": 1.2499411389982919e-06, |
| "loss": 0.2438, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.02229806598407281, |
| "grad_norm": 4.697964666160796, |
| "learning_rate": 1.2499399063603492e-06, |
| "loss": 0.26, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.022525597269624574, |
| "grad_norm": 6.831528796415563, |
| "learning_rate": 1.2499386609497793e-06, |
| "loss": 0.1291, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.022753128555176336, |
| "grad_norm": 3.3770537551655653, |
| "learning_rate": 1.2499374027666078e-06, |
| "loss": 0.1919, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0229806598407281, |
| "grad_norm": 10.54402988548413, |
| "learning_rate": 1.2499361318108602e-06, |
| "loss": 0.2695, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.023208191126279865, |
| "grad_norm": 6.4464740357818116, |
| "learning_rate": 1.2499348480825627e-06, |
| "loss": 0.1883, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.023435722411831627, |
| "grad_norm": 5.7228283849137895, |
| "learning_rate": 1.2499335515817413e-06, |
| "loss": 0.225, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.02366325369738339, |
| "grad_norm": 8.575195167369158, |
| "learning_rate": 1.2499322423084226e-06, |
| "loss": 0.1988, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.023890784982935155, |
| "grad_norm": 5.524822469569831, |
| "learning_rate": 1.2499309202626336e-06, |
| "loss": 0.1362, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.024118316268486917, |
| "grad_norm": 1.4259194554286314, |
| "learning_rate": 1.249929585444401e-06, |
| "loss": 0.1341, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.02434584755403868, |
| "grad_norm": 5.569399731315438, |
| "learning_rate": 1.2499282378537522e-06, |
| "loss": 0.1823, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.024573378839590442, |
| "grad_norm": 5.131038290322419, |
| "learning_rate": 1.2499268774907144e-06, |
| "loss": 0.1674, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.024800910125142208, |
| "grad_norm": 2.9740215362829368, |
| "learning_rate": 1.249925504355316e-06, |
| "loss": 0.1443, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.02502844141069397, |
| "grad_norm": 7.125610878241638, |
| "learning_rate": 1.2499241184475848e-06, |
| "loss": 0.1993, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.025255972696245733, |
| "grad_norm": 3.5104920582246284, |
| "learning_rate": 1.249922719767549e-06, |
| "loss": 0.1387, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0254835039817975, |
| "grad_norm": 15.180689323576399, |
| "learning_rate": 1.2499213083152374e-06, |
| "loss": 0.1609, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.02571103526734926, |
| "grad_norm": 2.6467486780240077, |
| "learning_rate": 1.2499198840906787e-06, |
| "loss": 0.0766, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.025938566552901023, |
| "grad_norm": 6.947833673299234, |
| "learning_rate": 1.249918447093902e-06, |
| "loss": 0.1988, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.026166097838452786, |
| "grad_norm": 3.236155694827761, |
| "learning_rate": 1.249916997324937e-06, |
| "loss": 0.2822, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.02639362912400455, |
| "grad_norm": 4.424229361394889, |
| "learning_rate": 1.2499155347838129e-06, |
| "loss": 0.2639, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.026621160409556314, |
| "grad_norm": 6.7125880752306, |
| "learning_rate": 1.2499140594705596e-06, |
| "loss": 0.1758, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.026848691695108076, |
| "grad_norm": 12.978485247890044, |
| "learning_rate": 1.2499125713852076e-06, |
| "loss": 0.2966, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.027076222980659842, |
| "grad_norm": 2.4562187666064297, |
| "learning_rate": 1.2499110705277869e-06, |
| "loss": 0.1317, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.027303754266211604, |
| "grad_norm": 2.450514697648912, |
| "learning_rate": 1.2499095568983284e-06, |
| "loss": 0.2491, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.027531285551763367, |
| "grad_norm": 2.962900989508568, |
| "learning_rate": 1.2499080304968634e-06, |
| "loss": 0.1782, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.027758816837315133, |
| "grad_norm": 4.706451675787787, |
| "learning_rate": 1.2499064913234222e-06, |
| "loss": 0.2063, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.027986348122866895, |
| "grad_norm": 4.848247166198472, |
| "learning_rate": 1.249904939378037e-06, |
| "loss": 0.1873, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.028213879408418657, |
| "grad_norm": 5.57275566955423, |
| "learning_rate": 1.2499033746607395e-06, |
| "loss": 0.2362, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.02844141069397042, |
| "grad_norm": 4.528761927217566, |
| "learning_rate": 1.2499017971715614e-06, |
| "loss": 0.2686, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.028668941979522185, |
| "grad_norm": 7.35859467900191, |
| "learning_rate": 1.2499002069105348e-06, |
| "loss": 0.275, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.028896473265073948, |
| "grad_norm": 4.494727686955716, |
| "learning_rate": 1.2498986038776926e-06, |
| "loss": 0.1759, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.02912400455062571, |
| "grad_norm": 7.273216392666622, |
| "learning_rate": 1.2498969880730671e-06, |
| "loss": 0.2159, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.029351535836177476, |
| "grad_norm": 4.955227920384567, |
| "learning_rate": 1.249895359496692e-06, |
| "loss": 0.1888, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.02957906712172924, |
| "grad_norm": 6.321445200949685, |
| "learning_rate": 1.2498937181486e-06, |
| "loss": 0.3007, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.029806598407281, |
| "grad_norm": 2.76312902269676, |
| "learning_rate": 1.2498920640288248e-06, |
| "loss": 0.2442, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.030034129692832763, |
| "grad_norm": 56.774720129580295, |
| "learning_rate": 1.2498903971374005e-06, |
| "loss": 0.223, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.03026166097838453, |
| "grad_norm": 3.9468490187056324, |
| "learning_rate": 1.2498887174743606e-06, |
| "loss": 0.2504, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.03048919226393629, |
| "grad_norm": 3.9118814976883542, |
| "learning_rate": 1.24988702503974e-06, |
| "loss": 0.1939, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.030716723549488054, |
| "grad_norm": 3.7837188268010506, |
| "learning_rate": 1.2498853198335728e-06, |
| "loss": 0.2199, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.03094425483503982, |
| "grad_norm": 4.0297942240817175, |
| "learning_rate": 1.2498836018558942e-06, |
| "loss": 0.1566, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.031171786120591582, |
| "grad_norm": 3.4754550482446698, |
| "learning_rate": 1.2498818711067392e-06, |
| "loss": 0.2666, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.031399317406143344, |
| "grad_norm": 3.864651244769, |
| "learning_rate": 1.2498801275861433e-06, |
| "loss": 0.1173, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.03162684869169511, |
| "grad_norm": 8.216814820623972, |
| "learning_rate": 1.2498783712941418e-06, |
| "loss": 0.1879, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.03185437997724687, |
| "grad_norm": 3.637457358045326, |
| "learning_rate": 1.2498766022307709e-06, |
| "loss": 0.2047, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.032081911262798635, |
| "grad_norm": 2.58051980801193, |
| "learning_rate": 1.2498748203960665e-06, |
| "loss": 0.1008, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0323094425483504, |
| "grad_norm": 3.8775724824241764, |
| "learning_rate": 1.2498730257900655e-06, |
| "loss": 0.2042, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.03253697383390216, |
| "grad_norm": 5.772591680829651, |
| "learning_rate": 1.249871218412804e-06, |
| "loss": 0.2352, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.032764505119453925, |
| "grad_norm": 2.210254874393301, |
| "learning_rate": 1.2498693982643192e-06, |
| "loss": 0.1803, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.03299203640500569, |
| "grad_norm": 6.540771980552272, |
| "learning_rate": 1.2498675653446485e-06, |
| "loss": 0.2304, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.03321956769055745, |
| "grad_norm": 2.904522388367919, |
| "learning_rate": 1.249865719653829e-06, |
| "loss": 0.1707, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.033447098976109216, |
| "grad_norm": 9.318986716894935, |
| "learning_rate": 1.2498638611918985e-06, |
| "loss": 0.2038, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.03367463026166098, |
| "grad_norm": 9.58516027118141, |
| "learning_rate": 1.249861989958895e-06, |
| "loss": 0.2357, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.03390216154721274, |
| "grad_norm": 3.559770501878285, |
| "learning_rate": 1.2498601059548572e-06, |
| "loss": 0.1613, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.034129692832764506, |
| "grad_norm": 3.348814329958542, |
| "learning_rate": 1.2498582091798228e-06, |
| "loss": 0.2016, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.034357224118316265, |
| "grad_norm": 6.375342543891093, |
| "learning_rate": 1.2498562996338312e-06, |
| "loss": 0.2231, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.03458475540386803, |
| "grad_norm": 7.488809251815451, |
| "learning_rate": 1.249854377316921e-06, |
| "loss": 0.1819, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.0348122866894198, |
| "grad_norm": 2.508487580474721, |
| "learning_rate": 1.2498524422291319e-06, |
| "loss": 0.182, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.035039817974971556, |
| "grad_norm": 3.656563964135558, |
| "learning_rate": 1.2498504943705033e-06, |
| "loss": 0.165, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.03526734926052332, |
| "grad_norm": 2.771070563762278, |
| "learning_rate": 1.249848533741075e-06, |
| "loss": 0.2569, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.03549488054607509, |
| "grad_norm": 5.610529774003187, |
| "learning_rate": 1.2498465603408865e-06, |
| "loss": 0.2873, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.035722411831626846, |
| "grad_norm": 3.6657793262286638, |
| "learning_rate": 1.2498445741699792e-06, |
| "loss": 0.1086, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.03594994311717861, |
| "grad_norm": 11.136381961854878, |
| "learning_rate": 1.249842575228393e-06, |
| "loss": 0.1653, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.03617747440273038, |
| "grad_norm": 4.607920317694178, |
| "learning_rate": 1.249840563516169e-06, |
| "loss": 0.1816, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.03640500568828214, |
| "grad_norm": 4.765507333684582, |
| "learning_rate": 1.249838539033348e-06, |
| "loss": 0.1735, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0366325369738339, |
| "grad_norm": 3.024559515436515, |
| "learning_rate": 1.2498365017799715e-06, |
| "loss": 0.0997, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.03686006825938567, |
| "grad_norm": 3.0006086205585594, |
| "learning_rate": 1.2498344517560815e-06, |
| "loss": 0.2742, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.03708759954493743, |
| "grad_norm": 4.390575337778858, |
| "learning_rate": 1.2498323889617198e-06, |
| "loss": 0.2112, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.03731513083048919, |
| "grad_norm": 4.987032274568943, |
| "learning_rate": 1.2498303133969281e-06, |
| "loss": 0.2282, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.03754266211604096, |
| "grad_norm": 3.813775711394782, |
| "learning_rate": 1.2498282250617492e-06, |
| "loss": 0.1944, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.03777019340159272, |
| "grad_norm": 3.361678763128891, |
| "learning_rate": 1.2498261239562257e-06, |
| "loss": 0.2018, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.037997724687144484, |
| "grad_norm": 4.992072192203259, |
| "learning_rate": 1.2498240100804005e-06, |
| "loss": 0.2089, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.03822525597269624, |
| "grad_norm": 8.050790934059092, |
| "learning_rate": 1.249821883434317e-06, |
| "loss": 0.2696, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.03845278725824801, |
| "grad_norm": 2.642297340192281, |
| "learning_rate": 1.2498197440180182e-06, |
| "loss": 0.2691, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.038680318543799774, |
| "grad_norm": 3.35790306734272, |
| "learning_rate": 1.2498175918315484e-06, |
| "loss": 0.1851, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.03890784982935153, |
| "grad_norm": 3.524642269348137, |
| "learning_rate": 1.2498154268749513e-06, |
| "loss": 0.2276, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.0391353811149033, |
| "grad_norm": 2.188667506818875, |
| "learning_rate": 1.249813249148271e-06, |
| "loss": 0.1616, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.039362912400455065, |
| "grad_norm": 5.1958946099491845, |
| "learning_rate": 1.2498110586515525e-06, |
| "loss": 0.1987, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.039590443686006824, |
| "grad_norm": 5.09328084896296, |
| "learning_rate": 1.2498088553848398e-06, |
| "loss": 0.195, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.03981797497155859, |
| "grad_norm": 2.8290595777512952, |
| "learning_rate": 1.2498066393481787e-06, |
| "loss": 0.1568, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.040045506257110355, |
| "grad_norm": 2.360697357040943, |
| "learning_rate": 1.249804410541614e-06, |
| "loss": 0.2065, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.040273037542662114, |
| "grad_norm": 4.718810327826489, |
| "learning_rate": 1.2498021689651916e-06, |
| "loss": 0.2003, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.04050056882821388, |
| "grad_norm": 2.6458436624930237, |
| "learning_rate": 1.249799914618957e-06, |
| "loss": 0.1589, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.040728100113765646, |
| "grad_norm": 3.289621635927127, |
| "learning_rate": 1.2497976475029566e-06, |
| "loss": 0.1905, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.040955631399317405, |
| "grad_norm": 2.7547654896260028, |
| "learning_rate": 1.2497953676172364e-06, |
| "loss": 0.1538, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04118316268486917, |
| "grad_norm": 4.715970073162376, |
| "learning_rate": 1.2497930749618431e-06, |
| "loss": 0.1297, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.04141069397042093, |
| "grad_norm": 13.147614048372157, |
| "learning_rate": 1.2497907695368238e-06, |
| "loss": 0.164, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.041638225255972695, |
| "grad_norm": 2.692225418023433, |
| "learning_rate": 1.2497884513422253e-06, |
| "loss": 0.2537, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.04186575654152446, |
| "grad_norm": 5.166049507007355, |
| "learning_rate": 1.249786120378095e-06, |
| "loss": 0.074, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.04209328782707622, |
| "grad_norm": 3.0648916024092596, |
| "learning_rate": 1.2497837766444806e-06, |
| "loss": 0.1639, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.042320819112627986, |
| "grad_norm": 4.567688921451397, |
| "learning_rate": 1.2497814201414304e-06, |
| "loss": 0.2905, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.04254835039817975, |
| "grad_norm": 3.970377559361967, |
| "learning_rate": 1.249779050868992e-06, |
| "loss": 0.2001, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.04277588168373151, |
| "grad_norm": 2.2768846909587763, |
| "learning_rate": 1.249776668827214e-06, |
| "loss": 0.0951, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.043003412969283276, |
| "grad_norm": 6.438142708090974, |
| "learning_rate": 1.249774274016145e-06, |
| "loss": 0.203, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.04323094425483504, |
| "grad_norm": 2.4175466744317977, |
| "learning_rate": 1.2497718664358341e-06, |
| "loss": 0.1713, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0434584755403868, |
| "grad_norm": 4.37204480901975, |
| "learning_rate": 1.2497694460863307e-06, |
| "loss": 0.2986, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.04368600682593857, |
| "grad_norm": 3.2046762676937255, |
| "learning_rate": 1.2497670129676838e-06, |
| "loss": 0.1288, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.04391353811149033, |
| "grad_norm": 3.901472238917995, |
| "learning_rate": 1.2497645670799436e-06, |
| "loss": 0.1291, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.04414106939704209, |
| "grad_norm": 3.891177273974114, |
| "learning_rate": 1.2497621084231595e-06, |
| "loss": 0.1165, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.04436860068259386, |
| "grad_norm": 3.831124951630966, |
| "learning_rate": 1.2497596369973823e-06, |
| "loss": 0.175, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.04459613196814562, |
| "grad_norm": 7.137497588920377, |
| "learning_rate": 1.2497571528026623e-06, |
| "loss": 0.2319, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.04482366325369738, |
| "grad_norm": 2.9787063992991256, |
| "learning_rate": 1.2497546558390503e-06, |
| "loss": 0.2044, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.04505119453924915, |
| "grad_norm": 2.5728244375494413, |
| "learning_rate": 1.2497521461065973e-06, |
| "loss": 0.1395, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.04527872582480091, |
| "grad_norm": 7.102221321561537, |
| "learning_rate": 1.2497496236053547e-06, |
| "loss": 0.1969, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.04550625711035267, |
| "grad_norm": 2.579422809989494, |
| "learning_rate": 1.2497470883353738e-06, |
| "loss": 0.1019, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04573378839590444, |
| "grad_norm": 4.340132040430137, |
| "learning_rate": 1.2497445402967068e-06, |
| "loss": 0.241, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.0459613196814562, |
| "grad_norm": 2.2195665044126276, |
| "learning_rate": 1.2497419794894053e-06, |
| "loss": 0.2059, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.04618885096700796, |
| "grad_norm": 3.274345001247324, |
| "learning_rate": 1.249739405913522e-06, |
| "loss": 0.1328, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.04641638225255973, |
| "grad_norm": 2.527264534705696, |
| "learning_rate": 1.2497368195691095e-06, |
| "loss": 0.1408, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.04664391353811149, |
| "grad_norm": 3.306757570747259, |
| "learning_rate": 1.2497342204562205e-06, |
| "loss": 0.2233, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.046871444823663254, |
| "grad_norm": 3.6647451852915336, |
| "learning_rate": 1.2497316085749081e-06, |
| "loss": 0.1239, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.04709897610921502, |
| "grad_norm": 4.68508784917087, |
| "learning_rate": 1.249728983925226e-06, |
| "loss": 0.1707, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.04732650739476678, |
| "grad_norm": 3.18438034976801, |
| "learning_rate": 1.2497263465072274e-06, |
| "loss": 0.1325, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.047554038680318544, |
| "grad_norm": 2.665536371480516, |
| "learning_rate": 1.2497236963209663e-06, |
| "loss": 0.247, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.04778156996587031, |
| "grad_norm": 3.6305897675111822, |
| "learning_rate": 1.2497210333664972e-06, |
| "loss": 0.1399, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.04800910125142207, |
| "grad_norm": 3.427786312260657, |
| "learning_rate": 1.2497183576438743e-06, |
| "loss": 0.1595, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.048236632536973835, |
| "grad_norm": 3.501593030667954, |
| "learning_rate": 1.2497156691531523e-06, |
| "loss": 0.1895, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.048464163822525594, |
| "grad_norm": 2.29399983953313, |
| "learning_rate": 1.249712967894386e-06, |
| "loss": 0.1273, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.04869169510807736, |
| "grad_norm": 4.248497703608046, |
| "learning_rate": 1.2497102538676308e-06, |
| "loss": 0.2118, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.048919226393629126, |
| "grad_norm": 5.009911727752511, |
| "learning_rate": 1.249707527072942e-06, |
| "loss": 0.1533, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.049146757679180884, |
| "grad_norm": 3.254064879259487, |
| "learning_rate": 1.2497047875103757e-06, |
| "loss": 0.3042, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.04937428896473265, |
| "grad_norm": 2.700363753095535, |
| "learning_rate": 1.2497020351799875e-06, |
| "loss": 0.1933, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.049601820250284416, |
| "grad_norm": 2.2159854350533763, |
| "learning_rate": 1.2496992700818335e-06, |
| "loss": 0.1733, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.049829351535836175, |
| "grad_norm": 6.438623712108173, |
| "learning_rate": 1.249696492215971e-06, |
| "loss": 0.2233, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.05005688282138794, |
| "grad_norm": 3.6403163135182552, |
| "learning_rate": 1.249693701582456e-06, |
| "loss": 0.1542, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.05028441410693971, |
| "grad_norm": 3.280631643810882, |
| "learning_rate": 1.2496908981813458e-06, |
| "loss": 0.1799, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.050511945392491465, |
| "grad_norm": 2.5684306853319687, |
| "learning_rate": 1.2496880820126977e-06, |
| "loss": 0.2051, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.05073947667804323, |
| "grad_norm": 2.7401430199461108, |
| "learning_rate": 1.2496852530765695e-06, |
| "loss": 0.1828, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.050967007963595, |
| "grad_norm": 2.95485123311806, |
| "learning_rate": 1.2496824113730186e-06, |
| "loss": 0.2602, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.051194539249146756, |
| "grad_norm": 2.5679914292312738, |
| "learning_rate": 1.2496795569021033e-06, |
| "loss": 0.1838, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.05142207053469852, |
| "grad_norm": 4.2106953289503055, |
| "learning_rate": 1.2496766896638819e-06, |
| "loss": 0.1831, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.05164960182025029, |
| "grad_norm": 2.4133590857510603, |
| "learning_rate": 1.249673809658413e-06, |
| "loss": 0.1869, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.05187713310580205, |
| "grad_norm": 2.009672236932174, |
| "learning_rate": 1.2496709168857555e-06, |
| "loss": 0.1297, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.05210466439135381, |
| "grad_norm": 2.57569428799923, |
| "learning_rate": 1.2496680113459683e-06, |
| "loss": 0.1887, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.05233219567690557, |
| "grad_norm": 3.3094428680937464, |
| "learning_rate": 1.2496650930391113e-06, |
| "loss": 0.2654, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.05255972696245734, |
| "grad_norm": 2.847650693015463, |
| "learning_rate": 1.2496621619652435e-06, |
| "loss": 0.1704, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.0527872582480091, |
| "grad_norm": 2.9888611972362167, |
| "learning_rate": 1.2496592181244253e-06, |
| "loss": 0.1601, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.05301478953356086, |
| "grad_norm": 2.08648737949565, |
| "learning_rate": 1.249656261516717e-06, |
| "loss": 0.1953, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.05324232081911263, |
| "grad_norm": 2.531082669247976, |
| "learning_rate": 1.2496532921421781e-06, |
| "loss": 0.1717, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.053469852104664393, |
| "grad_norm": 2.7509933573597896, |
| "learning_rate": 1.2496503100008704e-06, |
| "loss": 0.2469, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.05369738339021615, |
| "grad_norm": 3.5155091690123923, |
| "learning_rate": 1.249647315092854e-06, |
| "loss": 0.1314, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.05392491467576792, |
| "grad_norm": 3.2336581137529135, |
| "learning_rate": 1.2496443074181905e-06, |
| "loss": 0.1479, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.054152445961319684, |
| "grad_norm": 1.9727228995954271, |
| "learning_rate": 1.2496412869769415e-06, |
| "loss": 0.1072, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.05437997724687144, |
| "grad_norm": 9.030280638699303, |
| "learning_rate": 1.2496382537691686e-06, |
| "loss": 0.1993, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.05460750853242321, |
| "grad_norm": 2.012237999972146, |
| "learning_rate": 1.2496352077949336e-06, |
| "loss": 0.2021, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.054835039817974975, |
| "grad_norm": 2.875480352440569, |
| "learning_rate": 1.249632149054299e-06, |
| "loss": 0.1071, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.05506257110352673, |
| "grad_norm": 3.027078266755971, |
| "learning_rate": 1.249629077547327e-06, |
| "loss": 0.2081, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.0552901023890785, |
| "grad_norm": 3.212706521917931, |
| "learning_rate": 1.2496259932740813e-06, |
| "loss": 0.235, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.055517633674630265, |
| "grad_norm": 1.5899391805286471, |
| "learning_rate": 1.2496228962346236e-06, |
| "loss": 0.1498, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.055745164960182024, |
| "grad_norm": 2.252897408154709, |
| "learning_rate": 1.249619786429018e-06, |
| "loss": 0.0875, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.05597269624573379, |
| "grad_norm": 1.7851217439709355, |
| "learning_rate": 1.2496166638573278e-06, |
| "loss": 0.163, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.05620022753128555, |
| "grad_norm": 4.076208180076855, |
| "learning_rate": 1.2496135285196172e-06, |
| "loss": 0.1298, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.056427758816837315, |
| "grad_norm": 8.235783447081577, |
| "learning_rate": 1.2496103804159497e-06, |
| "loss": 0.1994, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.05665529010238908, |
| "grad_norm": 4.224863516307238, |
| "learning_rate": 1.2496072195463904e-06, |
| "loss": 0.1917, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.05688282138794084, |
| "grad_norm": 2.600108393969465, |
| "learning_rate": 1.249604045911003e-06, |
| "loss": 0.1728, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.057110352673492605, |
| "grad_norm": 4.193154020881599, |
| "learning_rate": 1.249600859509853e-06, |
| "loss": 0.1469, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.05733788395904437, |
| "grad_norm": 3.3023049454358957, |
| "learning_rate": 1.2495976603430054e-06, |
| "loss": 0.3015, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.05756541524459613, |
| "grad_norm": 2.1335803404002815, |
| "learning_rate": 1.2495944484105254e-06, |
| "loss": 0.1237, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.057792946530147896, |
| "grad_norm": 5.342229724882705, |
| "learning_rate": 1.2495912237124787e-06, |
| "loss": 0.1134, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.05802047781569966, |
| "grad_norm": 4.8799722775641765, |
| "learning_rate": 1.2495879862489312e-06, |
| "loss": 0.1865, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.05824800910125142, |
| "grad_norm": 5.731543371657422, |
| "learning_rate": 1.2495847360199495e-06, |
| "loss": 0.2008, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.058475540386803186, |
| "grad_norm": 2.313924736001694, |
| "learning_rate": 1.2495814730255993e-06, |
| "loss": 0.1361, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.05870307167235495, |
| "grad_norm": 1.3942403935107488, |
| "learning_rate": 1.2495781972659479e-06, |
| "loss": 0.1103, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.05893060295790671, |
| "grad_norm": 1.8635600367271647, |
| "learning_rate": 1.2495749087410618e-06, |
| "loss": 0.1736, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.05915813424345848, |
| "grad_norm": 3.934800507138662, |
| "learning_rate": 1.2495716074510087e-06, |
| "loss": 0.1706, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.059385665529010236, |
| "grad_norm": 7.067913001607123, |
| "learning_rate": 1.2495682933958555e-06, |
| "loss": 0.1963, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.059613196814562, |
| "grad_norm": 2.692944909371077, |
| "learning_rate": 1.2495649665756705e-06, |
| "loss": 0.2486, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.05984072810011377, |
| "grad_norm": 2.4930462253175305, |
| "learning_rate": 1.2495616269905212e-06, |
| "loss": 0.1447, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.060068259385665526, |
| "grad_norm": 1.7948148568482771, |
| "learning_rate": 1.2495582746404762e-06, |
| "loss": 0.0994, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.06029579067121729, |
| "grad_norm": 2.021876252112372, |
| "learning_rate": 1.249554909525604e-06, |
| "loss": 0.1386, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.06052332195676906, |
| "grad_norm": 2.069960058640526, |
| "learning_rate": 1.249551531645973e-06, |
| "loss": 0.1866, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.06075085324232082, |
| "grad_norm": 8.549797598789278, |
| "learning_rate": 1.2495481410016527e-06, |
| "loss": 0.3426, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.06097838452787258, |
| "grad_norm": 6.033524800668443, |
| "learning_rate": 1.2495447375927122e-06, |
| "loss": 0.2039, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.06120591581342435, |
| "grad_norm": 3.3984019223631656, |
| "learning_rate": 1.2495413214192209e-06, |
| "loss": 0.1562, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.06143344709897611, |
| "grad_norm": 2.78909231360363, |
| "learning_rate": 1.2495378924812486e-06, |
| "loss": 0.2056, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.06166097838452787, |
| "grad_norm": 5.781877877875473, |
| "learning_rate": 1.2495344507788662e-06, |
| "loss": 0.2293, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.06188850967007964, |
| "grad_norm": 2.3180826263300607, |
| "learning_rate": 1.249530996312143e-06, |
| "loss": 0.1489, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.0621160409556314, |
| "grad_norm": 7.2617460886104475, |
| "learning_rate": 1.2495275290811499e-06, |
| "loss": 0.2172, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.062343572241183164, |
| "grad_norm": 2.1316035699431173, |
| "learning_rate": 1.2495240490859581e-06, |
| "loss": 0.2176, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.06257110352673492, |
| "grad_norm": 2.5542857532037235, |
| "learning_rate": 1.2495205563266384e-06, |
| "loss": 0.1521, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.06279863481228669, |
| "grad_norm": 3.5696131149812644, |
| "learning_rate": 1.2495170508032624e-06, |
| "loss": 0.2817, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.06302616609783845, |
| "grad_norm": 4.055804927691344, |
| "learning_rate": 1.2495135325159015e-06, |
| "loss": 0.1484, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.06325369738339022, |
| "grad_norm": 2.830287596995614, |
| "learning_rate": 1.2495100014646277e-06, |
| "loss": 0.1714, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.06348122866894199, |
| "grad_norm": 5.2323794095215685, |
| "learning_rate": 1.2495064576495134e-06, |
| "loss": 0.3121, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.06370875995449374, |
| "grad_norm": 2.500465425444752, |
| "learning_rate": 1.2495029010706306e-06, |
| "loss": 0.1005, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0639362912400455, |
| "grad_norm": 2.7474098845449433, |
| "learning_rate": 1.2494993317280524e-06, |
| "loss": 0.1755, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.06416382252559727, |
| "grad_norm": 3.1110646620479967, |
| "learning_rate": 1.2494957496218516e-06, |
| "loss": 0.194, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.06439135381114904, |
| "grad_norm": 1.162926170243262, |
| "learning_rate": 1.2494921547521013e-06, |
| "loss": 0.1667, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.0646188850967008, |
| "grad_norm": 2.034958588386092, |
| "learning_rate": 1.249488547118875e-06, |
| "loss": 0.1031, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.06484641638225255, |
| "grad_norm": 2.8585727096596214, |
| "learning_rate": 1.2494849267222466e-06, |
| "loss": 0.1199, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.06507394766780432, |
| "grad_norm": 2.3756686418598916, |
| "learning_rate": 1.24948129356229e-06, |
| "loss": 0.203, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.06530147895335608, |
| "grad_norm": 6.080154909085321, |
| "learning_rate": 1.2494776476390793e-06, |
| "loss": 0.2723, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.06552901023890785, |
| "grad_norm": 3.1578927707769684, |
| "learning_rate": 1.2494739889526894e-06, |
| "loss": 0.1218, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.06575654152445962, |
| "grad_norm": 2.7745317736308373, |
| "learning_rate": 1.2494703175031946e-06, |
| "loss": 0.194, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.06598407281001138, |
| "grad_norm": 2.872306438815133, |
| "learning_rate": 1.2494666332906702e-06, |
| "loss": 0.143, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06621160409556313, |
| "grad_norm": 2.2661659384858277, |
| "learning_rate": 1.2494629363151916e-06, |
| "loss": 0.1497, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.0664391353811149, |
| "grad_norm": 2.7978250826969586, |
| "learning_rate": 1.2494592265768343e-06, |
| "loss": 0.1817, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 2.9435086338480496, |
| "learning_rate": 1.2494555040756737e-06, |
| "loss": 0.1195, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.06689419795221843, |
| "grad_norm": 2.525871560805257, |
| "learning_rate": 1.2494517688117867e-06, |
| "loss": 0.2054, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.0671217292377702, |
| "grad_norm": 3.3530486331117126, |
| "learning_rate": 1.2494480207852489e-06, |
| "loss": 0.1186, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.06734926052332196, |
| "grad_norm": 3.791549905681902, |
| "learning_rate": 1.249444259996137e-06, |
| "loss": 0.1616, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.06757679180887372, |
| "grad_norm": 2.3603348366809236, |
| "learning_rate": 1.2494404864445284e-06, |
| "loss": 0.1392, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.06780432309442548, |
| "grad_norm": 2.161901751847752, |
| "learning_rate": 1.2494367001304996e-06, |
| "loss": 0.1548, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.06803185437997725, |
| "grad_norm": 2.3978175716297634, |
| "learning_rate": 1.2494329010541284e-06, |
| "loss": 0.1634, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.06825938566552901, |
| "grad_norm": 5.413503442113624, |
| "learning_rate": 1.2494290892154922e-06, |
| "loss": 0.2876, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06848691695108078, |
| "grad_norm": 1.904095426332445, |
| "learning_rate": 1.2494252646146692e-06, |
| "loss": 0.1942, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.06871444823663253, |
| "grad_norm": 2.0091735504190504, |
| "learning_rate": 1.249421427251737e-06, |
| "loss": 0.1403, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.0689419795221843, |
| "grad_norm": 2.6001586830103123, |
| "learning_rate": 1.2494175771267748e-06, |
| "loss": 0.2376, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.06916951080773606, |
| "grad_norm": 2.8009063420794265, |
| "learning_rate": 1.2494137142398607e-06, |
| "loss": 0.1877, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.06939704209328783, |
| "grad_norm": 2.0648464255318517, |
| "learning_rate": 1.249409838591074e-06, |
| "loss": 0.1462, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.0696245733788396, |
| "grad_norm": 2.6396516124770657, |
| "learning_rate": 1.2494059501804937e-06, |
| "loss": 0.256, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.06985210466439136, |
| "grad_norm": 2.9901343092043837, |
| "learning_rate": 1.249402049008199e-06, |
| "loss": 0.1483, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.07007963594994311, |
| "grad_norm": 3.0343546498099356, |
| "learning_rate": 1.2493981350742704e-06, |
| "loss": 0.1561, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.07030716723549488, |
| "grad_norm": 3.2148889672864636, |
| "learning_rate": 1.2493942083787872e-06, |
| "loss": 0.1856, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.07053469852104664, |
| "grad_norm": 2.795539793994042, |
| "learning_rate": 1.2493902689218299e-06, |
| "loss": 0.1294, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.07076222980659841, |
| "grad_norm": 2.1866434219410307, |
| "learning_rate": 1.249386316703479e-06, |
| "loss": 0.1789, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.07098976109215017, |
| "grad_norm": 4.93386744278198, |
| "learning_rate": 1.2493823517238154e-06, |
| "loss": 0.1529, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.07121729237770194, |
| "grad_norm": 2.127480030167813, |
| "learning_rate": 1.2493783739829202e-06, |
| "loss": 0.1593, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.07144482366325369, |
| "grad_norm": 2.565861378561538, |
| "learning_rate": 1.2493743834808741e-06, |
| "loss": 0.1442, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.07167235494880546, |
| "grad_norm": 3.129314599970171, |
| "learning_rate": 1.2493703802177594e-06, |
| "loss": 0.1936, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.07189988623435722, |
| "grad_norm": 4.26603531282599, |
| "learning_rate": 1.2493663641936576e-06, |
| "loss": 0.1343, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.07212741751990899, |
| "grad_norm": 1.778626655821605, |
| "learning_rate": 1.2493623354086507e-06, |
| "loss": 0.1751, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.07235494880546076, |
| "grad_norm": 2.576979617695665, |
| "learning_rate": 1.2493582938628213e-06, |
| "loss": 0.1405, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.07258248009101251, |
| "grad_norm": 2.528946823784448, |
| "learning_rate": 1.2493542395562516e-06, |
| "loss": 0.1207, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.07281001137656427, |
| "grad_norm": 1.7105561186222351, |
| "learning_rate": 1.2493501724890247e-06, |
| "loss": 0.1067, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.07303754266211604, |
| "grad_norm": 3.0021555230652144, |
| "learning_rate": 1.249346092661224e-06, |
| "loss": 0.1769, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.0732650739476678, |
| "grad_norm": 3.2473648686733787, |
| "learning_rate": 1.2493420000729322e-06, |
| "loss": 0.1797, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.07349260523321957, |
| "grad_norm": 2.9141882965376644, |
| "learning_rate": 1.2493378947242336e-06, |
| "loss": 0.1936, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.07372013651877134, |
| "grad_norm": 2.139000059452357, |
| "learning_rate": 1.2493337766152119e-06, |
| "loss": 0.1323, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.07394766780432309, |
| "grad_norm": 3.7562365963393773, |
| "learning_rate": 1.249329645745951e-06, |
| "loss": 0.1521, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.07417519908987485, |
| "grad_norm": 3.1427328506374343, |
| "learning_rate": 1.2493255021165357e-06, |
| "loss": 0.1426, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.07440273037542662, |
| "grad_norm": 2.5928821859504225, |
| "learning_rate": 1.2493213457270504e-06, |
| "loss": 0.1492, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.07463026166097839, |
| "grad_norm": 2.6116349350740773, |
| "learning_rate": 1.2493171765775804e-06, |
| "loss": 0.1079, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.07485779294653015, |
| "grad_norm": 2.5063754100070796, |
| "learning_rate": 1.2493129946682107e-06, |
| "loss": 0.1449, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.07508532423208192, |
| "grad_norm": 2.7029390289735247, |
| "learning_rate": 1.2493087999990263e-06, |
| "loss": 0.2012, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.07531285551763367, |
| "grad_norm": 3.168250561710959, |
| "learning_rate": 1.249304592570114e-06, |
| "loss": 0.135, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.07554038680318544, |
| "grad_norm": 3.358825282989208, |
| "learning_rate": 1.2493003723815588e-06, |
| "loss": 0.202, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.0757679180887372, |
| "grad_norm": 3.4712230061099367, |
| "learning_rate": 1.2492961394334474e-06, |
| "loss": 0.1796, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.07599544937428897, |
| "grad_norm": 2.7447934095202586, |
| "learning_rate": 1.2492918937258663e-06, |
| "loss": 0.1529, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.07622298065984073, |
| "grad_norm": 4.884489478774658, |
| "learning_rate": 1.2492876352589024e-06, |
| "loss": 0.1983, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.07645051194539249, |
| "grad_norm": 11.840111431867928, |
| "learning_rate": 1.2492833640326424e-06, |
| "loss": 0.1701, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.07667804323094425, |
| "grad_norm": 3.6493332372043032, |
| "learning_rate": 1.2492790800471738e-06, |
| "loss": 0.1894, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.07690557451649602, |
| "grad_norm": 2.2273861687776657, |
| "learning_rate": 1.249274783302584e-06, |
| "loss": 0.1168, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.07713310580204778, |
| "grad_norm": 3.0155968100929016, |
| "learning_rate": 1.249270473798961e-06, |
| "loss": 0.1877, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.07736063708759955, |
| "grad_norm": 3.6811309004263197, |
| "learning_rate": 1.249266151536393e-06, |
| "loss": 0.1841, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07758816837315131, |
| "grad_norm": 3.3318670131929355, |
| "learning_rate": 1.249261816514968e-06, |
| "loss": 0.1425, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.07781569965870307, |
| "grad_norm": 1.542707864707429, |
| "learning_rate": 1.2492574687347747e-06, |
| "loss": 0.0954, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.07804323094425483, |
| "grad_norm": 5.219514434003638, |
| "learning_rate": 1.249253108195902e-06, |
| "loss": 0.1523, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.0782707622298066, |
| "grad_norm": 2.685054702258556, |
| "learning_rate": 1.249248734898439e-06, |
| "loss": 0.1932, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.07849829351535836, |
| "grad_norm": 3.782143044532345, |
| "learning_rate": 1.2492443488424753e-06, |
| "loss": 0.1782, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.07872582480091013, |
| "grad_norm": 2.987081909452687, |
| "learning_rate": 1.2492399500281002e-06, |
| "loss": 0.1174, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.07895335608646188, |
| "grad_norm": 2.4163752446451667, |
| "learning_rate": 1.2492355384554039e-06, |
| "loss": 0.1864, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.07918088737201365, |
| "grad_norm": 2.881696468020635, |
| "learning_rate": 1.2492311141244764e-06, |
| "loss": 0.1509, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.07940841865756541, |
| "grad_norm": 4.2425549257036925, |
| "learning_rate": 1.249226677035408e-06, |
| "loss": 0.1384, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.07963594994311718, |
| "grad_norm": 2.999886291999185, |
| "learning_rate": 1.2492222271882896e-06, |
| "loss": 0.1631, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.07986348122866894, |
| "grad_norm": 4.681484131322112, |
| "learning_rate": 1.2492177645832121e-06, |
| "loss": 0.1752, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.08009101251422071, |
| "grad_norm": 2.921704965075288, |
| "learning_rate": 1.2492132892202668e-06, |
| "loss": 0.1486, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.08031854379977246, |
| "grad_norm": 5.592595582830648, |
| "learning_rate": 1.2492088010995449e-06, |
| "loss": 0.2707, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.08054607508532423, |
| "grad_norm": 2.9440013961704823, |
| "learning_rate": 1.2492043002211385e-06, |
| "loss": 0.2054, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.080773606370876, |
| "grad_norm": 2.2221784159000006, |
| "learning_rate": 1.2491997865851392e-06, |
| "loss": 0.1373, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.08100113765642776, |
| "grad_norm": 1.7381570114572884, |
| "learning_rate": 1.2491952601916395e-06, |
| "loss": 0.0858, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.08122866894197953, |
| "grad_norm": 2.930524510809462, |
| "learning_rate": 1.2491907210407319e-06, |
| "loss": 0.2179, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.08145620022753129, |
| "grad_norm": 1.329914120982883, |
| "learning_rate": 1.249186169132509e-06, |
| "loss": 0.1839, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.08168373151308304, |
| "grad_norm": 4.774637200381304, |
| "learning_rate": 1.2491816044670641e-06, |
| "loss": 0.1266, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.08191126279863481, |
| "grad_norm": 3.0085506218930442, |
| "learning_rate": 1.24917702704449e-06, |
| "loss": 0.1813, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.08213879408418658, |
| "grad_norm": 2.683588571853357, |
| "learning_rate": 1.2491724368648808e-06, |
| "loss": 0.1182, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.08236632536973834, |
| "grad_norm": 4.142859587264675, |
| "learning_rate": 1.2491678339283303e-06, |
| "loss": 0.1213, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.08259385665529011, |
| "grad_norm": 2.266538556877378, |
| "learning_rate": 1.249163218234932e-06, |
| "loss": 0.1669, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.08282138794084186, |
| "grad_norm": 3.340308786527698, |
| "learning_rate": 1.249158589784781e-06, |
| "loss": 0.1449, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.08304891922639362, |
| "grad_norm": 3.600922134824311, |
| "learning_rate": 1.2491539485779713e-06, |
| "loss": 0.1934, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.08327645051194539, |
| "grad_norm": 2.5603148777390796, |
| "learning_rate": 1.2491492946145981e-06, |
| "loss": 0.1215, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.08350398179749716, |
| "grad_norm": 1.4306937563740754, |
| "learning_rate": 1.2491446278947563e-06, |
| "loss": 0.1218, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.08373151308304892, |
| "grad_norm": 6.514691076015768, |
| "learning_rate": 1.2491399484185413e-06, |
| "loss": 0.1723, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.08395904436860069, |
| "grad_norm": 2.1513333963844214, |
| "learning_rate": 1.249135256186049e-06, |
| "loss": 0.242, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.08418657565415244, |
| "grad_norm": 1.697947937157404, |
| "learning_rate": 1.249130551197375e-06, |
| "loss": 0.1045, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0844141069397042, |
| "grad_norm": 1.4338559958770856, |
| "learning_rate": 1.2491258334526155e-06, |
| "loss": 0.1671, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.08464163822525597, |
| "grad_norm": 2.7532236684188773, |
| "learning_rate": 1.2491211029518672e-06, |
| "loss": 0.1034, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.08486916951080774, |
| "grad_norm": 2.665642318134447, |
| "learning_rate": 1.2491163596952264e-06, |
| "loss": 0.1737, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.0850967007963595, |
| "grad_norm": 1.5130437493435105, |
| "learning_rate": 1.2491116036827902e-06, |
| "loss": 0.0804, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.08532423208191127, |
| "grad_norm": 1.3642320073282543, |
| "learning_rate": 1.2491068349146559e-06, |
| "loss": 0.1428, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.08555176336746302, |
| "grad_norm": 2.1006895230964444, |
| "learning_rate": 1.249102053390921e-06, |
| "loss": 0.2759, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.08577929465301479, |
| "grad_norm": 1.5335225229109515, |
| "learning_rate": 1.249097259111683e-06, |
| "loss": 0.1836, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.08600682593856655, |
| "grad_norm": 4.09523641946509, |
| "learning_rate": 1.24909245207704e-06, |
| "loss": 0.2771, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.08623435722411832, |
| "grad_norm": 2.2658393838403477, |
| "learning_rate": 1.2490876322870904e-06, |
| "loss": 0.1815, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.08646188850967008, |
| "grad_norm": 3.053596441038967, |
| "learning_rate": 1.2490827997419325e-06, |
| "loss": 0.1183, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.08668941979522184, |
| "grad_norm": 2.9366601199125153, |
| "learning_rate": 1.249077954441665e-06, |
| "loss": 0.1738, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.0869169510807736, |
| "grad_norm": 1.9726593738442935, |
| "learning_rate": 1.249073096386387e-06, |
| "loss": 0.1427, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.08714448236632537, |
| "grad_norm": 2.8452874204285985, |
| "learning_rate": 1.249068225576198e-06, |
| "loss": 0.2767, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.08737201365187713, |
| "grad_norm": 4.292343700500067, |
| "learning_rate": 1.2490633420111974e-06, |
| "loss": 0.127, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.0875995449374289, |
| "grad_norm": 4.105827667785258, |
| "learning_rate": 1.249058445691485e-06, |
| "loss": 0.1639, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.08782707622298067, |
| "grad_norm": 4.310698395146462, |
| "learning_rate": 1.2490535366171607e-06, |
| "loss": 0.1289, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.08805460750853242, |
| "grad_norm": 3.5788743602832795, |
| "learning_rate": 1.249048614788325e-06, |
| "loss": 0.1804, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.08828213879408418, |
| "grad_norm": 2.6616942664445413, |
| "learning_rate": 1.249043680205079e-06, |
| "loss": 0.144, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.08850967007963595, |
| "grad_norm": 2.989163897960478, |
| "learning_rate": 1.2490387328675226e-06, |
| "loss": 0.2016, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.08873720136518772, |
| "grad_norm": 4.587176162210019, |
| "learning_rate": 1.2490337727757576e-06, |
| "loss": 0.2284, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.08896473265073948, |
| "grad_norm": 2.794747809075531, |
| "learning_rate": 1.249028799929885e-06, |
| "loss": 0.2002, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.08919226393629125, |
| "grad_norm": 2.0197262567230276, |
| "learning_rate": 1.2490238143300066e-06, |
| "loss": 0.1143, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.089419795221843, |
| "grad_norm": 3.184614553894442, |
| "learning_rate": 1.2490188159762243e-06, |
| "loss": 0.1913, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.08964732650739476, |
| "grad_norm": 2.518010477046937, |
| "learning_rate": 1.2490138048686405e-06, |
| "loss": 0.1981, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.08987485779294653, |
| "grad_norm": 5.010077865699377, |
| "learning_rate": 1.249008781007357e-06, |
| "loss": 0.1423, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.0901023890784983, |
| "grad_norm": 1.420461399090385, |
| "learning_rate": 1.2490037443924768e-06, |
| "loss": 0.1363, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.09032992036405006, |
| "grad_norm": 2.5810652557759863, |
| "learning_rate": 1.2489986950241032e-06, |
| "loss": 0.1002, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.09055745164960181, |
| "grad_norm": 1.8725706501255737, |
| "learning_rate": 1.2489936329023387e-06, |
| "loss": 0.1974, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.09078498293515358, |
| "grad_norm": 3.2869147678539554, |
| "learning_rate": 1.2489885580272874e-06, |
| "loss": 0.1629, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.09101251422070535, |
| "grad_norm": 1.7546095764098488, |
| "learning_rate": 1.2489834703990527e-06, |
| "loss": 0.1326, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.09124004550625711, |
| "grad_norm": 3.0930989898336407, |
| "learning_rate": 1.2489783700177385e-06, |
| "loss": 0.2565, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.09146757679180888, |
| "grad_norm": 4.363886237065706, |
| "learning_rate": 1.2489732568834492e-06, |
| "loss": 0.1425, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.09169510807736064, |
| "grad_norm": 2.141413419957395, |
| "learning_rate": 1.2489681309962895e-06, |
| "loss": 0.1458, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.0919226393629124, |
| "grad_norm": 4.5478526718009205, |
| "learning_rate": 1.2489629923563637e-06, |
| "loss": 0.1655, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.09215017064846416, |
| "grad_norm": 5.253865415098631, |
| "learning_rate": 1.2489578409637774e-06, |
| "loss": 0.2702, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.09237770193401593, |
| "grad_norm": 6.114423825591168, |
| "learning_rate": 1.2489526768186352e-06, |
| "loss": 0.1364, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.09260523321956769, |
| "grad_norm": 2.4260049242900505, |
| "learning_rate": 1.2489474999210434e-06, |
| "loss": 0.1573, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.09283276450511946, |
| "grad_norm": 6.696614155480106, |
| "learning_rate": 1.2489423102711068e-06, |
| "loss": 0.2365, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.09306029579067122, |
| "grad_norm": 3.4093511525509848, |
| "learning_rate": 1.2489371078689326e-06, |
| "loss": 0.1552, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.09328782707622298, |
| "grad_norm": 3.512014449058475, |
| "learning_rate": 1.2489318927146263e-06, |
| "loss": 0.1392, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.09351535836177474, |
| "grad_norm": 4.385040034701264, |
| "learning_rate": 1.2489266648082951e-06, |
| "loss": 0.1184, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.09374288964732651, |
| "grad_norm": 11.030038016242493, |
| "learning_rate": 1.2489214241500453e-06, |
| "loss": 0.2445, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.09397042093287827, |
| "grad_norm": 3.8160488235069487, |
| "learning_rate": 1.2489161707399843e-06, |
| "loss": 0.2422, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.09419795221843004, |
| "grad_norm": 2.5154081754915554, |
| "learning_rate": 1.2489109045782194e-06, |
| "loss": 0.1284, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.09442548350398179, |
| "grad_norm": 2.186602019326803, |
| "learning_rate": 1.2489056256648582e-06, |
| "loss": 0.1387, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.09465301478953356, |
| "grad_norm": 3.1244704898712223, |
| "learning_rate": 1.2489003340000089e-06, |
| "loss": 0.2695, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.09488054607508532, |
| "grad_norm": 1.9015703147093774, |
| "learning_rate": 1.2488950295837792e-06, |
| "loss": 0.2029, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.09510807736063709, |
| "grad_norm": 3.2255120343889523, |
| "learning_rate": 1.2488897124162777e-06, |
| "loss": 0.1708, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.09533560864618885, |
| "grad_norm": 2.4361554392110354, |
| "learning_rate": 1.248884382497613e-06, |
| "loss": 0.237, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.09556313993174062, |
| "grad_norm": 5.44904137240634, |
| "learning_rate": 1.2488790398278941e-06, |
| "loss": 0.2259, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.09579067121729237, |
| "grad_norm": 2.5542725247665725, |
| "learning_rate": 1.2488736844072304e-06, |
| "loss": 0.1706, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.09601820250284414, |
| "grad_norm": 3.3440828684749837, |
| "learning_rate": 1.248868316235731e-06, |
| "loss": 0.166, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.0962457337883959, |
| "grad_norm": 2.837980086891423, |
| "learning_rate": 1.2488629353135059e-06, |
| "loss": 0.1974, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.09647326507394767, |
| "grad_norm": 3.0821716156484413, |
| "learning_rate": 1.2488575416406649e-06, |
| "loss": 0.2029, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.09670079635949944, |
| "grad_norm": 4.11082660525738, |
| "learning_rate": 1.2488521352173183e-06, |
| "loss": 0.1288, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.09692832764505119, |
| "grad_norm": 2.792375492899653, |
| "learning_rate": 1.2488467160435765e-06, |
| "loss": 0.1318, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.09715585893060295, |
| "grad_norm": 2.54978143800456, |
| "learning_rate": 1.2488412841195505e-06, |
| "loss": 0.2235, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.09738339021615472, |
| "grad_norm": 1.8685713785223814, |
| "learning_rate": 1.2488358394453512e-06, |
| "loss": 0.1018, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.09761092150170649, |
| "grad_norm": 2.19856597261874, |
| "learning_rate": 1.2488303820210897e-06, |
| "loss": 0.0955, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.09783845278725825, |
| "grad_norm": 2.756460140283964, |
| "learning_rate": 1.2488249118468776e-06, |
| "loss": 0.161, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.09806598407281002, |
| "grad_norm": 3.1658885878432446, |
| "learning_rate": 1.248819428922827e-06, |
| "loss": 0.1707, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.09829351535836177, |
| "grad_norm": 3.574624372801338, |
| "learning_rate": 1.2488139332490495e-06, |
| "loss": 0.2412, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.09852104664391353, |
| "grad_norm": 2.63473599121384, |
| "learning_rate": 1.248808424825658e-06, |
| "loss": 0.1195, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.0987485779294653, |
| "grad_norm": 3.928170371490413, |
| "learning_rate": 1.2488029036527645e-06, |
| "loss": 0.1478, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.09897610921501707, |
| "grad_norm": 2.0459697190569583, |
| "learning_rate": 1.2487973697304822e-06, |
| "loss": 0.0868, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.09920364050056883, |
| "grad_norm": 2.2037192709560283, |
| "learning_rate": 1.248791823058924e-06, |
| "loss": 0.1911, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.0994311717861206, |
| "grad_norm": 3.549121049187713, |
| "learning_rate": 1.2487862636382034e-06, |
| "loss": 0.1218, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.09965870307167235, |
| "grad_norm": 1.4303061363329783, |
| "learning_rate": 1.248780691468434e-06, |
| "loss": 0.1116, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.09988623435722412, |
| "grad_norm": 3.8141735085769746, |
| "learning_rate": 1.2487751065497296e-06, |
| "loss": 0.2179, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.10011376564277588, |
| "grad_norm": 2.6329169063924986, |
| "learning_rate": 1.2487695088822044e-06, |
| "loss": 0.1492, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.10034129692832765, |
| "grad_norm": 2.8773216855185635, |
| "learning_rate": 1.2487638984659729e-06, |
| "loss": 0.0988, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.10056882821387941, |
| "grad_norm": 2.5448731857786284, |
| "learning_rate": 1.2487582753011496e-06, |
| "loss": 0.1023, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.10079635949943117, |
| "grad_norm": 2.4399816480891445, |
| "learning_rate": 1.2487526393878497e-06, |
| "loss": 0.2015, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.10102389078498293, |
| "grad_norm": 2.056202357783669, |
| "learning_rate": 1.248746990726188e-06, |
| "loss": 0.1376, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.1012514220705347, |
| "grad_norm": 2.489946255383071, |
| "learning_rate": 1.2487413293162803e-06, |
| "loss": 0.1389, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.10147895335608646, |
| "grad_norm": 2.3660691937468807, |
| "learning_rate": 1.2487356551582421e-06, |
| "loss": 0.2235, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.10170648464163823, |
| "grad_norm": 2.5030375037996575, |
| "learning_rate": 1.2487299682521893e-06, |
| "loss": 0.2156, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.10193401592719, |
| "grad_norm": 2.210721856008811, |
| "learning_rate": 1.2487242685982384e-06, |
| "loss": 0.1101, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.10216154721274175, |
| "grad_norm": 2.250420318734035, |
| "learning_rate": 1.2487185561965057e-06, |
| "loss": 0.1241, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.10238907849829351, |
| "grad_norm": 2.019413043508561, |
| "learning_rate": 1.248712831047108e-06, |
| "loss": 0.1217, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.10261660978384528, |
| "grad_norm": 3.2295330442493713, |
| "learning_rate": 1.2487070931501624e-06, |
| "loss": 0.2304, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.10284414106939704, |
| "grad_norm": 2.444299385213433, |
| "learning_rate": 1.2487013425057858e-06, |
| "loss": 0.2084, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.10307167235494881, |
| "grad_norm": 2.8966369631126367, |
| "learning_rate": 1.2486955791140964e-06, |
| "loss": 0.1838, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.10329920364050058, |
| "grad_norm": 2.0941566856763387, |
| "learning_rate": 1.2486898029752113e-06, |
| "loss": 0.1043, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.10352673492605233, |
| "grad_norm": 2.3019250022426925, |
| "learning_rate": 1.248684014089249e-06, |
| "loss": 0.1189, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.1037542662116041, |
| "grad_norm": 2.1349092143720387, |
| "learning_rate": 1.2486782124563277e-06, |
| "loss": 0.1708, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.10398179749715586, |
| "grad_norm": 3.101054381668985, |
| "learning_rate": 1.2486723980765659e-06, |
| "loss": 0.1796, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.10420932878270762, |
| "grad_norm": 1.9574694651381292, |
| "learning_rate": 1.2486665709500826e-06, |
| "loss": 0.1762, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.10443686006825939, |
| "grad_norm": 1.9997685220641748, |
| "learning_rate": 1.2486607310769965e-06, |
| "loss": 0.1626, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.10466439135381114, |
| "grad_norm": 1.4987645243428842, |
| "learning_rate": 1.2486548784574275e-06, |
| "loss": 0.1104, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.10489192263936291, |
| "grad_norm": 3.0056305765303857, |
| "learning_rate": 1.2486490130914948e-06, |
| "loss": 0.1526, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.10511945392491467, |
| "grad_norm": 1.6498658926200307, |
| "learning_rate": 1.2486431349793185e-06, |
| "loss": 0.1158, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.10534698521046644, |
| "grad_norm": 2.8097802744351035, |
| "learning_rate": 1.2486372441210188e-06, |
| "loss": 0.174, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.1055745164960182, |
| "grad_norm": 2.2295425114906955, |
| "learning_rate": 1.248631340516716e-06, |
| "loss": 0.0993, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.10580204778156997, |
| "grad_norm": 1.7352971105344217, |
| "learning_rate": 1.2486254241665302e-06, |
| "loss": 0.1799, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.10602957906712172, |
| "grad_norm": 3.37890451450669, |
| "learning_rate": 1.2486194950705831e-06, |
| "loss": 0.1456, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.10625711035267349, |
| "grad_norm": 4.485196875503332, |
| "learning_rate": 1.248613553228996e-06, |
| "loss": 0.1509, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.10648464163822526, |
| "grad_norm": 3.8128664414272833, |
| "learning_rate": 1.2486075986418896e-06, |
| "loss": 0.1217, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.10671217292377702, |
| "grad_norm": 1.9049325746647565, |
| "learning_rate": 1.248601631309386e-06, |
| "loss": 0.1973, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.10693970420932879, |
| "grad_norm": 1.9433225744575688, |
| "learning_rate": 1.2485956512316072e-06, |
| "loss": 0.1422, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.10716723549488055, |
| "grad_norm": 1.7542185976103952, |
| "learning_rate": 1.2485896584086754e-06, |
| "loss": 0.1187, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.1073947667804323, |
| "grad_norm": 0.985585738392577, |
| "learning_rate": 1.248583652840713e-06, |
| "loss": 0.1116, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.10762229806598407, |
| "grad_norm": 6.520293791736507, |
| "learning_rate": 1.2485776345278427e-06, |
| "loss": 0.1634, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.10784982935153584, |
| "grad_norm": 2.9958165676640935, |
| "learning_rate": 1.2485716034701876e-06, |
| "loss": 0.1468, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.1080773606370876, |
| "grad_norm": 3.496540224028896, |
| "learning_rate": 1.2485655596678712e-06, |
| "loss": 0.1444, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.10830489192263937, |
| "grad_norm": 2.6887910577996603, |
| "learning_rate": 1.2485595031210164e-06, |
| "loss": 0.2257, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.10853242320819112, |
| "grad_norm": 2.210859712757279, |
| "learning_rate": 1.2485534338297475e-06, |
| "loss": 0.0858, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.10875995449374289, |
| "grad_norm": 1.5912288577365465, |
| "learning_rate": 1.2485473517941884e-06, |
| "loss": 0.1021, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.10898748577929465, |
| "grad_norm": 2.162920899638659, |
| "learning_rate": 1.2485412570144633e-06, |
| "loss": 0.2051, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.10921501706484642, |
| "grad_norm": 2.3337569161162186, |
| "learning_rate": 1.2485351494906969e-06, |
| "loss": 0.1726, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.10944254835039818, |
| "grad_norm": 1.6587972530161754, |
| "learning_rate": 1.2485290292230142e-06, |
| "loss": 0.1589, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.10967007963594995, |
| "grad_norm": 2.549443212629399, |
| "learning_rate": 1.24852289621154e-06, |
| "loss": 0.1107, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.1098976109215017, |
| "grad_norm": 1.9600173744992218, |
| "learning_rate": 1.2485167504563995e-06, |
| "loss": 0.1497, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.11012514220705347, |
| "grad_norm": 2.914488733886043, |
| "learning_rate": 1.2485105919577187e-06, |
| "loss": 0.2242, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.11035267349260523, |
| "grad_norm": 2.4334592724633475, |
| "learning_rate": 1.2485044207156233e-06, |
| "loss": 0.1326, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.110580204778157, |
| "grad_norm": 2.1918094312708374, |
| "learning_rate": 1.2484982367302395e-06, |
| "loss": 0.1611, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.11080773606370876, |
| "grad_norm": 2.2072766100880843, |
| "learning_rate": 1.2484920400016936e-06, |
| "loss": 0.1402, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.11103526734926053, |
| "grad_norm": 1.6859469474720183, |
| "learning_rate": 1.2484858305301122e-06, |
| "loss": 0.1472, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.11126279863481228, |
| "grad_norm": 1.590244696061809, |
| "learning_rate": 1.2484796083156222e-06, |
| "loss": 0.0824, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.11149032992036405, |
| "grad_norm": 4.525638347888733, |
| "learning_rate": 1.2484733733583511e-06, |
| "loss": 0.1257, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.11171786120591581, |
| "grad_norm": 2.6721724669454723, |
| "learning_rate": 1.248467125658426e-06, |
| "loss": 0.2084, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.11194539249146758, |
| "grad_norm": 2.300055245713483, |
| "learning_rate": 1.2484608652159746e-06, |
| "loss": 0.1053, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.11217292377701935, |
| "grad_norm": 3.273977920110333, |
| "learning_rate": 1.248454592031125e-06, |
| "loss": 0.1176, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.1124004550625711, |
| "grad_norm": 2.101057790899636, |
| "learning_rate": 1.2484483061040054e-06, |
| "loss": 0.1277, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.11262798634812286, |
| "grad_norm": 3.6133620556599984, |
| "learning_rate": 1.2484420074347441e-06, |
| "loss": 0.1845, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.11285551763367463, |
| "grad_norm": 1.9619725915027257, |
| "learning_rate": 1.24843569602347e-06, |
| "loss": 0.1894, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.1130830489192264, |
| "grad_norm": 2.636905846270966, |
| "learning_rate": 1.2484293718703119e-06, |
| "loss": 0.1874, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.11331058020477816, |
| "grad_norm": 2.5593822043936125, |
| "learning_rate": 1.2484230349753994e-06, |
| "loss": 0.0927, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.11353811149032993, |
| "grad_norm": 2.2440609982402715, |
| "learning_rate": 1.2484166853388617e-06, |
| "loss": 0.1381, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.11376564277588168, |
| "grad_norm": 2.7232866925160506, |
| "learning_rate": 1.2484103229608288e-06, |
| "loss": 0.1758, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11399317406143344, |
| "grad_norm": 2.6484317978572816, |
| "learning_rate": 1.2484039478414305e-06, |
| "loss": 0.1259, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.11422070534698521, |
| "grad_norm": 2.1058374053464464, |
| "learning_rate": 1.2483975599807972e-06, |
| "loss": 0.1369, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.11444823663253698, |
| "grad_norm": 2.1458925241645903, |
| "learning_rate": 1.2483911593790595e-06, |
| "loss": 0.1004, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.11467576791808874, |
| "grad_norm": 3.031837353586065, |
| "learning_rate": 1.2483847460363482e-06, |
| "loss": 0.154, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.1149032992036405, |
| "grad_norm": 3.1297621875057544, |
| "learning_rate": 1.2483783199527943e-06, |
| "loss": 0.1071, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.11513083048919226, |
| "grad_norm": 2.5407911203085787, |
| "learning_rate": 1.2483718811285296e-06, |
| "loss": 0.1744, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.11535836177474403, |
| "grad_norm": 3.1175064627764377, |
| "learning_rate": 1.2483654295636848e-06, |
| "loss": 0.1072, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.11558589306029579, |
| "grad_norm": 3.0988741009535667, |
| "learning_rate": 1.2483589652583924e-06, |
| "loss": 0.1753, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.11581342434584756, |
| "grad_norm": 1.8808814641931946, |
| "learning_rate": 1.2483524882127846e-06, |
| "loss": 0.0859, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.11604095563139932, |
| "grad_norm": 2.8937543802568158, |
| "learning_rate": 1.2483459984269933e-06, |
| "loss": 0.1816, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.11626848691695107, |
| "grad_norm": 2.186370885841539, |
| "learning_rate": 1.2483394959011514e-06, |
| "loss": 0.0819, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.11649601820250284, |
| "grad_norm": 1.8650801779387822, |
| "learning_rate": 1.248332980635392e-06, |
| "loss": 0.1436, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.1167235494880546, |
| "grad_norm": 2.9270321544640994, |
| "learning_rate": 1.2483264526298478e-06, |
| "loss": 0.1308, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.11695108077360637, |
| "grad_norm": 1.9942689645578024, |
| "learning_rate": 1.2483199118846525e-06, |
| "loss": 0.1656, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.11717861205915814, |
| "grad_norm": 2.8104633311436116, |
| "learning_rate": 1.2483133583999399e-06, |
| "loss": 0.1681, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.1174061433447099, |
| "grad_norm": 2.546169206593085, |
| "learning_rate": 1.2483067921758439e-06, |
| "loss": 0.0925, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.11763367463026166, |
| "grad_norm": 2.0758430805982178, |
| "learning_rate": 1.2483002132124983e-06, |
| "loss": 0.203, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.11786120591581342, |
| "grad_norm": 2.1497459150584386, |
| "learning_rate": 1.2482936215100382e-06, |
| "loss": 0.1056, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.11808873720136519, |
| "grad_norm": 2.197584956184683, |
| "learning_rate": 1.2482870170685978e-06, |
| "loss": 0.0933, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.11831626848691695, |
| "grad_norm": 4.944962250057973, |
| "learning_rate": 1.2482803998883122e-06, |
| "loss": 0.2129, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.11854379977246872, |
| "grad_norm": 1.5333537239736301, |
| "learning_rate": 1.2482737699693168e-06, |
| "loss": 0.1729, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.11877133105802047, |
| "grad_norm": 2.5556570479037948, |
| "learning_rate": 1.248267127311747e-06, |
| "loss": 0.1607, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.11899886234357224, |
| "grad_norm": 2.0949542782407398, |
| "learning_rate": 1.2482604719157386e-06, |
| "loss": 0.1857, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.119226393629124, |
| "grad_norm": 2.2586097350216385, |
| "learning_rate": 1.2482538037814277e-06, |
| "loss": 0.1258, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.11945392491467577, |
| "grad_norm": 3.036602602741407, |
| "learning_rate": 1.2482471229089502e-06, |
| "loss": 0.161, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.11968145620022753, |
| "grad_norm": 3.382002996482515, |
| "learning_rate": 1.2482404292984431e-06, |
| "loss": 0.1784, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.1199089874857793, |
| "grad_norm": 1.571226708630226, |
| "learning_rate": 1.248233722950043e-06, |
| "loss": 0.1605, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.12013651877133105, |
| "grad_norm": 3.0053996402943737, |
| "learning_rate": 1.2482270038638872e-06, |
| "loss": 0.1201, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.12036405005688282, |
| "grad_norm": 4.663906907753179, |
| "learning_rate": 1.2482202720401128e-06, |
| "loss": 0.203, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.12059158134243458, |
| "grad_norm": 2.107107186527039, |
| "learning_rate": 1.248213527478857e-06, |
| "loss": 0.1933, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.12081911262798635, |
| "grad_norm": 2.191569921182264, |
| "learning_rate": 1.2482067701802583e-06, |
| "loss": 0.1735, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.12104664391353812, |
| "grad_norm": 1.611611034864374, |
| "learning_rate": 1.2482000001444547e-06, |
| "loss": 0.1299, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.12127417519908988, |
| "grad_norm": 1.9644367618752439, |
| "learning_rate": 1.2481932173715845e-06, |
| "loss": 0.0868, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.12150170648464163, |
| "grad_norm": 1.7597689357542332, |
| "learning_rate": 1.2481864218617859e-06, |
| "loss": 0.1977, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.1217292377701934, |
| "grad_norm": 1.0455766882042379, |
| "learning_rate": 1.2481796136151984e-06, |
| "loss": 0.0856, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.12195676905574517, |
| "grad_norm": 3.2419347761543684, |
| "learning_rate": 1.2481727926319609e-06, |
| "loss": 0.2399, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.12218430034129693, |
| "grad_norm": 3.339873316715719, |
| "learning_rate": 1.2481659589122127e-06, |
| "loss": 0.186, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.1224118316268487, |
| "grad_norm": 3.4453888669974146, |
| "learning_rate": 1.2481591124560934e-06, |
| "loss": 0.2007, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.12263936291240045, |
| "grad_norm": 3.4700673703521736, |
| "learning_rate": 1.2481522532637435e-06, |
| "loss": 0.1632, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.12286689419795221, |
| "grad_norm": 2.355397510374851, |
| "learning_rate": 1.2481453813353026e-06, |
| "loss": 0.1212, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.12309442548350398, |
| "grad_norm": 5.338957920220655, |
| "learning_rate": 1.2481384966709116e-06, |
| "loss": 0.1592, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.12332195676905575, |
| "grad_norm": 2.990026650956376, |
| "learning_rate": 1.2481315992707104e-06, |
| "loss": 0.2656, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.12354948805460751, |
| "grad_norm": 1.8798810865858828, |
| "learning_rate": 1.248124689134841e-06, |
| "loss": 0.1125, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.12377701934015928, |
| "grad_norm": 1.6104299610891197, |
| "learning_rate": 1.2481177662634438e-06, |
| "loss": 0.1557, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.12400455062571103, |
| "grad_norm": 3.302283676048537, |
| "learning_rate": 1.2481108306566609e-06, |
| "loss": 0.1799, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.1242320819112628, |
| "grad_norm": 2.0532951352869513, |
| "learning_rate": 1.2481038823146338e-06, |
| "loss": 0.0815, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.12445961319681456, |
| "grad_norm": 1.4326913794879275, |
| "learning_rate": 1.2480969212375043e-06, |
| "loss": 0.177, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.12468714448236633, |
| "grad_norm": 3.5494676426295286, |
| "learning_rate": 1.2480899474254151e-06, |
| "loss": 0.136, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.12491467576791809, |
| "grad_norm": 1.3410455744599155, |
| "learning_rate": 1.2480829608785085e-06, |
| "loss": 0.1078, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.12514220705346984, |
| "grad_norm": 1.7709434217848017, |
| "learning_rate": 1.2480759615969273e-06, |
| "loss": 0.1114, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.12536973833902162, |
| "grad_norm": 1.4865770903343614, |
| "learning_rate": 1.2480689495808144e-06, |
| "loss": 0.1377, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.12559726962457338, |
| "grad_norm": 1.6211826207402742, |
| "learning_rate": 1.2480619248303133e-06, |
| "loss": 0.1873, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.12582480091012513, |
| "grad_norm": 3.1755876159758794, |
| "learning_rate": 1.2480548873455675e-06, |
| "loss": 0.2135, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.1260523321956769, |
| "grad_norm": 3.6986046315140952, |
| "learning_rate": 1.248047837126721e-06, |
| "loss": 0.3549, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.12627986348122866, |
| "grad_norm": 2.782290781984551, |
| "learning_rate": 1.248040774173918e-06, |
| "loss": 0.1936, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.12650739476678044, |
| "grad_norm": 2.329760734261347, |
| "learning_rate": 1.248033698487302e-06, |
| "loss": 0.1395, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.1267349260523322, |
| "grad_norm": 2.258554836923121, |
| "learning_rate": 1.2480266100670189e-06, |
| "loss": 0.1605, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.12696245733788397, |
| "grad_norm": 3.058041285297341, |
| "learning_rate": 1.2480195089132125e-06, |
| "loss": 0.1975, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.12718998862343572, |
| "grad_norm": 2.406042057945949, |
| "learning_rate": 1.2480123950260284e-06, |
| "loss": 0.1405, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.12741751990898748, |
| "grad_norm": 1.4634033865621767, |
| "learning_rate": 1.248005268405612e-06, |
| "loss": 0.0686, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.12764505119453926, |
| "grad_norm": 1.1470288222889338, |
| "learning_rate": 1.2479981290521087e-06, |
| "loss": 0.0649, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.127872582480091, |
| "grad_norm": 3.357158703331078, |
| "learning_rate": 1.2479909769656648e-06, |
| "loss": 0.1684, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.1281001137656428, |
| "grad_norm": 2.4363436867877595, |
| "learning_rate": 1.2479838121464263e-06, |
| "loss": 0.2155, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.12832764505119454, |
| "grad_norm": 4.051636355021599, |
| "learning_rate": 1.2479766345945395e-06, |
| "loss": 0.1853, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.1285551763367463, |
| "grad_norm": 1.6707836764627593, |
| "learning_rate": 1.2479694443101513e-06, |
| "loss": 0.2261, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.12878270762229807, |
| "grad_norm": 1.3008647546251737, |
| "learning_rate": 1.2479622412934087e-06, |
| "loss": 0.1606, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.12901023890784982, |
| "grad_norm": 3.421202381350775, |
| "learning_rate": 1.2479550255444586e-06, |
| "loss": 0.147, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.1292377701934016, |
| "grad_norm": 1.5157864652280186, |
| "learning_rate": 1.2479477970634487e-06, |
| "loss": 0.1536, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.12946530147895335, |
| "grad_norm": 3.27856184412377, |
| "learning_rate": 1.2479405558505267e-06, |
| "loss": 0.1931, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.1296928327645051, |
| "grad_norm": 2.5943823025048474, |
| "learning_rate": 1.247933301905841e-06, |
| "loss": 0.1384, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.12992036405005689, |
| "grad_norm": 4.278003846990416, |
| "learning_rate": 1.2479260352295388e-06, |
| "loss": 0.1771, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.13014789533560864, |
| "grad_norm": 3.446486195671729, |
| "learning_rate": 1.2479187558217697e-06, |
| "loss": 0.1323, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.13037542662116042, |
| "grad_norm": 1.5099352019896337, |
| "learning_rate": 1.247911463682682e-06, |
| "loss": 0.1444, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.13060295790671217, |
| "grad_norm": 3.798908546439363, |
| "learning_rate": 1.2479041588124247e-06, |
| "loss": 0.1504, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.13083048919226395, |
| "grad_norm": 3.7532424433768754, |
| "learning_rate": 1.2478968412111471e-06, |
| "loss": 0.1518, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.1310580204778157, |
| "grad_norm": 2.056630545760187, |
| "learning_rate": 1.247889510878999e-06, |
| "loss": 0.2708, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.13128555176336745, |
| "grad_norm": 2.303355999452058, |
| "learning_rate": 1.24788216781613e-06, |
| "loss": 0.1662, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.13151308304891923, |
| "grad_norm": 2.269104241548175, |
| "learning_rate": 1.2478748120226902e-06, |
| "loss": 0.1337, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.13174061433447098, |
| "grad_norm": 3.0692597907642862, |
| "learning_rate": 1.2478674434988299e-06, |
| "loss": 0.1326, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.13196814562002276, |
| "grad_norm": 1.6865202158454742, |
| "learning_rate": 1.2478600622447001e-06, |
| "loss": 0.1647, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.13219567690557452, |
| "grad_norm": 2.939283703136826, |
| "learning_rate": 1.2478526682604512e-06, |
| "loss": 0.1303, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.13242320819112627, |
| "grad_norm": 3.1064926411391713, |
| "learning_rate": 1.2478452615462345e-06, |
| "loss": 0.1409, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.13265073947667805, |
| "grad_norm": 2.5571749562826485, |
| "learning_rate": 1.247837842102201e-06, |
| "loss": 0.1791, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.1328782707622298, |
| "grad_norm": 2.795629539563545, |
| "learning_rate": 1.2478304099285031e-06, |
| "loss": 0.1567, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.13310580204778158, |
| "grad_norm": 2.0832780528771466, |
| "learning_rate": 1.2478229650252921e-06, |
| "loss": 0.1639, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 2.9969798024524117, |
| "learning_rate": 1.2478155073927204e-06, |
| "loss": 0.2444, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.13356086461888508, |
| "grad_norm": 1.9274087851448982, |
| "learning_rate": 1.2478080370309404e-06, |
| "loss": 0.105, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.13378839590443686, |
| "grad_norm": 4.021015627831867, |
| "learning_rate": 1.2478005539401046e-06, |
| "loss": 0.1734, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.13401592718998862, |
| "grad_norm": 2.9342976021528027, |
| "learning_rate": 1.2477930581203663e-06, |
| "loss": 0.1465, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.1342434584755404, |
| "grad_norm": 2.3242426333780632, |
| "learning_rate": 1.2477855495718782e-06, |
| "loss": 0.2241, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.13447098976109215, |
| "grad_norm": 2.957504561813871, |
| "learning_rate": 1.2477780282947942e-06, |
| "loss": 0.1734, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.13469852104664393, |
| "grad_norm": 1.8788696793522301, |
| "learning_rate": 1.2477704942892677e-06, |
| "loss": 0.1469, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.13492605233219568, |
| "grad_norm": 2.339527187323086, |
| "learning_rate": 1.2477629475554532e-06, |
| "loss": 0.1312, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.13515358361774743, |
| "grad_norm": 3.707567497860105, |
| "learning_rate": 1.2477553880935043e-06, |
| "loss": 0.1916, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.1353811149032992, |
| "grad_norm": 3.2750827489523022, |
| "learning_rate": 1.2477478159035758e-06, |
| "loss": 0.1774, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.13560864618885096, |
| "grad_norm": 2.777476705753077, |
| "learning_rate": 1.2477402309858226e-06, |
| "loss": 0.1789, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.13583617747440274, |
| "grad_norm": 2.144596195630353, |
| "learning_rate": 1.2477326333403995e-06, |
| "loss": 0.147, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.1360637087599545, |
| "grad_norm": 2.3685083837175935, |
| "learning_rate": 1.2477250229674618e-06, |
| "loss": 0.1831, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.13629124004550625, |
| "grad_norm": 1.9843295041761948, |
| "learning_rate": 1.2477173998671653e-06, |
| "loss": 0.178, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.13651877133105803, |
| "grad_norm": 3.434039497211011, |
| "learning_rate": 1.2477097640396655e-06, |
| "loss": 0.1235, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.13674630261660978, |
| "grad_norm": 1.4586285890850859, |
| "learning_rate": 1.2477021154851185e-06, |
| "loss": 0.0977, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.13697383390216156, |
| "grad_norm": 3.3913304667052198, |
| "learning_rate": 1.2476944542036806e-06, |
| "loss": 0.1786, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.1372013651877133, |
| "grad_norm": 2.667804003182341, |
| "learning_rate": 1.2476867801955086e-06, |
| "loss": 0.1204, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.13742889647326506, |
| "grad_norm": 2.4655446209984033, |
| "learning_rate": 1.247679093460759e-06, |
| "loss": 0.2298, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.13765642775881684, |
| "grad_norm": 3.1521634114958816, |
| "learning_rate": 1.2476713939995895e-06, |
| "loss": 0.1264, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.1378839590443686, |
| "grad_norm": 1.8219187381761075, |
| "learning_rate": 1.2476636818121568e-06, |
| "loss": 0.1028, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.13811149032992037, |
| "grad_norm": 2.337156447435568, |
| "learning_rate": 1.247655956898619e-06, |
| "loss": 0.1946, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.13833902161547212, |
| "grad_norm": 3.2562899945752966, |
| "learning_rate": 1.2476482192591335e-06, |
| "loss": 0.1465, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.1385665529010239, |
| "grad_norm": 1.8250022998173558, |
| "learning_rate": 1.247640468893859e-06, |
| "loss": 0.1467, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.13879408418657566, |
| "grad_norm": 3.5242803865119603, |
| "learning_rate": 1.2476327058029534e-06, |
| "loss": 0.1225, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.1390216154721274, |
| "grad_norm": 3.027013883019154, |
| "learning_rate": 1.2476249299865757e-06, |
| "loss": 0.1595, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.1392491467576792, |
| "grad_norm": 2.3807833370240843, |
| "learning_rate": 1.2476171414448847e-06, |
| "loss": 0.0984, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.13947667804323094, |
| "grad_norm": 3.1119739781274416, |
| "learning_rate": 1.2476093401780397e-06, |
| "loss": 0.154, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.13970420932878272, |
| "grad_norm": 3.4567643287811958, |
| "learning_rate": 1.2476015261861998e-06, |
| "loss": 0.1405, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.13993174061433447, |
| "grad_norm": 2.6730132596017504, |
| "learning_rate": 1.247593699469525e-06, |
| "loss": 0.117, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.14015927189988622, |
| "grad_norm": 2.78286071664722, |
| "learning_rate": 1.2475858600281754e-06, |
| "loss": 0.1504, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.140386803185438, |
| "grad_norm": 2.0905809356248803, |
| "learning_rate": 1.247578007862311e-06, |
| "loss": 0.1221, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.14061433447098975, |
| "grad_norm": 2.307570493464016, |
| "learning_rate": 1.2475701429720923e-06, |
| "loss": 0.1166, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.14084186575654153, |
| "grad_norm": 1.2783682538203782, |
| "learning_rate": 1.24756226535768e-06, |
| "loss": 0.1346, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.1410693970420933, |
| "grad_norm": 1.497656716954093, |
| "learning_rate": 1.2475543750192352e-06, |
| "loss": 0.2064, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.14129692832764504, |
| "grad_norm": 3.79056695480817, |
| "learning_rate": 1.2475464719569192e-06, |
| "loss": 0.2673, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.14152445961319682, |
| "grad_norm": 1.4805750856049538, |
| "learning_rate": 1.2475385561708934e-06, |
| "loss": 0.1992, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.14175199089874857, |
| "grad_norm": 1.6748002073239907, |
| "learning_rate": 1.2475306276613194e-06, |
| "loss": 0.0979, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.14197952218430035, |
| "grad_norm": 2.5674392190565736, |
| "learning_rate": 1.2475226864283596e-06, |
| "loss": 0.1337, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.1422070534698521, |
| "grad_norm": 2.656075374063454, |
| "learning_rate": 1.2475147324721764e-06, |
| "loss": 0.2501, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.14243458475540388, |
| "grad_norm": 2.03707084801983, |
| "learning_rate": 1.2475067657929319e-06, |
| "loss": 0.1673, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.14266211604095563, |
| "grad_norm": 2.975904435297751, |
| "learning_rate": 1.2474987863907894e-06, |
| "loss": 0.135, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.14288964732650739, |
| "grad_norm": 2.2205623276633295, |
| "learning_rate": 1.2474907942659116e-06, |
| "loss": 0.2149, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.14311717861205916, |
| "grad_norm": 2.271865927518249, |
| "learning_rate": 1.247482789418462e-06, |
| "loss": 0.1519, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.14334470989761092, |
| "grad_norm": 6.542697842484103, |
| "learning_rate": 1.2474747718486044e-06, |
| "loss": 0.1757, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.1435722411831627, |
| "grad_norm": 1.8493295758356152, |
| "learning_rate": 1.2474667415565022e-06, |
| "loss": 0.096, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.14379977246871445, |
| "grad_norm": 4.567549869753572, |
| "learning_rate": 1.24745869854232e-06, |
| "loss": 0.1745, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.1440273037542662, |
| "grad_norm": 3.104479250541457, |
| "learning_rate": 1.2474506428062219e-06, |
| "loss": 0.14, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.14425483503981798, |
| "grad_norm": 2.9519743566943464, |
| "learning_rate": 1.2474425743483726e-06, |
| "loss": 0.237, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.14448236632536973, |
| "grad_norm": 1.4814831832284159, |
| "learning_rate": 1.2474344931689371e-06, |
| "loss": 0.0873, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.1447098976109215, |
| "grad_norm": 2.0222816327136712, |
| "learning_rate": 1.2474263992680805e-06, |
| "loss": 0.155, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.14493742889647326, |
| "grad_norm": 2.0590304829666914, |
| "learning_rate": 1.247418292645968e-06, |
| "loss": 0.107, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.14516496018202502, |
| "grad_norm": 2.5562023131920633, |
| "learning_rate": 1.2474101733027659e-06, |
| "loss": 0.2256, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.1453924914675768, |
| "grad_norm": 2.3833084873555195, |
| "learning_rate": 1.2474020412386395e-06, |
| "loss": 0.1087, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.14562002275312855, |
| "grad_norm": 1.5076273114920544, |
| "learning_rate": 1.2473938964537551e-06, |
| "loss": 0.0893, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.14584755403868033, |
| "grad_norm": 2.3708066851044887, |
| "learning_rate": 1.2473857389482797e-06, |
| "loss": 0.1247, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.14607508532423208, |
| "grad_norm": 1.5590215080673084, |
| "learning_rate": 1.2473775687223794e-06, |
| "loss": 0.1504, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.14630261660978386, |
| "grad_norm": 1.6107910166409294, |
| "learning_rate": 1.2473693857762215e-06, |
| "loss": 0.149, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.1465301478953356, |
| "grad_norm": 1.7918533159116738, |
| "learning_rate": 1.247361190109973e-06, |
| "loss": 0.1104, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.14675767918088736, |
| "grad_norm": 2.8984966135096566, |
| "learning_rate": 1.2473529817238016e-06, |
| "loss": 0.1755, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.14698521046643914, |
| "grad_norm": 1.9091822418599347, |
| "learning_rate": 1.2473447606178754e-06, |
| "loss": 0.1077, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.1472127417519909, |
| "grad_norm": 4.199288030915391, |
| "learning_rate": 1.2473365267923617e-06, |
| "loss": 0.2124, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.14744027303754267, |
| "grad_norm": 2.331859473332942, |
| "learning_rate": 1.2473282802474293e-06, |
| "loss": 0.1576, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.14766780432309443, |
| "grad_norm": 3.5722786659910577, |
| "learning_rate": 1.2473200209832465e-06, |
| "loss": 0.2027, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.14789533560864618, |
| "grad_norm": 1.5390826591189062, |
| "learning_rate": 1.2473117489999823e-06, |
| "loss": 0.161, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.14812286689419796, |
| "grad_norm": 2.741044883004237, |
| "learning_rate": 1.2473034642978057e-06, |
| "loss": 0.1656, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.1483503981797497, |
| "grad_norm": 2.2681711762464034, |
| "learning_rate": 1.247295166876886e-06, |
| "loss": 0.1254, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.1485779294653015, |
| "grad_norm": 2.2254637289761194, |
| "learning_rate": 1.2472868567373924e-06, |
| "loss": 0.1291, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.14880546075085324, |
| "grad_norm": 2.213517163461755, |
| "learning_rate": 1.2472785338794953e-06, |
| "loss": 0.1541, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.149032992036405, |
| "grad_norm": 1.6789308605390307, |
| "learning_rate": 1.247270198303365e-06, |
| "loss": 0.1316, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.14926052332195677, |
| "grad_norm": 2.179149997459725, |
| "learning_rate": 1.247261850009171e-06, |
| "loss": 0.2437, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.14948805460750852, |
| "grad_norm": 2.910894270371587, |
| "learning_rate": 1.2472534889970848e-06, |
| "loss": 0.2038, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.1497155858930603, |
| "grad_norm": 1.751607816792672, |
| "learning_rate": 1.2472451152672766e-06, |
| "loss": 0.1164, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.14994311717861206, |
| "grad_norm": 1.6602009490349432, |
| "learning_rate": 1.2472367288199177e-06, |
| "loss": 0.1193, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.15017064846416384, |
| "grad_norm": 2.038150970938399, |
| "learning_rate": 1.2472283296551798e-06, |
| "loss": 0.102, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.1503981797497156, |
| "grad_norm": 2.1439804373776936, |
| "learning_rate": 1.2472199177732346e-06, |
| "loss": 0.1502, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.15062571103526734, |
| "grad_norm": 2.5777822840030358, |
| "learning_rate": 1.2472114931742537e-06, |
| "loss": 0.1168, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.15085324232081912, |
| "grad_norm": 2.4175964563163177, |
| "learning_rate": 1.2472030558584093e-06, |
| "loss": 0.1035, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.15108077360637087, |
| "grad_norm": 2.635267423704016, |
| "learning_rate": 1.2471946058258742e-06, |
| "loss": 0.1701, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.15130830489192265, |
| "grad_norm": 1.9337561786859772, |
| "learning_rate": 1.2471861430768205e-06, |
| "loss": 0.1075, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.1515358361774744, |
| "grad_norm": 1.7937795679496227, |
| "learning_rate": 1.2471776676114217e-06, |
| "loss": 0.1785, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.15176336746302616, |
| "grad_norm": 3.0588710289274816, |
| "learning_rate": 1.2471691794298508e-06, |
| "loss": 0.1798, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.15199089874857794, |
| "grad_norm": 2.638986072752188, |
| "learning_rate": 1.2471606785322814e-06, |
| "loss": 0.0878, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.1522184300341297, |
| "grad_norm": 2.732712357601826, |
| "learning_rate": 1.247152164918887e-06, |
| "loss": 0.1267, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.15244596131968147, |
| "grad_norm": 1.7481991977105777, |
| "learning_rate": 1.247143638589842e-06, |
| "loss": 0.1584, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.15267349260523322, |
| "grad_norm": 2.794672743532085, |
| "learning_rate": 1.2471350995453203e-06, |
| "loss": 0.1584, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.15290102389078497, |
| "grad_norm": 3.1279366528301633, |
| "learning_rate": 1.2471265477854966e-06, |
| "loss": 0.148, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.15312855517633675, |
| "grad_norm": 3.920575109905724, |
| "learning_rate": 1.2471179833105454e-06, |
| "loss": 0.1732, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.1533560864618885, |
| "grad_norm": 1.7916571238390178, |
| "learning_rate": 1.2471094061206422e-06, |
| "loss": 0.2336, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.15358361774744028, |
| "grad_norm": 1.7363850632393116, |
| "learning_rate": 1.247100816215962e-06, |
| "loss": 0.1244, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.15381114903299203, |
| "grad_norm": 2.504377712379844, |
| "learning_rate": 1.2470922135966806e-06, |
| "loss": 0.1674, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.1540386803185438, |
| "grad_norm": 2.43043947984636, |
| "learning_rate": 1.2470835982629736e-06, |
| "loss": 0.1249, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.15426621160409557, |
| "grad_norm": 3.950497364660697, |
| "learning_rate": 1.247074970215017e-06, |
| "loss": 0.2401, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.15449374288964732, |
| "grad_norm": 3.1492013494233846, |
| "learning_rate": 1.2470663294529873e-06, |
| "loss": 0.1605, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.1547212741751991, |
| "grad_norm": 1.80598204305421, |
| "learning_rate": 1.2470576759770612e-06, |
| "loss": 0.113, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.15494880546075085, |
| "grad_norm": 2.0454054940402506, |
| "learning_rate": 1.2470490097874155e-06, |
| "loss": 0.1453, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.15517633674630263, |
| "grad_norm": 3.6952564849548053, |
| "learning_rate": 1.247040330884227e-06, |
| "loss": 0.1581, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.15540386803185438, |
| "grad_norm": 2.3655397835651075, |
| "learning_rate": 1.2470316392676738e-06, |
| "loss": 0.169, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.15563139931740613, |
| "grad_norm": 3.416348712472315, |
| "learning_rate": 1.2470229349379326e-06, |
| "loss": 0.1347, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.1558589306029579, |
| "grad_norm": 2.618995350775909, |
| "learning_rate": 1.2470142178951822e-06, |
| "loss": 0.1924, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.15608646188850966, |
| "grad_norm": 1.344663220923034, |
| "learning_rate": 1.2470054881396002e-06, |
| "loss": 0.2013, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.15631399317406144, |
| "grad_norm": 1.1568986493989724, |
| "learning_rate": 1.246996745671365e-06, |
| "loss": 0.131, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.1565415244596132, |
| "grad_norm": 3.0558312091963473, |
| "learning_rate": 1.2469879904906556e-06, |
| "loss": 0.14, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.15676905574516495, |
| "grad_norm": 4.767157427966137, |
| "learning_rate": 1.2469792225976507e-06, |
| "loss": 0.156, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.15699658703071673, |
| "grad_norm": 1.9971770266956603, |
| "learning_rate": 1.2469704419925296e-06, |
| "loss": 0.1413, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.15722411831626848, |
| "grad_norm": 3.560138993273607, |
| "learning_rate": 1.246961648675472e-06, |
| "loss": 0.2274, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.15745164960182026, |
| "grad_norm": 1.8091873297743188, |
| "learning_rate": 1.246952842646657e-06, |
| "loss": 0.2606, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.157679180887372, |
| "grad_norm": 1.9524492716137443, |
| "learning_rate": 1.2469440239062653e-06, |
| "loss": 0.1888, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.15790671217292376, |
| "grad_norm": 1.978419283294589, |
| "learning_rate": 1.2469351924544766e-06, |
| "loss": 0.168, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.15813424345847554, |
| "grad_norm": 1.909977232991382, |
| "learning_rate": 1.2469263482914716e-06, |
| "loss": 0.1302, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.1583617747440273, |
| "grad_norm": 2.786836009335205, |
| "learning_rate": 1.246917491417431e-06, |
| "loss": 0.1603, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.15858930602957907, |
| "grad_norm": 2.700038379786115, |
| "learning_rate": 1.246908621832536e-06, |
| "loss": 0.2268, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.15881683731513083, |
| "grad_norm": 1.4116863857464026, |
| "learning_rate": 1.2468997395369677e-06, |
| "loss": 0.1761, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.1590443686006826, |
| "grad_norm": 2.8928190492615133, |
| "learning_rate": 1.2468908445309077e-06, |
| "loss": 0.1789, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.15927189988623436, |
| "grad_norm": 1.650749552825084, |
| "learning_rate": 1.2468819368145376e-06, |
| "loss": 0.1324, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.1594994311717861, |
| "grad_norm": 2.3722473947353677, |
| "learning_rate": 1.2468730163880398e-06, |
| "loss": 0.1116, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.1597269624573379, |
| "grad_norm": 2.879822957568519, |
| "learning_rate": 1.2468640832515962e-06, |
| "loss": 0.0564, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.15995449374288964, |
| "grad_norm": 2.162764734574199, |
| "learning_rate": 1.24685513740539e-06, |
| "loss": 0.1739, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.16018202502844142, |
| "grad_norm": 2.8968364936480206, |
| "learning_rate": 1.2468461788496036e-06, |
| "loss": 0.2091, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.16040955631399317, |
| "grad_norm": 1.8559610510087743, |
| "learning_rate": 1.24683720758442e-06, |
| "loss": 0.1533, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.16063708759954493, |
| "grad_norm": 2.184281056476426, |
| "learning_rate": 1.2468282236100226e-06, |
| "loss": 0.1582, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.1608646188850967, |
| "grad_norm": 1.3209438595657337, |
| "learning_rate": 1.2468192269265955e-06, |
| "loss": 0.1914, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.16109215017064846, |
| "grad_norm": 2.1470386790088174, |
| "learning_rate": 1.246810217534322e-06, |
| "loss": 0.0831, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.16131968145620024, |
| "grad_norm": 1.594792083731403, |
| "learning_rate": 1.2468011954333864e-06, |
| "loss": 0.1349, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.161547212741752, |
| "grad_norm": 1.9899900139983586, |
| "learning_rate": 1.2467921606239734e-06, |
| "loss": 0.1406, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.16177474402730374, |
| "grad_norm": 2.161056989124219, |
| "learning_rate": 1.2467831131062672e-06, |
| "loss": 0.1186, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.16200227531285552, |
| "grad_norm": 3.2786168252573438, |
| "learning_rate": 1.2467740528804528e-06, |
| "loss": 0.1525, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.16222980659840727, |
| "grad_norm": 2.152367629184536, |
| "learning_rate": 1.2467649799467156e-06, |
| "loss": 0.1403, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.16245733788395905, |
| "grad_norm": 2.658644939282435, |
| "learning_rate": 1.246755894305241e-06, |
| "loss": 0.1287, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.1626848691695108, |
| "grad_norm": 1.8320157906526173, |
| "learning_rate": 1.2467467959562143e-06, |
| "loss": 0.1489, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.16291240045506258, |
| "grad_norm": 3.0792158572997526, |
| "learning_rate": 1.2467376848998221e-06, |
| "loss": 0.1929, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.16313993174061434, |
| "grad_norm": 2.592666663523021, |
| "learning_rate": 1.2467285611362501e-06, |
| "loss": 0.1198, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.1633674630261661, |
| "grad_norm": 2.3270639642215123, |
| "learning_rate": 1.2467194246656851e-06, |
| "loss": 0.119, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.16359499431171787, |
| "grad_norm": 1.5662096056295784, |
| "learning_rate": 1.2467102754883136e-06, |
| "loss": 0.1488, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.16382252559726962, |
| "grad_norm": 2.0754259992407174, |
| "learning_rate": 1.2467011136043228e-06, |
| "loss": 0.1206, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.1640500568828214, |
| "grad_norm": 2.377809704915352, |
| "learning_rate": 1.2466919390138995e-06, |
| "loss": 0.2349, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.16427758816837315, |
| "grad_norm": 2.1373727350700205, |
| "learning_rate": 1.246682751717232e-06, |
| "loss": 0.1333, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.1645051194539249, |
| "grad_norm": 3.8601459911234697, |
| "learning_rate": 1.2466735517145074e-06, |
| "loss": 0.3259, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.16473265073947668, |
| "grad_norm": 2.1273982856593614, |
| "learning_rate": 1.2466643390059138e-06, |
| "loss": 0.199, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.16496018202502843, |
| "grad_norm": 2.274158988300012, |
| "learning_rate": 1.2466551135916398e-06, |
| "loss": 0.1351, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.16518771331058021, |
| "grad_norm": 2.1566789936379287, |
| "learning_rate": 1.2466458754718737e-06, |
| "loss": 0.219, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.16541524459613197, |
| "grad_norm": 3.388462178150055, |
| "learning_rate": 1.2466366246468045e-06, |
| "loss": 0.1456, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.16564277588168372, |
| "grad_norm": 2.792548754369155, |
| "learning_rate": 1.246627361116621e-06, |
| "loss": 0.2178, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.1658703071672355, |
| "grad_norm": 1.7787275123381943, |
| "learning_rate": 1.246618084881513e-06, |
| "loss": 0.2584, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.16609783845278725, |
| "grad_norm": 2.150845029279013, |
| "learning_rate": 1.2466087959416695e-06, |
| "loss": 0.1474, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.16632536973833903, |
| "grad_norm": 3.4162019984229213, |
| "learning_rate": 1.2465994942972805e-06, |
| "loss": 0.1415, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.16655290102389078, |
| "grad_norm": 3.5172418167047743, |
| "learning_rate": 1.2465901799485366e-06, |
| "loss": 0.2267, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.16678043230944256, |
| "grad_norm": 1.9664520821504867, |
| "learning_rate": 1.2465808528956277e-06, |
| "loss": 0.1027, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.1670079635949943, |
| "grad_norm": 2.053925645911197, |
| "learning_rate": 1.2465715131387446e-06, |
| "loss": 0.1405, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.16723549488054607, |
| "grad_norm": 1.6417683696863474, |
| "learning_rate": 1.2465621606780778e-06, |
| "loss": 0.1804, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.16746302616609784, |
| "grad_norm": 1.9532511665276102, |
| "learning_rate": 1.2465527955138191e-06, |
| "loss": 0.1438, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.1676905574516496, |
| "grad_norm": 2.7978077296538295, |
| "learning_rate": 1.2465434176461596e-06, |
| "loss": 0.1806, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.16791808873720138, |
| "grad_norm": 1.7861222447513503, |
| "learning_rate": 1.2465340270752908e-06, |
| "loss": 0.0953, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.16814562002275313, |
| "grad_norm": 1.2545980680473232, |
| "learning_rate": 1.2465246238014047e-06, |
| "loss": 0.0881, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.16837315130830488, |
| "grad_norm": 2.49195685975364, |
| "learning_rate": 1.2465152078246936e-06, |
| "loss": 0.1643, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.16860068259385666, |
| "grad_norm": 2.0211233157427637, |
| "learning_rate": 1.24650577914535e-06, |
| "loss": 0.1263, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.1688282138794084, |
| "grad_norm": 2.7858317155477317, |
| "learning_rate": 1.2464963377635667e-06, |
| "loss": 0.1547, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.1690557451649602, |
| "grad_norm": 1.7097291360774547, |
| "learning_rate": 1.246486883679536e-06, |
| "loss": 0.2516, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.16928327645051194, |
| "grad_norm": 3.9137648292026737, |
| "learning_rate": 1.246477416893452e-06, |
| "loss": 0.2036, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.1695108077360637, |
| "grad_norm": 3.005605654107358, |
| "learning_rate": 1.2464679374055074e-06, |
| "loss": 0.1481, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.16973833902161548, |
| "grad_norm": 3.401532765227879, |
| "learning_rate": 1.2464584452158968e-06, |
| "loss": 0.1841, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.16996587030716723, |
| "grad_norm": 2.843140048954733, |
| "learning_rate": 1.2464489403248133e-06, |
| "loss": 0.184, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.170193401592719, |
| "grad_norm": 1.515779223289782, |
| "learning_rate": 1.246439422732452e-06, |
| "loss": 0.1262, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.17042093287827076, |
| "grad_norm": 2.618293101772126, |
| "learning_rate": 1.2464298924390066e-06, |
| "loss": 0.1415, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.17064846416382254, |
| "grad_norm": 2.248269138511338, |
| "learning_rate": 1.2464203494446725e-06, |
| "loss": 0.185, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.1708759954493743, |
| "grad_norm": 1.3558978429200024, |
| "learning_rate": 1.2464107937496444e-06, |
| "loss": 0.096, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.17110352673492604, |
| "grad_norm": 1.8355286869437153, |
| "learning_rate": 1.246401225354118e-06, |
| "loss": 0.0936, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.17133105802047782, |
| "grad_norm": 2.611386377303649, |
| "learning_rate": 1.2463916442582883e-06, |
| "loss": 0.2058, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.17155858930602957, |
| "grad_norm": 1.81511526173022, |
| "learning_rate": 1.2463820504623516e-06, |
| "loss": 0.0722, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.17178612059158135, |
| "grad_norm": 1.6836561465138316, |
| "learning_rate": 1.246372443966504e-06, |
| "loss": 0.1419, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.1720136518771331, |
| "grad_norm": 3.189715404864015, |
| "learning_rate": 1.246362824770941e-06, |
| "loss": 0.1604, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.17224118316268486, |
| "grad_norm": 2.8556456489625193, |
| "learning_rate": 1.2463531928758605e-06, |
| "loss": 0.1793, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.17246871444823664, |
| "grad_norm": 2.1490228034084344, |
| "learning_rate": 1.2463435482814585e-06, |
| "loss": 0.1928, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.1726962457337884, |
| "grad_norm": 1.866877451814791, |
| "learning_rate": 1.246333890987932e-06, |
| "loss": 0.2064, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.17292377701934017, |
| "grad_norm": 2.7361601673612284, |
| "learning_rate": 1.246324220995479e-06, |
| "loss": 0.1024, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.17315130830489192, |
| "grad_norm": 3.6715173407277004, |
| "learning_rate": 1.2463145383042966e-06, |
| "loss": 0.1741, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.17337883959044367, |
| "grad_norm": 4.388914943676026, |
| "learning_rate": 1.2463048429145832e-06, |
| "loss": 0.2951, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.17360637087599545, |
| "grad_norm": 3.0864567661578075, |
| "learning_rate": 1.2462951348265364e-06, |
| "loss": 0.1681, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.1738339021615472, |
| "grad_norm": 2.2429137189515487, |
| "learning_rate": 1.2462854140403553e-06, |
| "loss": 0.1698, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.17406143344709898, |
| "grad_norm": 3.7655750343422487, |
| "learning_rate": 1.2462756805562378e-06, |
| "loss": 0.1972, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.17428896473265074, |
| "grad_norm": 1.4821109763148475, |
| "learning_rate": 1.2462659343743832e-06, |
| "loss": 0.1144, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.17451649601820252, |
| "grad_norm": 2.9261323093043234, |
| "learning_rate": 1.2462561754949908e-06, |
| "loss": 0.1354, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.17474402730375427, |
| "grad_norm": 2.021278631174851, |
| "learning_rate": 1.2462464039182598e-06, |
| "loss": 0.1158, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.17497155858930602, |
| "grad_norm": 2.189903163956334, |
| "learning_rate": 1.2462366196443903e-06, |
| "loss": 0.1587, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.1751990898748578, |
| "grad_norm": 3.7285174958892364, |
| "learning_rate": 1.246226822673582e-06, |
| "loss": 0.2024, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.17542662116040955, |
| "grad_norm": 1.9007743093993184, |
| "learning_rate": 1.2462170130060351e-06, |
| "loss": 0.1025, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.17565415244596133, |
| "grad_norm": 3.3341124392840134, |
| "learning_rate": 1.24620719064195e-06, |
| "loss": 0.1718, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.17588168373151308, |
| "grad_norm": 2.271177623744295, |
| "learning_rate": 1.246197355581528e-06, |
| "loss": 0.1713, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.17610921501706484, |
| "grad_norm": 2.631276315974309, |
| "learning_rate": 1.2461875078249694e-06, |
| "loss": 0.1769, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.17633674630261661, |
| "grad_norm": 2.2924143983188765, |
| "learning_rate": 1.246177647372476e-06, |
| "loss": 0.1155, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.17656427758816837, |
| "grad_norm": 4.145219852575127, |
| "learning_rate": 1.246167774224249e-06, |
| "loss": 0.1997, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.17679180887372015, |
| "grad_norm": 3.5955716696986237, |
| "learning_rate": 1.2461578883804903e-06, |
| "loss": 0.1434, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.1770193401592719, |
| "grad_norm": 3.5823237759342477, |
| "learning_rate": 1.246147989841402e-06, |
| "loss": 0.131, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.17724687144482365, |
| "grad_norm": 1.7885388560764315, |
| "learning_rate": 1.2461380786071863e-06, |
| "loss": 0.0755, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.17747440273037543, |
| "grad_norm": 2.362853335883513, |
| "learning_rate": 1.246128154678046e-06, |
| "loss": 0.1285, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.17770193401592718, |
| "grad_norm": 2.826403481752188, |
| "learning_rate": 1.2461182180541835e-06, |
| "loss": 0.0898, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.17792946530147896, |
| "grad_norm": 5.793503549962082, |
| "learning_rate": 1.2461082687358022e-06, |
| "loss": 0.0971, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.1781569965870307, |
| "grad_norm": 1.8035940463938722, |
| "learning_rate": 1.2460983067231055e-06, |
| "loss": 0.1105, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.1783845278725825, |
| "grad_norm": 2.3286047675537613, |
| "learning_rate": 1.246088332016297e-06, |
| "loss": 0.0997, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.17861205915813425, |
| "grad_norm": 2.4331158536688067, |
| "learning_rate": 1.2460783446155802e-06, |
| "loss": 0.2145, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.178839590443686, |
| "grad_norm": 2.4301917574272234, |
| "learning_rate": 1.2460683445211596e-06, |
| "loss": 0.1826, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.17906712172923778, |
| "grad_norm": 3.191042960124482, |
| "learning_rate": 1.2460583317332395e-06, |
| "loss": 0.2224, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.17929465301478953, |
| "grad_norm": 1.9281932990563415, |
| "learning_rate": 1.2460483062520246e-06, |
| "loss": 0.1012, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.1795221843003413, |
| "grad_norm": 1.9401318974845003, |
| "learning_rate": 1.2460382680777196e-06, |
| "loss": 0.0761, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.17974971558589306, |
| "grad_norm": 13.086161362963225, |
| "learning_rate": 1.2460282172105298e-06, |
| "loss": 0.2088, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.1799772468714448, |
| "grad_norm": 1.4783130702588718, |
| "learning_rate": 1.2460181536506608e-06, |
| "loss": 0.2126, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.1802047781569966, |
| "grad_norm": 2.4964786740518763, |
| "learning_rate": 1.2460080773983177e-06, |
| "loss": 0.1385, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.18043230944254834, |
| "grad_norm": 2.7778972521749545, |
| "learning_rate": 1.2459979884537072e-06, |
| "loss": 0.1448, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.18065984072810012, |
| "grad_norm": 2.167813491126184, |
| "learning_rate": 1.2459878868170348e-06, |
| "loss": 0.1379, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.18088737201365188, |
| "grad_norm": 1.9654699615947284, |
| "learning_rate": 1.2459777724885075e-06, |
| "loss": 0.1314, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.18111490329920363, |
| "grad_norm": 2.293952257528565, |
| "learning_rate": 1.2459676454683318e-06, |
| "loss": 0.1695, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.1813424345847554, |
| "grad_norm": 3.9215044200778144, |
| "learning_rate": 1.2459575057567144e-06, |
| "loss": 0.2204, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.18156996587030716, |
| "grad_norm": 2.8214133097210117, |
| "learning_rate": 1.245947353353863e-06, |
| "loss": 0.1558, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.18179749715585894, |
| "grad_norm": 5.317020653859289, |
| "learning_rate": 1.245937188259985e-06, |
| "loss": 0.2603, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.1820250284414107, |
| "grad_norm": 4.004955818619992, |
| "learning_rate": 1.245927010475288e-06, |
| "loss": 0.1196, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.18225255972696247, |
| "grad_norm": 3.792524464667178, |
| "learning_rate": 1.24591681999998e-06, |
| "loss": 0.1821, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.18248009101251422, |
| "grad_norm": 2.813011742342484, |
| "learning_rate": 1.2459066168342693e-06, |
| "loss": 0.1513, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.18270762229806597, |
| "grad_norm": 3.511510747002315, |
| "learning_rate": 1.2458964009783646e-06, |
| "loss": 0.2163, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.18293515358361775, |
| "grad_norm": 2.802158661308834, |
| "learning_rate": 1.2458861724324745e-06, |
| "loss": 0.1963, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.1831626848691695, |
| "grad_norm": 3.64850186041969, |
| "learning_rate": 1.2458759311968084e-06, |
| "loss": 0.303, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.1833902161547213, |
| "grad_norm": 2.6182595326596725, |
| "learning_rate": 1.245865677271575e-06, |
| "loss": 0.1456, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.18361774744027304, |
| "grad_norm": 2.399741320725503, |
| "learning_rate": 1.2458554106569844e-06, |
| "loss": 0.2288, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.1838452787258248, |
| "grad_norm": 1.252106549654472, |
| "learning_rate": 1.2458451313532463e-06, |
| "loss": 0.0801, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.18407281001137657, |
| "grad_norm": 3.696224132577839, |
| "learning_rate": 1.2458348393605708e-06, |
| "loss": 0.2059, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.18430034129692832, |
| "grad_norm": 1.3783330613855644, |
| "learning_rate": 1.2458245346791678e-06, |
| "loss": 0.1164, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.1845278725824801, |
| "grad_norm": 1.5623432135982267, |
| "learning_rate": 1.2458142173092486e-06, |
| "loss": 0.176, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.18475540386803185, |
| "grad_norm": 6.552053967433837, |
| "learning_rate": 1.2458038872510237e-06, |
| "loss": 0.118, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.1849829351535836, |
| "grad_norm": 3.2237210845046964, |
| "learning_rate": 1.2457935445047042e-06, |
| "loss": 0.1875, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.18521046643913538, |
| "grad_norm": 1.7463109516387256, |
| "learning_rate": 1.2457831890705018e-06, |
| "loss": 0.1945, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.18543799772468714, |
| "grad_norm": 2.8292409598595953, |
| "learning_rate": 1.2457728209486279e-06, |
| "loss": 0.1711, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.18566552901023892, |
| "grad_norm": 3.198074487753419, |
| "learning_rate": 1.2457624401392943e-06, |
| "loss": 0.2552, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.18589306029579067, |
| "grad_norm": 3.2293783551138278, |
| "learning_rate": 1.2457520466427135e-06, |
| "loss": 0.1955, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.18612059158134245, |
| "grad_norm": 2.5604778410965383, |
| "learning_rate": 1.2457416404590974e-06, |
| "loss": 0.1689, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.1863481228668942, |
| "grad_norm": 2.4475267016374427, |
| "learning_rate": 1.2457312215886592e-06, |
| "loss": 0.1165, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.18657565415244595, |
| "grad_norm": 1.9856047790588058, |
| "learning_rate": 1.2457207900316115e-06, |
| "loss": 0.195, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.18680318543799773, |
| "grad_norm": 3.030251865029441, |
| "learning_rate": 1.245710345788168e-06, |
| "loss": 0.2233, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.18703071672354948, |
| "grad_norm": 6.914472069589314, |
| "learning_rate": 1.2456998888585414e-06, |
| "loss": 0.1294, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.18725824800910126, |
| "grad_norm": 1.5392801223632877, |
| "learning_rate": 1.245689419242946e-06, |
| "loss": 0.1031, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.18748577929465302, |
| "grad_norm": 1.5563008585328006, |
| "learning_rate": 1.2456789369415955e-06, |
| "loss": 0.1233, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.18771331058020477, |
| "grad_norm": 1.5005319006316646, |
| "learning_rate": 1.2456684419547044e-06, |
| "loss": 0.1698, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.18794084186575655, |
| "grad_norm": 2.5311436309198245, |
| "learning_rate": 1.245657934282487e-06, |
| "loss": 0.1242, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.1881683731513083, |
| "grad_norm": 1.3382771790085715, |
| "learning_rate": 1.245647413925158e-06, |
| "loss": 0.1173, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.18839590443686008, |
| "grad_norm": 2.455502403566395, |
| "learning_rate": 1.2456368808829327e-06, |
| "loss": 0.0912, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.18862343572241183, |
| "grad_norm": 2.9752303589937212, |
| "learning_rate": 1.2456263351560261e-06, |
| "loss": 0.2599, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.18885096700796358, |
| "grad_norm": 5.043835077918359, |
| "learning_rate": 1.2456157767446538e-06, |
| "loss": 0.1609, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.18907849829351536, |
| "grad_norm": 2.756359704558054, |
| "learning_rate": 1.245605205649032e-06, |
| "loss": 0.1323, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.18930602957906711, |
| "grad_norm": 1.835440265718024, |
| "learning_rate": 1.245594621869376e-06, |
| "loss": 0.2094, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.1895335608646189, |
| "grad_norm": 1.2880237601014817, |
| "learning_rate": 1.2455840254059026e-06, |
| "loss": 0.1085, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.18976109215017065, |
| "grad_norm": 1.4808086873300856, |
| "learning_rate": 1.2455734162588282e-06, |
| "loss": 0.1067, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.1899886234357224, |
| "grad_norm": 2.3351983872627597, |
| "learning_rate": 1.2455627944283697e-06, |
| "loss": 0.1493, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.19021615472127418, |
| "grad_norm": 2.422722379821762, |
| "learning_rate": 1.245552159914744e-06, |
| "loss": 0.1387, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.19044368600682593, |
| "grad_norm": 2.2005548282870477, |
| "learning_rate": 1.245541512718169e-06, |
| "loss": 0.1047, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.1906712172923777, |
| "grad_norm": 2.379475571028047, |
| "learning_rate": 1.245530852838862e-06, |
| "loss": 0.1524, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.19089874857792946, |
| "grad_norm": 1.669935289366072, |
| "learning_rate": 1.2455201802770405e-06, |
| "loss": 0.157, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.19112627986348124, |
| "grad_norm": 2.357020791051429, |
| "learning_rate": 1.245509495032923e-06, |
| "loss": 0.2156, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.191353811149033, |
| "grad_norm": 3.871602599108809, |
| "learning_rate": 1.2454987971067278e-06, |
| "loss": 0.1557, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.19158134243458474, |
| "grad_norm": 2.5332197020943887, |
| "learning_rate": 1.2454880864986737e-06, |
| "loss": 0.1644, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.19180887372013652, |
| "grad_norm": 3.1286962973408596, |
| "learning_rate": 1.2454773632089795e-06, |
| "loss": 0.0794, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.19203640500568828, |
| "grad_norm": 2.3210649274985666, |
| "learning_rate": 1.2454666272378644e-06, |
| "loss": 0.129, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.19226393629124006, |
| "grad_norm": 3.000200402253768, |
| "learning_rate": 1.2454558785855475e-06, |
| "loss": 0.1628, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.1924914675767918, |
| "grad_norm": 2.3643323080869902, |
| "learning_rate": 1.245445117252249e-06, |
| "loss": 0.1345, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.19271899886234356, |
| "grad_norm": 2.532625203594351, |
| "learning_rate": 1.2454343432381886e-06, |
| "loss": 0.2082, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.19294653014789534, |
| "grad_norm": 1.9628657145639428, |
| "learning_rate": 1.2454235565435862e-06, |
| "loss": 0.0782, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.1931740614334471, |
| "grad_norm": 1.609178421923729, |
| "learning_rate": 1.2454127571686629e-06, |
| "loss": 0.1405, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.19340159271899887, |
| "grad_norm": 1.7728115247069527, |
| "learning_rate": 1.245401945113639e-06, |
| "loss": 0.203, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.19362912400455062, |
| "grad_norm": 3.2450475274049118, |
| "learning_rate": 1.2453911203787355e-06, |
| "loss": 0.1524, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.19385665529010238, |
| "grad_norm": 22.097060091469434, |
| "learning_rate": 1.2453802829641736e-06, |
| "loss": 0.2636, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.19408418657565416, |
| "grad_norm": 2.5365065820289496, |
| "learning_rate": 1.2453694328701752e-06, |
| "loss": 0.1019, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.1943117178612059, |
| "grad_norm": 2.090322149834491, |
| "learning_rate": 1.2453585700969614e-06, |
| "loss": 0.1498, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.1945392491467577, |
| "grad_norm": 2.6606765925685787, |
| "learning_rate": 1.2453476946447547e-06, |
| "loss": 0.1398, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.19476678043230944, |
| "grad_norm": 3.56083888144899, |
| "learning_rate": 1.2453368065137772e-06, |
| "loss": 0.1463, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.19499431171786122, |
| "grad_norm": 2.1276836242796793, |
| "learning_rate": 1.2453259057042514e-06, |
| "loss": 0.1753, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.19522184300341297, |
| "grad_norm": 2.5690977004159805, |
| "learning_rate": 1.2453149922164003e-06, |
| "loss": 0.1292, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.19544937428896472, |
| "grad_norm": 4.345742784369693, |
| "learning_rate": 1.2453040660504468e-06, |
| "loss": 0.15, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.1956769055745165, |
| "grad_norm": 3.118246879884093, |
| "learning_rate": 1.2452931272066141e-06, |
| "loss": 0.169, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.19590443686006825, |
| "grad_norm": 2.68254786515319, |
| "learning_rate": 1.245282175685126e-06, |
| "loss": 0.157, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.19613196814562003, |
| "grad_norm": 2.088476673647213, |
| "learning_rate": 1.2452712114862063e-06, |
| "loss": 0.1782, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.19635949943117179, |
| "grad_norm": 1.568141769132608, |
| "learning_rate": 1.245260234610079e-06, |
| "loss": 0.1295, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.19658703071672354, |
| "grad_norm": 2.186319656948205, |
| "learning_rate": 1.2452492450569682e-06, |
| "loss": 0.1734, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.19681456200227532, |
| "grad_norm": 2.7655739546712135, |
| "learning_rate": 1.245238242827099e-06, |
| "loss": 0.1694, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.19704209328782707, |
| "grad_norm": 3.0373302408208196, |
| "learning_rate": 1.245227227920696e-06, |
| "loss": 0.1356, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.19726962457337885, |
| "grad_norm": 2.1820099415146914, |
| "learning_rate": 1.2452162003379842e-06, |
| "loss": 0.2082, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.1974971558589306, |
| "grad_norm": 3.6721625065681827, |
| "learning_rate": 1.2452051600791891e-06, |
| "loss": 0.1915, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.19772468714448235, |
| "grad_norm": 6.490462296454016, |
| "learning_rate": 1.2451941071445367e-06, |
| "loss": 0.1815, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.19795221843003413, |
| "grad_norm": 3.246518762107006, |
| "learning_rate": 1.2451830415342524e-06, |
| "loss": 0.137, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.19817974971558588, |
| "grad_norm": 2.7033364330836873, |
| "learning_rate": 1.2451719632485627e-06, |
| "loss": 0.1317, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.19840728100113766, |
| "grad_norm": 3.30778551761739, |
| "learning_rate": 1.2451608722876938e-06, |
| "loss": 0.1099, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.19863481228668942, |
| "grad_norm": 2.2687509460631294, |
| "learning_rate": 1.2451497686518722e-06, |
| "loss": 0.1361, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.1988623435722412, |
| "grad_norm": 1.641721237453431, |
| "learning_rate": 1.2451386523413252e-06, |
| "loss": 0.1052, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.19908987485779295, |
| "grad_norm": 2.206444085506852, |
| "learning_rate": 1.24512752335628e-06, |
| "loss": 0.1018, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.1993174061433447, |
| "grad_norm": 2.210652731669232, |
| "learning_rate": 1.2451163816969639e-06, |
| "loss": 0.1879, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.19954493742889648, |
| "grad_norm": 2.085600222270482, |
| "learning_rate": 1.2451052273636045e-06, |
| "loss": 0.127, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.19977246871444823, |
| "grad_norm": 2.6309536592299705, |
| "learning_rate": 1.24509406035643e-06, |
| "loss": 0.1678, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 4.158698099165945, |
| "learning_rate": 1.2450828806756685e-06, |
| "loss": 0.2095, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.20022753128555176, |
| "grad_norm": 2.602198490586786, |
| "learning_rate": 1.245071688321549e-06, |
| "loss": 0.1436, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.20045506257110352, |
| "grad_norm": 2.252594865848713, |
| "learning_rate": 1.2450604832942991e-06, |
| "loss": 0.1231, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.2006825938566553, |
| "grad_norm": 1.912453352899942, |
| "learning_rate": 1.245049265594149e-06, |
| "loss": 0.1408, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.20091012514220705, |
| "grad_norm": 3.264942350461524, |
| "learning_rate": 1.2450380352213271e-06, |
| "loss": 0.1697, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.20113765642775883, |
| "grad_norm": 2.415399674888119, |
| "learning_rate": 1.2450267921760636e-06, |
| "loss": 0.1331, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.20136518771331058, |
| "grad_norm": 2.62867521080006, |
| "learning_rate": 1.2450155364585878e-06, |
| "loss": 0.1217, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.20159271899886233, |
| "grad_norm": 2.3552959017058477, |
| "learning_rate": 1.2450042680691301e-06, |
| "loss": 0.1216, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.2018202502844141, |
| "grad_norm": 1.4369969713280852, |
| "learning_rate": 1.2449929870079206e-06, |
| "loss": 0.1282, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.20204778156996586, |
| "grad_norm": 2.305787931213179, |
| "learning_rate": 1.24498169327519e-06, |
| "loss": 0.1076, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.20227531285551764, |
| "grad_norm": 1.7868835912702514, |
| "learning_rate": 1.2449703868711688e-06, |
| "loss": 0.1225, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.2025028441410694, |
| "grad_norm": 2.1124657583403494, |
| "learning_rate": 1.2449590677960886e-06, |
| "loss": 0.1765, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.20273037542662117, |
| "grad_norm": 1.6102832172606196, |
| "learning_rate": 1.2449477360501802e-06, |
| "loss": 0.0719, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.20295790671217293, |
| "grad_norm": 3.8988824882283843, |
| "learning_rate": 1.2449363916336756e-06, |
| "loss": 0.1854, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.20318543799772468, |
| "grad_norm": 3.2116126604298882, |
| "learning_rate": 1.2449250345468065e-06, |
| "loss": 0.2028, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.20341296928327646, |
| "grad_norm": 2.083882159988442, |
| "learning_rate": 1.244913664789805e-06, |
| "loss": 0.1337, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.2036405005688282, |
| "grad_norm": 1.8394649372022975, |
| "learning_rate": 1.2449022823629036e-06, |
| "loss": 0.1205, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.20386803185438, |
| "grad_norm": 2.6323013014057004, |
| "learning_rate": 1.2448908872663347e-06, |
| "loss": 0.1133, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.20409556313993174, |
| "grad_norm": 1.8291857038844686, |
| "learning_rate": 1.2448794795003313e-06, |
| "loss": 0.1142, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.2043230944254835, |
| "grad_norm": 1.7184606914815217, |
| "learning_rate": 1.2448680590651269e-06, |
| "loss": 0.1222, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.20455062571103527, |
| "grad_norm": 2.7034652156706716, |
| "learning_rate": 1.2448566259609543e-06, |
| "loss": 0.1991, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.20477815699658702, |
| "grad_norm": 2.5930455129642653, |
| "learning_rate": 1.2448451801880476e-06, |
| "loss": 0.1085, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2050056882821388, |
| "grad_norm": 2.44560677998223, |
| "learning_rate": 1.2448337217466404e-06, |
| "loss": 0.1735, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.20523321956769056, |
| "grad_norm": 2.257000828394708, |
| "learning_rate": 1.2448222506369675e-06, |
| "loss": 0.1118, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.2054607508532423, |
| "grad_norm": 2.5459054260546323, |
| "learning_rate": 1.2448107668592626e-06, |
| "loss": 0.1975, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.2056882821387941, |
| "grad_norm": 5.093888329917388, |
| "learning_rate": 1.244799270413761e-06, |
| "loss": 0.2277, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.20591581342434584, |
| "grad_norm": 4.116266489839909, |
| "learning_rate": 1.2447877613006972e-06, |
| "loss": 0.2004, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.20614334470989762, |
| "grad_norm": 1.8199951318249294, |
| "learning_rate": 1.244776239520307e-06, |
| "loss": 0.2131, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.20637087599544937, |
| "grad_norm": 2.7663340604707267, |
| "learning_rate": 1.244764705072825e-06, |
| "loss": 0.2145, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.20659840728100115, |
| "grad_norm": 1.8748872621346087, |
| "learning_rate": 1.2447531579584878e-06, |
| "loss": 0.1327, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.2068259385665529, |
| "grad_norm": 3.4272822632320237, |
| "learning_rate": 1.2447415981775312e-06, |
| "loss": 0.2198, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.20705346985210465, |
| "grad_norm": 3.1215491420073396, |
| "learning_rate": 1.2447300257301912e-06, |
| "loss": 0.1342, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.20728100113765643, |
| "grad_norm": 2.5239722345332396, |
| "learning_rate": 1.2447184406167045e-06, |
| "loss": 0.1868, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.2075085324232082, |
| "grad_norm": 1.9655955083845185, |
| "learning_rate": 1.2447068428373077e-06, |
| "loss": 0.1769, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.20773606370875997, |
| "grad_norm": 3.157478086474276, |
| "learning_rate": 1.244695232392238e-06, |
| "loss": 0.1824, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.20796359499431172, |
| "grad_norm": 1.9386984879122342, |
| "learning_rate": 1.2446836092817328e-06, |
| "loss": 0.1036, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.20819112627986347, |
| "grad_norm": 2.2587342441489997, |
| "learning_rate": 1.2446719735060293e-06, |
| "loss": 0.2175, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.20841865756541525, |
| "grad_norm": 2.3841098586953846, |
| "learning_rate": 1.2446603250653658e-06, |
| "loss": 0.1917, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.208646188850967, |
| "grad_norm": 2.0643080194861496, |
| "learning_rate": 1.24464866395998e-06, |
| "loss": 0.1276, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.20887372013651878, |
| "grad_norm": 1.1445975014034748, |
| "learning_rate": 1.2446369901901102e-06, |
| "loss": 0.0884, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.20910125142207053, |
| "grad_norm": 3.359267538919808, |
| "learning_rate": 1.2446253037559952e-06, |
| "loss": 0.1214, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.20932878270762229, |
| "grad_norm": 2.1583486474112927, |
| "learning_rate": 1.2446136046578739e-06, |
| "loss": 0.1093, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.20955631399317406, |
| "grad_norm": 2.692763960200507, |
| "learning_rate": 1.2446018928959853e-06, |
| "loss": 0.2289, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.20978384527872582, |
| "grad_norm": 2.356276890733175, |
| "learning_rate": 1.2445901684705685e-06, |
| "loss": 0.2222, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.2100113765642776, |
| "grad_norm": 2.596476104334523, |
| "learning_rate": 1.2445784313818638e-06, |
| "loss": 0.1574, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.21023890784982935, |
| "grad_norm": 2.788233818738729, |
| "learning_rate": 1.2445666816301102e-06, |
| "loss": 0.1303, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.21046643913538113, |
| "grad_norm": 2.3013258694625245, |
| "learning_rate": 1.2445549192155487e-06, |
| "loss": 0.2232, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.21069397042093288, |
| "grad_norm": 2.364410552617768, |
| "learning_rate": 1.244543144138419e-06, |
| "loss": 0.1967, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.21092150170648463, |
| "grad_norm": 1.4320620142185012, |
| "learning_rate": 1.2445313563989624e-06, |
| "loss": 0.1533, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.2111490329920364, |
| "grad_norm": 1.8979786639459473, |
| "learning_rate": 1.2445195559974194e-06, |
| "loss": 0.1494, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.21137656427758816, |
| "grad_norm": 2.1174466003626446, |
| "learning_rate": 1.244507742934031e-06, |
| "loss": 0.1973, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.21160409556313994, |
| "grad_norm": 2.164188059326067, |
| "learning_rate": 1.2444959172090393e-06, |
| "loss": 0.1336, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.2118316268486917, |
| "grad_norm": 1.5503789009056947, |
| "learning_rate": 1.2444840788226854e-06, |
| "loss": 0.1948, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.21205915813424345, |
| "grad_norm": 1.8654319466920093, |
| "learning_rate": 1.2444722277752114e-06, |
| "loss": 0.2043, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.21228668941979523, |
| "grad_norm": 2.020474941013341, |
| "learning_rate": 1.2444603640668596e-06, |
| "loss": 0.2211, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.21251422070534698, |
| "grad_norm": 2.0138343922511206, |
| "learning_rate": 1.2444484876978725e-06, |
| "loss": 0.1402, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.21274175199089876, |
| "grad_norm": 1.5804379894073013, |
| "learning_rate": 1.2444365986684929e-06, |
| "loss": 0.1311, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.2129692832764505, |
| "grad_norm": 2.2151819679335367, |
| "learning_rate": 1.2444246969789633e-06, |
| "loss": 0.0884, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.21319681456200226, |
| "grad_norm": 2.4707341962723834, |
| "learning_rate": 1.2444127826295277e-06, |
| "loss": 0.1138, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.21342434584755404, |
| "grad_norm": 2.142646726979162, |
| "learning_rate": 1.244400855620429e-06, |
| "loss": 0.1234, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.2136518771331058, |
| "grad_norm": 1.3461044168942922, |
| "learning_rate": 1.2443889159519113e-06, |
| "loss": 0.0966, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.21387940841865757, |
| "grad_norm": 2.824705608850421, |
| "learning_rate": 1.2443769636242185e-06, |
| "loss": 0.1736, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.21410693970420933, |
| "grad_norm": 3.3926592270656526, |
| "learning_rate": 1.244364998637595e-06, |
| "loss": 0.102, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.2143344709897611, |
| "grad_norm": 2.1478829302272278, |
| "learning_rate": 1.2443530209922848e-06, |
| "loss": 0.0958, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.21456200227531286, |
| "grad_norm": 2.084791701381943, |
| "learning_rate": 1.2443410306885337e-06, |
| "loss": 0.128, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.2147895335608646, |
| "grad_norm": 2.667044034523646, |
| "learning_rate": 1.244329027726586e-06, |
| "loss": 0.2088, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.2150170648464164, |
| "grad_norm": 1.4354076627961647, |
| "learning_rate": 1.2443170121066872e-06, |
| "loss": 0.1295, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.21524459613196814, |
| "grad_norm": 3.608014557262876, |
| "learning_rate": 1.2443049838290827e-06, |
| "loss": 0.1479, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.21547212741751992, |
| "grad_norm": 2.4907426669888424, |
| "learning_rate": 1.2442929428940186e-06, |
| "loss": 0.2094, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.21569965870307167, |
| "grad_norm": 1.889292577370491, |
| "learning_rate": 1.2442808893017414e-06, |
| "loss": 0.1182, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.21592718998862342, |
| "grad_norm": 1.295703999044032, |
| "learning_rate": 1.2442688230524965e-06, |
| "loss": 0.1493, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.2161547212741752, |
| "grad_norm": 3.010053578949512, |
| "learning_rate": 1.244256744146531e-06, |
| "loss": 0.1837, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.21638225255972696, |
| "grad_norm": 2.2542440250817357, |
| "learning_rate": 1.244244652584092e-06, |
| "loss": 0.2011, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.21660978384527874, |
| "grad_norm": 1.8471360091007536, |
| "learning_rate": 1.2442325483654263e-06, |
| "loss": 0.1529, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.2168373151308305, |
| "grad_norm": 3.360264898638295, |
| "learning_rate": 1.2442204314907812e-06, |
| "loss": 0.1952, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.21706484641638224, |
| "grad_norm": 2.2836983418694308, |
| "learning_rate": 1.2442083019604047e-06, |
| "loss": 0.2068, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.21729237770193402, |
| "grad_norm": 2.534259478561885, |
| "learning_rate": 1.2441961597745447e-06, |
| "loss": 0.131, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.21751990898748577, |
| "grad_norm": 2.116332324988344, |
| "learning_rate": 1.244184004933449e-06, |
| "loss": 0.1433, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.21774744027303755, |
| "grad_norm": 1.9239447267712195, |
| "learning_rate": 1.2441718374373662e-06, |
| "loss": 0.1296, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.2179749715585893, |
| "grad_norm": 3.11283517907892, |
| "learning_rate": 1.244159657286545e-06, |
| "loss": 0.1556, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.21820250284414108, |
| "grad_norm": 2.1030310163998, |
| "learning_rate": 1.2441474644812345e-06, |
| "loss": 0.1398, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.21843003412969283, |
| "grad_norm": 2.6301386027385734, |
| "learning_rate": 1.2441352590216836e-06, |
| "loss": 0.1328, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.2186575654152446, |
| "grad_norm": 1.6843043929069075, |
| "learning_rate": 1.244123040908142e-06, |
| "loss": 0.2169, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.21888509670079637, |
| "grad_norm": 2.021371056385805, |
| "learning_rate": 1.2441108101408592e-06, |
| "loss": 0.105, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.21911262798634812, |
| "grad_norm": 2.932640255317413, |
| "learning_rate": 1.2440985667200853e-06, |
| "loss": 0.1186, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.2193401592718999, |
| "grad_norm": 2.287879466073487, |
| "learning_rate": 1.2440863106460705e-06, |
| "loss": 0.1418, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.21956769055745165, |
| "grad_norm": 2.4323172112890807, |
| "learning_rate": 1.2440740419190655e-06, |
| "loss": 0.2116, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.2197952218430034, |
| "grad_norm": 2.906286752213052, |
| "learning_rate": 1.2440617605393208e-06, |
| "loss": 0.2029, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.22002275312855518, |
| "grad_norm": 2.420234503572233, |
| "learning_rate": 1.2440494665070874e-06, |
| "loss": 0.2227, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.22025028441410693, |
| "grad_norm": 2.1531642600457874, |
| "learning_rate": 1.2440371598226165e-06, |
| "loss": 0.1565, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.2204778156996587, |
| "grad_norm": 1.7851844835265829, |
| "learning_rate": 1.2440248404861598e-06, |
| "loss": 0.1132, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.22070534698521047, |
| "grad_norm": 2.2253443799094605, |
| "learning_rate": 1.2440125084979693e-06, |
| "loss": 0.1141, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.22093287827076222, |
| "grad_norm": 3.491367387042196, |
| "learning_rate": 1.2440001638582965e-06, |
| "loss": 0.1678, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.221160409556314, |
| "grad_norm": 2.6799332639547297, |
| "learning_rate": 1.2439878065673944e-06, |
| "loss": 0.1791, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.22138794084186575, |
| "grad_norm": 0.9028117739016462, |
| "learning_rate": 1.2439754366255149e-06, |
| "loss": 0.0794, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.22161547212741753, |
| "grad_norm": 1.6629358802939667, |
| "learning_rate": 1.2439630540329111e-06, |
| "loss": 0.1328, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.22184300341296928, |
| "grad_norm": 2.734953415687441, |
| "learning_rate": 1.2439506587898358e-06, |
| "loss": 0.1168, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.22207053469852106, |
| "grad_norm": 2.0986779517624745, |
| "learning_rate": 1.243938250896543e-06, |
| "loss": 0.1288, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.2222980659840728, |
| "grad_norm": 2.4554262769941766, |
| "learning_rate": 1.2439258303532858e-06, |
| "loss": 0.1545, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.22252559726962456, |
| "grad_norm": 1.7628888954012072, |
| "learning_rate": 1.243913397160318e-06, |
| "loss": 0.0967, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.22275312855517634, |
| "grad_norm": 1.8371409568342896, |
| "learning_rate": 1.2439009513178938e-06, |
| "loss": 0.1184, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.2229806598407281, |
| "grad_norm": 3.4838138279645103, |
| "learning_rate": 1.2438884928262678e-06, |
| "loss": 0.1686, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.22320819112627988, |
| "grad_norm": 1.743212643613601, |
| "learning_rate": 1.2438760216856944e-06, |
| "loss": 0.1005, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.22343572241183163, |
| "grad_norm": 2.2940811110233135, |
| "learning_rate": 1.2438635378964284e-06, |
| "loss": 0.1261, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.22366325369738338, |
| "grad_norm": 3.306786589733754, |
| "learning_rate": 1.2438510414587251e-06, |
| "loss": 0.1057, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.22389078498293516, |
| "grad_norm": 1.8312197926008273, |
| "learning_rate": 1.24383853237284e-06, |
| "loss": 0.1121, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.2241183162684869, |
| "grad_norm": 1.375951456745173, |
| "learning_rate": 1.2438260106390285e-06, |
| "loss": 0.1137, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.2243458475540387, |
| "grad_norm": 2.2850475547846507, |
| "learning_rate": 1.2438134762575467e-06, |
| "loss": 0.1528, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.22457337883959044, |
| "grad_norm": 1.7811601291763544, |
| "learning_rate": 1.243800929228651e-06, |
| "loss": 0.114, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.2248009101251422, |
| "grad_norm": 2.175503500486742, |
| "learning_rate": 1.2437883695525974e-06, |
| "loss": 0.2246, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.22502844141069397, |
| "grad_norm": 2.5853887611675375, |
| "learning_rate": 1.2437757972296427e-06, |
| "loss": 0.2126, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.22525597269624573, |
| "grad_norm": 2.4622729490723065, |
| "learning_rate": 1.2437632122600442e-06, |
| "loss": 0.1806, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.2254835039817975, |
| "grad_norm": 2.2336859931017794, |
| "learning_rate": 1.2437506146440587e-06, |
| "loss": 0.1948, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.22571103526734926, |
| "grad_norm": 2.388802906376772, |
| "learning_rate": 1.243738004381944e-06, |
| "loss": 0.1028, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.225938566552901, |
| "grad_norm": 2.526457136508687, |
| "learning_rate": 1.2437253814739572e-06, |
| "loss": 0.1394, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.2261660978384528, |
| "grad_norm": 2.282347439516019, |
| "learning_rate": 1.2437127459203572e-06, |
| "loss": 0.1678, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.22639362912400454, |
| "grad_norm": 1.3050466119815518, |
| "learning_rate": 1.2437000977214015e-06, |
| "loss": 0.0753, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.22662116040955632, |
| "grad_norm": 2.159334429482828, |
| "learning_rate": 1.243687436877349e-06, |
| "loss": 0.2767, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.22684869169510807, |
| "grad_norm": 2.4741243617261617, |
| "learning_rate": 1.2436747633884583e-06, |
| "loss": 0.167, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.22707622298065985, |
| "grad_norm": 2.522130011756034, |
| "learning_rate": 1.2436620772549885e-06, |
| "loss": 0.2229, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.2273037542662116, |
| "grad_norm": 2.2654639871535873, |
| "learning_rate": 1.243649378477199e-06, |
| "loss": 0.1376, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.22753128555176336, |
| "grad_norm": 2.737389406083516, |
| "learning_rate": 1.2436366670553491e-06, |
| "loss": 0.1672, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.22775881683731514, |
| "grad_norm": 2.497999857751637, |
| "learning_rate": 1.2436239429896988e-06, |
| "loss": 0.2831, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.2279863481228669, |
| "grad_norm": 2.3986139069373125, |
| "learning_rate": 1.2436112062805081e-06, |
| "loss": 0.1413, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.22821387940841867, |
| "grad_norm": 1.63194618315687, |
| "learning_rate": 1.2435984569280372e-06, |
| "loss": 0.1509, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.22844141069397042, |
| "grad_norm": 1.9884735218546312, |
| "learning_rate": 1.2435856949325467e-06, |
| "loss": 0.0909, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.22866894197952217, |
| "grad_norm": 3.7364717574130877, |
| "learning_rate": 1.2435729202942972e-06, |
| "loss": 0.1362, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.22889647326507395, |
| "grad_norm": 4.3498400339740595, |
| "learning_rate": 1.2435601330135506e-06, |
| "loss": 0.1364, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.2291240045506257, |
| "grad_norm": 1.468486521047109, |
| "learning_rate": 1.2435473330905674e-06, |
| "loss": 0.1902, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.22935153583617748, |
| "grad_norm": 2.602985360302298, |
| "learning_rate": 1.2435345205256097e-06, |
| "loss": 0.0947, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.22957906712172924, |
| "grad_norm": 2.117002790495142, |
| "learning_rate": 1.243521695318939e-06, |
| "loss": 0.1228, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.229806598407281, |
| "grad_norm": 2.0012843231226034, |
| "learning_rate": 1.2435088574708178e-06, |
| "loss": 0.1156, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.23003412969283277, |
| "grad_norm": 2.490148339748286, |
| "learning_rate": 1.2434960069815083e-06, |
| "loss": 0.164, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.23026166097838452, |
| "grad_norm": 2.450730689081713, |
| "learning_rate": 1.243483143851273e-06, |
| "loss": 0.138, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.2304891922639363, |
| "grad_norm": 2.892744061430906, |
| "learning_rate": 1.2434702680803751e-06, |
| "loss": 0.1061, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.23071672354948805, |
| "grad_norm": 2.790226387512928, |
| "learning_rate": 1.2434573796690774e-06, |
| "loss": 0.1957, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.23094425483503983, |
| "grad_norm": 2.4036726186705972, |
| "learning_rate": 1.2434444786176435e-06, |
| "loss": 0.1544, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.23117178612059158, |
| "grad_norm": 1.3271746602955339, |
| "learning_rate": 1.2434315649263372e-06, |
| "loss": 0.061, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.23139931740614333, |
| "grad_norm": 1.4063593684445947, |
| "learning_rate": 1.2434186385954225e-06, |
| "loss": 0.1068, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.23162684869169511, |
| "grad_norm": 2.9525793198909724, |
| "learning_rate": 1.243405699625163e-06, |
| "loss": 0.1067, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.23185437997724687, |
| "grad_norm": 2.7846219600282747, |
| "learning_rate": 1.243392748015824e-06, |
| "loss": 0.1435, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.23208191126279865, |
| "grad_norm": 1.5658061687677385, |
| "learning_rate": 1.2433797837676694e-06, |
| "loss": 0.1492, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.2323094425483504, |
| "grad_norm": 4.123388323133236, |
| "learning_rate": 1.2433668068809648e-06, |
| "loss": 0.1699, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.23253697383390215, |
| "grad_norm": 2.0976126762166403, |
| "learning_rate": 1.243353817355975e-06, |
| "loss": 0.1257, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.23276450511945393, |
| "grad_norm": 2.4116621601065296, |
| "learning_rate": 1.2433408151929655e-06, |
| "loss": 0.133, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.23299203640500568, |
| "grad_norm": 1.395623834578789, |
| "learning_rate": 1.2433278003922026e-06, |
| "loss": 0.0936, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.23321956769055746, |
| "grad_norm": 1.7768669244027402, |
| "learning_rate": 1.2433147729539514e-06, |
| "loss": 0.1264, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.2334470989761092, |
| "grad_norm": 2.489847520949891, |
| "learning_rate": 1.2433017328784788e-06, |
| "loss": 0.1714, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.23367463026166096, |
| "grad_norm": 1.722648702759186, |
| "learning_rate": 1.2432886801660513e-06, |
| "loss": 0.122, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.23390216154721274, |
| "grad_norm": 1.3061284883014919, |
| "learning_rate": 1.2432756148169354e-06, |
| "loss": 0.0726, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.2341296928327645, |
| "grad_norm": 2.807955909764041, |
| "learning_rate": 1.2432625368313983e-06, |
| "loss": 0.1667, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.23435722411831628, |
| "grad_norm": 1.9724601313774524, |
| "learning_rate": 1.2432494462097072e-06, |
| "loss": 0.1995, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.23458475540386803, |
| "grad_norm": 2.3943947067430895, |
| "learning_rate": 1.2432363429521295e-06, |
| "loss": 0.1625, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.2348122866894198, |
| "grad_norm": 1.5436408096888365, |
| "learning_rate": 1.2432232270589335e-06, |
| "loss": 0.076, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.23503981797497156, |
| "grad_norm": 1.1938881747627557, |
| "learning_rate": 1.2432100985303868e-06, |
| "loss": 0.1002, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.2352673492605233, |
| "grad_norm": 2.0446974564823304, |
| "learning_rate": 1.243196957366758e-06, |
| "loss": 0.1721, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.2354948805460751, |
| "grad_norm": 1.079879180238331, |
| "learning_rate": 1.2431838035683155e-06, |
| "loss": 0.1257, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.23572241183162684, |
| "grad_norm": 1.8378535292320874, |
| "learning_rate": 1.2431706371353282e-06, |
| "loss": 0.1821, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.23594994311717862, |
| "grad_norm": 1.969855842746801, |
| "learning_rate": 1.2431574580680653e-06, |
| "loss": 0.1436, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.23617747440273038, |
| "grad_norm": 3.058757707801488, |
| "learning_rate": 1.2431442663667958e-06, |
| "loss": 0.1605, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.23640500568828213, |
| "grad_norm": 1.2648716547694445, |
| "learning_rate": 1.2431310620317898e-06, |
| "loss": 0.1614, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.2366325369738339, |
| "grad_norm": 1.9610877034271015, |
| "learning_rate": 1.2431178450633168e-06, |
| "loss": 0.139, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.23686006825938566, |
| "grad_norm": 1.5919631273318544, |
| "learning_rate": 1.2431046154616473e-06, |
| "loss": 0.0888, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.23708759954493744, |
| "grad_norm": 1.791707313865184, |
| "learning_rate": 1.2430913732270512e-06, |
| "loss": 0.1087, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.2373151308304892, |
| "grad_norm": 3.1377911678690666, |
| "learning_rate": 1.2430781183597995e-06, |
| "loss": 0.1565, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.23754266211604094, |
| "grad_norm": 2.2837991793589607, |
| "learning_rate": 1.243064850860163e-06, |
| "loss": 0.1126, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.23777019340159272, |
| "grad_norm": 2.6823412767535246, |
| "learning_rate": 1.243051570728413e-06, |
| "loss": 0.2083, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.23799772468714447, |
| "grad_norm": 4.365244516577561, |
| "learning_rate": 1.2430382779648208e-06, |
| "loss": 0.1904, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.23822525597269625, |
| "grad_norm": 2.434739692035364, |
| "learning_rate": 1.243024972569658e-06, |
| "loss": 0.1347, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.238452787258248, |
| "grad_norm": 2.1595986496307384, |
| "learning_rate": 1.2430116545431966e-06, |
| "loss": 0.1926, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.23868031854379979, |
| "grad_norm": 2.2542031412662573, |
| "learning_rate": 1.2429983238857088e-06, |
| "loss": 0.1667, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.23890784982935154, |
| "grad_norm": 2.0405926385207787, |
| "learning_rate": 1.2429849805974673e-06, |
| "loss": 0.1872, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2391353811149033, |
| "grad_norm": 2.2037085916589043, |
| "learning_rate": 1.2429716246787444e-06, |
| "loss": 0.0775, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.23936291240045507, |
| "grad_norm": 0.9628371959013814, |
| "learning_rate": 1.242958256129813e-06, |
| "loss": 0.1378, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.23959044368600682, |
| "grad_norm": 2.1187588487355424, |
| "learning_rate": 1.242944874950947e-06, |
| "loss": 0.159, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.2398179749715586, |
| "grad_norm": 1.9961766997876433, |
| "learning_rate": 1.2429314811424192e-06, |
| "loss": 0.1568, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.24004550625711035, |
| "grad_norm": 1.935471261024473, |
| "learning_rate": 1.242918074704504e-06, |
| "loss": 0.1596, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.2402730375426621, |
| "grad_norm": 1.4988665110908368, |
| "learning_rate": 1.2429046556374747e-06, |
| "loss": 0.0987, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.24050056882821388, |
| "grad_norm": 2.4283216098462015, |
| "learning_rate": 1.2428912239416057e-06, |
| "loss": 0.1127, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.24072810011376564, |
| "grad_norm": 2.3264824459084448, |
| "learning_rate": 1.242877779617172e-06, |
| "loss": 0.1274, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.24095563139931742, |
| "grad_norm": 2.159687331291489, |
| "learning_rate": 1.242864322664448e-06, |
| "loss": 0.1399, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.24118316268486917, |
| "grad_norm": 2.3632421336063087, |
| "learning_rate": 1.2428508530837088e-06, |
| "loss": 0.1751, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.24141069397042092, |
| "grad_norm": 4.564054038887482, |
| "learning_rate": 1.2428373708752298e-06, |
| "loss": 0.1623, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.2416382252559727, |
| "grad_norm": 2.913968751293169, |
| "learning_rate": 1.2428238760392862e-06, |
| "loss": 0.2404, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.24186575654152445, |
| "grad_norm": 2.375864551832549, |
| "learning_rate": 1.2428103685761543e-06, |
| "loss": 0.1551, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.24209328782707623, |
| "grad_norm": 2.773326434228427, |
| "learning_rate": 1.2427968484861097e-06, |
| "loss": 0.1129, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.24232081911262798, |
| "grad_norm": 3.440322207371564, |
| "learning_rate": 1.2427833157694292e-06, |
| "loss": 0.2312, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.24254835039817976, |
| "grad_norm": 2.09362609958651, |
| "learning_rate": 1.2427697704263892e-06, |
| "loss": 0.1047, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.24277588168373151, |
| "grad_norm": 2.0696892695320432, |
| "learning_rate": 1.2427562124572663e-06, |
| "loss": 0.1156, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.24300341296928327, |
| "grad_norm": 1.923568801452821, |
| "learning_rate": 1.2427426418623377e-06, |
| "loss": 0.1609, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.24323094425483505, |
| "grad_norm": 1.5158781630471698, |
| "learning_rate": 1.242729058641881e-06, |
| "loss": 0.094, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.2434584755403868, |
| "grad_norm": 2.2258107327352037, |
| "learning_rate": 1.2427154627961737e-06, |
| "loss": 0.2017, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.24368600682593858, |
| "grad_norm": 2.3481688305100645, |
| "learning_rate": 1.2427018543254935e-06, |
| "loss": 0.1535, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.24391353811149033, |
| "grad_norm": 2.148375299510445, |
| "learning_rate": 1.2426882332301187e-06, |
| "loss": 0.1812, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.24414106939704208, |
| "grad_norm": 1.6816805152718777, |
| "learning_rate": 1.2426745995103277e-06, |
| "loss": 0.1341, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.24436860068259386, |
| "grad_norm": 2.651811251817173, |
| "learning_rate": 1.242660953166399e-06, |
| "loss": 0.1318, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.2445961319681456, |
| "grad_norm": 2.473544844662378, |
| "learning_rate": 1.2426472941986117e-06, |
| "loss": 0.1972, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.2448236632536974, |
| "grad_norm": 1.3274925024741444, |
| "learning_rate": 1.2426336226072449e-06, |
| "loss": 0.1497, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.24505119453924915, |
| "grad_norm": 2.1014804926130277, |
| "learning_rate": 1.242619938392578e-06, |
| "loss": 0.1186, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.2452787258248009, |
| "grad_norm": 3.0260303106049973, |
| "learning_rate": 1.2426062415548907e-06, |
| "loss": 0.2506, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.24550625711035268, |
| "grad_norm": 1.2327761741993546, |
| "learning_rate": 1.2425925320944628e-06, |
| "loss": 0.117, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.24573378839590443, |
| "grad_norm": 3.2155457599215036, |
| "learning_rate": 1.2425788100115747e-06, |
| "loss": 0.1412, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.2459613196814562, |
| "grad_norm": 1.6672046307721682, |
| "learning_rate": 1.2425650753065065e-06, |
| "loss": 0.148, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.24618885096700796, |
| "grad_norm": 4.323033908726176, |
| "learning_rate": 1.2425513279795395e-06, |
| "loss": 0.1685, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.24641638225255974, |
| "grad_norm": 2.4128743686143146, |
| "learning_rate": 1.2425375680309543e-06, |
| "loss": 0.0992, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.2466439135381115, |
| "grad_norm": 2.0582783253443497, |
| "learning_rate": 1.2425237954610322e-06, |
| "loss": 0.1263, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.24687144482366324, |
| "grad_norm": 2.5810033905990637, |
| "learning_rate": 1.2425100102700547e-06, |
| "loss": 0.2102, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.24709897610921502, |
| "grad_norm": 2.269665820869707, |
| "learning_rate": 1.2424962124583033e-06, |
| "loss": 0.105, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.24732650739476678, |
| "grad_norm": 2.706182109515585, |
| "learning_rate": 1.2424824020260603e-06, |
| "loss": 0.1596, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.24755403868031856, |
| "grad_norm": 3.0056026517839016, |
| "learning_rate": 1.2424685789736077e-06, |
| "loss": 0.1809, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.2477815699658703, |
| "grad_norm": 2.2230272708907513, |
| "learning_rate": 1.2424547433012284e-06, |
| "loss": 0.1187, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.24800910125142206, |
| "grad_norm": 2.271631978747539, |
| "learning_rate": 1.2424408950092049e-06, |
| "loss": 0.1478, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.24823663253697384, |
| "grad_norm": 2.485671272218175, |
| "learning_rate": 1.2424270340978204e-06, |
| "loss": 0.1595, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.2484641638225256, |
| "grad_norm": 2.5242524420773087, |
| "learning_rate": 1.2424131605673582e-06, |
| "loss": 0.2519, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.24869169510807737, |
| "grad_norm": 2.6439941529662025, |
| "learning_rate": 1.2423992744181015e-06, |
| "loss": 0.1389, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.24891922639362912, |
| "grad_norm": 2.1610086973465417, |
| "learning_rate": 1.2423853756503343e-06, |
| "loss": 0.1017, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.24914675767918087, |
| "grad_norm": 1.8954846688503157, |
| "learning_rate": 1.2423714642643408e-06, |
| "loss": 0.2796, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.24937428896473265, |
| "grad_norm": 1.3124277359799683, |
| "learning_rate": 1.2423575402604051e-06, |
| "loss": 0.12, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.2496018202502844, |
| "grad_norm": 2.5234695537617444, |
| "learning_rate": 1.2423436036388122e-06, |
| "loss": 0.1242, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.24982935153583619, |
| "grad_norm": 2.044792039361886, |
| "learning_rate": 1.2423296543998465e-06, |
| "loss": 0.1743, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.25005688282138794, |
| "grad_norm": 3.6767614291561492, |
| "learning_rate": 1.2423156925437932e-06, |
| "loss": 0.2584, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.2502844141069397, |
| "grad_norm": 2.1397151355216506, |
| "learning_rate": 1.2423017180709376e-06, |
| "loss": 0.1586, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.25051194539249144, |
| "grad_norm": 1.670738860931536, |
| "learning_rate": 1.2422877309815656e-06, |
| "loss": 0.0821, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.25073947667804325, |
| "grad_norm": 2.3733300367714185, |
| "learning_rate": 1.242273731275963e-06, |
| "loss": 0.1335, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.250967007963595, |
| "grad_norm": 2.6954093027320534, |
| "learning_rate": 1.2422597189544155e-06, |
| "loss": 0.1244, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.25119453924914675, |
| "grad_norm": 2.17330712431736, |
| "learning_rate": 1.2422456940172101e-06, |
| "loss": 0.1799, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.2514220705346985, |
| "grad_norm": 2.4883101223722397, |
| "learning_rate": 1.2422316564646331e-06, |
| "loss": 0.0881, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.25164960182025026, |
| "grad_norm": 2.4975644528149528, |
| "learning_rate": 1.2422176062969713e-06, |
| "loss": 0.2376, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.25187713310580206, |
| "grad_norm": 2.242874102497345, |
| "learning_rate": 1.2422035435145121e-06, |
| "loss": 0.1117, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.2521046643913538, |
| "grad_norm": 2.1430334401000994, |
| "learning_rate": 1.2421894681175428e-06, |
| "loss": 0.1937, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.25233219567690557, |
| "grad_norm": 2.8329522904929796, |
| "learning_rate": 1.2421753801063511e-06, |
| "loss": 0.2192, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.2525597269624573, |
| "grad_norm": 2.7185072984242016, |
| "learning_rate": 1.2421612794812248e-06, |
| "loss": 0.1612, |
| "step": 1110 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 21975, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 1110, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4896118628352.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|