| { |
| "best_global_step": 5162, |
| "best_metric": 0.24373722, |
| "best_model_checkpoint": "/mmu_mllm_hdd_2/yifanzhang/models/tool_final/qwen_tool_all_data_180k_alldata_wpgemini/v0-20250616-170052/checkpoint-5162", |
| "epoch": 2.9990553927386343, |
| "eval_steps": 500, |
| "global_step": 7740, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00038753118414997455, |
| "grad_norm": 6.552547931671143, |
| "learning_rate": 2.583979328165375e-08, |
| "loss": 0.9296205043792725, |
| "memory(GiB)": 53.11, |
| "step": 1, |
| "token_acc": 0.7648440120764844, |
| "train_speed(iter/s)": 0.021396 |
| }, |
| { |
| "epoch": 0.0019376559207498728, |
| "grad_norm": 6.881824016571045, |
| "learning_rate": 1.2919896640826874e-07, |
| "loss": 0.9338862895965576, |
| "memory(GiB)": 68.26, |
| "step": 5, |
| "token_acc": 0.7612466124661247, |
| "train_speed(iter/s)": 0.049734 |
| }, |
| { |
| "epoch": 0.0038753118414997455, |
| "grad_norm": 7.085341930389404, |
| "learning_rate": 2.583979328165375e-07, |
| "loss": 0.9332740783691407, |
| "memory(GiB)": 68.26, |
| "step": 10, |
| "token_acc": 0.7584953537016083, |
| "train_speed(iter/s)": 0.062044 |
| }, |
| { |
| "epoch": 0.005812967762249619, |
| "grad_norm": 6.054533958435059, |
| "learning_rate": 3.8759689922480623e-07, |
| "loss": 0.9270101547241211, |
| "memory(GiB)": 68.26, |
| "step": 15, |
| "token_acc": 0.7512948517940717, |
| "train_speed(iter/s)": 0.066051 |
| }, |
| { |
| "epoch": 0.007750623682999491, |
| "grad_norm": 4.462226867675781, |
| "learning_rate": 5.16795865633075e-07, |
| "loss": 0.8828842163085937, |
| "memory(GiB)": 68.26, |
| "step": 20, |
| "token_acc": 0.758990211231324, |
| "train_speed(iter/s)": 0.069348 |
| }, |
| { |
| "epoch": 0.009688279603749364, |
| "grad_norm": 4.585198879241943, |
| "learning_rate": 6.459948320413437e-07, |
| "loss": 0.8611625671386719, |
| "memory(GiB)": 68.26, |
| "step": 25, |
| "token_acc": 0.7707904789891272, |
| "train_speed(iter/s)": 0.071211 |
| }, |
| { |
| "epoch": 0.011625935524499238, |
| "grad_norm": 2.7715461254119873, |
| "learning_rate": 7.751937984496125e-07, |
| "loss": 0.7636467933654785, |
| "memory(GiB)": 68.26, |
| "step": 30, |
| "token_acc": 0.7774806344522317, |
| "train_speed(iter/s)": 0.071926 |
| }, |
| { |
| "epoch": 0.01356359144524911, |
| "grad_norm": 2.4396770000457764, |
| "learning_rate": 9.043927648578812e-07, |
| "loss": 0.7435248374938965, |
| "memory(GiB)": 68.26, |
| "step": 35, |
| "token_acc": 0.8056009119196524, |
| "train_speed(iter/s)": 0.072532 |
| }, |
| { |
| "epoch": 0.015501247365998982, |
| "grad_norm": 2.1752095222473145, |
| "learning_rate": 1.03359173126615e-06, |
| "loss": 0.672543716430664, |
| "memory(GiB)": 68.26, |
| "step": 40, |
| "token_acc": 0.7990769230769231, |
| "train_speed(iter/s)": 0.073019 |
| }, |
| { |
| "epoch": 0.017438903286748856, |
| "grad_norm": 1.7170302867889404, |
| "learning_rate": 1.1627906976744188e-06, |
| "loss": 0.6109170913696289, |
| "memory(GiB)": 68.26, |
| "step": 45, |
| "token_acc": 0.81396280591633, |
| "train_speed(iter/s)": 0.074189 |
| }, |
| { |
| "epoch": 0.019376559207498728, |
| "grad_norm": 1.450340747833252, |
| "learning_rate": 1.2919896640826874e-06, |
| "loss": 0.590874195098877, |
| "memory(GiB)": 68.26, |
| "step": 50, |
| "token_acc": 0.8275686511598307, |
| "train_speed(iter/s)": 0.074984 |
| }, |
| { |
| "epoch": 0.0213142151282486, |
| "grad_norm": 1.4400850534439087, |
| "learning_rate": 1.421188630490956e-06, |
| "loss": 0.5400970458984375, |
| "memory(GiB)": 68.26, |
| "step": 55, |
| "token_acc": 0.8339371199378962, |
| "train_speed(iter/s)": 0.076011 |
| }, |
| { |
| "epoch": 0.023251871048998476, |
| "grad_norm": 1.2291407585144043, |
| "learning_rate": 1.550387596899225e-06, |
| "loss": 0.4900141716003418, |
| "memory(GiB)": 68.26, |
| "step": 60, |
| "token_acc": 0.8436885967884002, |
| "train_speed(iter/s)": 0.076361 |
| }, |
| { |
| "epoch": 0.025189526969748348, |
| "grad_norm": 0.973463237285614, |
| "learning_rate": 1.6795865633074938e-06, |
| "loss": 0.4778586387634277, |
| "memory(GiB)": 68.26, |
| "step": 65, |
| "token_acc": 0.8613077004330834, |
| "train_speed(iter/s)": 0.07651 |
| }, |
| { |
| "epoch": 0.02712718289049822, |
| "grad_norm": 0.999035656452179, |
| "learning_rate": 1.8087855297157624e-06, |
| "loss": 0.4614850997924805, |
| "memory(GiB)": 68.26, |
| "step": 70, |
| "token_acc": 0.8476737395366946, |
| "train_speed(iter/s)": 0.076462 |
| }, |
| { |
| "epoch": 0.029064838811248092, |
| "grad_norm": 0.9571713805198669, |
| "learning_rate": 1.9379844961240315e-06, |
| "loss": 0.44265012741088866, |
| "memory(GiB)": 68.26, |
| "step": 75, |
| "token_acc": 0.8589666423797003, |
| "train_speed(iter/s)": 0.076552 |
| }, |
| { |
| "epoch": 0.031002494731997964, |
| "grad_norm": 0.9145216941833496, |
| "learning_rate": 2.0671834625323e-06, |
| "loss": 0.42769956588745117, |
| "memory(GiB)": 68.26, |
| "step": 80, |
| "token_acc": 0.8603299712778038, |
| "train_speed(iter/s)": 0.07668 |
| }, |
| { |
| "epoch": 0.032940150652747836, |
| "grad_norm": 0.9121745824813843, |
| "learning_rate": 2.1963824289405687e-06, |
| "loss": 0.4430729866027832, |
| "memory(GiB)": 68.26, |
| "step": 85, |
| "token_acc": 0.8658448831461377, |
| "train_speed(iter/s)": 0.076904 |
| }, |
| { |
| "epoch": 0.03487780657349771, |
| "grad_norm": 0.9223958253860474, |
| "learning_rate": 2.3255813953488376e-06, |
| "loss": 0.41683158874511717, |
| "memory(GiB)": 68.26, |
| "step": 90, |
| "token_acc": 0.8670972793110198, |
| "train_speed(iter/s)": 0.076765 |
| }, |
| { |
| "epoch": 0.03681546249424758, |
| "grad_norm": 0.905558705329895, |
| "learning_rate": 2.454780361757106e-06, |
| "loss": 0.43251714706420896, |
| "memory(GiB)": 68.26, |
| "step": 95, |
| "token_acc": 0.8683047710037619, |
| "train_speed(iter/s)": 0.076628 |
| }, |
| { |
| "epoch": 0.038753118414997456, |
| "grad_norm": 0.8794025778770447, |
| "learning_rate": 2.583979328165375e-06, |
| "loss": 0.4090336799621582, |
| "memory(GiB)": 68.26, |
| "step": 100, |
| "token_acc": 0.8667196041902085, |
| "train_speed(iter/s)": 0.07673 |
| }, |
| { |
| "epoch": 0.04069077433574733, |
| "grad_norm": 0.8912697434425354, |
| "learning_rate": 2.7131782945736433e-06, |
| "loss": 0.399850869178772, |
| "memory(GiB)": 68.26, |
| "step": 105, |
| "token_acc": 0.8666218766996193, |
| "train_speed(iter/s)": 0.077113 |
| }, |
| { |
| "epoch": 0.0426284302564972, |
| "grad_norm": 0.8915030360221863, |
| "learning_rate": 2.842377260981912e-06, |
| "loss": 0.40947537422180175, |
| "memory(GiB)": 68.26, |
| "step": 110, |
| "token_acc": 0.8638117728372308, |
| "train_speed(iter/s)": 0.077292 |
| }, |
| { |
| "epoch": 0.044566086177247076, |
| "grad_norm": 0.8565830588340759, |
| "learning_rate": 2.971576227390181e-06, |
| "loss": 0.396804141998291, |
| "memory(GiB)": 68.26, |
| "step": 115, |
| "token_acc": 0.8661356714969378, |
| "train_speed(iter/s)": 0.07761 |
| }, |
| { |
| "epoch": 0.04650374209799695, |
| "grad_norm": 0.8623788952827454, |
| "learning_rate": 3.10077519379845e-06, |
| "loss": 0.3913600206375122, |
| "memory(GiB)": 68.26, |
| "step": 120, |
| "token_acc": 0.8754619622062617, |
| "train_speed(iter/s)": 0.078035 |
| }, |
| { |
| "epoch": 0.04844139801874682, |
| "grad_norm": 0.8666126728057861, |
| "learning_rate": 3.2299741602067187e-06, |
| "loss": 0.3875392436981201, |
| "memory(GiB)": 71.85, |
| "step": 125, |
| "token_acc": 0.8719657721636882, |
| "train_speed(iter/s)": 0.078025 |
| }, |
| { |
| "epoch": 0.050379053939496696, |
| "grad_norm": 0.9226394891738892, |
| "learning_rate": 3.3591731266149875e-06, |
| "loss": 0.3926274538040161, |
| "memory(GiB)": 71.85, |
| "step": 130, |
| "token_acc": 0.8687872763419483, |
| "train_speed(iter/s)": 0.078147 |
| }, |
| { |
| "epoch": 0.052316709860246564, |
| "grad_norm": 0.8856056928634644, |
| "learning_rate": 3.4883720930232564e-06, |
| "loss": 0.3912957668304443, |
| "memory(GiB)": 71.85, |
| "step": 135, |
| "token_acc": 0.8724723353638756, |
| "train_speed(iter/s)": 0.078214 |
| }, |
| { |
| "epoch": 0.05425436578099644, |
| "grad_norm": 0.8952415585517883, |
| "learning_rate": 3.617571059431525e-06, |
| "loss": 0.3882893085479736, |
| "memory(GiB)": 71.85, |
| "step": 140, |
| "token_acc": 0.863610550227158, |
| "train_speed(iter/s)": 0.078368 |
| }, |
| { |
| "epoch": 0.056192021701746316, |
| "grad_norm": 0.9110421538352966, |
| "learning_rate": 3.7467700258397936e-06, |
| "loss": 0.3694924831390381, |
| "memory(GiB)": 71.85, |
| "step": 145, |
| "token_acc": 0.883052064631957, |
| "train_speed(iter/s)": 0.078555 |
| }, |
| { |
| "epoch": 0.058129677622496184, |
| "grad_norm": 0.9361255168914795, |
| "learning_rate": 3.875968992248063e-06, |
| "loss": 0.3734897136688232, |
| "memory(GiB)": 71.85, |
| "step": 150, |
| "token_acc": 0.8814651629237498, |
| "train_speed(iter/s)": 0.078958 |
| }, |
| { |
| "epoch": 0.06006733354324606, |
| "grad_norm": 0.8287400007247925, |
| "learning_rate": 4.005167958656331e-06, |
| "loss": 0.3683502197265625, |
| "memory(GiB)": 71.85, |
| "step": 155, |
| "token_acc": 0.8783027148088738, |
| "train_speed(iter/s)": 0.079085 |
| }, |
| { |
| "epoch": 0.06200498946399593, |
| "grad_norm": 0.8841593265533447, |
| "learning_rate": 4.1343669250646e-06, |
| "loss": 0.38687331676483155, |
| "memory(GiB)": 71.85, |
| "step": 160, |
| "token_acc": 0.8696694133482158, |
| "train_speed(iter/s)": 0.079376 |
| }, |
| { |
| "epoch": 0.0639426453847458, |
| "grad_norm": 0.9208874702453613, |
| "learning_rate": 4.263565891472868e-06, |
| "loss": 0.37154593467712405, |
| "memory(GiB)": 71.85, |
| "step": 165, |
| "token_acc": 0.8725894872621734, |
| "train_speed(iter/s)": 0.079544 |
| }, |
| { |
| "epoch": 0.06588030130549567, |
| "grad_norm": 0.880796492099762, |
| "learning_rate": 4.3927648578811375e-06, |
| "loss": 0.3870300054550171, |
| "memory(GiB)": 71.85, |
| "step": 170, |
| "token_acc": 0.8695997091197567, |
| "train_speed(iter/s)": 0.079632 |
| }, |
| { |
| "epoch": 0.06781795722624555, |
| "grad_norm": 0.8492954969406128, |
| "learning_rate": 4.521963824289406e-06, |
| "loss": 0.37391808032989504, |
| "memory(GiB)": 71.85, |
| "step": 175, |
| "token_acc": 0.8902093180283592, |
| "train_speed(iter/s)": 0.07985 |
| }, |
| { |
| "epoch": 0.06975561314699542, |
| "grad_norm": 0.8536194562911987, |
| "learning_rate": 4.651162790697675e-06, |
| "loss": 0.3642561435699463, |
| "memory(GiB)": 71.85, |
| "step": 180, |
| "token_acc": 0.8766690784713543, |
| "train_speed(iter/s)": 0.079782 |
| }, |
| { |
| "epoch": 0.0716932690677453, |
| "grad_norm": 0.8752952218055725, |
| "learning_rate": 4.780361757105944e-06, |
| "loss": 0.37262544631958006, |
| "memory(GiB)": 71.85, |
| "step": 185, |
| "token_acc": 0.8835785829108446, |
| "train_speed(iter/s)": 0.079949 |
| }, |
| { |
| "epoch": 0.07363092498849516, |
| "grad_norm": 0.8449862003326416, |
| "learning_rate": 4.909560723514212e-06, |
| "loss": 0.36355421543121336, |
| "memory(GiB)": 71.85, |
| "step": 190, |
| "token_acc": 0.8750923624638947, |
| "train_speed(iter/s)": 0.079919 |
| }, |
| { |
| "epoch": 0.07556858090924504, |
| "grad_norm": 0.9465638399124146, |
| "learning_rate": 5.038759689922481e-06, |
| "loss": 0.371561074256897, |
| "memory(GiB)": 71.85, |
| "step": 195, |
| "token_acc": 0.876782571757995, |
| "train_speed(iter/s)": 0.080132 |
| }, |
| { |
| "epoch": 0.07750623682999491, |
| "grad_norm": 0.8754744529724121, |
| "learning_rate": 5.16795865633075e-06, |
| "loss": 0.37893788814544677, |
| "memory(GiB)": 71.85, |
| "step": 200, |
| "token_acc": 0.8805535324107793, |
| "train_speed(iter/s)": 0.08012 |
| }, |
| { |
| "epoch": 0.07944389275074479, |
| "grad_norm": 0.9141603112220764, |
| "learning_rate": 5.297157622739019e-06, |
| "loss": 0.35440912246704104, |
| "memory(GiB)": 71.85, |
| "step": 205, |
| "token_acc": 0.890790432880897, |
| "train_speed(iter/s)": 0.080087 |
| }, |
| { |
| "epoch": 0.08138154867149466, |
| "grad_norm": 0.8244524598121643, |
| "learning_rate": 5.4263565891472865e-06, |
| "loss": 0.3542884349822998, |
| "memory(GiB)": 71.85, |
| "step": 210, |
| "token_acc": 0.8815609234572851, |
| "train_speed(iter/s)": 0.080069 |
| }, |
| { |
| "epoch": 0.08331920459224454, |
| "grad_norm": 0.8364593982696533, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.355057692527771, |
| "memory(GiB)": 71.85, |
| "step": 215, |
| "token_acc": 0.876421973748177, |
| "train_speed(iter/s)": 0.079978 |
| }, |
| { |
| "epoch": 0.0852568605129944, |
| "grad_norm": 0.8796382546424866, |
| "learning_rate": 5.684754521963824e-06, |
| "loss": 0.3415433406829834, |
| "memory(GiB)": 71.85, |
| "step": 220, |
| "token_acc": 0.8817479925880173, |
| "train_speed(iter/s)": 0.080045 |
| }, |
| { |
| "epoch": 0.08719451643374428, |
| "grad_norm": 0.8751423954963684, |
| "learning_rate": 5.8139534883720935e-06, |
| "loss": 0.3774546146392822, |
| "memory(GiB)": 71.85, |
| "step": 225, |
| "token_acc": 0.8779613309653855, |
| "train_speed(iter/s)": 0.080169 |
| }, |
| { |
| "epoch": 0.08913217235449415, |
| "grad_norm": 0.7890948057174683, |
| "learning_rate": 5.943152454780362e-06, |
| "loss": 0.34930667877197263, |
| "memory(GiB)": 71.85, |
| "step": 230, |
| "token_acc": 0.8998645090369916, |
| "train_speed(iter/s)": 0.08015 |
| }, |
| { |
| "epoch": 0.09106982827524403, |
| "grad_norm": 0.8329840302467346, |
| "learning_rate": 6.072351421188631e-06, |
| "loss": 0.362669849395752, |
| "memory(GiB)": 71.85, |
| "step": 235, |
| "token_acc": 0.8783116299955096, |
| "train_speed(iter/s)": 0.08006 |
| }, |
| { |
| "epoch": 0.0930074841959939, |
| "grad_norm": 0.8648292422294617, |
| "learning_rate": 6.2015503875969e-06, |
| "loss": 0.37513184547424316, |
| "memory(GiB)": 71.85, |
| "step": 240, |
| "token_acc": 0.879857933201284, |
| "train_speed(iter/s)": 0.08001 |
| }, |
| { |
| "epoch": 0.09494514011674376, |
| "grad_norm": 0.833514392375946, |
| "learning_rate": 6.330749354005169e-06, |
| "loss": 0.35806612968444823, |
| "memory(GiB)": 71.85, |
| "step": 245, |
| "token_acc": 0.8780785479580661, |
| "train_speed(iter/s)": 0.080031 |
| }, |
| { |
| "epoch": 0.09688279603749364, |
| "grad_norm": 0.8195750713348389, |
| "learning_rate": 6.459948320413437e-06, |
| "loss": 0.3542565107345581, |
| "memory(GiB)": 71.85, |
| "step": 250, |
| "token_acc": 0.8780148722922728, |
| "train_speed(iter/s)": 0.080065 |
| }, |
| { |
| "epoch": 0.09882045195824352, |
| "grad_norm": 0.8255862593650818, |
| "learning_rate": 6.589147286821706e-06, |
| "loss": 0.35635693073272706, |
| "memory(GiB)": 71.85, |
| "step": 255, |
| "token_acc": 0.8740539677470861, |
| "train_speed(iter/s)": 0.08009 |
| }, |
| { |
| "epoch": 0.10075810787899339, |
| "grad_norm": 0.8040722012519836, |
| "learning_rate": 6.718346253229975e-06, |
| "loss": 0.3676167011260986, |
| "memory(GiB)": 71.85, |
| "step": 260, |
| "token_acc": 0.8800150109170306, |
| "train_speed(iter/s)": 0.080048 |
| }, |
| { |
| "epoch": 0.10269576379974327, |
| "grad_norm": 0.8556498289108276, |
| "learning_rate": 6.8475452196382435e-06, |
| "loss": 0.3633396148681641, |
| "memory(GiB)": 71.85, |
| "step": 265, |
| "token_acc": 0.8784773530356569, |
| "train_speed(iter/s)": 0.080154 |
| }, |
| { |
| "epoch": 0.10463341972049313, |
| "grad_norm": 0.8252087235450745, |
| "learning_rate": 6.976744186046513e-06, |
| "loss": 0.33879594802856444, |
| "memory(GiB)": 71.85, |
| "step": 270, |
| "token_acc": 0.8953123958124959, |
| "train_speed(iter/s)": 0.080137 |
| }, |
| { |
| "epoch": 0.106571075641243, |
| "grad_norm": 0.8291889429092407, |
| "learning_rate": 7.10594315245478e-06, |
| "loss": 0.3601172924041748, |
| "memory(GiB)": 71.85, |
| "step": 275, |
| "token_acc": 0.8832243378978936, |
| "train_speed(iter/s)": 0.08008 |
| }, |
| { |
| "epoch": 0.10850873156199288, |
| "grad_norm": 0.8094320893287659, |
| "learning_rate": 7.23514211886305e-06, |
| "loss": 0.35142252445220945, |
| "memory(GiB)": 71.85, |
| "step": 280, |
| "token_acc": 0.8835487626496313, |
| "train_speed(iter/s)": 0.080038 |
| }, |
| { |
| "epoch": 0.11044638748274276, |
| "grad_norm": 0.7662101984024048, |
| "learning_rate": 7.364341085271318e-06, |
| "loss": 0.3705580234527588, |
| "memory(GiB)": 71.85, |
| "step": 285, |
| "token_acc": 0.8818561396849497, |
| "train_speed(iter/s)": 0.080036 |
| }, |
| { |
| "epoch": 0.11238404340349263, |
| "grad_norm": 0.8766042590141296, |
| "learning_rate": 7.493540051679587e-06, |
| "loss": 0.3576506614685059, |
| "memory(GiB)": 71.85, |
| "step": 290, |
| "token_acc": 0.8764228750596414, |
| "train_speed(iter/s)": 0.080017 |
| }, |
| { |
| "epoch": 0.11432169932424249, |
| "grad_norm": 0.7793442010879517, |
| "learning_rate": 7.622739018087856e-06, |
| "loss": 0.36000614166259765, |
| "memory(GiB)": 71.85, |
| "step": 295, |
| "token_acc": 0.8948535291124519, |
| "train_speed(iter/s)": 0.079965 |
| }, |
| { |
| "epoch": 0.11625935524499237, |
| "grad_norm": 0.8231183886528015, |
| "learning_rate": 7.751937984496126e-06, |
| "loss": 0.33621535301208494, |
| "memory(GiB)": 71.85, |
| "step": 300, |
| "token_acc": 0.8816721700381496, |
| "train_speed(iter/s)": 0.079893 |
| }, |
| { |
| "epoch": 0.11819701116574224, |
| "grad_norm": 0.850242555141449, |
| "learning_rate": 7.881136950904393e-06, |
| "loss": 0.3525859355926514, |
| "memory(GiB)": 71.85, |
| "step": 305, |
| "token_acc": 0.8819446807062906, |
| "train_speed(iter/s)": 0.079931 |
| }, |
| { |
| "epoch": 0.12013466708649212, |
| "grad_norm": 0.8171371221542358, |
| "learning_rate": 8.010335917312663e-06, |
| "loss": 0.3546357870101929, |
| "memory(GiB)": 71.85, |
| "step": 310, |
| "token_acc": 0.8777248580326067, |
| "train_speed(iter/s)": 0.079884 |
| }, |
| { |
| "epoch": 0.122072323007242, |
| "grad_norm": 0.7793200016021729, |
| "learning_rate": 8.139534883720931e-06, |
| "loss": 0.3619884252548218, |
| "memory(GiB)": 71.85, |
| "step": 315, |
| "token_acc": 0.885740589198036, |
| "train_speed(iter/s)": 0.079851 |
| }, |
| { |
| "epoch": 0.12400997892799186, |
| "grad_norm": 0.8639906048774719, |
| "learning_rate": 8.2687338501292e-06, |
| "loss": 0.3500725269317627, |
| "memory(GiB)": 71.85, |
| "step": 320, |
| "token_acc": 0.8943805668016195, |
| "train_speed(iter/s)": 0.079868 |
| }, |
| { |
| "epoch": 0.12594763484874175, |
| "grad_norm": 0.7964992523193359, |
| "learning_rate": 8.397932816537468e-06, |
| "loss": 0.33359375, |
| "memory(GiB)": 71.85, |
| "step": 325, |
| "token_acc": 0.8848108035833667, |
| "train_speed(iter/s)": 0.079922 |
| }, |
| { |
| "epoch": 0.1278852907694916, |
| "grad_norm": 0.8312904834747314, |
| "learning_rate": 8.527131782945736e-06, |
| "loss": 0.3640293121337891, |
| "memory(GiB)": 71.85, |
| "step": 330, |
| "token_acc": 0.8779143037177064, |
| "train_speed(iter/s)": 0.079932 |
| }, |
| { |
| "epoch": 0.12982294669024147, |
| "grad_norm": 0.9487627744674683, |
| "learning_rate": 8.656330749354006e-06, |
| "loss": 0.3424405574798584, |
| "memory(GiB)": 71.85, |
| "step": 335, |
| "token_acc": 0.8947685759886504, |
| "train_speed(iter/s)": 0.080055 |
| }, |
| { |
| "epoch": 0.13176060261099135, |
| "grad_norm": 0.8095372319221497, |
| "learning_rate": 8.785529715762275e-06, |
| "loss": 0.3609053134918213, |
| "memory(GiB)": 71.85, |
| "step": 340, |
| "token_acc": 0.8851003253796096, |
| "train_speed(iter/s)": 0.080183 |
| }, |
| { |
| "epoch": 0.13369825853174122, |
| "grad_norm": 0.7795316576957703, |
| "learning_rate": 8.914728682170543e-06, |
| "loss": 0.34638004302978515, |
| "memory(GiB)": 71.85, |
| "step": 345, |
| "token_acc": 0.8741907781741314, |
| "train_speed(iter/s)": 0.08015 |
| }, |
| { |
| "epoch": 0.1356359144524911, |
| "grad_norm": 0.8028691411018372, |
| "learning_rate": 9.043927648578812e-06, |
| "loss": 0.3564423084259033, |
| "memory(GiB)": 71.85, |
| "step": 350, |
| "token_acc": 0.87438717787555, |
| "train_speed(iter/s)": 0.080072 |
| }, |
| { |
| "epoch": 0.13757357037324097, |
| "grad_norm": 0.8242841362953186, |
| "learning_rate": 9.173126614987082e-06, |
| "loss": 0.3585948944091797, |
| "memory(GiB)": 71.85, |
| "step": 355, |
| "token_acc": 0.8687102733506591, |
| "train_speed(iter/s)": 0.080091 |
| }, |
| { |
| "epoch": 0.13951122629399085, |
| "grad_norm": 0.8537614345550537, |
| "learning_rate": 9.30232558139535e-06, |
| "loss": 0.3516932487487793, |
| "memory(GiB)": 71.85, |
| "step": 360, |
| "token_acc": 0.8854277411581261, |
| "train_speed(iter/s)": 0.080165 |
| }, |
| { |
| "epoch": 0.14144888221474072, |
| "grad_norm": 0.8241102695465088, |
| "learning_rate": 9.431524547803619e-06, |
| "loss": 0.34127624034881593, |
| "memory(GiB)": 71.85, |
| "step": 365, |
| "token_acc": 0.8881654287864411, |
| "train_speed(iter/s)": 0.080126 |
| }, |
| { |
| "epoch": 0.1433865381354906, |
| "grad_norm": 0.7585867643356323, |
| "learning_rate": 9.560723514211887e-06, |
| "loss": 0.3570504903793335, |
| "memory(GiB)": 71.85, |
| "step": 370, |
| "token_acc": 0.8768577494692145, |
| "train_speed(iter/s)": 0.080189 |
| }, |
| { |
| "epoch": 0.14532419405624047, |
| "grad_norm": 0.7735254168510437, |
| "learning_rate": 9.689922480620156e-06, |
| "loss": 0.3525998592376709, |
| "memory(GiB)": 71.85, |
| "step": 375, |
| "token_acc": 0.8809278863704244, |
| "train_speed(iter/s)": 0.080227 |
| }, |
| { |
| "epoch": 0.14726184997699032, |
| "grad_norm": 0.8313542008399963, |
| "learning_rate": 9.819121447028424e-06, |
| "loss": 0.34625673294067383, |
| "memory(GiB)": 71.85, |
| "step": 380, |
| "token_acc": 0.8787977998952331, |
| "train_speed(iter/s)": 0.080142 |
| }, |
| { |
| "epoch": 0.1491995058977402, |
| "grad_norm": 0.7735046744346619, |
| "learning_rate": 9.948320413436692e-06, |
| "loss": 0.34496536254882815, |
| "memory(GiB)": 71.85, |
| "step": 385, |
| "token_acc": 0.8872030801357348, |
| "train_speed(iter/s)": 0.080142 |
| }, |
| { |
| "epoch": 0.15113716181849007, |
| "grad_norm": 0.7527068257331848, |
| "learning_rate": 9.999995892731712e-06, |
| "loss": 0.3598473072052002, |
| "memory(GiB)": 71.85, |
| "step": 390, |
| "token_acc": 0.8917708333333333, |
| "train_speed(iter/s)": 0.080105 |
| }, |
| { |
| "epoch": 0.15307481773923995, |
| "grad_norm": 0.8179726600646973, |
| "learning_rate": 9.999970792783274e-06, |
| "loss": 0.3587926387786865, |
| "memory(GiB)": 71.85, |
| "step": 395, |
| "token_acc": 0.8800157714355039, |
| "train_speed(iter/s)": 0.080088 |
| }, |
| { |
| "epoch": 0.15501247365998982, |
| "grad_norm": 0.8060595989227295, |
| "learning_rate": 9.999922874816521e-06, |
| "loss": 0.36094279289245607, |
| "memory(GiB)": 71.85, |
| "step": 400, |
| "token_acc": 0.8862200557996344, |
| "train_speed(iter/s)": 0.080129 |
| }, |
| { |
| "epoch": 0.1569501295807397, |
| "grad_norm": 0.7952097654342651, |
| "learning_rate": 9.999852139050132e-06, |
| "loss": 0.35231719017028806, |
| "memory(GiB)": 71.85, |
| "step": 405, |
| "token_acc": 0.8807519158733977, |
| "train_speed(iter/s)": 0.08013 |
| }, |
| { |
| "epoch": 0.15888778550148958, |
| "grad_norm": 0.7951153516769409, |
| "learning_rate": 9.999758585806923e-06, |
| "loss": 0.3466503620147705, |
| "memory(GiB)": 71.85, |
| "step": 410, |
| "token_acc": 0.8873098736787832, |
| "train_speed(iter/s)": 0.080147 |
| }, |
| { |
| "epoch": 0.16082544142223945, |
| "grad_norm": 0.793311595916748, |
| "learning_rate": 9.999642215513832e-06, |
| "loss": 0.3452963352203369, |
| "memory(GiB)": 71.85, |
| "step": 415, |
| "token_acc": 0.8855158793361577, |
| "train_speed(iter/s)": 0.080133 |
| }, |
| { |
| "epoch": 0.16276309734298933, |
| "grad_norm": 0.7786062359809875, |
| "learning_rate": 9.999503028701931e-06, |
| "loss": 0.3341179370880127, |
| "memory(GiB)": 71.85, |
| "step": 420, |
| "token_acc": 0.8807970661733684, |
| "train_speed(iter/s)": 0.08015 |
| }, |
| { |
| "epoch": 0.1647007532637392, |
| "grad_norm": 0.7555674910545349, |
| "learning_rate": 9.99934102600642e-06, |
| "loss": 0.343152379989624, |
| "memory(GiB)": 71.85, |
| "step": 425, |
| "token_acc": 0.8769051924567295, |
| "train_speed(iter/s)": 0.0801 |
| }, |
| { |
| "epoch": 0.16663840918448908, |
| "grad_norm": 0.7114547491073608, |
| "learning_rate": 9.999156208166614e-06, |
| "loss": 0.34557628631591797, |
| "memory(GiB)": 71.85, |
| "step": 430, |
| "token_acc": 0.8801453505688926, |
| "train_speed(iter/s)": 0.080148 |
| }, |
| { |
| "epoch": 0.16857606510523893, |
| "grad_norm": 0.7580552101135254, |
| "learning_rate": 9.99894857602596e-06, |
| "loss": 0.330863881111145, |
| "memory(GiB)": 71.85, |
| "step": 435, |
| "token_acc": 0.8863163953823031, |
| "train_speed(iter/s)": 0.080207 |
| }, |
| { |
| "epoch": 0.1705137210259888, |
| "grad_norm": 0.8019564747810364, |
| "learning_rate": 9.998718130532008e-06, |
| "loss": 0.3519331693649292, |
| "memory(GiB)": 71.85, |
| "step": 440, |
| "token_acc": 0.8858001022494888, |
| "train_speed(iter/s)": 0.080231 |
| }, |
| { |
| "epoch": 0.17245137694673868, |
| "grad_norm": 0.8301380276679993, |
| "learning_rate": 9.99846487273643e-06, |
| "loss": 0.3256737947463989, |
| "memory(GiB)": 71.85, |
| "step": 445, |
| "token_acc": 0.8935722938787145, |
| "train_speed(iter/s)": 0.080282 |
| }, |
| { |
| "epoch": 0.17438903286748855, |
| "grad_norm": 0.781501293182373, |
| "learning_rate": 9.998188803795e-06, |
| "loss": 0.35074899196624754, |
| "memory(GiB)": 71.85, |
| "step": 450, |
| "token_acc": 0.8913197897428612, |
| "train_speed(iter/s)": 0.080269 |
| }, |
| { |
| "epoch": 0.17632668878823843, |
| "grad_norm": 0.7460458278656006, |
| "learning_rate": 9.997889924967594e-06, |
| "loss": 0.3532873153686523, |
| "memory(GiB)": 71.85, |
| "step": 455, |
| "token_acc": 0.8787985639184511, |
| "train_speed(iter/s)": 0.08029 |
| }, |
| { |
| "epoch": 0.1782643447089883, |
| "grad_norm": 0.8098095655441284, |
| "learning_rate": 9.997568237618185e-06, |
| "loss": 0.3252811670303345, |
| "memory(GiB)": 71.85, |
| "step": 460, |
| "token_acc": 0.8971127170599719, |
| "train_speed(iter/s)": 0.080326 |
| }, |
| { |
| "epoch": 0.18020200062973818, |
| "grad_norm": 0.7576206922531128, |
| "learning_rate": 9.997223743214836e-06, |
| "loss": 0.3403348922729492, |
| "memory(GiB)": 71.85, |
| "step": 465, |
| "token_acc": 0.8788252334432494, |
| "train_speed(iter/s)": 0.080259 |
| }, |
| { |
| "epoch": 0.18213965655048805, |
| "grad_norm": 0.7641308903694153, |
| "learning_rate": 9.99685644332969e-06, |
| "loss": 0.33709111213684084, |
| "memory(GiB)": 71.85, |
| "step": 470, |
| "token_acc": 0.8855252274607114, |
| "train_speed(iter/s)": 0.080259 |
| }, |
| { |
| "epoch": 0.18407731247123793, |
| "grad_norm": 0.7454455494880676, |
| "learning_rate": 9.99646633963897e-06, |
| "loss": 0.3458225250244141, |
| "memory(GiB)": 71.85, |
| "step": 475, |
| "token_acc": 0.8953594473049599, |
| "train_speed(iter/s)": 0.080375 |
| }, |
| { |
| "epoch": 0.1860149683919878, |
| "grad_norm": 0.7686738967895508, |
| "learning_rate": 9.996053433922963e-06, |
| "loss": 0.3410240650177002, |
| "memory(GiB)": 71.85, |
| "step": 480, |
| "token_acc": 0.8842588257327465, |
| "train_speed(iter/s)": 0.080335 |
| }, |
| { |
| "epoch": 0.18795262431273765, |
| "grad_norm": 0.7207880020141602, |
| "learning_rate": 9.99561772806602e-06, |
| "loss": 0.33681282997131345, |
| "memory(GiB)": 71.85, |
| "step": 485, |
| "token_acc": 0.8913524098288169, |
| "train_speed(iter/s)": 0.080287 |
| }, |
| { |
| "epoch": 0.18989028023348753, |
| "grad_norm": 0.698858916759491, |
| "learning_rate": 9.99515922405654e-06, |
| "loss": 0.33275785446166994, |
| "memory(GiB)": 71.85, |
| "step": 490, |
| "token_acc": 0.8863521376169997, |
| "train_speed(iter/s)": 0.080363 |
| }, |
| { |
| "epoch": 0.1918279361542374, |
| "grad_norm": 0.717072069644928, |
| "learning_rate": 9.994677923986966e-06, |
| "loss": 0.3287045478820801, |
| "memory(GiB)": 71.85, |
| "step": 495, |
| "token_acc": 0.889167089249493, |
| "train_speed(iter/s)": 0.080389 |
| }, |
| { |
| "epoch": 0.19376559207498728, |
| "grad_norm": 0.8267790079116821, |
| "learning_rate": 9.994173830053775e-06, |
| "loss": 0.34030606746673586, |
| "memory(GiB)": 71.85, |
| "step": 500, |
| "token_acc": 0.8931808444019643, |
| "train_speed(iter/s)": 0.080364 |
| }, |
| { |
| "epoch": 0.19570324799573716, |
| "grad_norm": 0.8210350871086121, |
| "learning_rate": 9.993646944557464e-06, |
| "loss": 0.34555807113647463, |
| "memory(GiB)": 71.85, |
| "step": 505, |
| "token_acc": 0.8850965901724361, |
| "train_speed(iter/s)": 0.080382 |
| }, |
| { |
| "epoch": 0.19764090391648703, |
| "grad_norm": 0.7384742498397827, |
| "learning_rate": 9.993097269902543e-06, |
| "loss": 0.33759369850158694, |
| "memory(GiB)": 71.85, |
| "step": 510, |
| "token_acc": 0.8914952751528628, |
| "train_speed(iter/s)": 0.080379 |
| }, |
| { |
| "epoch": 0.1995785598372369, |
| "grad_norm": 0.7011024355888367, |
| "learning_rate": 9.992524808597527e-06, |
| "loss": 0.34852309226989747, |
| "memory(GiB)": 71.85, |
| "step": 515, |
| "token_acc": 0.8825714285714286, |
| "train_speed(iter/s)": 0.080335 |
| }, |
| { |
| "epoch": 0.20151621575798678, |
| "grad_norm": 0.7273775935173035, |
| "learning_rate": 9.991929563254913e-06, |
| "loss": 0.34394521713256837, |
| "memory(GiB)": 71.85, |
| "step": 520, |
| "token_acc": 0.893121525019857, |
| "train_speed(iter/s)": 0.080345 |
| }, |
| { |
| "epoch": 0.20345387167873666, |
| "grad_norm": 0.7170522212982178, |
| "learning_rate": 9.991311536591187e-06, |
| "loss": 0.3372217893600464, |
| "memory(GiB)": 71.85, |
| "step": 525, |
| "token_acc": 0.8847938227946663, |
| "train_speed(iter/s)": 0.080317 |
| }, |
| { |
| "epoch": 0.20539152759948653, |
| "grad_norm": 0.711494505405426, |
| "learning_rate": 9.990670731426787e-06, |
| "loss": 0.3441883087158203, |
| "memory(GiB)": 71.85, |
| "step": 530, |
| "token_acc": 0.879945936813651, |
| "train_speed(iter/s)": 0.080228 |
| }, |
| { |
| "epoch": 0.20732918352023638, |
| "grad_norm": 0.7713858485221863, |
| "learning_rate": 9.990007150686116e-06, |
| "loss": 0.33831114768981935, |
| "memory(GiB)": 71.85, |
| "step": 535, |
| "token_acc": 0.8745052315056304, |
| "train_speed(iter/s)": 0.080271 |
| }, |
| { |
| "epoch": 0.20926683944098626, |
| "grad_norm": 0.6852882504463196, |
| "learning_rate": 9.98932079739751e-06, |
| "loss": 0.34247727394104005, |
| "memory(GiB)": 71.85, |
| "step": 540, |
| "token_acc": 0.8979679758742012, |
| "train_speed(iter/s)": 0.080267 |
| }, |
| { |
| "epoch": 0.21120449536173613, |
| "grad_norm": 0.7397508025169373, |
| "learning_rate": 9.98861167469323e-06, |
| "loss": 0.31511332988739016, |
| "memory(GiB)": 71.85, |
| "step": 545, |
| "token_acc": 0.8939690385473518, |
| "train_speed(iter/s)": 0.080308 |
| }, |
| { |
| "epoch": 0.213142151282486, |
| "grad_norm": 0.7629010677337646, |
| "learning_rate": 9.987879785809452e-06, |
| "loss": 0.33156523704528806, |
| "memory(GiB)": 71.85, |
| "step": 550, |
| "token_acc": 0.8859730832235891, |
| "train_speed(iter/s)": 0.080275 |
| }, |
| { |
| "epoch": 0.21507980720323588, |
| "grad_norm": 0.7358865737915039, |
| "learning_rate": 9.987125134086247e-06, |
| "loss": 0.3441560506820679, |
| "memory(GiB)": 71.85, |
| "step": 555, |
| "token_acc": 0.8937451892506945, |
| "train_speed(iter/s)": 0.080387 |
| }, |
| { |
| "epoch": 0.21701746312398576, |
| "grad_norm": 0.7392430305480957, |
| "learning_rate": 9.986347722967562e-06, |
| "loss": 0.3216050863265991, |
| "memory(GiB)": 71.85, |
| "step": 560, |
| "token_acc": 0.8860679676091863, |
| "train_speed(iter/s)": 0.080374 |
| }, |
| { |
| "epoch": 0.21895511904473564, |
| "grad_norm": 0.7147052884101868, |
| "learning_rate": 9.985547556001219e-06, |
| "loss": 0.33604471683502196, |
| "memory(GiB)": 71.85, |
| "step": 565, |
| "token_acc": 0.8841051235074863, |
| "train_speed(iter/s)": 0.080384 |
| }, |
| { |
| "epoch": 0.2208927749654855, |
| "grad_norm": 0.7495723366737366, |
| "learning_rate": 9.98472463683888e-06, |
| "loss": 0.34450702667236327, |
| "memory(GiB)": 71.85, |
| "step": 570, |
| "token_acc": 0.8893646715513687, |
| "train_speed(iter/s)": 0.080369 |
| }, |
| { |
| "epoch": 0.2228304308862354, |
| "grad_norm": 0.6873824596405029, |
| "learning_rate": 9.98387896923605e-06, |
| "loss": 0.3255999326705933, |
| "memory(GiB)": 71.85, |
| "step": 575, |
| "token_acc": 0.884888862332696, |
| "train_speed(iter/s)": 0.080403 |
| }, |
| { |
| "epoch": 0.22476808680698526, |
| "grad_norm": 0.7272993326187134, |
| "learning_rate": 9.983010557052036e-06, |
| "loss": 0.34434938430786133, |
| "memory(GiB)": 71.85, |
| "step": 580, |
| "token_acc": 0.8903038878781753, |
| "train_speed(iter/s)": 0.080382 |
| }, |
| { |
| "epoch": 0.2267057427277351, |
| "grad_norm": 0.7470014095306396, |
| "learning_rate": 9.982119404249953e-06, |
| "loss": 0.33343305587768557, |
| "memory(GiB)": 71.85, |
| "step": 585, |
| "token_acc": 0.8992504684572142, |
| "train_speed(iter/s)": 0.080362 |
| }, |
| { |
| "epoch": 0.22864339864848499, |
| "grad_norm": 0.7505276203155518, |
| "learning_rate": 9.981205514896696e-06, |
| "loss": 0.3441640377044678, |
| "memory(GiB)": 71.85, |
| "step": 590, |
| "token_acc": 0.8857358193793623, |
| "train_speed(iter/s)": 0.080377 |
| }, |
| { |
| "epoch": 0.23058105456923486, |
| "grad_norm": 0.7288657426834106, |
| "learning_rate": 9.980268893162915e-06, |
| "loss": 0.3250357866287231, |
| "memory(GiB)": 71.85, |
| "step": 595, |
| "token_acc": 0.8958659141905215, |
| "train_speed(iter/s)": 0.080393 |
| }, |
| { |
| "epoch": 0.23251871048998474, |
| "grad_norm": 0.6993427276611328, |
| "learning_rate": 9.979309543323003e-06, |
| "loss": 0.3328931570053101, |
| "memory(GiB)": 71.85, |
| "step": 600, |
| "token_acc": 0.8799145041844783, |
| "train_speed(iter/s)": 0.080448 |
| }, |
| { |
| "epoch": 0.2344563664107346, |
| "grad_norm": 0.7342857718467712, |
| "learning_rate": 9.978327469755085e-06, |
| "loss": 0.3401214599609375, |
| "memory(GiB)": 71.85, |
| "step": 605, |
| "token_acc": 0.8813226985709538, |
| "train_speed(iter/s)": 0.080407 |
| }, |
| { |
| "epoch": 0.2363940223314845, |
| "grad_norm": 0.7363426685333252, |
| "learning_rate": 9.977322676940975e-06, |
| "loss": 0.3349423885345459, |
| "memory(GiB)": 71.85, |
| "step": 610, |
| "token_acc": 0.87650187118377, |
| "train_speed(iter/s)": 0.080462 |
| }, |
| { |
| "epoch": 0.23833167825223436, |
| "grad_norm": 0.7320806980133057, |
| "learning_rate": 9.97629516946618e-06, |
| "loss": 0.332952880859375, |
| "memory(GiB)": 71.85, |
| "step": 615, |
| "token_acc": 0.9026784355501446, |
| "train_speed(iter/s)": 0.080475 |
| }, |
| { |
| "epoch": 0.24026933417298424, |
| "grad_norm": 0.702238917350769, |
| "learning_rate": 9.975244952019863e-06, |
| "loss": 0.3283294677734375, |
| "memory(GiB)": 71.85, |
| "step": 620, |
| "token_acc": 0.8897130860741778, |
| "train_speed(iter/s)": 0.080534 |
| }, |
| { |
| "epoch": 0.24220699009373411, |
| "grad_norm": 0.6831479072570801, |
| "learning_rate": 9.974172029394827e-06, |
| "loss": 0.3218890428543091, |
| "memory(GiB)": 71.85, |
| "step": 625, |
| "token_acc": 0.902763491736954, |
| "train_speed(iter/s)": 0.080587 |
| }, |
| { |
| "epoch": 0.244144646014484, |
| "grad_norm": 0.7455374002456665, |
| "learning_rate": 9.973076406487497e-06, |
| "loss": 0.3427925109863281, |
| "memory(GiB)": 71.85, |
| "step": 630, |
| "token_acc": 0.8771017735513803, |
| "train_speed(iter/s)": 0.080633 |
| }, |
| { |
| "epoch": 0.24608230193523384, |
| "grad_norm": 0.7146010398864746, |
| "learning_rate": 9.971958088297886e-06, |
| "loss": 0.35007200241088865, |
| "memory(GiB)": 71.85, |
| "step": 635, |
| "token_acc": 0.8881121350139463, |
| "train_speed(iter/s)": 0.080632 |
| }, |
| { |
| "epoch": 0.2480199578559837, |
| "grad_norm": 0.6996679306030273, |
| "learning_rate": 9.97081707992959e-06, |
| "loss": 0.3419856071472168, |
| "memory(GiB)": 71.85, |
| "step": 640, |
| "token_acc": 0.8812891400641515, |
| "train_speed(iter/s)": 0.08065 |
| }, |
| { |
| "epoch": 0.2499576137767336, |
| "grad_norm": 0.7165552377700806, |
| "learning_rate": 9.969653386589749e-06, |
| "loss": 0.33933205604553224, |
| "memory(GiB)": 71.85, |
| "step": 645, |
| "token_acc": 0.8926531416190664, |
| "train_speed(iter/s)": 0.080663 |
| }, |
| { |
| "epoch": 0.2518952696974835, |
| "grad_norm": 0.6688835620880127, |
| "learning_rate": 9.968467013589025e-06, |
| "loss": 0.3391030550003052, |
| "memory(GiB)": 71.85, |
| "step": 650, |
| "token_acc": 0.8828354743847702, |
| "train_speed(iter/s)": 0.080687 |
| }, |
| { |
| "epoch": 0.25383292561823334, |
| "grad_norm": 0.6886083483695984, |
| "learning_rate": 9.967257966341591e-06, |
| "loss": 0.318090558052063, |
| "memory(GiB)": 71.85, |
| "step": 655, |
| "token_acc": 0.9001209101234748, |
| "train_speed(iter/s)": 0.080727 |
| }, |
| { |
| "epoch": 0.2557705815389832, |
| "grad_norm": 0.7216477394104004, |
| "learning_rate": 9.966026250365086e-06, |
| "loss": 0.3310126781463623, |
| "memory(GiB)": 71.85, |
| "step": 660, |
| "token_acc": 0.8833163784333672, |
| "train_speed(iter/s)": 0.080719 |
| }, |
| { |
| "epoch": 0.2577082374597331, |
| "grad_norm": 0.7138867974281311, |
| "learning_rate": 9.964771871280611e-06, |
| "loss": 0.30642869472503664, |
| "memory(GiB)": 71.85, |
| "step": 665, |
| "token_acc": 0.8931192070030896, |
| "train_speed(iter/s)": 0.080693 |
| }, |
| { |
| "epoch": 0.25964589338048294, |
| "grad_norm": 0.7110100984573364, |
| "learning_rate": 9.963494834812688e-06, |
| "loss": 0.33192169666290283, |
| "memory(GiB)": 71.85, |
| "step": 670, |
| "token_acc": 0.889419329696025, |
| "train_speed(iter/s)": 0.080681 |
| }, |
| { |
| "epoch": 0.26158354930123284, |
| "grad_norm": 0.6674191951751709, |
| "learning_rate": 9.962195146789237e-06, |
| "loss": 0.3481155872344971, |
| "memory(GiB)": 71.85, |
| "step": 675, |
| "token_acc": 0.8790135092679862, |
| "train_speed(iter/s)": 0.080646 |
| }, |
| { |
| "epoch": 0.2635212052219827, |
| "grad_norm": 0.7230032086372375, |
| "learning_rate": 9.960872813141555e-06, |
| "loss": 0.31385188102722167, |
| "memory(GiB)": 71.85, |
| "step": 680, |
| "token_acc": 0.8956414978514426, |
| "train_speed(iter/s)": 0.080653 |
| }, |
| { |
| "epoch": 0.2654588611427326, |
| "grad_norm": 0.754707396030426, |
| "learning_rate": 9.959527839904283e-06, |
| "loss": 0.33814034461975095, |
| "memory(GiB)": 71.85, |
| "step": 685, |
| "token_acc": 0.8922617070740618, |
| "train_speed(iter/s)": 0.080635 |
| }, |
| { |
| "epoch": 0.26739651706348244, |
| "grad_norm": 0.6921854615211487, |
| "learning_rate": 9.958160233215383e-06, |
| "loss": 0.3266612529754639, |
| "memory(GiB)": 71.85, |
| "step": 690, |
| "token_acc": 0.8988545975182947, |
| "train_speed(iter/s)": 0.08066 |
| }, |
| { |
| "epoch": 0.26933417298423235, |
| "grad_norm": 0.7002295851707458, |
| "learning_rate": 9.956769999316108e-06, |
| "loss": 0.34218902587890626, |
| "memory(GiB)": 71.85, |
| "step": 695, |
| "token_acc": 0.9018419058288576, |
| "train_speed(iter/s)": 0.080623 |
| }, |
| { |
| "epoch": 0.2712718289049822, |
| "grad_norm": 0.686951756477356, |
| "learning_rate": 9.955357144550967e-06, |
| "loss": 0.32504887580871583, |
| "memory(GiB)": 71.85, |
| "step": 700, |
| "token_acc": 0.8831498186254053, |
| "train_speed(iter/s)": 0.080585 |
| }, |
| { |
| "epoch": 0.2732094848257321, |
| "grad_norm": 0.635576069355011, |
| "learning_rate": 9.953921675367711e-06, |
| "loss": 0.3315123558044434, |
| "memory(GiB)": 71.85, |
| "step": 705, |
| "token_acc": 0.8815539513677811, |
| "train_speed(iter/s)": 0.080527 |
| }, |
| { |
| "epoch": 0.27514714074648194, |
| "grad_norm": 0.6723300218582153, |
| "learning_rate": 9.952463598317286e-06, |
| "loss": 0.31166939735412597, |
| "memory(GiB)": 71.85, |
| "step": 710, |
| "token_acc": 0.8939772037499157, |
| "train_speed(iter/s)": 0.080549 |
| }, |
| { |
| "epoch": 0.2770847966672318, |
| "grad_norm": 0.7758930921554565, |
| "learning_rate": 9.950982920053822e-06, |
| "loss": 0.33371968269348146, |
| "memory(GiB)": 71.85, |
| "step": 715, |
| "token_acc": 0.8760346611484738, |
| "train_speed(iter/s)": 0.080596 |
| }, |
| { |
| "epoch": 0.2790224525879817, |
| "grad_norm": 0.7074525952339172, |
| "learning_rate": 9.949479647334584e-06, |
| "loss": 0.33358490467071533, |
| "memory(GiB)": 71.85, |
| "step": 720, |
| "token_acc": 0.8858836043154535, |
| "train_speed(iter/s)": 0.080652 |
| }, |
| { |
| "epoch": 0.28096010850873154, |
| "grad_norm": 0.7324233055114746, |
| "learning_rate": 9.947953787019955e-06, |
| "loss": 0.3313758134841919, |
| "memory(GiB)": 71.85, |
| "step": 725, |
| "token_acc": 0.8816807191136506, |
| "train_speed(iter/s)": 0.080632 |
| }, |
| { |
| "epoch": 0.28289776442948145, |
| "grad_norm": 0.6607609391212463, |
| "learning_rate": 9.946405346073395e-06, |
| "loss": 0.343143105506897, |
| "memory(GiB)": 71.85, |
| "step": 730, |
| "token_acc": 0.8806101783497746, |
| "train_speed(iter/s)": 0.080642 |
| }, |
| { |
| "epoch": 0.2848354203502313, |
| "grad_norm": 0.6818123459815979, |
| "learning_rate": 9.944834331561418e-06, |
| "loss": 0.3311434507369995, |
| "memory(GiB)": 71.85, |
| "step": 735, |
| "token_acc": 0.8792830615210722, |
| "train_speed(iter/s)": 0.080645 |
| }, |
| { |
| "epoch": 0.2867730762709812, |
| "grad_norm": 0.6960747241973877, |
| "learning_rate": 9.943240750653552e-06, |
| "loss": 0.3408792972564697, |
| "memory(GiB)": 71.85, |
| "step": 740, |
| "token_acc": 0.8895531360630552, |
| "train_speed(iter/s)": 0.080666 |
| }, |
| { |
| "epoch": 0.28871073219173105, |
| "grad_norm": 0.6795424818992615, |
| "learning_rate": 9.941624610622312e-06, |
| "loss": 0.32071099281311033, |
| "memory(GiB)": 71.85, |
| "step": 745, |
| "token_acc": 0.8923172567240364, |
| "train_speed(iter/s)": 0.080651 |
| }, |
| { |
| "epoch": 0.29064838811248095, |
| "grad_norm": 0.7239606976509094, |
| "learning_rate": 9.939985918843163e-06, |
| "loss": 0.3250409126281738, |
| "memory(GiB)": 71.85, |
| "step": 750, |
| "token_acc": 0.8985261336793425, |
| "train_speed(iter/s)": 0.080637 |
| }, |
| { |
| "epoch": 0.2925860440332308, |
| "grad_norm": 0.6698892116546631, |
| "learning_rate": 9.938324682794489e-06, |
| "loss": 0.3211568832397461, |
| "memory(GiB)": 71.85, |
| "step": 755, |
| "token_acc": 0.8952513503661729, |
| "train_speed(iter/s)": 0.08066 |
| }, |
| { |
| "epoch": 0.29452369995398064, |
| "grad_norm": 0.6671373844146729, |
| "learning_rate": 9.936640910057557e-06, |
| "loss": 0.33314924240112304, |
| "memory(GiB)": 71.85, |
| "step": 760, |
| "token_acc": 0.8977999531192445, |
| "train_speed(iter/s)": 0.080628 |
| }, |
| { |
| "epoch": 0.29646135587473055, |
| "grad_norm": 0.6509130001068115, |
| "learning_rate": 9.934934608316484e-06, |
| "loss": 0.3198971748352051, |
| "memory(GiB)": 71.85, |
| "step": 765, |
| "token_acc": 0.9052997393570807, |
| "train_speed(iter/s)": 0.080619 |
| }, |
| { |
| "epoch": 0.2983990117954804, |
| "grad_norm": 0.7284981608390808, |
| "learning_rate": 9.9332057853582e-06, |
| "loss": 0.32789950370788573, |
| "memory(GiB)": 71.85, |
| "step": 770, |
| "token_acc": 0.8824503819135556, |
| "train_speed(iter/s)": 0.080637 |
| }, |
| { |
| "epoch": 0.3003366677162303, |
| "grad_norm": 0.6976826786994934, |
| "learning_rate": 9.931454449072414e-06, |
| "loss": 0.3451281309127808, |
| "memory(GiB)": 71.85, |
| "step": 775, |
| "token_acc": 0.8759064042410578, |
| "train_speed(iter/s)": 0.080614 |
| }, |
| { |
| "epoch": 0.30227432363698015, |
| "grad_norm": 0.7098056077957153, |
| "learning_rate": 9.929680607451577e-06, |
| "loss": 0.3235619306564331, |
| "memory(GiB)": 71.85, |
| "step": 780, |
| "token_acc": 0.8838213673119334, |
| "train_speed(iter/s)": 0.080685 |
| }, |
| { |
| "epoch": 0.30421197955773005, |
| "grad_norm": 0.7323970198631287, |
| "learning_rate": 9.927884268590846e-06, |
| "loss": 0.32435629367828367, |
| "memory(GiB)": 71.85, |
| "step": 785, |
| "token_acc": 0.901668191736468, |
| "train_speed(iter/s)": 0.080703 |
| }, |
| { |
| "epoch": 0.3061496354784799, |
| "grad_norm": 0.6804087162017822, |
| "learning_rate": 9.926065440688048e-06, |
| "loss": 0.33081588745117185, |
| "memory(GiB)": 71.85, |
| "step": 790, |
| "token_acc": 0.8955827220863896, |
| "train_speed(iter/s)": 0.080696 |
| }, |
| { |
| "epoch": 0.3080872913992298, |
| "grad_norm": 0.6831809282302856, |
| "learning_rate": 9.92422413204364e-06, |
| "loss": 0.3195831775665283, |
| "memory(GiB)": 71.85, |
| "step": 795, |
| "token_acc": 0.8932602703766899, |
| "train_speed(iter/s)": 0.080702 |
| }, |
| { |
| "epoch": 0.31002494731997965, |
| "grad_norm": 0.7360027432441711, |
| "learning_rate": 9.922360351060678e-06, |
| "loss": 0.3314258098602295, |
| "memory(GiB)": 71.85, |
| "step": 800, |
| "token_acc": 0.880964780938741, |
| "train_speed(iter/s)": 0.080737 |
| }, |
| { |
| "epoch": 0.31196260324072955, |
| "grad_norm": 0.6755993366241455, |
| "learning_rate": 9.920474106244764e-06, |
| "loss": 0.3221295833587646, |
| "memory(GiB)": 71.85, |
| "step": 805, |
| "token_acc": 0.9010371500919213, |
| "train_speed(iter/s)": 0.080777 |
| }, |
| { |
| "epoch": 0.3139002591614794, |
| "grad_norm": 0.6507264971733093, |
| "learning_rate": 9.918565406204026e-06, |
| "loss": 0.3143099546432495, |
| "memory(GiB)": 71.85, |
| "step": 810, |
| "token_acc": 0.8967003567181926, |
| "train_speed(iter/s)": 0.080768 |
| }, |
| { |
| "epoch": 0.31583791508222925, |
| "grad_norm": 0.6312580108642578, |
| "learning_rate": 9.916634259649063e-06, |
| "loss": 0.32530817985534666, |
| "memory(GiB)": 71.85, |
| "step": 815, |
| "token_acc": 0.8985116339529442, |
| "train_speed(iter/s)": 0.080764 |
| }, |
| { |
| "epoch": 0.31777557100297915, |
| "grad_norm": 0.6676200032234192, |
| "learning_rate": 9.914680675392915e-06, |
| "loss": 0.33031282424926756, |
| "memory(GiB)": 71.85, |
| "step": 820, |
| "token_acc": 0.8891376185907686, |
| "train_speed(iter/s)": 0.080751 |
| }, |
| { |
| "epoch": 0.319713226923729, |
| "grad_norm": 0.6487600803375244, |
| "learning_rate": 9.91270466235102e-06, |
| "loss": 0.3286482334136963, |
| "memory(GiB)": 71.85, |
| "step": 825, |
| "token_acc": 0.8954001460271103, |
| "train_speed(iter/s)": 0.080787 |
| }, |
| { |
| "epoch": 0.3216508828444789, |
| "grad_norm": 0.6774858236312866, |
| "learning_rate": 9.910706229541168e-06, |
| "loss": 0.33796694278717043, |
| "memory(GiB)": 71.85, |
| "step": 830, |
| "token_acc": 0.8842560947824105, |
| "train_speed(iter/s)": 0.080774 |
| }, |
| { |
| "epoch": 0.32358853876522875, |
| "grad_norm": 0.6695789098739624, |
| "learning_rate": 9.90868538608347e-06, |
| "loss": 0.3218432903289795, |
| "memory(GiB)": 71.85, |
| "step": 835, |
| "token_acc": 0.8819998762452819, |
| "train_speed(iter/s)": 0.080748 |
| }, |
| { |
| "epoch": 0.32552619468597865, |
| "grad_norm": 0.6637648940086365, |
| "learning_rate": 9.906642141200305e-06, |
| "loss": 0.3061497688293457, |
| "memory(GiB)": 71.85, |
| "step": 840, |
| "token_acc": 0.8962320773591197, |
| "train_speed(iter/s)": 0.080761 |
| }, |
| { |
| "epoch": 0.3274638506067285, |
| "grad_norm": 0.6814354658126831, |
| "learning_rate": 9.904576504216292e-06, |
| "loss": 0.3214125156402588, |
| "memory(GiB)": 71.85, |
| "step": 845, |
| "token_acc": 0.8940032614381096, |
| "train_speed(iter/s)": 0.080715 |
| }, |
| { |
| "epoch": 0.3294015065274784, |
| "grad_norm": 0.687163770198822, |
| "learning_rate": 9.902488484558231e-06, |
| "loss": 0.3220739603042603, |
| "memory(GiB)": 71.85, |
| "step": 850, |
| "token_acc": 0.8982021789725774, |
| "train_speed(iter/s)": 0.080746 |
| }, |
| { |
| "epoch": 0.33133916244822825, |
| "grad_norm": 0.6442270874977112, |
| "learning_rate": 9.900378091755072e-06, |
| "loss": 0.3244103670120239, |
| "memory(GiB)": 71.85, |
| "step": 855, |
| "token_acc": 0.8932569296375267, |
| "train_speed(iter/s)": 0.080751 |
| }, |
| { |
| "epoch": 0.33327681836897816, |
| "grad_norm": 0.6879537105560303, |
| "learning_rate": 9.89824533543787e-06, |
| "loss": 0.33673839569091796, |
| "memory(GiB)": 71.85, |
| "step": 860, |
| "token_acc": 0.8844792633061883, |
| "train_speed(iter/s)": 0.080723 |
| }, |
| { |
| "epoch": 0.335214474289728, |
| "grad_norm": 0.6312484741210938, |
| "learning_rate": 9.896090225339735e-06, |
| "loss": 0.3281097412109375, |
| "memory(GiB)": 71.85, |
| "step": 865, |
| "token_acc": 0.8877129503995176, |
| "train_speed(iter/s)": 0.080719 |
| }, |
| { |
| "epoch": 0.33715213021047785, |
| "grad_norm": 0.6339064836502075, |
| "learning_rate": 9.893912771295792e-06, |
| "loss": 0.33004236221313477, |
| "memory(GiB)": 71.85, |
| "step": 870, |
| "token_acc": 0.8873361187148826, |
| "train_speed(iter/s)": 0.080697 |
| }, |
| { |
| "epoch": 0.33908978613122776, |
| "grad_norm": 0.6488205790519714, |
| "learning_rate": 9.891712983243138e-06, |
| "loss": 0.31695027351379396, |
| "memory(GiB)": 71.85, |
| "step": 875, |
| "token_acc": 0.8903181427343079, |
| "train_speed(iter/s)": 0.080716 |
| }, |
| { |
| "epoch": 0.3410274420519776, |
| "grad_norm": 0.6404213905334473, |
| "learning_rate": 9.889490871220791e-06, |
| "loss": 0.3214251041412354, |
| "memory(GiB)": 71.85, |
| "step": 880, |
| "token_acc": 0.8919099073814202, |
| "train_speed(iter/s)": 0.080726 |
| }, |
| { |
| "epoch": 0.3429650979727275, |
| "grad_norm": 0.6886140704154968, |
| "learning_rate": 9.887246445369651e-06, |
| "loss": 0.32711448669433596, |
| "memory(GiB)": 71.85, |
| "step": 885, |
| "token_acc": 0.8934789023310711, |
| "train_speed(iter/s)": 0.080709 |
| }, |
| { |
| "epoch": 0.34490275389347735, |
| "grad_norm": 0.6515597701072693, |
| "learning_rate": 9.884979715932444e-06, |
| "loss": 0.3249198436737061, |
| "memory(GiB)": 71.85, |
| "step": 890, |
| "token_acc": 0.8946919213472081, |
| "train_speed(iter/s)": 0.080729 |
| }, |
| { |
| "epoch": 0.34684040981422726, |
| "grad_norm": 0.6933703422546387, |
| "learning_rate": 9.88269069325369e-06, |
| "loss": 0.3156230926513672, |
| "memory(GiB)": 71.85, |
| "step": 895, |
| "token_acc": 0.8905154706400182, |
| "train_speed(iter/s)": 0.080718 |
| }, |
| { |
| "epoch": 0.3487780657349771, |
| "grad_norm": 0.6851153373718262, |
| "learning_rate": 9.880379387779637e-06, |
| "loss": 0.330825662612915, |
| "memory(GiB)": 71.85, |
| "step": 900, |
| "token_acc": 0.8904936548635227, |
| "train_speed(iter/s)": 0.080739 |
| }, |
| { |
| "epoch": 0.350715721655727, |
| "grad_norm": 0.6466790437698364, |
| "learning_rate": 9.878045810058232e-06, |
| "loss": 0.3320996999740601, |
| "memory(GiB)": 71.85, |
| "step": 905, |
| "token_acc": 0.8887969258054981, |
| "train_speed(iter/s)": 0.08071 |
| }, |
| { |
| "epoch": 0.35265337757647686, |
| "grad_norm": 0.6607591509819031, |
| "learning_rate": 9.875689970739062e-06, |
| "loss": 0.3408550500869751, |
| "memory(GiB)": 71.85, |
| "step": 910, |
| "token_acc": 0.8919545885909107, |
| "train_speed(iter/s)": 0.080729 |
| }, |
| { |
| "epoch": 0.3545910334972267, |
| "grad_norm": 0.6846933960914612, |
| "learning_rate": 9.873311880573305e-06, |
| "loss": 0.33896684646606445, |
| "memory(GiB)": 71.85, |
| "step": 915, |
| "token_acc": 0.8795058992243009, |
| "train_speed(iter/s)": 0.08072 |
| }, |
| { |
| "epoch": 0.3565286894179766, |
| "grad_norm": 0.690334677696228, |
| "learning_rate": 9.870911550413684e-06, |
| "loss": 0.3155628204345703, |
| "memory(GiB)": 71.85, |
| "step": 920, |
| "token_acc": 0.885660422526782, |
| "train_speed(iter/s)": 0.080721 |
| }, |
| { |
| "epoch": 0.35846634533872646, |
| "grad_norm": 0.695077121257782, |
| "learning_rate": 9.86848899121442e-06, |
| "loss": 0.32448444366455076, |
| "memory(GiB)": 71.85, |
| "step": 925, |
| "token_acc": 0.8889850612297236, |
| "train_speed(iter/s)": 0.08077 |
| }, |
| { |
| "epoch": 0.36040400125947636, |
| "grad_norm": 0.6861225962638855, |
| "learning_rate": 9.866044214031179e-06, |
| "loss": 0.31257429122924807, |
| "memory(GiB)": 71.85, |
| "step": 930, |
| "token_acc": 0.8953256377369246, |
| "train_speed(iter/s)": 0.080788 |
| }, |
| { |
| "epoch": 0.3623416571802262, |
| "grad_norm": 0.6602544784545898, |
| "learning_rate": 9.86357723002102e-06, |
| "loss": 0.3284167766571045, |
| "memory(GiB)": 71.85, |
| "step": 935, |
| "token_acc": 0.8837062915053662, |
| "train_speed(iter/s)": 0.08078 |
| }, |
| { |
| "epoch": 0.3642793131009761, |
| "grad_norm": 0.641711950302124, |
| "learning_rate": 9.861088050442342e-06, |
| "loss": 0.3246732234954834, |
| "memory(GiB)": 71.85, |
| "step": 940, |
| "token_acc": 0.9021792751226023, |
| "train_speed(iter/s)": 0.080795 |
| }, |
| { |
| "epoch": 0.36621696902172596, |
| "grad_norm": 0.666860044002533, |
| "learning_rate": 9.858576686654847e-06, |
| "loss": 0.3272420406341553, |
| "memory(GiB)": 71.86, |
| "step": 945, |
| "token_acc": 0.9028790786948176, |
| "train_speed(iter/s)": 0.080725 |
| }, |
| { |
| "epoch": 0.36815462494247586, |
| "grad_norm": 0.6561484932899475, |
| "learning_rate": 9.856043150119466e-06, |
| "loss": 0.3124321460723877, |
| "memory(GiB)": 71.86, |
| "step": 950, |
| "token_acc": 0.9018067556952082, |
| "train_speed(iter/s)": 0.080731 |
| }, |
| { |
| "epoch": 0.3700922808632257, |
| "grad_norm": 0.6260762214660645, |
| "learning_rate": 9.853487452398324e-06, |
| "loss": 0.3247486114501953, |
| "memory(GiB)": 71.86, |
| "step": 955, |
| "token_acc": 0.8947277680079997, |
| "train_speed(iter/s)": 0.080736 |
| }, |
| { |
| "epoch": 0.3720299367839756, |
| "grad_norm": 0.6457754373550415, |
| "learning_rate": 9.850909605154682e-06, |
| "loss": 0.3376758575439453, |
| "memory(GiB)": 71.86, |
| "step": 960, |
| "token_acc": 0.8909614421632449, |
| "train_speed(iter/s)": 0.080731 |
| }, |
| { |
| "epoch": 0.37396759270472546, |
| "grad_norm": 0.6789243817329407, |
| "learning_rate": 9.84830962015288e-06, |
| "loss": 0.3184741258621216, |
| "memory(GiB)": 71.86, |
| "step": 965, |
| "token_acc": 0.8856595547922584, |
| "train_speed(iter/s)": 0.080768 |
| }, |
| { |
| "epoch": 0.3759052486254753, |
| "grad_norm": 0.6199044585227966, |
| "learning_rate": 9.84568750925829e-06, |
| "loss": 0.3230047941207886, |
| "memory(GiB)": 71.86, |
| "step": 970, |
| "token_acc": 0.8937249758387409, |
| "train_speed(iter/s)": 0.080769 |
| }, |
| { |
| "epoch": 0.3778429045462252, |
| "grad_norm": 0.630508542060852, |
| "learning_rate": 9.843043284437257e-06, |
| "loss": 0.33438754081726074, |
| "memory(GiB)": 71.86, |
| "step": 975, |
| "token_acc": 0.8890298436459357, |
| "train_speed(iter/s)": 0.080752 |
| }, |
| { |
| "epoch": 0.37978056046697506, |
| "grad_norm": 0.6191177368164062, |
| "learning_rate": 9.840376957757042e-06, |
| "loss": 0.32352337837219236, |
| "memory(GiB)": 71.86, |
| "step": 980, |
| "token_acc": 0.8885422380691168, |
| "train_speed(iter/s)": 0.080724 |
| }, |
| { |
| "epoch": 0.38171821638772496, |
| "grad_norm": 0.6409081816673279, |
| "learning_rate": 9.83768854138578e-06, |
| "loss": 0.33620543479919435, |
| "memory(GiB)": 71.86, |
| "step": 985, |
| "token_acc": 0.8729323535637606, |
| "train_speed(iter/s)": 0.080742 |
| }, |
| { |
| "epoch": 0.3836558723084748, |
| "grad_norm": 0.6569280028343201, |
| "learning_rate": 9.834978047592404e-06, |
| "loss": 0.3181809425354004, |
| "memory(GiB)": 71.86, |
| "step": 990, |
| "token_acc": 0.8895862964566199, |
| "train_speed(iter/s)": 0.08074 |
| }, |
| { |
| "epoch": 0.3855935282292247, |
| "grad_norm": 0.637712836265564, |
| "learning_rate": 9.832245488746612e-06, |
| "loss": 0.3220236301422119, |
| "memory(GiB)": 71.86, |
| "step": 995, |
| "token_acc": 0.9014977162482739, |
| "train_speed(iter/s)": 0.080745 |
| }, |
| { |
| "epoch": 0.38753118414997456, |
| "grad_norm": 0.6087589263916016, |
| "learning_rate": 9.829490877318785e-06, |
| "loss": 0.30138335227966306, |
| "memory(GiB)": 71.86, |
| "step": 1000, |
| "token_acc": 0.9027007152884551, |
| "train_speed(iter/s)": 0.080744 |
| }, |
| { |
| "epoch": 0.38946884007072446, |
| "grad_norm": 0.6550173759460449, |
| "learning_rate": 9.826714225879957e-06, |
| "loss": 0.3302351236343384, |
| "memory(GiB)": 71.86, |
| "step": 1005, |
| "token_acc": 0.884102234367465, |
| "train_speed(iter/s)": 0.080755 |
| }, |
| { |
| "epoch": 0.3914064959914743, |
| "grad_norm": 0.6532196998596191, |
| "learning_rate": 9.823915547101735e-06, |
| "loss": 0.33039035797119143, |
| "memory(GiB)": 71.86, |
| "step": 1010, |
| "token_acc": 0.8898355520751762, |
| "train_speed(iter/s)": 0.080755 |
| }, |
| { |
| "epoch": 0.39334415191222416, |
| "grad_norm": 0.6979678869247437, |
| "learning_rate": 9.821094853756256e-06, |
| "loss": 0.32817845344543456, |
| "memory(GiB)": 71.86, |
| "step": 1015, |
| "token_acc": 0.8947746637922185, |
| "train_speed(iter/s)": 0.080747 |
| }, |
| { |
| "epoch": 0.39528180783297406, |
| "grad_norm": 0.6975110173225403, |
| "learning_rate": 9.818252158716121e-06, |
| "loss": 0.33469138145446775, |
| "memory(GiB)": 71.86, |
| "step": 1020, |
| "token_acc": 0.8819404784567548, |
| "train_speed(iter/s)": 0.080768 |
| }, |
| { |
| "epoch": 0.3972194637537239, |
| "grad_norm": 0.6733399033546448, |
| "learning_rate": 9.81538747495434e-06, |
| "loss": 0.3251255989074707, |
| "memory(GiB)": 71.86, |
| "step": 1025, |
| "token_acc": 0.8900912769361061, |
| "train_speed(iter/s)": 0.080814 |
| }, |
| { |
| "epoch": 0.3991571196744738, |
| "grad_norm": 0.6425248980522156, |
| "learning_rate": 9.812500815544272e-06, |
| "loss": 0.314360523223877, |
| "memory(GiB)": 71.86, |
| "step": 1030, |
| "token_acc": 0.8923047727488377, |
| "train_speed(iter/s)": 0.080807 |
| }, |
| { |
| "epoch": 0.40109477559522366, |
| "grad_norm": 0.6609647274017334, |
| "learning_rate": 9.809592193659562e-06, |
| "loss": 0.31197681427001955, |
| "memory(GiB)": 71.86, |
| "step": 1035, |
| "token_acc": 0.8963696597418731, |
| "train_speed(iter/s)": 0.080824 |
| }, |
| { |
| "epoch": 0.40303243151597357, |
| "grad_norm": 0.7050966024398804, |
| "learning_rate": 9.806661622574084e-06, |
| "loss": 0.34067888259887696, |
| "memory(GiB)": 71.86, |
| "step": 1040, |
| "token_acc": 0.889449232838193, |
| "train_speed(iter/s)": 0.080806 |
| }, |
| { |
| "epoch": 0.4049700874367234, |
| "grad_norm": 0.6383348703384399, |
| "learning_rate": 9.803709115661882e-06, |
| "loss": 0.32077836990356445, |
| "memory(GiB)": 71.86, |
| "step": 1045, |
| "token_acc": 0.8989039372164641, |
| "train_speed(iter/s)": 0.080784 |
| }, |
| { |
| "epoch": 0.4069077433574733, |
| "grad_norm": 0.6369540095329285, |
| "learning_rate": 9.800734686397105e-06, |
| "loss": 0.3368854999542236, |
| "memory(GiB)": 71.86, |
| "step": 1050, |
| "token_acc": 0.8808575338300136, |
| "train_speed(iter/s)": 0.080786 |
| }, |
| { |
| "epoch": 0.40884539927822316, |
| "grad_norm": 0.6770035028457642, |
| "learning_rate": 9.797738348353951e-06, |
| "loss": 0.3146085023880005, |
| "memory(GiB)": 71.86, |
| "step": 1055, |
| "token_acc": 0.9046233656980617, |
| "train_speed(iter/s)": 0.080782 |
| }, |
| { |
| "epoch": 0.41078305519897307, |
| "grad_norm": 0.6430585980415344, |
| "learning_rate": 9.794720115206597e-06, |
| "loss": 0.3340238332748413, |
| "memory(GiB)": 71.86, |
| "step": 1060, |
| "token_acc": 0.8887318968562345, |
| "train_speed(iter/s)": 0.080765 |
| }, |
| { |
| "epoch": 0.4127207111197229, |
| "grad_norm": 0.6269405484199524, |
| "learning_rate": 9.791680000729145e-06, |
| "loss": 0.3351738452911377, |
| "memory(GiB)": 71.86, |
| "step": 1065, |
| "token_acc": 0.8993369707852752, |
| "train_speed(iter/s)": 0.080785 |
| }, |
| { |
| "epoch": 0.41465836704047276, |
| "grad_norm": 0.617246150970459, |
| "learning_rate": 9.788618018795552e-06, |
| "loss": 0.3090823650360107, |
| "memory(GiB)": 71.86, |
| "step": 1070, |
| "token_acc": 0.8943518151613985, |
| "train_speed(iter/s)": 0.080806 |
| }, |
| { |
| "epoch": 0.41659602296122267, |
| "grad_norm": 0.6660940647125244, |
| "learning_rate": 9.785534183379571e-06, |
| "loss": 0.3186957597732544, |
| "memory(GiB)": 71.86, |
| "step": 1075, |
| "token_acc": 0.8957356770833333, |
| "train_speed(iter/s)": 0.080805 |
| }, |
| { |
| "epoch": 0.4185336788819725, |
| "grad_norm": 0.6464110016822815, |
| "learning_rate": 9.78242850855469e-06, |
| "loss": 0.30962181091308594, |
| "memory(GiB)": 71.86, |
| "step": 1080, |
| "token_acc": 0.8997256004607203, |
| "train_speed(iter/s)": 0.080829 |
| }, |
| { |
| "epoch": 0.4204713348027224, |
| "grad_norm": 0.6295762062072754, |
| "learning_rate": 9.779301008494057e-06, |
| "loss": 0.32428104877471925, |
| "memory(GiB)": 71.86, |
| "step": 1085, |
| "token_acc": 0.8935828705338512, |
| "train_speed(iter/s)": 0.080829 |
| }, |
| { |
| "epoch": 0.42240899072347227, |
| "grad_norm": 0.6192321181297302, |
| "learning_rate": 9.776151697470431e-06, |
| "loss": 0.3047311305999756, |
| "memory(GiB)": 71.86, |
| "step": 1090, |
| "token_acc": 0.8998032786885246, |
| "train_speed(iter/s)": 0.080814 |
| }, |
| { |
| "epoch": 0.42434664664422217, |
| "grad_norm": 0.6715239882469177, |
| "learning_rate": 9.772980589856099e-06, |
| "loss": 0.3298771381378174, |
| "memory(GiB)": 71.86, |
| "step": 1095, |
| "token_acc": 0.8884317290211463, |
| "train_speed(iter/s)": 0.080817 |
| }, |
| { |
| "epoch": 0.426284302564972, |
| "grad_norm": 0.6691494584083557, |
| "learning_rate": 9.769787700122823e-06, |
| "loss": 0.32638509273529054, |
| "memory(GiB)": 71.86, |
| "step": 1100, |
| "token_acc": 0.9007710475131981, |
| "train_speed(iter/s)": 0.080813 |
| }, |
| { |
| "epoch": 0.4282219584857219, |
| "grad_norm": 0.6489860415458679, |
| "learning_rate": 9.766573042841776e-06, |
| "loss": 0.3245856761932373, |
| "memory(GiB)": 71.86, |
| "step": 1105, |
| "token_acc": 0.8856663638269621, |
| "train_speed(iter/s)": 0.080796 |
| }, |
| { |
| "epoch": 0.43015961440647177, |
| "grad_norm": 0.6144282817840576, |
| "learning_rate": 9.763336632683463e-06, |
| "loss": 0.3188477039337158, |
| "memory(GiB)": 71.86, |
| "step": 1110, |
| "token_acc": 0.8965635738831615, |
| "train_speed(iter/s)": 0.080789 |
| }, |
| { |
| "epoch": 0.4320972703272216, |
| "grad_norm": 0.6301048398017883, |
| "learning_rate": 9.760078484417661e-06, |
| "loss": 0.33411540985107424, |
| "memory(GiB)": 71.86, |
| "step": 1115, |
| "token_acc": 0.8902315885476043, |
| "train_speed(iter/s)": 0.080787 |
| }, |
| { |
| "epoch": 0.4340349262479715, |
| "grad_norm": 0.6585410237312317, |
| "learning_rate": 9.756798612913358e-06, |
| "loss": 0.3051680326461792, |
| "memory(GiB)": 71.86, |
| "step": 1120, |
| "token_acc": 0.8869892784700087, |
| "train_speed(iter/s)": 0.080825 |
| }, |
| { |
| "epoch": 0.43597258216872137, |
| "grad_norm": 0.5961290597915649, |
| "learning_rate": 9.753497033138674e-06, |
| "loss": 0.3312673091888428, |
| "memory(GiB)": 71.86, |
| "step": 1125, |
| "token_acc": 0.9026795875435389, |
| "train_speed(iter/s)": 0.080816 |
| }, |
| { |
| "epoch": 0.43791023808947127, |
| "grad_norm": 0.6419817209243774, |
| "learning_rate": 9.750173760160799e-06, |
| "loss": 0.3214743614196777, |
| "memory(GiB)": 71.86, |
| "step": 1130, |
| "token_acc": 0.8987567162314132, |
| "train_speed(iter/s)": 0.080822 |
| }, |
| { |
| "epoch": 0.4398478940102211, |
| "grad_norm": 0.6663675308227539, |
| "learning_rate": 9.74682880914592e-06, |
| "loss": 0.31963672637939455, |
| "memory(GiB)": 71.86, |
| "step": 1135, |
| "token_acc": 0.8844882486732373, |
| "train_speed(iter/s)": 0.080854 |
| }, |
| { |
| "epoch": 0.441785549930971, |
| "grad_norm": 0.6634994745254517, |
| "learning_rate": 9.74346219535916e-06, |
| "loss": 0.3009810447692871, |
| "memory(GiB)": 71.86, |
| "step": 1140, |
| "token_acc": 0.9029835103409726, |
| "train_speed(iter/s)": 0.080864 |
| }, |
| { |
| "epoch": 0.44372320585172087, |
| "grad_norm": 0.6361936926841736, |
| "learning_rate": 9.740073934164499e-06, |
| "loss": 0.30399558544158933, |
| "memory(GiB)": 71.86, |
| "step": 1145, |
| "token_acc": 0.8956846262545745, |
| "train_speed(iter/s)": 0.080856 |
| }, |
| { |
| "epoch": 0.4456608617724708, |
| "grad_norm": 0.635513961315155, |
| "learning_rate": 9.736664041024705e-06, |
| "loss": 0.3424172639846802, |
| "memory(GiB)": 71.86, |
| "step": 1150, |
| "token_acc": 0.8794452236247468, |
| "train_speed(iter/s)": 0.080844 |
| }, |
| { |
| "epoch": 0.4475985176932206, |
| "grad_norm": 0.6696315407752991, |
| "learning_rate": 9.733232531501275e-06, |
| "loss": 0.3195925235748291, |
| "memory(GiB)": 71.86, |
| "step": 1155, |
| "token_acc": 0.8961313012895662, |
| "train_speed(iter/s)": 0.080828 |
| }, |
| { |
| "epoch": 0.4495361736139705, |
| "grad_norm": 0.6544925570487976, |
| "learning_rate": 9.729779421254346e-06, |
| "loss": 0.30799403190612795, |
| "memory(GiB)": 71.86, |
| "step": 1160, |
| "token_acc": 0.8943392211108746, |
| "train_speed(iter/s)": 0.080858 |
| }, |
| { |
| "epoch": 0.4514738295347204, |
| "grad_norm": 0.6342989206314087, |
| "learning_rate": 9.726304726042639e-06, |
| "loss": 0.31985857486724856, |
| "memory(GiB)": 71.86, |
| "step": 1165, |
| "token_acc": 0.8814540558734433, |
| "train_speed(iter/s)": 0.080869 |
| }, |
| { |
| "epoch": 0.4534114854554702, |
| "grad_norm": 0.6403650641441345, |
| "learning_rate": 9.722808461723377e-06, |
| "loss": 0.31233992576599123, |
| "memory(GiB)": 71.86, |
| "step": 1170, |
| "token_acc": 0.8884251530328325, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 0.4553491413762201, |
| "grad_norm": 0.6330046653747559, |
| "learning_rate": 9.719290644252219e-06, |
| "loss": 0.315544581413269, |
| "memory(GiB)": 71.86, |
| "step": 1175, |
| "token_acc": 0.9076247317596566, |
| "train_speed(iter/s)": 0.080863 |
| }, |
| { |
| "epoch": 0.45728679729696997, |
| "grad_norm": 0.5967057943344116, |
| "learning_rate": 9.715751289683181e-06, |
| "loss": 0.3158837080001831, |
| "memory(GiB)": 71.86, |
| "step": 1180, |
| "token_acc": 0.8883155397390273, |
| "train_speed(iter/s)": 0.08083 |
| }, |
| { |
| "epoch": 0.4592244532177199, |
| "grad_norm": 0.6456601619720459, |
| "learning_rate": 9.712190414168573e-06, |
| "loss": 0.31205410957336427, |
| "memory(GiB)": 71.86, |
| "step": 1185, |
| "token_acc": 0.895832576390655, |
| "train_speed(iter/s)": 0.080848 |
| }, |
| { |
| "epoch": 0.4611621091384697, |
| "grad_norm": 0.6398383378982544, |
| "learning_rate": 9.70860803395891e-06, |
| "loss": 0.32424077987670896, |
| "memory(GiB)": 71.86, |
| "step": 1190, |
| "token_acc": 0.8800454803865833, |
| "train_speed(iter/s)": 0.080842 |
| }, |
| { |
| "epoch": 0.4630997650592196, |
| "grad_norm": 0.620103657245636, |
| "learning_rate": 9.705004165402855e-06, |
| "loss": 0.30479159355163576, |
| "memory(GiB)": 71.86, |
| "step": 1195, |
| "token_acc": 0.9015556387650202, |
| "train_speed(iter/s)": 0.080832 |
| }, |
| { |
| "epoch": 0.4650374209799695, |
| "grad_norm": 0.6715317964553833, |
| "learning_rate": 9.70137882494713e-06, |
| "loss": 0.3113380432128906, |
| "memory(GiB)": 71.86, |
| "step": 1200, |
| "token_acc": 0.9031839539939687, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 0.4669750769007194, |
| "grad_norm": 0.6765401363372803, |
| "learning_rate": 9.697732029136446e-06, |
| "loss": 0.3275330066680908, |
| "memory(GiB)": 71.86, |
| "step": 1205, |
| "token_acc": 0.8961176583924729, |
| "train_speed(iter/s)": 0.08084 |
| }, |
| { |
| "epoch": 0.4689127328214692, |
| "grad_norm": 0.6207024455070496, |
| "learning_rate": 9.694063794613435e-06, |
| "loss": 0.31103494167327883, |
| "memory(GiB)": 71.86, |
| "step": 1210, |
| "token_acc": 0.9014660276289822, |
| "train_speed(iter/s)": 0.080834 |
| }, |
| { |
| "epoch": 0.47085038874221913, |
| "grad_norm": 0.6183663606643677, |
| "learning_rate": 9.690374138118563e-06, |
| "loss": 0.31775264739990233, |
| "memory(GiB)": 71.86, |
| "step": 1215, |
| "token_acc": 0.8885089322563529, |
| "train_speed(iter/s)": 0.080828 |
| }, |
| { |
| "epoch": 0.472788044662969, |
| "grad_norm": 0.6315492391586304, |
| "learning_rate": 9.686663076490055e-06, |
| "loss": 0.3269613265991211, |
| "memory(GiB)": 71.86, |
| "step": 1220, |
| "token_acc": 0.8918183467376187, |
| "train_speed(iter/s)": 0.080821 |
| }, |
| { |
| "epoch": 0.4747257005837188, |
| "grad_norm": 0.6290526390075684, |
| "learning_rate": 9.682930626663826e-06, |
| "loss": 0.32108583450317385, |
| "memory(GiB)": 71.86, |
| "step": 1225, |
| "token_acc": 0.8929259130654377, |
| "train_speed(iter/s)": 0.080818 |
| }, |
| { |
| "epoch": 0.4766633565044687, |
| "grad_norm": 0.6333230137825012, |
| "learning_rate": 9.679176805673399e-06, |
| "loss": 0.3004627227783203, |
| "memory(GiB)": 71.86, |
| "step": 1230, |
| "token_acc": 0.8975369131960538, |
| "train_speed(iter/s)": 0.080831 |
| }, |
| { |
| "epoch": 0.4786010124252186, |
| "grad_norm": 0.6388303637504578, |
| "learning_rate": 9.675401630649824e-06, |
| "loss": 0.321624755859375, |
| "memory(GiB)": 71.86, |
| "step": 1235, |
| "token_acc": 0.8988916178600668, |
| "train_speed(iter/s)": 0.080824 |
| }, |
| { |
| "epoch": 0.4805386683459685, |
| "grad_norm": 0.6355609893798828, |
| "learning_rate": 9.671605118821603e-06, |
| "loss": 0.3184902906417847, |
| "memory(GiB)": 71.86, |
| "step": 1240, |
| "token_acc": 0.8958213888233112, |
| "train_speed(iter/s)": 0.080815 |
| }, |
| { |
| "epoch": 0.4824763242667183, |
| "grad_norm": 0.6359343528747559, |
| "learning_rate": 9.667787287514614e-06, |
| "loss": 0.3213565587997437, |
| "memory(GiB)": 71.86, |
| "step": 1245, |
| "token_acc": 0.8997242062088961, |
| "train_speed(iter/s)": 0.080816 |
| }, |
| { |
| "epoch": 0.48441398018746823, |
| "grad_norm": 0.595271646976471, |
| "learning_rate": 9.663948154152028e-06, |
| "loss": 0.3277468204498291, |
| "memory(GiB)": 71.86, |
| "step": 1250, |
| "token_acc": 0.8972503128487841, |
| "train_speed(iter/s)": 0.080817 |
| }, |
| { |
| "epoch": 0.4863516361082181, |
| "grad_norm": 0.5990849137306213, |
| "learning_rate": 9.660087736254228e-06, |
| "loss": 0.3170775890350342, |
| "memory(GiB)": 71.86, |
| "step": 1255, |
| "token_acc": 0.8847311019180143, |
| "train_speed(iter/s)": 0.080811 |
| }, |
| { |
| "epoch": 0.488289292028968, |
| "grad_norm": 0.6386568546295166, |
| "learning_rate": 9.656206051438736e-06, |
| "loss": 0.3276305913925171, |
| "memory(GiB)": 71.86, |
| "step": 1260, |
| "token_acc": 0.8964000938086304, |
| "train_speed(iter/s)": 0.080816 |
| }, |
| { |
| "epoch": 0.49022694794971783, |
| "grad_norm": 0.6302090287208557, |
| "learning_rate": 9.65230311742013e-06, |
| "loss": 0.32054777145385743, |
| "memory(GiB)": 71.86, |
| "step": 1265, |
| "token_acc": 0.8926590538336052, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 0.4921646038704677, |
| "grad_norm": 0.6372174620628357, |
| "learning_rate": 9.64837895200995e-06, |
| "loss": 0.3121542692184448, |
| "memory(GiB)": 71.86, |
| "step": 1270, |
| "token_acc": 0.8987489689304372, |
| "train_speed(iter/s)": 0.080852 |
| }, |
| { |
| "epoch": 0.4941022597912176, |
| "grad_norm": 0.5766242146492004, |
| "learning_rate": 9.644433573116643e-06, |
| "loss": 0.31241488456726074, |
| "memory(GiB)": 71.86, |
| "step": 1275, |
| "token_acc": 0.9012521632902372, |
| "train_speed(iter/s)": 0.080856 |
| }, |
| { |
| "epoch": 0.4960399157119674, |
| "grad_norm": 0.6373428106307983, |
| "learning_rate": 9.640466998745456e-06, |
| "loss": 0.32321221828460694, |
| "memory(GiB)": 71.86, |
| "step": 1280, |
| "token_acc": 0.8911403689227182, |
| "train_speed(iter/s)": 0.080859 |
| }, |
| { |
| "epoch": 0.49797757163271733, |
| "grad_norm": 0.6617798209190369, |
| "learning_rate": 9.636479246998371e-06, |
| "loss": 0.3195833444595337, |
| "memory(GiB)": 71.86, |
| "step": 1285, |
| "token_acc": 0.8979424138837759, |
| "train_speed(iter/s)": 0.080879 |
| }, |
| { |
| "epoch": 0.4999152275534672, |
| "grad_norm": 0.6529935598373413, |
| "learning_rate": 9.632470336074009e-06, |
| "loss": 0.3121352672576904, |
| "memory(GiB)": 71.86, |
| "step": 1290, |
| "token_acc": 0.8983967935871744, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 0.5018528834742171, |
| "grad_norm": 0.687832236289978, |
| "learning_rate": 9.628440284267562e-06, |
| "loss": 0.31133959293365476, |
| "memory(GiB)": 71.86, |
| "step": 1295, |
| "token_acc": 0.9002139001120429, |
| "train_speed(iter/s)": 0.080945 |
| }, |
| { |
| "epoch": 0.503790539394967, |
| "grad_norm": 0.6272649765014648, |
| "learning_rate": 9.624389109970693e-06, |
| "loss": 0.31259541511535643, |
| "memory(GiB)": 71.86, |
| "step": 1300, |
| "token_acc": 0.893979721166033, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 0.5057281953157168, |
| "grad_norm": 0.663655698299408, |
| "learning_rate": 9.620316831671467e-06, |
| "loss": 0.3107600688934326, |
| "memory(GiB)": 71.86, |
| "step": 1305, |
| "token_acc": 0.8993554263294332, |
| "train_speed(iter/s)": 0.080942 |
| }, |
| { |
| "epoch": 0.5076658512364667, |
| "grad_norm": 0.6684431433677673, |
| "learning_rate": 9.616223467954254e-06, |
| "loss": 0.3163787364959717, |
| "memory(GiB)": 71.86, |
| "step": 1310, |
| "token_acc": 0.9010745466756213, |
| "train_speed(iter/s)": 0.080962 |
| }, |
| { |
| "epoch": 0.5096035071572166, |
| "grad_norm": 0.6781684160232544, |
| "learning_rate": 9.612109037499652e-06, |
| "loss": 0.3251797199249268, |
| "memory(GiB)": 71.86, |
| "step": 1315, |
| "token_acc": 0.8915072012982622, |
| "train_speed(iter/s)": 0.080986 |
| }, |
| { |
| "epoch": 0.5115411630779664, |
| "grad_norm": 0.6502076387405396, |
| "learning_rate": 9.607973559084403e-06, |
| "loss": 0.32048649787902833, |
| "memory(GiB)": 71.86, |
| "step": 1320, |
| "token_acc": 0.899069038621071, |
| "train_speed(iter/s)": 0.080974 |
| }, |
| { |
| "epoch": 0.5134788189987163, |
| "grad_norm": 0.6232612133026123, |
| "learning_rate": 9.6038170515813e-06, |
| "loss": 0.3221144437789917, |
| "memory(GiB)": 71.86, |
| "step": 1325, |
| "token_acc": 0.9006666433981362, |
| "train_speed(iter/s)": 0.080965 |
| }, |
| { |
| "epoch": 0.5154164749194662, |
| "grad_norm": 0.6397128701210022, |
| "learning_rate": 9.599639533959111e-06, |
| "loss": 0.3275588512420654, |
| "memory(GiB)": 71.86, |
| "step": 1330, |
| "token_acc": 0.8982322652586009, |
| "train_speed(iter/s)": 0.08096 |
| }, |
| { |
| "epoch": 0.5173541308402161, |
| "grad_norm": 0.6316452622413635, |
| "learning_rate": 9.595441025282477e-06, |
| "loss": 0.32077484130859374, |
| "memory(GiB)": 71.86, |
| "step": 1335, |
| "token_acc": 0.9006897522522522, |
| "train_speed(iter/s)": 0.080953 |
| }, |
| { |
| "epoch": 0.5192917867609659, |
| "grad_norm": 0.6124240159988403, |
| "learning_rate": 9.59122154471184e-06, |
| "loss": 0.304377007484436, |
| "memory(GiB)": 71.86, |
| "step": 1340, |
| "token_acc": 0.9003951367781156, |
| "train_speed(iter/s)": 0.080934 |
| }, |
| { |
| "epoch": 0.5212294426817158, |
| "grad_norm": 0.6277111768722534, |
| "learning_rate": 9.586981111503352e-06, |
| "loss": 0.31348342895507814, |
| "memory(GiB)": 71.86, |
| "step": 1345, |
| "token_acc": 0.8995058831893641, |
| "train_speed(iter/s)": 0.080904 |
| }, |
| { |
| "epoch": 0.5231670986024657, |
| "grad_norm": 0.6599859595298767, |
| "learning_rate": 9.58271974500878e-06, |
| "loss": 0.30618677139282224, |
| "memory(GiB)": 71.86, |
| "step": 1350, |
| "token_acc": 0.8968546885717086, |
| "train_speed(iter/s)": 0.080888 |
| }, |
| { |
| "epoch": 0.5251047545232156, |
| "grad_norm": 0.6723609566688538, |
| "learning_rate": 9.578437464675427e-06, |
| "loss": 0.31934945583343505, |
| "memory(GiB)": 71.86, |
| "step": 1355, |
| "token_acc": 0.9059823179331934, |
| "train_speed(iter/s)": 0.080911 |
| }, |
| { |
| "epoch": 0.5270424104439654, |
| "grad_norm": 0.6316993236541748, |
| "learning_rate": 9.574134290046038e-06, |
| "loss": 0.3180837631225586, |
| "memory(GiB)": 71.86, |
| "step": 1360, |
| "token_acc": 0.8795793439020562, |
| "train_speed(iter/s)": 0.080892 |
| }, |
| { |
| "epoch": 0.5289800663647153, |
| "grad_norm": 0.6185073852539062, |
| "learning_rate": 9.56981024075871e-06, |
| "loss": 0.2968299865722656, |
| "memory(GiB)": 71.86, |
| "step": 1365, |
| "token_acc": 0.8910199696663297, |
| "train_speed(iter/s)": 0.080903 |
| }, |
| { |
| "epoch": 0.5309177222854652, |
| "grad_norm": 0.6216744184494019, |
| "learning_rate": 9.565465336546806e-06, |
| "loss": 0.3100132942199707, |
| "memory(GiB)": 71.86, |
| "step": 1370, |
| "token_acc": 0.8934845760132798, |
| "train_speed(iter/s)": 0.080915 |
| }, |
| { |
| "epoch": 0.532855378206215, |
| "grad_norm": 0.6380418539047241, |
| "learning_rate": 9.561099597238862e-06, |
| "loss": 0.3299269199371338, |
| "memory(GiB)": 71.86, |
| "step": 1375, |
| "token_acc": 0.9037136496943571, |
| "train_speed(iter/s)": 0.080876 |
| }, |
| { |
| "epoch": 0.5347930341269649, |
| "grad_norm": 0.6195020079612732, |
| "learning_rate": 9.556713042758496e-06, |
| "loss": 0.32070245742797854, |
| "memory(GiB)": 71.86, |
| "step": 1380, |
| "token_acc": 0.8869779636681441, |
| "train_speed(iter/s)": 0.080852 |
| }, |
| { |
| "epoch": 0.5367306900477148, |
| "grad_norm": 0.6413518190383911, |
| "learning_rate": 9.552305693124327e-06, |
| "loss": 0.3137282609939575, |
| "memory(GiB)": 71.86, |
| "step": 1385, |
| "token_acc": 0.8823070702402958, |
| "train_speed(iter/s)": 0.08086 |
| }, |
| { |
| "epoch": 0.5386683459684647, |
| "grad_norm": 0.6277745366096497, |
| "learning_rate": 9.547877568449863e-06, |
| "loss": 0.3198817253112793, |
| "memory(GiB)": 71.86, |
| "step": 1390, |
| "token_acc": 0.8930678944296738, |
| "train_speed(iter/s)": 0.080875 |
| }, |
| { |
| "epoch": 0.5406060018892145, |
| "grad_norm": 0.6188220381736755, |
| "learning_rate": 9.543428688943432e-06, |
| "loss": 0.3162529468536377, |
| "memory(GiB)": 71.86, |
| "step": 1395, |
| "token_acc": 0.8938566098081023, |
| "train_speed(iter/s)": 0.080861 |
| }, |
| { |
| "epoch": 0.5425436578099644, |
| "grad_norm": 0.6183713674545288, |
| "learning_rate": 9.538959074908076e-06, |
| "loss": 0.3134912014007568, |
| "memory(GiB)": 71.86, |
| "step": 1400, |
| "token_acc": 0.8904494382022472, |
| "train_speed(iter/s)": 0.080874 |
| }, |
| { |
| "epoch": 0.5444813137307143, |
| "grad_norm": 0.6285673379898071, |
| "learning_rate": 9.534468746741459e-06, |
| "loss": 0.31511313915252687, |
| "memory(GiB)": 71.86, |
| "step": 1405, |
| "token_acc": 0.8941470378301214, |
| "train_speed(iter/s)": 0.080887 |
| }, |
| { |
| "epoch": 0.5464189696514642, |
| "grad_norm": 0.6295690536499023, |
| "learning_rate": 9.529957724935778e-06, |
| "loss": 0.30770626068115237, |
| "memory(GiB)": 71.86, |
| "step": 1410, |
| "token_acc": 0.8990203466465713, |
| "train_speed(iter/s)": 0.080902 |
| }, |
| { |
| "epoch": 0.548356625572214, |
| "grad_norm": 0.5778564810752869, |
| "learning_rate": 9.52542603007767e-06, |
| "loss": 0.31245245933532717, |
| "memory(GiB)": 71.86, |
| "step": 1415, |
| "token_acc": 0.8955087884310152, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 0.5502942814929639, |
| "grad_norm": 0.6101093292236328, |
| "learning_rate": 9.520873682848116e-06, |
| "loss": 0.31894540786743164, |
| "memory(GiB)": 71.86, |
| "step": 1420, |
| "token_acc": 0.897682608558941, |
| "train_speed(iter/s)": 0.080884 |
| }, |
| { |
| "epoch": 0.5522319374137138, |
| "grad_norm": 0.6187080144882202, |
| "learning_rate": 9.516300704022345e-06, |
| "loss": 0.30878329277038574, |
| "memory(GiB)": 71.86, |
| "step": 1425, |
| "token_acc": 0.9063188446340525, |
| "train_speed(iter/s)": 0.080887 |
| }, |
| { |
| "epoch": 0.5541695933344636, |
| "grad_norm": 0.6207754611968994, |
| "learning_rate": 9.511707114469743e-06, |
| "loss": 0.31332030296325686, |
| "memory(GiB)": 71.86, |
| "step": 1430, |
| "token_acc": 0.8907917355908015, |
| "train_speed(iter/s)": 0.080902 |
| }, |
| { |
| "epoch": 0.5561072492552135, |
| "grad_norm": 0.6338498592376709, |
| "learning_rate": 9.507092935153753e-06, |
| "loss": 0.29761881828308107, |
| "memory(GiB)": 71.86, |
| "step": 1435, |
| "token_acc": 0.8983898330307449, |
| "train_speed(iter/s)": 0.080882 |
| }, |
| { |
| "epoch": 0.5580449051759634, |
| "grad_norm": 0.5981598496437073, |
| "learning_rate": 9.502458187131782e-06, |
| "loss": 0.3157939434051514, |
| "memory(GiB)": 71.86, |
| "step": 1440, |
| "token_acc": 0.8913943563284286, |
| "train_speed(iter/s)": 0.08089 |
| }, |
| { |
| "epoch": 0.5599825610967133, |
| "grad_norm": 0.5984019637107849, |
| "learning_rate": 9.497802891555111e-06, |
| "loss": 0.30947513580322267, |
| "memory(GiB)": 71.86, |
| "step": 1445, |
| "token_acc": 0.8913408571812441, |
| "train_speed(iter/s)": 0.080891 |
| }, |
| { |
| "epoch": 0.5619202170174631, |
| "grad_norm": 0.6401349902153015, |
| "learning_rate": 9.493127069668786e-06, |
| "loss": 0.32453346252441406, |
| "memory(GiB)": 71.86, |
| "step": 1450, |
| "token_acc": 0.8906738363300286, |
| "train_speed(iter/s)": 0.080896 |
| }, |
| { |
| "epoch": 0.563857872938213, |
| "grad_norm": 0.6067079901695251, |
| "learning_rate": 9.488430742811527e-06, |
| "loss": 0.31854901313781736, |
| "memory(GiB)": 71.86, |
| "step": 1455, |
| "token_acc": 0.899044467704359, |
| "train_speed(iter/s)": 0.080887 |
| }, |
| { |
| "epoch": 0.5657955288589629, |
| "grad_norm": 0.6511966586112976, |
| "learning_rate": 9.48371393241564e-06, |
| "loss": 0.31812138557434083, |
| "memory(GiB)": 71.86, |
| "step": 1460, |
| "token_acc": 0.8997221683788847, |
| "train_speed(iter/s)": 0.080873 |
| }, |
| { |
| "epoch": 0.5677331847797128, |
| "grad_norm": 0.6218332648277283, |
| "learning_rate": 9.478976660006896e-06, |
| "loss": 0.3028250694274902, |
| "memory(GiB)": 71.86, |
| "step": 1465, |
| "token_acc": 0.8977089383672152, |
| "train_speed(iter/s)": 0.080891 |
| }, |
| { |
| "epoch": 0.5696708407004626, |
| "grad_norm": 0.6328048706054688, |
| "learning_rate": 9.47421894720446e-06, |
| "loss": 0.3045586109161377, |
| "memory(GiB)": 71.86, |
| "step": 1470, |
| "token_acc": 0.9078662334145681, |
| "train_speed(iter/s)": 0.0809 |
| }, |
| { |
| "epoch": 0.5716084966212125, |
| "grad_norm": 0.6221967339515686, |
| "learning_rate": 9.469440815720774e-06, |
| "loss": 0.29703731536865235, |
| "memory(GiB)": 71.86, |
| "step": 1475, |
| "token_acc": 0.911037834085387, |
| "train_speed(iter/s)": 0.080886 |
| }, |
| { |
| "epoch": 0.5735461525419624, |
| "grad_norm": 0.5945653319358826, |
| "learning_rate": 9.464642287361463e-06, |
| "loss": 0.30239651203155515, |
| "memory(GiB)": 71.86, |
| "step": 1480, |
| "token_acc": 0.8957724980090257, |
| "train_speed(iter/s)": 0.080884 |
| }, |
| { |
| "epoch": 0.5754838084627122, |
| "grad_norm": 0.6818097829818726, |
| "learning_rate": 9.459823384025235e-06, |
| "loss": 0.32621264457702637, |
| "memory(GiB)": 71.86, |
| "step": 1485, |
| "token_acc": 0.8870914053658212, |
| "train_speed(iter/s)": 0.080899 |
| }, |
| { |
| "epoch": 0.5774214643834621, |
| "grad_norm": 0.6261887550354004, |
| "learning_rate": 9.454984127703788e-06, |
| "loss": 0.31356096267700195, |
| "memory(GiB)": 71.86, |
| "step": 1490, |
| "token_acc": 0.8967860682429563, |
| "train_speed(iter/s)": 0.080917 |
| }, |
| { |
| "epoch": 0.579359120304212, |
| "grad_norm": 0.6280757784843445, |
| "learning_rate": 9.450124540481693e-06, |
| "loss": 0.3232735633850098, |
| "memory(GiB)": 71.86, |
| "step": 1495, |
| "token_acc": 0.8853289947123056, |
| "train_speed(iter/s)": 0.080916 |
| }, |
| { |
| "epoch": 0.5812967762249619, |
| "grad_norm": 0.6291863322257996, |
| "learning_rate": 9.445244644536314e-06, |
| "loss": 0.31281461715698244, |
| "memory(GiB)": 71.86, |
| "step": 1500, |
| "token_acc": 0.8985899401701692, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 0.5832344321457117, |
| "grad_norm": 0.6008995175361633, |
| "learning_rate": 9.44034446213769e-06, |
| "loss": 0.31398735046386717, |
| "memory(GiB)": 71.86, |
| "step": 1505, |
| "token_acc": 0.903951683605546, |
| "train_speed(iter/s)": 0.080921 |
| }, |
| { |
| "epoch": 0.5851720880664616, |
| "grad_norm": 0.6257957220077515, |
| "learning_rate": 9.43542401564844e-06, |
| "loss": 0.30848581790924073, |
| "memory(GiB)": 71.86, |
| "step": 1510, |
| "token_acc": 0.9153630929670609, |
| "train_speed(iter/s)": 0.08092 |
| }, |
| { |
| "epoch": 0.5871097439872115, |
| "grad_norm": 0.6130756139755249, |
| "learning_rate": 9.430483327523667e-06, |
| "loss": 0.3132195949554443, |
| "memory(GiB)": 71.86, |
| "step": 1515, |
| "token_acc": 0.8899323208445322, |
| "train_speed(iter/s)": 0.080934 |
| }, |
| { |
| "epoch": 0.5890473999079613, |
| "grad_norm": 0.5911505222320557, |
| "learning_rate": 9.425522420310845e-06, |
| "loss": 0.31288986206054686, |
| "memory(GiB)": 71.86, |
| "step": 1520, |
| "token_acc": 0.8933943534004105, |
| "train_speed(iter/s)": 0.080906 |
| }, |
| { |
| "epoch": 0.5909850558287112, |
| "grad_norm": 0.586607813835144, |
| "learning_rate": 9.420541316649718e-06, |
| "loss": 0.3002540111541748, |
| "memory(GiB)": 71.86, |
| "step": 1525, |
| "token_acc": 0.9010152284263959, |
| "train_speed(iter/s)": 0.080906 |
| }, |
| { |
| "epoch": 0.5929227117494611, |
| "grad_norm": 0.6183869242668152, |
| "learning_rate": 9.415540039272202e-06, |
| "loss": 0.3158272266387939, |
| "memory(GiB)": 71.86, |
| "step": 1530, |
| "token_acc": 0.89158118146445, |
| "train_speed(iter/s)": 0.08092 |
| }, |
| { |
| "epoch": 0.594860367670211, |
| "grad_norm": 0.6333942413330078, |
| "learning_rate": 9.41051861100228e-06, |
| "loss": 0.32763597965240476, |
| "memory(GiB)": 71.86, |
| "step": 1535, |
| "token_acc": 0.9017759605480821, |
| "train_speed(iter/s)": 0.080921 |
| }, |
| { |
| "epoch": 0.5967980235909608, |
| "grad_norm": 0.6147089004516602, |
| "learning_rate": 9.405477054755891e-06, |
| "loss": 0.2997840404510498, |
| "memory(GiB)": 71.86, |
| "step": 1540, |
| "token_acc": 0.8861569340776516, |
| "train_speed(iter/s)": 0.080923 |
| }, |
| { |
| "epoch": 0.5987356795117107, |
| "grad_norm": 0.6072155833244324, |
| "learning_rate": 9.400415393540838e-06, |
| "loss": 0.3148937225341797, |
| "memory(GiB)": 71.86, |
| "step": 1545, |
| "token_acc": 0.8906319892644897, |
| "train_speed(iter/s)": 0.080897 |
| }, |
| { |
| "epoch": 0.6006733354324606, |
| "grad_norm": 0.6179304122924805, |
| "learning_rate": 9.39533365045667e-06, |
| "loss": 0.3015735149383545, |
| "memory(GiB)": 71.86, |
| "step": 1550, |
| "token_acc": 0.8949518913931725, |
| "train_speed(iter/s)": 0.080902 |
| }, |
| { |
| "epoch": 0.6026109913532105, |
| "grad_norm": 0.6203258037567139, |
| "learning_rate": 9.390231848694582e-06, |
| "loss": 0.29992084503173827, |
| "memory(GiB)": 71.86, |
| "step": 1555, |
| "token_acc": 0.8858452722063037, |
| "train_speed(iter/s)": 0.080887 |
| }, |
| { |
| "epoch": 0.6045486472739603, |
| "grad_norm": 0.6550023555755615, |
| "learning_rate": 9.385110011537312e-06, |
| "loss": 0.3103346347808838, |
| "memory(GiB)": 71.86, |
| "step": 1560, |
| "token_acc": 0.898361172392058, |
| "train_speed(iter/s)": 0.080915 |
| }, |
| { |
| "epoch": 0.6064863031947102, |
| "grad_norm": 0.6364780068397522, |
| "learning_rate": 9.379968162359034e-06, |
| "loss": 0.3126693248748779, |
| "memory(GiB)": 71.86, |
| "step": 1565, |
| "token_acc": 0.894732145512954, |
| "train_speed(iter/s)": 0.080924 |
| }, |
| { |
| "epoch": 0.6084239591154601, |
| "grad_norm": 0.66599041223526, |
| "learning_rate": 9.374806324625243e-06, |
| "loss": 0.3166584253311157, |
| "memory(GiB)": 71.86, |
| "step": 1570, |
| "token_acc": 0.8910642910371926, |
| "train_speed(iter/s)": 0.080926 |
| }, |
| { |
| "epoch": 0.6103616150362099, |
| "grad_norm": 0.6209014654159546, |
| "learning_rate": 9.369624521892662e-06, |
| "loss": 0.31363322734832766, |
| "memory(GiB)": 71.86, |
| "step": 1575, |
| "token_acc": 0.8906361686919227, |
| "train_speed(iter/s)": 0.080946 |
| }, |
| { |
| "epoch": 0.6122992709569598, |
| "grad_norm": 0.6511316299438477, |
| "learning_rate": 9.36442277780912e-06, |
| "loss": 0.31300342082977295, |
| "memory(GiB)": 71.86, |
| "step": 1580, |
| "token_acc": 0.8923055565196265, |
| "train_speed(iter/s)": 0.080963 |
| }, |
| { |
| "epoch": 0.6142369268777097, |
| "grad_norm": 0.5959946513175964, |
| "learning_rate": 9.359201116113454e-06, |
| "loss": 0.3122777700424194, |
| "memory(GiB)": 71.86, |
| "step": 1585, |
| "token_acc": 0.8964503956990684, |
| "train_speed(iter/s)": 0.080963 |
| }, |
| { |
| "epoch": 0.6161745827984596, |
| "grad_norm": 0.6623050570487976, |
| "learning_rate": 9.353959560635402e-06, |
| "loss": 0.3189687252044678, |
| "memory(GiB)": 71.86, |
| "step": 1590, |
| "token_acc": 0.9036677273488528, |
| "train_speed(iter/s)": 0.080949 |
| }, |
| { |
| "epoch": 0.6181122387192094, |
| "grad_norm": 0.620650589466095, |
| "learning_rate": 9.34869813529548e-06, |
| "loss": 0.3007480621337891, |
| "memory(GiB)": 71.86, |
| "step": 1595, |
| "token_acc": 0.9134024910661624, |
| "train_speed(iter/s)": 0.08096 |
| }, |
| { |
| "epoch": 0.6200498946399593, |
| "grad_norm": 0.5833150744438171, |
| "learning_rate": 9.34341686410489e-06, |
| "loss": 0.32265076637268064, |
| "memory(GiB)": 71.86, |
| "step": 1600, |
| "token_acc": 0.8827635466380962, |
| "train_speed(iter/s)": 0.080949 |
| }, |
| { |
| "epoch": 0.6219875505607092, |
| "grad_norm": 0.6048651337623596, |
| "learning_rate": 9.338115771165401e-06, |
| "loss": 0.29888324737548827, |
| "memory(GiB)": 71.86, |
| "step": 1605, |
| "token_acc": 0.9089085583290881, |
| "train_speed(iter/s)": 0.080933 |
| }, |
| { |
| "epoch": 0.6239252064814591, |
| "grad_norm": 0.5740781426429749, |
| "learning_rate": 9.332794880669244e-06, |
| "loss": 0.2918308019638062, |
| "memory(GiB)": 71.86, |
| "step": 1610, |
| "token_acc": 0.89769777194534, |
| "train_speed(iter/s)": 0.080956 |
| }, |
| { |
| "epoch": 0.6258628624022089, |
| "grad_norm": 0.6081269979476929, |
| "learning_rate": 9.327454216898994e-06, |
| "loss": 0.3145033597946167, |
| "memory(GiB)": 71.86, |
| "step": 1615, |
| "token_acc": 0.8979991021612261, |
| "train_speed(iter/s)": 0.08096 |
| }, |
| { |
| "epoch": 0.6278005183229588, |
| "grad_norm": 0.6427978277206421, |
| "learning_rate": 9.322093804227467e-06, |
| "loss": 0.30745735168457033, |
| "memory(GiB)": 71.86, |
| "step": 1620, |
| "token_acc": 0.8982848422519325, |
| "train_speed(iter/s)": 0.080968 |
| }, |
| { |
| "epoch": 0.6297381742437087, |
| "grad_norm": 0.6230420470237732, |
| "learning_rate": 9.316713667117605e-06, |
| "loss": 0.3176234245300293, |
| "memory(GiB)": 71.86, |
| "step": 1625, |
| "token_acc": 0.8878736748272898, |
| "train_speed(iter/s)": 0.080967 |
| }, |
| { |
| "epoch": 0.6316758301644585, |
| "grad_norm": 0.6435239911079407, |
| "learning_rate": 9.311313830122364e-06, |
| "loss": 0.3190913200378418, |
| "memory(GiB)": 71.86, |
| "step": 1630, |
| "token_acc": 0.8949252474205096, |
| "train_speed(iter/s)": 0.080954 |
| }, |
| { |
| "epoch": 0.6336134860852084, |
| "grad_norm": 0.5868645310401917, |
| "learning_rate": 9.305894317884603e-06, |
| "loss": 0.2969323396682739, |
| "memory(GiB)": 71.86, |
| "step": 1635, |
| "token_acc": 0.8913538481556177, |
| "train_speed(iter/s)": 0.080956 |
| }, |
| { |
| "epoch": 0.6355511420059583, |
| "grad_norm": 0.5792374610900879, |
| "learning_rate": 9.300455155136973e-06, |
| "loss": 0.3106818199157715, |
| "memory(GiB)": 71.86, |
| "step": 1640, |
| "token_acc": 0.907856750068738, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 0.6374887979267082, |
| "grad_norm": 0.6041505932807922, |
| "learning_rate": 9.294996366701804e-06, |
| "loss": 0.3139790534973145, |
| "memory(GiB)": 71.86, |
| "step": 1645, |
| "token_acc": 0.8883584511217198, |
| "train_speed(iter/s)": 0.080926 |
| }, |
| { |
| "epoch": 0.639426453847458, |
| "grad_norm": 0.6277373433113098, |
| "learning_rate": 9.289517977490986e-06, |
| "loss": 0.32369270324707033, |
| "memory(GiB)": 71.86, |
| "step": 1650, |
| "token_acc": 0.8990907470137279, |
| "train_speed(iter/s)": 0.080935 |
| }, |
| { |
| "epoch": 0.6413641097682079, |
| "grad_norm": 0.6074517369270325, |
| "learning_rate": 9.284020012505863e-06, |
| "loss": 0.317038369178772, |
| "memory(GiB)": 71.86, |
| "step": 1655, |
| "token_acc": 0.895113307539747, |
| "train_speed(iter/s)": 0.08093 |
| }, |
| { |
| "epoch": 0.6433017656889578, |
| "grad_norm": 0.6136306524276733, |
| "learning_rate": 9.278502496837116e-06, |
| "loss": 0.30502321720123293, |
| "memory(GiB)": 71.86, |
| "step": 1660, |
| "token_acc": 0.8915775068424981, |
| "train_speed(iter/s)": 0.080933 |
| }, |
| { |
| "epoch": 0.6452394216097077, |
| "grad_norm": 0.5837361812591553, |
| "learning_rate": 9.272965455664644e-06, |
| "loss": 0.31087141036987304, |
| "memory(GiB)": 71.86, |
| "step": 1665, |
| "token_acc": 0.8925676335579646, |
| "train_speed(iter/s)": 0.080923 |
| }, |
| { |
| "epoch": 0.6471770775304575, |
| "grad_norm": 0.6269435882568359, |
| "learning_rate": 9.267408914257459e-06, |
| "loss": 0.31297247409820556, |
| "memory(GiB)": 71.86, |
| "step": 1670, |
| "token_acc": 0.8990828714700448, |
| "train_speed(iter/s)": 0.080911 |
| }, |
| { |
| "epoch": 0.6491147334512074, |
| "grad_norm": 0.6197941899299622, |
| "learning_rate": 9.261832897973559e-06, |
| "loss": 0.3180734157562256, |
| "memory(GiB)": 71.86, |
| "step": 1675, |
| "token_acc": 0.892425031396453, |
| "train_speed(iter/s)": 0.080927 |
| }, |
| { |
| "epoch": 0.6510523893719573, |
| "grad_norm": 0.6286627650260925, |
| "learning_rate": 9.256237432259823e-06, |
| "loss": 0.3299598217010498, |
| "memory(GiB)": 71.86, |
| "step": 1680, |
| "token_acc": 0.8858767912542473, |
| "train_speed(iter/s)": 0.080926 |
| }, |
| { |
| "epoch": 0.6529900452927071, |
| "grad_norm": 0.5860146284103394, |
| "learning_rate": 9.250622542651887e-06, |
| "loss": 0.31193127632141116, |
| "memory(GiB)": 71.86, |
| "step": 1685, |
| "token_acc": 0.886069178652086, |
| "train_speed(iter/s)": 0.080942 |
| }, |
| { |
| "epoch": 0.654927701213457, |
| "grad_norm": 0.6271302103996277, |
| "learning_rate": 9.244988254774032e-06, |
| "loss": 0.3232892513275146, |
| "memory(GiB)": 71.86, |
| "step": 1690, |
| "token_acc": 0.8902681549910187, |
| "train_speed(iter/s)": 0.080933 |
| }, |
| { |
| "epoch": 0.6568653571342069, |
| "grad_norm": 0.6138412952423096, |
| "learning_rate": 9.239334594339064e-06, |
| "loss": 0.3187739849090576, |
| "memory(GiB)": 71.86, |
| "step": 1695, |
| "token_acc": 0.8890871342373321, |
| "train_speed(iter/s)": 0.080926 |
| }, |
| { |
| "epoch": 0.6588030130549568, |
| "grad_norm": 0.5884256362915039, |
| "learning_rate": 9.233661587148202e-06, |
| "loss": 0.3060622215270996, |
| "memory(GiB)": 71.86, |
| "step": 1700, |
| "token_acc": 0.8999258710155671, |
| "train_speed(iter/s)": 0.080951 |
| }, |
| { |
| "epoch": 0.6607406689757066, |
| "grad_norm": 0.5839608311653137, |
| "learning_rate": 9.22796925909095e-06, |
| "loss": 0.312208890914917, |
| "memory(GiB)": 71.86, |
| "step": 1705, |
| "token_acc": 0.8906457570553737, |
| "train_speed(iter/s)": 0.080954 |
| }, |
| { |
| "epoch": 0.6626783248964565, |
| "grad_norm": 0.657823383808136, |
| "learning_rate": 9.222257636144992e-06, |
| "loss": 0.2995912075042725, |
| "memory(GiB)": 71.86, |
| "step": 1710, |
| "token_acc": 0.913647851727043, |
| "train_speed(iter/s)": 0.080952 |
| }, |
| { |
| "epoch": 0.6646159808172064, |
| "grad_norm": 0.6004208922386169, |
| "learning_rate": 9.216526744376059e-06, |
| "loss": 0.30365538597106934, |
| "memory(GiB)": 71.86, |
| "step": 1715, |
| "token_acc": 0.9049971046087816, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 0.6665536367379563, |
| "grad_norm": 0.6169917583465576, |
| "learning_rate": 9.21077660993783e-06, |
| "loss": 0.3203572273254395, |
| "memory(GiB)": 71.86, |
| "step": 1720, |
| "token_acc": 0.896225152265578, |
| "train_speed(iter/s)": 0.080917 |
| }, |
| { |
| "epoch": 0.6684912926587061, |
| "grad_norm": 0.5970929265022278, |
| "learning_rate": 9.205007259071786e-06, |
| "loss": 0.29948410987854, |
| "memory(GiB)": 71.86, |
| "step": 1725, |
| "token_acc": 0.9039809714571858, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 0.670428948579456, |
| "grad_norm": 0.6267915368080139, |
| "learning_rate": 9.199218718107115e-06, |
| "loss": 0.311102294921875, |
| "memory(GiB)": 71.86, |
| "step": 1730, |
| "token_acc": 0.8852009147337472, |
| "train_speed(iter/s)": 0.080902 |
| }, |
| { |
| "epoch": 0.6723666045002059, |
| "grad_norm": 0.5979821085929871, |
| "learning_rate": 9.193411013460576e-06, |
| "loss": 0.30515599250793457, |
| "memory(GiB)": 71.86, |
| "step": 1735, |
| "token_acc": 0.8829092393522928, |
| "train_speed(iter/s)": 0.080896 |
| }, |
| { |
| "epoch": 0.6743042604209557, |
| "grad_norm": 0.6205952763557434, |
| "learning_rate": 9.187584171636388e-06, |
| "loss": 0.3166038990020752, |
| "memory(GiB)": 71.86, |
| "step": 1740, |
| "token_acc": 0.889463922460213, |
| "train_speed(iter/s)": 0.080916 |
| }, |
| { |
| "epoch": 0.6762419163417056, |
| "grad_norm": 0.598731279373169, |
| "learning_rate": 9.181738219226102e-06, |
| "loss": 0.29600186347961427, |
| "memory(GiB)": 71.86, |
| "step": 1745, |
| "token_acc": 0.8940675369246798, |
| "train_speed(iter/s)": 0.080913 |
| }, |
| { |
| "epoch": 0.6781795722624555, |
| "grad_norm": 0.6215488314628601, |
| "learning_rate": 9.175873182908484e-06, |
| "loss": 0.3002432107925415, |
| "memory(GiB)": 71.86, |
| "step": 1750, |
| "token_acc": 0.9014647976059222, |
| "train_speed(iter/s)": 0.080921 |
| }, |
| { |
| "epoch": 0.6801172281832054, |
| "grad_norm": 0.5754505395889282, |
| "learning_rate": 9.16998908944939e-06, |
| "loss": 0.3116154670715332, |
| "memory(GiB)": 71.86, |
| "step": 1755, |
| "token_acc": 0.8995418630901003, |
| "train_speed(iter/s)": 0.08093 |
| }, |
| { |
| "epoch": 0.6820548841039552, |
| "grad_norm": 0.640917956829071, |
| "learning_rate": 9.16408596570165e-06, |
| "loss": 0.31115570068359377, |
| "memory(GiB)": 71.86, |
| "step": 1760, |
| "token_acc": 0.8795623533403071, |
| "train_speed(iter/s)": 0.08093 |
| }, |
| { |
| "epoch": 0.6839925400247051, |
| "grad_norm": 0.6518266201019287, |
| "learning_rate": 9.158163838604938e-06, |
| "loss": 0.31464262008666993, |
| "memory(GiB)": 71.86, |
| "step": 1765, |
| "token_acc": 0.9010270455323519, |
| "train_speed(iter/s)": 0.080917 |
| }, |
| { |
| "epoch": 0.685930195945455, |
| "grad_norm": 0.6728587746620178, |
| "learning_rate": 9.15222273518565e-06, |
| "loss": 0.2999051570892334, |
| "memory(GiB)": 71.86, |
| "step": 1770, |
| "token_acc": 0.889860038329752, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 0.6878678518662048, |
| "grad_norm": 0.6566827297210693, |
| "learning_rate": 9.14626268255679e-06, |
| "loss": 0.30396265983581544, |
| "memory(GiB)": 71.86, |
| "step": 1775, |
| "token_acc": 0.893242859172943, |
| "train_speed(iter/s)": 0.080938 |
| }, |
| { |
| "epoch": 0.6898055077869547, |
| "grad_norm": 0.6036619544029236, |
| "learning_rate": 9.140283707917831e-06, |
| "loss": 0.3114708423614502, |
| "memory(GiB)": 71.86, |
| "step": 1780, |
| "token_acc": 0.8952726572034171, |
| "train_speed(iter/s)": 0.080945 |
| }, |
| { |
| "epoch": 0.6917431637077046, |
| "grad_norm": 0.6068353652954102, |
| "learning_rate": 9.134285838554605e-06, |
| "loss": 0.30329604148864747, |
| "memory(GiB)": 71.86, |
| "step": 1785, |
| "token_acc": 0.8964941042940917, |
| "train_speed(iter/s)": 0.080946 |
| }, |
| { |
| "epoch": 0.6936808196284545, |
| "grad_norm": 0.5659881234169006, |
| "learning_rate": 9.128269101839172e-06, |
| "loss": 0.31119503974914553, |
| "memory(GiB)": 71.86, |
| "step": 1790, |
| "token_acc": 0.8999454743729552, |
| "train_speed(iter/s)": 0.080951 |
| }, |
| { |
| "epoch": 0.6956184755492043, |
| "grad_norm": 0.644329309463501, |
| "learning_rate": 9.122233525229688e-06, |
| "loss": 0.32088627815246584, |
| "memory(GiB)": 71.86, |
| "step": 1795, |
| "token_acc": 0.8879273955255382, |
| "train_speed(iter/s)": 0.080955 |
| }, |
| { |
| "epoch": 0.6975561314699542, |
| "grad_norm": 0.592437744140625, |
| "learning_rate": 9.116179136270302e-06, |
| "loss": 0.2998102903366089, |
| "memory(GiB)": 71.86, |
| "step": 1800, |
| "token_acc": 0.9004172371080253, |
| "train_speed(iter/s)": 0.080935 |
| }, |
| { |
| "epoch": 0.6994937873907041, |
| "grad_norm": 0.6425099968910217, |
| "learning_rate": 9.110105962591e-06, |
| "loss": 0.30757966041564944, |
| "memory(GiB)": 71.86, |
| "step": 1805, |
| "token_acc": 0.9009111237192537, |
| "train_speed(iter/s)": 0.080943 |
| }, |
| { |
| "epoch": 0.701431443311454, |
| "grad_norm": 0.5910810232162476, |
| "learning_rate": 9.104014031907505e-06, |
| "loss": 0.31116063594818116, |
| "memory(GiB)": 71.86, |
| "step": 1810, |
| "token_acc": 0.8962950765684176, |
| "train_speed(iter/s)": 0.080951 |
| }, |
| { |
| "epoch": 0.7033690992322038, |
| "grad_norm": 0.563534140586853, |
| "learning_rate": 9.097903372021136e-06, |
| "loss": 0.30581443309783934, |
| "memory(GiB)": 71.86, |
| "step": 1815, |
| "token_acc": 0.8948527387833746, |
| "train_speed(iter/s)": 0.080954 |
| }, |
| { |
| "epoch": 0.7053067551529537, |
| "grad_norm": 0.5983726382255554, |
| "learning_rate": 9.091774010818686e-06, |
| "loss": 0.3007395029067993, |
| "memory(GiB)": 71.86, |
| "step": 1820, |
| "token_acc": 0.8795069692431747, |
| "train_speed(iter/s)": 0.080949 |
| }, |
| { |
| "epoch": 0.7072444110737036, |
| "grad_norm": 0.5900224447250366, |
| "learning_rate": 9.085625976272292e-06, |
| "loss": 0.31903905868530275, |
| "memory(GiB)": 71.86, |
| "step": 1825, |
| "token_acc": 0.8930551027062276, |
| "train_speed(iter/s)": 0.080938 |
| }, |
| { |
| "epoch": 0.7091820669944534, |
| "grad_norm": 0.6485769152641296, |
| "learning_rate": 9.079459296439312e-06, |
| "loss": 0.3097678184509277, |
| "memory(GiB)": 71.86, |
| "step": 1830, |
| "token_acc": 0.8928302360622803, |
| "train_speed(iter/s)": 0.080952 |
| }, |
| { |
| "epoch": 0.7111197229152033, |
| "grad_norm": 0.6100226044654846, |
| "learning_rate": 9.073273999462194e-06, |
| "loss": 0.3076323986053467, |
| "memory(GiB)": 71.86, |
| "step": 1835, |
| "token_acc": 0.8870309813572465, |
| "train_speed(iter/s)": 0.080963 |
| }, |
| { |
| "epoch": 0.7130573788359532, |
| "grad_norm": 0.6233454346656799, |
| "learning_rate": 9.067070113568346e-06, |
| "loss": 0.3112640857696533, |
| "memory(GiB)": 71.86, |
| "step": 1840, |
| "token_acc": 0.9014221634966445, |
| "train_speed(iter/s)": 0.080965 |
| }, |
| { |
| "epoch": 0.7149950347567031, |
| "grad_norm": 0.5628553032875061, |
| "learning_rate": 9.060847667070008e-06, |
| "loss": 0.3148493766784668, |
| "memory(GiB)": 71.86, |
| "step": 1845, |
| "token_acc": 0.902200413541956, |
| "train_speed(iter/s)": 0.080974 |
| }, |
| { |
| "epoch": 0.7169326906774529, |
| "grad_norm": 0.5869155526161194, |
| "learning_rate": 9.05460668836413e-06, |
| "loss": 0.31223297119140625, |
| "memory(GiB)": 71.86, |
| "step": 1850, |
| "token_acc": 0.9005300474977628, |
| "train_speed(iter/s)": 0.08096 |
| }, |
| { |
| "epoch": 0.7188703465982028, |
| "grad_norm": 0.5835767388343811, |
| "learning_rate": 9.048347205932227e-06, |
| "loss": 0.3029788494110107, |
| "memory(GiB)": 71.86, |
| "step": 1855, |
| "token_acc": 0.9045944838885854, |
| "train_speed(iter/s)": 0.080963 |
| }, |
| { |
| "epoch": 0.7208080025189527, |
| "grad_norm": 0.5862783193588257, |
| "learning_rate": 9.042069248340265e-06, |
| "loss": 0.30313446521759035, |
| "memory(GiB)": 71.86, |
| "step": 1860, |
| "token_acc": 0.9043334435990737, |
| "train_speed(iter/s)": 0.080964 |
| }, |
| { |
| "epoch": 0.7227456584397026, |
| "grad_norm": 0.650233268737793, |
| "learning_rate": 9.03577284423852e-06, |
| "loss": 0.32649707794189453, |
| "memory(GiB)": 71.86, |
| "step": 1865, |
| "token_acc": 0.8913325107958051, |
| "train_speed(iter/s)": 0.080966 |
| }, |
| { |
| "epoch": 0.7246833143604524, |
| "grad_norm": 0.6166769862174988, |
| "learning_rate": 9.029458022361455e-06, |
| "loss": 0.3023579835891724, |
| "memory(GiB)": 71.86, |
| "step": 1870, |
| "token_acc": 0.9074721468945013, |
| "train_speed(iter/s)": 0.080987 |
| }, |
| { |
| "epoch": 0.7266209702812023, |
| "grad_norm": 0.5706512331962585, |
| "learning_rate": 9.023124811527582e-06, |
| "loss": 0.3054157257080078, |
| "memory(GiB)": 71.86, |
| "step": 1875, |
| "token_acc": 0.8847754152606231, |
| "train_speed(iter/s)": 0.080984 |
| }, |
| { |
| "epoch": 0.7285586262019522, |
| "grad_norm": 0.6064714789390564, |
| "learning_rate": 9.016773240639334e-06, |
| "loss": 0.3063014507293701, |
| "memory(GiB)": 71.86, |
| "step": 1880, |
| "token_acc": 0.908094012236301, |
| "train_speed(iter/s)": 0.080989 |
| }, |
| { |
| "epoch": 0.730496282122702, |
| "grad_norm": 0.6083559393882751, |
| "learning_rate": 9.01040333868293e-06, |
| "loss": 0.30855863094329833, |
| "memory(GiB)": 71.86, |
| "step": 1885, |
| "token_acc": 0.9015966845503903, |
| "train_speed(iter/s)": 0.081004 |
| }, |
| { |
| "epoch": 0.7324339380434519, |
| "grad_norm": 0.595201849937439, |
| "learning_rate": 9.004015134728252e-06, |
| "loss": 0.30145509243011476, |
| "memory(GiB)": 71.86, |
| "step": 1890, |
| "token_acc": 0.8991807826626196, |
| "train_speed(iter/s)": 0.081022 |
| }, |
| { |
| "epoch": 0.7343715939642018, |
| "grad_norm": 0.6078715920448303, |
| "learning_rate": 8.997608657928698e-06, |
| "loss": 0.30652527809143065, |
| "memory(GiB)": 71.86, |
| "step": 1895, |
| "token_acc": 0.8993735064264031, |
| "train_speed(iter/s)": 0.081038 |
| }, |
| { |
| "epoch": 0.7363092498849517, |
| "grad_norm": 0.6498925685882568, |
| "learning_rate": 8.99118393752106e-06, |
| "loss": 0.3331868648529053, |
| "memory(GiB)": 71.86, |
| "step": 1900, |
| "token_acc": 0.8941127377681956, |
| "train_speed(iter/s)": 0.081032 |
| }, |
| { |
| "epoch": 0.7382469058057015, |
| "grad_norm": 0.6449311971664429, |
| "learning_rate": 8.98474100282539e-06, |
| "loss": 0.3127185344696045, |
| "memory(GiB)": 71.86, |
| "step": 1905, |
| "token_acc": 0.8932032412766661, |
| "train_speed(iter/s)": 0.08104 |
| }, |
| { |
| "epoch": 0.7401845617264514, |
| "grad_norm": 0.5712651014328003, |
| "learning_rate": 8.978279883244855e-06, |
| "loss": 0.3024605274200439, |
| "memory(GiB)": 71.86, |
| "step": 1910, |
| "token_acc": 0.8927019331634947, |
| "train_speed(iter/s)": 0.081039 |
| }, |
| { |
| "epoch": 0.7421222176472013, |
| "grad_norm": 0.636428713798523, |
| "learning_rate": 8.971800608265621e-06, |
| "loss": 0.3047468662261963, |
| "memory(GiB)": 71.86, |
| "step": 1915, |
| "token_acc": 0.895410208444682, |
| "train_speed(iter/s)": 0.081041 |
| }, |
| { |
| "epoch": 0.7440598735679512, |
| "grad_norm": 0.6133522987365723, |
| "learning_rate": 8.965303207456702e-06, |
| "loss": 0.28756425380706785, |
| "memory(GiB)": 71.86, |
| "step": 1920, |
| "token_acc": 0.9078662897056378, |
| "train_speed(iter/s)": 0.081048 |
| }, |
| { |
| "epoch": 0.745997529488701, |
| "grad_norm": 0.581291913986206, |
| "learning_rate": 8.958787710469832e-06, |
| "loss": 0.3084988832473755, |
| "memory(GiB)": 71.86, |
| "step": 1925, |
| "token_acc": 0.9017774896796424, |
| "train_speed(iter/s)": 0.081057 |
| }, |
| { |
| "epoch": 0.7479351854094509, |
| "grad_norm": 0.6467372179031372, |
| "learning_rate": 8.95225414703933e-06, |
| "loss": 0.3201260805130005, |
| "memory(GiB)": 71.86, |
| "step": 1930, |
| "token_acc": 0.8919000632163373, |
| "train_speed(iter/s)": 0.081043 |
| }, |
| { |
| "epoch": 0.7498728413302008, |
| "grad_norm": 0.6468227505683899, |
| "learning_rate": 8.94570254698197e-06, |
| "loss": 0.31164610385894775, |
| "memory(GiB)": 71.86, |
| "step": 1935, |
| "token_acc": 0.900514465156678, |
| "train_speed(iter/s)": 0.081051 |
| }, |
| { |
| "epoch": 0.7518104972509506, |
| "grad_norm": 0.6398622989654541, |
| "learning_rate": 8.939132940196825e-06, |
| "loss": 0.32195866107940674, |
| "memory(GiB)": 71.86, |
| "step": 1940, |
| "token_acc": 0.8985073645449764, |
| "train_speed(iter/s)": 0.081062 |
| }, |
| { |
| "epoch": 0.7537481531717005, |
| "grad_norm": 0.5821054577827454, |
| "learning_rate": 8.932545356665157e-06, |
| "loss": 0.2912397861480713, |
| "memory(GiB)": 71.86, |
| "step": 1945, |
| "token_acc": 0.8943398638499153, |
| "train_speed(iter/s)": 0.081049 |
| }, |
| { |
| "epoch": 0.7556858090924504, |
| "grad_norm": 0.6212338209152222, |
| "learning_rate": 8.925939826450259e-06, |
| "loss": 0.2999946117401123, |
| "memory(GiB)": 71.86, |
| "step": 1950, |
| "token_acc": 0.9014343865090134, |
| "train_speed(iter/s)": 0.081047 |
| }, |
| { |
| "epoch": 0.7576234650132003, |
| "grad_norm": 0.5945454239845276, |
| "learning_rate": 8.919316379697331e-06, |
| "loss": 0.3051981687545776, |
| "memory(GiB)": 71.86, |
| "step": 1955, |
| "token_acc": 0.9032103497843795, |
| "train_speed(iter/s)": 0.08103 |
| }, |
| { |
| "epoch": 0.7595611209339501, |
| "grad_norm": 0.6175397038459778, |
| "learning_rate": 8.912675046633336e-06, |
| "loss": 0.3095961093902588, |
| "memory(GiB)": 71.86, |
| "step": 1960, |
| "token_acc": 0.9021332892343311, |
| "train_speed(iter/s)": 0.08103 |
| }, |
| { |
| "epoch": 0.7614987768547, |
| "grad_norm": 0.5822819471359253, |
| "learning_rate": 8.90601585756686e-06, |
| "loss": 0.302655553817749, |
| "memory(GiB)": 71.86, |
| "step": 1965, |
| "token_acc": 0.8917866492146597, |
| "train_speed(iter/s)": 0.081012 |
| }, |
| { |
| "epoch": 0.7634364327754499, |
| "grad_norm": 0.5745783448219299, |
| "learning_rate": 8.899338842887982e-06, |
| "loss": 0.31560654640197755, |
| "memory(GiB)": 71.86, |
| "step": 1970, |
| "token_acc": 0.8880744017825427, |
| "train_speed(iter/s)": 0.081022 |
| }, |
| { |
| "epoch": 0.7653740886961997, |
| "grad_norm": 0.5756574273109436, |
| "learning_rate": 8.892644033068128e-06, |
| "loss": 0.29446749687194823, |
| "memory(GiB)": 71.86, |
| "step": 1975, |
| "token_acc": 0.9025089605734767, |
| "train_speed(iter/s)": 0.081021 |
| }, |
| { |
| "epoch": 0.7673117446169496, |
| "grad_norm": 0.602070689201355, |
| "learning_rate": 8.885931458659936e-06, |
| "loss": 0.28977217674255373, |
| "memory(GiB)": 71.86, |
| "step": 1980, |
| "token_acc": 0.9017447199265382, |
| "train_speed(iter/s)": 0.081044 |
| }, |
| { |
| "epoch": 0.7692494005376995, |
| "grad_norm": 0.6217750310897827, |
| "learning_rate": 8.879201150297114e-06, |
| "loss": 0.3090545654296875, |
| "memory(GiB)": 71.86, |
| "step": 1985, |
| "token_acc": 0.8897201527577787, |
| "train_speed(iter/s)": 0.081059 |
| }, |
| { |
| "epoch": 0.7711870564584494, |
| "grad_norm": 0.6141068935394287, |
| "learning_rate": 8.8724531386943e-06, |
| "loss": 0.316222620010376, |
| "memory(GiB)": 71.86, |
| "step": 1990, |
| "token_acc": 0.9057566110064323, |
| "train_speed(iter/s)": 0.081058 |
| }, |
| { |
| "epoch": 0.7731247123791992, |
| "grad_norm": 0.6178670525550842, |
| "learning_rate": 8.865687454646925e-06, |
| "loss": 0.30695629119873047, |
| "memory(GiB)": 71.86, |
| "step": 1995, |
| "token_acc": 0.902152258401646, |
| "train_speed(iter/s)": 0.081049 |
| }, |
| { |
| "epoch": 0.7750623682999491, |
| "grad_norm": 0.5628589987754822, |
| "learning_rate": 8.858904129031072e-06, |
| "loss": 0.30267930030822754, |
| "memory(GiB)": 71.86, |
| "step": 2000, |
| "token_acc": 0.889784859529723, |
| "train_speed(iter/s)": 0.081069 |
| }, |
| { |
| "epoch": 0.777000024220699, |
| "grad_norm": 0.610127329826355, |
| "learning_rate": 8.852103192803328e-06, |
| "loss": 0.3077036142349243, |
| "memory(GiB)": 71.86, |
| "step": 2005, |
| "token_acc": 0.9019208964183285, |
| "train_speed(iter/s)": 0.081069 |
| }, |
| { |
| "epoch": 0.7789376801414489, |
| "grad_norm": 0.6629402041435242, |
| "learning_rate": 8.845284677000652e-06, |
| "loss": 0.3216689586639404, |
| "memory(GiB)": 71.86, |
| "step": 2010, |
| "token_acc": 0.902394461005866, |
| "train_speed(iter/s)": 0.081073 |
| }, |
| { |
| "epoch": 0.7808753360621987, |
| "grad_norm": 0.6259828209877014, |
| "learning_rate": 8.838448612740232e-06, |
| "loss": 0.30659077167510984, |
| "memory(GiB)": 71.86, |
| "step": 2015, |
| "token_acc": 0.8962287480680062, |
| "train_speed(iter/s)": 0.081043 |
| }, |
| { |
| "epoch": 0.7828129919829486, |
| "grad_norm": 0.6070216298103333, |
| "learning_rate": 8.831595031219337e-06, |
| "loss": 0.31620666980743406, |
| "memory(GiB)": 71.86, |
| "step": 2020, |
| "token_acc": 0.8867298426980632, |
| "train_speed(iter/s)": 0.081035 |
| }, |
| { |
| "epoch": 0.7847506479036985, |
| "grad_norm": 0.5795909762382507, |
| "learning_rate": 8.82472396371518e-06, |
| "loss": 0.30554752349853515, |
| "memory(GiB)": 71.86, |
| "step": 2025, |
| "token_acc": 0.8927852041750595, |
| "train_speed(iter/s)": 0.08103 |
| }, |
| { |
| "epoch": 0.7866883038244483, |
| "grad_norm": 0.5747736096382141, |
| "learning_rate": 8.817835441584772e-06, |
| "loss": 0.30044281482696533, |
| "memory(GiB)": 71.86, |
| "step": 2030, |
| "token_acc": 0.9013870751420605, |
| "train_speed(iter/s)": 0.081052 |
| }, |
| { |
| "epoch": 0.7886259597451982, |
| "grad_norm": 0.621019721031189, |
| "learning_rate": 8.810929496264783e-06, |
| "loss": 0.2906287670135498, |
| "memory(GiB)": 71.86, |
| "step": 2035, |
| "token_acc": 0.8963844797178131, |
| "train_speed(iter/s)": 0.081046 |
| }, |
| { |
| "epoch": 0.7905636156659481, |
| "grad_norm": 0.6499260663986206, |
| "learning_rate": 8.80400615927139e-06, |
| "loss": 0.3061370372772217, |
| "memory(GiB)": 71.86, |
| "step": 2040, |
| "token_acc": 0.9048953721693156, |
| "train_speed(iter/s)": 0.081038 |
| }, |
| { |
| "epoch": 0.792501271586698, |
| "grad_norm": 0.6083590388298035, |
| "learning_rate": 8.79706546220015e-06, |
| "loss": 0.2965734004974365, |
| "memory(GiB)": 71.86, |
| "step": 2045, |
| "token_acc": 0.9033915001407261, |
| "train_speed(iter/s)": 0.081037 |
| }, |
| { |
| "epoch": 0.7944389275074478, |
| "grad_norm": 0.6360229849815369, |
| "learning_rate": 8.790107436725834e-06, |
| "loss": 0.3123593807220459, |
| "memory(GiB)": 71.86, |
| "step": 2050, |
| "token_acc": 0.9031104403176061, |
| "train_speed(iter/s)": 0.081028 |
| }, |
| { |
| "epoch": 0.7963765834281977, |
| "grad_norm": 0.5798035860061646, |
| "learning_rate": 8.783132114602299e-06, |
| "loss": 0.3108199596405029, |
| "memory(GiB)": 71.86, |
| "step": 2055, |
| "token_acc": 0.8884368108830015, |
| "train_speed(iter/s)": 0.081 |
| }, |
| { |
| "epoch": 0.7983142393489476, |
| "grad_norm": 0.6013064980506897, |
| "learning_rate": 8.77613952766234e-06, |
| "loss": 0.30339345932006834, |
| "memory(GiB)": 71.86, |
| "step": 2060, |
| "token_acc": 0.9030202185106115, |
| "train_speed(iter/s)": 0.08099 |
| }, |
| { |
| "epoch": 0.8002518952696975, |
| "grad_norm": 0.6271098256111145, |
| "learning_rate": 8.769129707817532e-06, |
| "loss": 0.3082975149154663, |
| "memory(GiB)": 71.86, |
| "step": 2065, |
| "token_acc": 0.8864762668652783, |
| "train_speed(iter/s)": 0.080983 |
| }, |
| { |
| "epoch": 0.8021895511904473, |
| "grad_norm": 0.5560224652290344, |
| "learning_rate": 8.762102687058108e-06, |
| "loss": 0.29483623504638673, |
| "memory(GiB)": 71.86, |
| "step": 2070, |
| "token_acc": 0.9096249912052347, |
| "train_speed(iter/s)": 0.081 |
| }, |
| { |
| "epoch": 0.8041272071111972, |
| "grad_norm": 0.5846743583679199, |
| "learning_rate": 8.755058497452789e-06, |
| "loss": 0.30487353801727296, |
| "memory(GiB)": 71.86, |
| "step": 2075, |
| "token_acc": 0.9014453781512605, |
| "train_speed(iter/s)": 0.08099 |
| }, |
| { |
| "epoch": 0.8060648630319471, |
| "grad_norm": 0.6149039268493652, |
| "learning_rate": 8.747997171148655e-06, |
| "loss": 0.3211073398590088, |
| "memory(GiB)": 71.86, |
| "step": 2080, |
| "token_acc": 0.8910502310689582, |
| "train_speed(iter/s)": 0.080987 |
| }, |
| { |
| "epoch": 0.8080025189526969, |
| "grad_norm": 0.5836396217346191, |
| "learning_rate": 8.740918740370985e-06, |
| "loss": 0.2896585464477539, |
| "memory(GiB)": 71.86, |
| "step": 2085, |
| "token_acc": 0.8984591194968553, |
| "train_speed(iter/s)": 0.081002 |
| }, |
| { |
| "epoch": 0.8099401748734468, |
| "grad_norm": 0.5869709849357605, |
| "learning_rate": 8.733823237423124e-06, |
| "loss": 0.31051561832427976, |
| "memory(GiB)": 71.86, |
| "step": 2090, |
| "token_acc": 0.9037916430453197, |
| "train_speed(iter/s)": 0.08102 |
| }, |
| { |
| "epoch": 0.8118778307941967, |
| "grad_norm": 0.5999536514282227, |
| "learning_rate": 8.726710694686324e-06, |
| "loss": 0.3082785129547119, |
| "memory(GiB)": 71.86, |
| "step": 2095, |
| "token_acc": 0.8980618292487587, |
| "train_speed(iter/s)": 0.081019 |
| }, |
| { |
| "epoch": 0.8138154867149466, |
| "grad_norm": 0.6493249535560608, |
| "learning_rate": 8.719581144619598e-06, |
| "loss": 0.31282968521118165, |
| "memory(GiB)": 71.86, |
| "step": 2100, |
| "token_acc": 0.898483664217602, |
| "train_speed(iter/s)": 0.081029 |
| }, |
| { |
| "epoch": 0.8157531426356964, |
| "grad_norm": 0.5943895578384399, |
| "learning_rate": 8.71243461975958e-06, |
| "loss": 0.32207574844360354, |
| "memory(GiB)": 71.86, |
| "step": 2105, |
| "token_acc": 0.9011841841172141, |
| "train_speed(iter/s)": 0.08103 |
| }, |
| { |
| "epoch": 0.8176907985564463, |
| "grad_norm": 0.60904461145401, |
| "learning_rate": 8.705271152720364e-06, |
| "loss": 0.31167449951171877, |
| "memory(GiB)": 71.86, |
| "step": 2110, |
| "token_acc": 0.890208345987973, |
| "train_speed(iter/s)": 0.081006 |
| }, |
| { |
| "epoch": 0.8196284544771962, |
| "grad_norm": 0.6434164643287659, |
| "learning_rate": 8.698090776193371e-06, |
| "loss": 0.3102255821228027, |
| "memory(GiB)": 71.86, |
| "step": 2115, |
| "token_acc": 0.903289090518959, |
| "train_speed(iter/s)": 0.081001 |
| }, |
| { |
| "epoch": 0.8215661103979461, |
| "grad_norm": 0.6066402792930603, |
| "learning_rate": 8.690893522947179e-06, |
| "loss": 0.3043588876724243, |
| "memory(GiB)": 71.86, |
| "step": 2120, |
| "token_acc": 0.8837515642494895, |
| "train_speed(iter/s)": 0.081012 |
| }, |
| { |
| "epoch": 0.8235037663186959, |
| "grad_norm": 0.6076641082763672, |
| "learning_rate": 8.683679425827395e-06, |
| "loss": 0.30590732097625734, |
| "memory(GiB)": 71.86, |
| "step": 2125, |
| "token_acc": 0.9053521668108004, |
| "train_speed(iter/s)": 0.081016 |
| }, |
| { |
| "epoch": 0.8254414222394458, |
| "grad_norm": 0.6392712593078613, |
| "learning_rate": 8.676448517756489e-06, |
| "loss": 0.3209038019180298, |
| "memory(GiB)": 71.86, |
| "step": 2130, |
| "token_acc": 0.8917396542180835, |
| "train_speed(iter/s)": 0.081003 |
| }, |
| { |
| "epoch": 0.8273790781601957, |
| "grad_norm": 0.6290078163146973, |
| "learning_rate": 8.669200831733655e-06, |
| "loss": 0.3171893835067749, |
| "memory(GiB)": 71.86, |
| "step": 2135, |
| "token_acc": 0.8987797088782062, |
| "train_speed(iter/s)": 0.081001 |
| }, |
| { |
| "epoch": 0.8293167340809455, |
| "grad_norm": 0.6287849545478821, |
| "learning_rate": 8.66193640083465e-06, |
| "loss": 0.30498471260070803, |
| "memory(GiB)": 71.86, |
| "step": 2140, |
| "token_acc": 0.893487338362069, |
| "train_speed(iter/s)": 0.08099 |
| }, |
| { |
| "epoch": 0.8312543900016954, |
| "grad_norm": 0.6079146265983582, |
| "learning_rate": 8.654655258211652e-06, |
| "loss": 0.31498687267303466, |
| "memory(GiB)": 71.86, |
| "step": 2145, |
| "token_acc": 0.8881385789782736, |
| "train_speed(iter/s)": 0.080992 |
| }, |
| { |
| "epoch": 0.8331920459224453, |
| "grad_norm": 0.5923950672149658, |
| "learning_rate": 8.647357437093104e-06, |
| "loss": 0.29389705657958987, |
| "memory(GiB)": 71.86, |
| "step": 2150, |
| "token_acc": 0.9051536737901742, |
| "train_speed(iter/s)": 0.081008 |
| }, |
| { |
| "epoch": 0.8351297018431952, |
| "grad_norm": 0.5749879479408264, |
| "learning_rate": 8.640042970783567e-06, |
| "loss": 0.31409273147583006, |
| "memory(GiB)": 71.86, |
| "step": 2155, |
| "token_acc": 0.902754901010884, |
| "train_speed(iter/s)": 0.081029 |
| }, |
| { |
| "epoch": 0.837067357763945, |
| "grad_norm": 0.6237610578536987, |
| "learning_rate": 8.632711892663554e-06, |
| "loss": 0.3192549705505371, |
| "memory(GiB)": 71.86, |
| "step": 2160, |
| "token_acc": 0.8993284397938466, |
| "train_speed(iter/s)": 0.081045 |
| }, |
| { |
| "epoch": 0.8390050136846949, |
| "grad_norm": 0.6099810600280762, |
| "learning_rate": 8.625364236189405e-06, |
| "loss": 0.32326383590698243, |
| "memory(GiB)": 71.86, |
| "step": 2165, |
| "token_acc": 0.8934105339342698, |
| "train_speed(iter/s)": 0.081057 |
| }, |
| { |
| "epoch": 0.8409426696054448, |
| "grad_norm": 0.6087759733200073, |
| "learning_rate": 8.6180000348931e-06, |
| "loss": 0.3070503234863281, |
| "memory(GiB)": 71.86, |
| "step": 2170, |
| "token_acc": 0.8968854051054383, |
| "train_speed(iter/s)": 0.08106 |
| }, |
| { |
| "epoch": 0.8428803255261947, |
| "grad_norm": 0.588354766368866, |
| "learning_rate": 8.610619322382133e-06, |
| "loss": 0.2942917823791504, |
| "memory(GiB)": 71.86, |
| "step": 2175, |
| "token_acc": 0.9020420697067404, |
| "train_speed(iter/s)": 0.081055 |
| }, |
| { |
| "epoch": 0.8448179814469445, |
| "grad_norm": 0.5909959673881531, |
| "learning_rate": 8.603222132339348e-06, |
| "loss": 0.3080431461334229, |
| "memory(GiB)": 71.86, |
| "step": 2180, |
| "token_acc": 0.9125050477857046, |
| "train_speed(iter/s)": 0.081058 |
| }, |
| { |
| "epoch": 0.8467556373676944, |
| "grad_norm": 0.5789511203765869, |
| "learning_rate": 8.595808498522788e-06, |
| "loss": 0.3041903257369995, |
| "memory(GiB)": 71.86, |
| "step": 2185, |
| "token_acc": 0.9008222686526925, |
| "train_speed(iter/s)": 0.081068 |
| }, |
| { |
| "epoch": 0.8486932932884443, |
| "grad_norm": 0.6096296906471252, |
| "learning_rate": 8.588378454765535e-06, |
| "loss": 0.30947365760803225, |
| "memory(GiB)": 71.86, |
| "step": 2190, |
| "token_acc": 0.8935091277890467, |
| "train_speed(iter/s)": 0.08106 |
| }, |
| { |
| "epoch": 0.8506309492091941, |
| "grad_norm": 0.6188230514526367, |
| "learning_rate": 8.580932034975563e-06, |
| "loss": 0.31347320079803465, |
| "memory(GiB)": 71.86, |
| "step": 2195, |
| "token_acc": 0.8942014242115972, |
| "train_speed(iter/s)": 0.081054 |
| }, |
| { |
| "epoch": 0.852568605129944, |
| "grad_norm": 0.590366005897522, |
| "learning_rate": 8.573469273135578e-06, |
| "loss": 0.3038686752319336, |
| "memory(GiB)": 71.86, |
| "step": 2200, |
| "token_acc": 0.8942797934278478, |
| "train_speed(iter/s)": 0.081049 |
| }, |
| { |
| "epoch": 0.8545062610506939, |
| "grad_norm": 0.5962342023849487, |
| "learning_rate": 8.565990203302866e-06, |
| "loss": 0.3185643196105957, |
| "memory(GiB)": 71.86, |
| "step": 2205, |
| "token_acc": 0.8901260899567154, |
| "train_speed(iter/s)": 0.081039 |
| }, |
| { |
| "epoch": 0.8564439169714438, |
| "grad_norm": 0.6078639030456543, |
| "learning_rate": 8.558494859609137e-06, |
| "loss": 0.3044931650161743, |
| "memory(GiB)": 71.86, |
| "step": 2210, |
| "token_acc": 0.8968926976897492, |
| "train_speed(iter/s)": 0.081023 |
| }, |
| { |
| "epoch": 0.8583815728921936, |
| "grad_norm": 0.5709052681922913, |
| "learning_rate": 8.55098327626037e-06, |
| "loss": 0.29850192070007325, |
| "memory(GiB)": 71.86, |
| "step": 2215, |
| "token_acc": 0.8929022936608272, |
| "train_speed(iter/s)": 0.081006 |
| }, |
| { |
| "epoch": 0.8603192288129435, |
| "grad_norm": 0.5804421305656433, |
| "learning_rate": 8.543455487536654e-06, |
| "loss": 0.3016526222229004, |
| "memory(GiB)": 71.86, |
| "step": 2220, |
| "token_acc": 0.8973657201505303, |
| "train_speed(iter/s)": 0.081011 |
| }, |
| { |
| "epoch": 0.8622568847336934, |
| "grad_norm": 0.5794805884361267, |
| "learning_rate": 8.535911527792032e-06, |
| "loss": 0.29973387718200684, |
| "memory(GiB)": 71.86, |
| "step": 2225, |
| "token_acc": 0.8946569324202094, |
| "train_speed(iter/s)": 0.081003 |
| }, |
| { |
| "epoch": 0.8641945406544432, |
| "grad_norm": 0.5984667539596558, |
| "learning_rate": 8.528351431454352e-06, |
| "loss": 0.31471891403198243, |
| "memory(GiB)": 71.86, |
| "step": 2230, |
| "token_acc": 0.9035908316456103, |
| "train_speed(iter/s)": 0.080987 |
| }, |
| { |
| "epoch": 0.8661321965751931, |
| "grad_norm": 0.599163830280304, |
| "learning_rate": 8.520775233025094e-06, |
| "loss": 0.28800148963928224, |
| "memory(GiB)": 71.86, |
| "step": 2235, |
| "token_acc": 0.9002882990356894, |
| "train_speed(iter/s)": 0.080985 |
| }, |
| { |
| "epoch": 0.868069852495943, |
| "grad_norm": 0.5712947249412537, |
| "learning_rate": 8.513182967079228e-06, |
| "loss": 0.31661105155944824, |
| "memory(GiB)": 71.86, |
| "step": 2240, |
| "token_acc": 0.890927432066074, |
| "train_speed(iter/s)": 0.081 |
| }, |
| { |
| "epoch": 0.8700075084166929, |
| "grad_norm": 0.6933742165565491, |
| "learning_rate": 8.50557466826505e-06, |
| "loss": 0.2999523878097534, |
| "memory(GiB)": 71.86, |
| "step": 2245, |
| "token_acc": 0.8946842250413679, |
| "train_speed(iter/s)": 0.081019 |
| }, |
| { |
| "epoch": 0.8719451643374427, |
| "grad_norm": 0.5995941162109375, |
| "learning_rate": 8.497950371304025e-06, |
| "loss": 0.3046144962310791, |
| "memory(GiB)": 71.86, |
| "step": 2250, |
| "token_acc": 0.8875266849649284, |
| "train_speed(iter/s)": 0.081015 |
| }, |
| { |
| "epoch": 0.8738828202581926, |
| "grad_norm": 0.6179806590080261, |
| "learning_rate": 8.490310110990623e-06, |
| "loss": 0.30069704055786134, |
| "memory(GiB)": 71.86, |
| "step": 2255, |
| "token_acc": 0.8802156301394896, |
| "train_speed(iter/s)": 0.081008 |
| }, |
| { |
| "epoch": 0.8758204761789425, |
| "grad_norm": 0.6140835881233215, |
| "learning_rate": 8.482653922192169e-06, |
| "loss": 0.3018641948699951, |
| "memory(GiB)": 71.86, |
| "step": 2260, |
| "token_acc": 0.908906160867085, |
| "train_speed(iter/s)": 0.080993 |
| }, |
| { |
| "epoch": 0.8777581320996924, |
| "grad_norm": 0.5812036991119385, |
| "learning_rate": 8.474981839848675e-06, |
| "loss": 0.29200072288513185, |
| "memory(GiB)": 71.86, |
| "step": 2265, |
| "token_acc": 0.9004617925641285, |
| "train_speed(iter/s)": 0.080992 |
| }, |
| { |
| "epoch": 0.8796957880204422, |
| "grad_norm": 0.6047909259796143, |
| "learning_rate": 8.467293898972694e-06, |
| "loss": 0.29525227546691896, |
| "memory(GiB)": 71.86, |
| "step": 2270, |
| "token_acc": 0.903614068010483, |
| "train_speed(iter/s)": 0.080993 |
| }, |
| { |
| "epoch": 0.8816334439411921, |
| "grad_norm": 0.6456109881401062, |
| "learning_rate": 8.459590134649143e-06, |
| "loss": 0.3068693161010742, |
| "memory(GiB)": 71.86, |
| "step": 2275, |
| "token_acc": 0.89543441552235, |
| "train_speed(iter/s)": 0.080997 |
| }, |
| { |
| "epoch": 0.883571099861942, |
| "grad_norm": 0.6033593416213989, |
| "learning_rate": 8.451870582035155e-06, |
| "loss": 0.3062614917755127, |
| "memory(GiB)": 71.86, |
| "step": 2280, |
| "token_acc": 0.8919612576002933, |
| "train_speed(iter/s)": 0.08101 |
| }, |
| { |
| "epoch": 0.8855087557826918, |
| "grad_norm": 0.6340951323509216, |
| "learning_rate": 8.444135276359913e-06, |
| "loss": 0.2958319902420044, |
| "memory(GiB)": 71.86, |
| "step": 2285, |
| "token_acc": 0.9055605173497403, |
| "train_speed(iter/s)": 0.080999 |
| }, |
| { |
| "epoch": 0.8874464117034417, |
| "grad_norm": 0.5683568120002747, |
| "learning_rate": 8.436384252924496e-06, |
| "loss": 0.2960463047027588, |
| "memory(GiB)": 71.86, |
| "step": 2290, |
| "token_acc": 0.8960641209435684, |
| "train_speed(iter/s)": 0.081004 |
| }, |
| { |
| "epoch": 0.8893840676241916, |
| "grad_norm": 0.6018267273902893, |
| "learning_rate": 8.428617547101707e-06, |
| "loss": 0.3079716682434082, |
| "memory(GiB)": 71.86, |
| "step": 2295, |
| "token_acc": 0.8969420906320126, |
| "train_speed(iter/s)": 0.081009 |
| }, |
| { |
| "epoch": 0.8913217235449415, |
| "grad_norm": 0.5824874639511108, |
| "learning_rate": 8.42083519433592e-06, |
| "loss": 0.308961820602417, |
| "memory(GiB)": 71.86, |
| "step": 2300, |
| "token_acc": 0.9047845105286358, |
| "train_speed(iter/s)": 0.081008 |
| }, |
| { |
| "epoch": 0.8932593794656913, |
| "grad_norm": 0.6830450892448425, |
| "learning_rate": 8.413037230142916e-06, |
| "loss": 0.3022101402282715, |
| "memory(GiB)": 71.86, |
| "step": 2305, |
| "token_acc": 0.8928808351663698, |
| "train_speed(iter/s)": 0.08102 |
| }, |
| { |
| "epoch": 0.8951970353864412, |
| "grad_norm": 0.607520580291748, |
| "learning_rate": 8.405223690109723e-06, |
| "loss": 0.3095353126525879, |
| "memory(GiB)": 71.86, |
| "step": 2310, |
| "token_acc": 0.8990780317213607, |
| "train_speed(iter/s)": 0.081012 |
| }, |
| { |
| "epoch": 0.8971346913071911, |
| "grad_norm": 0.5952607989311218, |
| "learning_rate": 8.397394609894446e-06, |
| "loss": 0.2872850656509399, |
| "memory(GiB)": 71.86, |
| "step": 2315, |
| "token_acc": 0.9125979306469207, |
| "train_speed(iter/s)": 0.08101 |
| }, |
| { |
| "epoch": 0.899072347227941, |
| "grad_norm": 0.586456298828125, |
| "learning_rate": 8.389550025226117e-06, |
| "loss": 0.30147950649261473, |
| "memory(GiB)": 71.86, |
| "step": 2320, |
| "token_acc": 0.8989215036124394, |
| "train_speed(iter/s)": 0.081001 |
| }, |
| { |
| "epoch": 0.9010100031486908, |
| "grad_norm": 0.5853320956230164, |
| "learning_rate": 8.381689971904514e-06, |
| "loss": 0.30711946487426756, |
| "memory(GiB)": 71.86, |
| "step": 2325, |
| "token_acc": 0.8965238741358145, |
| "train_speed(iter/s)": 0.081013 |
| }, |
| { |
| "epoch": 0.9029476590694407, |
| "grad_norm": 0.5991949439048767, |
| "learning_rate": 8.373814485800022e-06, |
| "loss": 0.301871132850647, |
| "memory(GiB)": 71.86, |
| "step": 2330, |
| "token_acc": 0.9006086506086506, |
| "train_speed(iter/s)": 0.081022 |
| }, |
| { |
| "epoch": 0.9048853149901906, |
| "grad_norm": 0.5821011066436768, |
| "learning_rate": 8.365923602853444e-06, |
| "loss": 0.3028329133987427, |
| "memory(GiB)": 71.86, |
| "step": 2335, |
| "token_acc": 0.8980891719745223, |
| "train_speed(iter/s)": 0.081012 |
| }, |
| { |
| "epoch": 0.9068229709109404, |
| "grad_norm": 0.5990322232246399, |
| "learning_rate": 8.358017359075854e-06, |
| "loss": 0.2973571062088013, |
| "memory(GiB)": 71.86, |
| "step": 2340, |
| "token_acc": 0.8983305073692448, |
| "train_speed(iter/s)": 0.081021 |
| }, |
| { |
| "epoch": 0.9087606268316903, |
| "grad_norm": 0.6502439379692078, |
| "learning_rate": 8.350095790548424e-06, |
| "loss": 0.30336899757385255, |
| "memory(GiB)": 71.86, |
| "step": 2345, |
| "token_acc": 0.8919290834613415, |
| "train_speed(iter/s)": 0.081011 |
| }, |
| { |
| "epoch": 0.9106982827524402, |
| "grad_norm": 0.587325930595398, |
| "learning_rate": 8.342158933422266e-06, |
| "loss": 0.29255731105804444, |
| "memory(GiB)": 71.86, |
| "step": 2350, |
| "token_acc": 0.9055942855180467, |
| "train_speed(iter/s)": 0.081013 |
| }, |
| { |
| "epoch": 0.9126359386731902, |
| "grad_norm": 0.5987265706062317, |
| "learning_rate": 8.334206823918262e-06, |
| "loss": 0.31260898113250735, |
| "memory(GiB)": 71.86, |
| "step": 2355, |
| "token_acc": 0.8882655446470313, |
| "train_speed(iter/s)": 0.081013 |
| }, |
| { |
| "epoch": 0.9145735945939399, |
| "grad_norm": 0.5997234582901001, |
| "learning_rate": 8.3262394983269e-06, |
| "loss": 0.3038615703582764, |
| "memory(GiB)": 71.86, |
| "step": 2360, |
| "token_acc": 0.8856326635424022, |
| "train_speed(iter/s)": 0.081018 |
| }, |
| { |
| "epoch": 0.9165112505146898, |
| "grad_norm": 0.6047623157501221, |
| "learning_rate": 8.318256993008108e-06, |
| "loss": 0.2977303981781006, |
| "memory(GiB)": 71.86, |
| "step": 2365, |
| "token_acc": 0.8961500928636635, |
| "train_speed(iter/s)": 0.081026 |
| }, |
| { |
| "epoch": 0.9184489064354397, |
| "grad_norm": 0.605626106262207, |
| "learning_rate": 8.31025934439109e-06, |
| "loss": 0.312529468536377, |
| "memory(GiB)": 71.86, |
| "step": 2370, |
| "token_acc": 0.9005831363278172, |
| "train_speed(iter/s)": 0.081024 |
| }, |
| { |
| "epoch": 0.9203865623561897, |
| "grad_norm": 0.597262978553772, |
| "learning_rate": 8.302246588974156e-06, |
| "loss": 0.28765239715576174, |
| "memory(GiB)": 71.86, |
| "step": 2375, |
| "token_acc": 0.9029015993856222, |
| "train_speed(iter/s)": 0.081019 |
| }, |
| { |
| "epoch": 0.9223242182769394, |
| "grad_norm": 0.563490629196167, |
| "learning_rate": 8.29421876332456e-06, |
| "loss": 0.2911080837249756, |
| "memory(GiB)": 71.86, |
| "step": 2380, |
| "token_acc": 0.9119191065074164, |
| "train_speed(iter/s)": 0.081032 |
| }, |
| { |
| "epoch": 0.9242618741976893, |
| "grad_norm": 0.6514439582824707, |
| "learning_rate": 8.286175904078333e-06, |
| "loss": 0.31125853061676023, |
| "memory(GiB)": 71.86, |
| "step": 2385, |
| "token_acc": 0.9083231470163351, |
| "train_speed(iter/s)": 0.081045 |
| }, |
| { |
| "epoch": 0.9261995301184393, |
| "grad_norm": 0.58599454164505, |
| "learning_rate": 8.27811804794011e-06, |
| "loss": 0.30330696105957033, |
| "memory(GiB)": 71.86, |
| "step": 2390, |
| "token_acc": 0.894368, |
| "train_speed(iter/s)": 0.08105 |
| }, |
| { |
| "epoch": 0.928137186039189, |
| "grad_norm": 0.5928026437759399, |
| "learning_rate": 8.270045231682966e-06, |
| "loss": 0.3130389928817749, |
| "memory(GiB)": 71.86, |
| "step": 2395, |
| "token_acc": 0.902063969382176, |
| "train_speed(iter/s)": 0.081042 |
| }, |
| { |
| "epoch": 0.930074841959939, |
| "grad_norm": 0.5734291076660156, |
| "learning_rate": 8.261957492148252e-06, |
| "loss": 0.2945571422576904, |
| "memory(GiB)": 71.86, |
| "step": 2400, |
| "token_acc": 0.9001185351550315, |
| "train_speed(iter/s)": 0.081039 |
| }, |
| { |
| "epoch": 0.9320124978806889, |
| "grad_norm": 0.5834051966667175, |
| "learning_rate": 8.253854866245421e-06, |
| "loss": 0.30569703578948976, |
| "memory(GiB)": 71.86, |
| "step": 2405, |
| "token_acc": 0.9009355543240865, |
| "train_speed(iter/s)": 0.081039 |
| }, |
| { |
| "epoch": 0.9339501538014388, |
| "grad_norm": 0.5792734026908875, |
| "learning_rate": 8.245737390951861e-06, |
| "loss": 0.29303114414215087, |
| "memory(GiB)": 71.86, |
| "step": 2410, |
| "token_acc": 0.9045537095580817, |
| "train_speed(iter/s)": 0.081046 |
| }, |
| { |
| "epoch": 0.9358878097221885, |
| "grad_norm": 0.6011708974838257, |
| "learning_rate": 8.23760510331273e-06, |
| "loss": 0.2884019136428833, |
| "memory(GiB)": 71.86, |
| "step": 2415, |
| "token_acc": 0.8996518944972554, |
| "train_speed(iter/s)": 0.08106 |
| }, |
| { |
| "epoch": 0.9378254656429384, |
| "grad_norm": 0.6015334725379944, |
| "learning_rate": 8.229458040440783e-06, |
| "loss": 0.2984572172164917, |
| "memory(GiB)": 71.86, |
| "step": 2420, |
| "token_acc": 0.9039551835853131, |
| "train_speed(iter/s)": 0.08106 |
| }, |
| { |
| "epoch": 0.9397631215636884, |
| "grad_norm": 0.5682195425033569, |
| "learning_rate": 8.2212962395162e-06, |
| "loss": 0.30713858604431155, |
| "memory(GiB)": 71.86, |
| "step": 2425, |
| "token_acc": 0.8901319652625398, |
| "train_speed(iter/s)": 0.081067 |
| }, |
| { |
| "epoch": 0.9417007774844383, |
| "grad_norm": 0.5934369564056396, |
| "learning_rate": 8.213119737786425e-06, |
| "loss": 0.29988932609558105, |
| "memory(GiB)": 71.86, |
| "step": 2430, |
| "token_acc": 0.9072377226397611, |
| "train_speed(iter/s)": 0.081072 |
| }, |
| { |
| "epoch": 0.943638433405188, |
| "grad_norm": 0.5825701355934143, |
| "learning_rate": 8.204928572565992e-06, |
| "loss": 0.2891493320465088, |
| "memory(GiB)": 71.86, |
| "step": 2435, |
| "token_acc": 0.9002493765586035, |
| "train_speed(iter/s)": 0.081076 |
| }, |
| { |
| "epoch": 0.945576089325938, |
| "grad_norm": 0.6257404088973999, |
| "learning_rate": 8.196722781236345e-06, |
| "loss": 0.2987071514129639, |
| "memory(GiB)": 71.86, |
| "step": 2440, |
| "token_acc": 0.8886912499144831, |
| "train_speed(iter/s)": 0.081082 |
| }, |
| { |
| "epoch": 0.9475137452466879, |
| "grad_norm": 0.5801171660423279, |
| "learning_rate": 8.188502401245685e-06, |
| "loss": 0.294779896736145, |
| "memory(GiB)": 71.86, |
| "step": 2445, |
| "token_acc": 0.914494488962574, |
| "train_speed(iter/s)": 0.081085 |
| }, |
| { |
| "epoch": 0.9494514011674376, |
| "grad_norm": 0.6023902893066406, |
| "learning_rate": 8.180267470108791e-06, |
| "loss": 0.3050975799560547, |
| "memory(GiB)": 71.86, |
| "step": 2450, |
| "token_acc": 0.8998041334594084, |
| "train_speed(iter/s)": 0.081082 |
| }, |
| { |
| "epoch": 0.9513890570881876, |
| "grad_norm": 0.5659891366958618, |
| "learning_rate": 8.17201802540684e-06, |
| "loss": 0.291573166847229, |
| "memory(GiB)": 71.86, |
| "step": 2455, |
| "token_acc": 0.898186114392939, |
| "train_speed(iter/s)": 0.081089 |
| }, |
| { |
| "epoch": 0.9533267130089375, |
| "grad_norm": 0.5715075731277466, |
| "learning_rate": 8.16375410478725e-06, |
| "loss": 0.31129865646362304, |
| "memory(GiB)": 71.86, |
| "step": 2460, |
| "token_acc": 0.8948479093183805, |
| "train_speed(iter/s)": 0.081082 |
| }, |
| { |
| "epoch": 0.9552643689296874, |
| "grad_norm": 0.5733673572540283, |
| "learning_rate": 8.155475745963497e-06, |
| "loss": 0.30196385383605956, |
| "memory(GiB)": 71.86, |
| "step": 2465, |
| "token_acc": 0.8970524127292847, |
| "train_speed(iter/s)": 0.081079 |
| }, |
| { |
| "epoch": 0.9572020248504371, |
| "grad_norm": 0.6193950176239014, |
| "learning_rate": 8.147182986714951e-06, |
| "loss": 0.3040598392486572, |
| "memory(GiB)": 71.86, |
| "step": 2470, |
| "token_acc": 0.899796048012304, |
| "train_speed(iter/s)": 0.081069 |
| }, |
| { |
| "epoch": 0.959139680771187, |
| "grad_norm": 0.5847228765487671, |
| "learning_rate": 8.138875864886704e-06, |
| "loss": 0.2934266567230225, |
| "memory(GiB)": 71.86, |
| "step": 2475, |
| "token_acc": 0.8982365532941456, |
| "train_speed(iter/s)": 0.081082 |
| }, |
| { |
| "epoch": 0.961077336691937, |
| "grad_norm": 0.646843671798706, |
| "learning_rate": 8.130554418389385e-06, |
| "loss": 0.3016360759735107, |
| "memory(GiB)": 71.86, |
| "step": 2480, |
| "token_acc": 0.896435309303942, |
| "train_speed(iter/s)": 0.081069 |
| }, |
| { |
| "epoch": 0.9630149926126867, |
| "grad_norm": 0.6069235801696777, |
| "learning_rate": 8.122218685199001e-06, |
| "loss": 0.29787559509277345, |
| "memory(GiB)": 71.86, |
| "step": 2485, |
| "token_acc": 0.8999183673469388, |
| "train_speed(iter/s)": 0.081064 |
| }, |
| { |
| "epoch": 0.9649526485334367, |
| "grad_norm": 0.5728825926780701, |
| "learning_rate": 8.113868703356755e-06, |
| "loss": 0.3042722702026367, |
| "memory(GiB)": 71.86, |
| "step": 2490, |
| "token_acc": 0.8947862704111, |
| "train_speed(iter/s)": 0.08107 |
| }, |
| { |
| "epoch": 0.9668903044541866, |
| "grad_norm": 0.5724794864654541, |
| "learning_rate": 8.105504510968878e-06, |
| "loss": 0.29997859001159666, |
| "memory(GiB)": 71.86, |
| "step": 2495, |
| "token_acc": 0.9035289881166727, |
| "train_speed(iter/s)": 0.081054 |
| }, |
| { |
| "epoch": 0.9688279603749365, |
| "grad_norm": 0.6254956722259521, |
| "learning_rate": 8.097126146206454e-06, |
| "loss": 0.3147443771362305, |
| "memory(GiB)": 71.86, |
| "step": 2500, |
| "token_acc": 0.8975554174435467, |
| "train_speed(iter/s)": 0.081062 |
| }, |
| { |
| "epoch": 0.9707656162956863, |
| "grad_norm": 0.6081948280334473, |
| "learning_rate": 8.08873364730524e-06, |
| "loss": 0.29856858253479, |
| "memory(GiB)": 71.86, |
| "step": 2505, |
| "token_acc": 0.8946579947383806, |
| "train_speed(iter/s)": 0.081059 |
| }, |
| { |
| "epoch": 0.9727032722164362, |
| "grad_norm": 0.592427670955658, |
| "learning_rate": 8.080327052565498e-06, |
| "loss": 0.28695039749145507, |
| "memory(GiB)": 71.86, |
| "step": 2510, |
| "token_acc": 0.8975024846910968, |
| "train_speed(iter/s)": 0.081067 |
| }, |
| { |
| "epoch": 0.9746409281371861, |
| "grad_norm": 0.585587203502655, |
| "learning_rate": 8.071906400351823e-06, |
| "loss": 0.3133570194244385, |
| "memory(GiB)": 71.86, |
| "step": 2515, |
| "token_acc": 0.891758151728658, |
| "train_speed(iter/s)": 0.081079 |
| }, |
| { |
| "epoch": 0.976578584057936, |
| "grad_norm": 0.5797784328460693, |
| "learning_rate": 8.063471729092953e-06, |
| "loss": 0.29287357330322267, |
| "memory(GiB)": 71.86, |
| "step": 2520, |
| "token_acc": 0.9092919601238474, |
| "train_speed(iter/s)": 0.081073 |
| }, |
| { |
| "epoch": 0.9785162399786858, |
| "grad_norm": 0.6151092052459717, |
| "learning_rate": 8.055023077281614e-06, |
| "loss": 0.29386229515075685, |
| "memory(GiB)": 71.86, |
| "step": 2525, |
| "token_acc": 0.9003792401151767, |
| "train_speed(iter/s)": 0.081085 |
| }, |
| { |
| "epoch": 0.9804538958994357, |
| "grad_norm": 0.5305030345916748, |
| "learning_rate": 8.046560483474327e-06, |
| "loss": 0.27774505615234374, |
| "memory(GiB)": 71.86, |
| "step": 2530, |
| "token_acc": 0.9066745422327229, |
| "train_speed(iter/s)": 0.081101 |
| }, |
| { |
| "epoch": 0.9823915518201856, |
| "grad_norm": 0.569251298904419, |
| "learning_rate": 8.038083986291242e-06, |
| "loss": 0.29792633056640627, |
| "memory(GiB)": 71.86, |
| "step": 2535, |
| "token_acc": 0.894457876139111, |
| "train_speed(iter/s)": 0.081103 |
| }, |
| { |
| "epoch": 0.9843292077409354, |
| "grad_norm": 0.5375701785087585, |
| "learning_rate": 8.029593624415961e-06, |
| "loss": 0.30445160865783694, |
| "memory(GiB)": 71.86, |
| "step": 2540, |
| "token_acc": 0.9060257773053415, |
| "train_speed(iter/s)": 0.081113 |
| }, |
| { |
| "epoch": 0.9862668636616853, |
| "grad_norm": 0.5565457344055176, |
| "learning_rate": 8.021089436595354e-06, |
| "loss": 0.288785719871521, |
| "memory(GiB)": 71.86, |
| "step": 2545, |
| "token_acc": 0.9060629628377236, |
| "train_speed(iter/s)": 0.081105 |
| }, |
| { |
| "epoch": 0.9882045195824352, |
| "grad_norm": 0.5765477418899536, |
| "learning_rate": 8.012571461639391e-06, |
| "loss": 0.29372076988220214, |
| "memory(GiB)": 71.86, |
| "step": 2550, |
| "token_acc": 0.8993327432423232, |
| "train_speed(iter/s)": 0.081094 |
| }, |
| { |
| "epoch": 0.9901421755031851, |
| "grad_norm": 0.5807486772537231, |
| "learning_rate": 8.004039738420962e-06, |
| "loss": 0.3115732192993164, |
| "memory(GiB)": 71.86, |
| "step": 2555, |
| "token_acc": 0.898284978308026, |
| "train_speed(iter/s)": 0.081087 |
| }, |
| { |
| "epoch": 0.9920798314239349, |
| "grad_norm": 0.5933189988136292, |
| "learning_rate": 7.995494305875696e-06, |
| "loss": 0.2891175031661987, |
| "memory(GiB)": 71.86, |
| "step": 2560, |
| "token_acc": 0.9098856523411547, |
| "train_speed(iter/s)": 0.081099 |
| }, |
| { |
| "epoch": 0.9940174873446848, |
| "grad_norm": 0.5942724943161011, |
| "learning_rate": 7.98693520300179e-06, |
| "loss": 0.3004627704620361, |
| "memory(GiB)": 71.86, |
| "step": 2565, |
| "token_acc": 0.8957946815089672, |
| "train_speed(iter/s)": 0.081106 |
| }, |
| { |
| "epoch": 0.9959551432654347, |
| "grad_norm": 0.609277606010437, |
| "learning_rate": 7.978362468859824e-06, |
| "loss": 0.30392889976501464, |
| "memory(GiB)": 71.86, |
| "step": 2570, |
| "token_acc": 0.904025201260063, |
| "train_speed(iter/s)": 0.081112 |
| }, |
| { |
| "epoch": 0.9978927991861846, |
| "grad_norm": 0.5895731449127197, |
| "learning_rate": 7.969776142572588e-06, |
| "loss": 0.31368122100830076, |
| "memory(GiB)": 71.86, |
| "step": 2575, |
| "token_acc": 0.9018180772107474, |
| "train_speed(iter/s)": 0.081108 |
| }, |
| { |
| "epoch": 0.9998304551069344, |
| "grad_norm": 0.601768970489502, |
| "learning_rate": 7.961176263324902e-06, |
| "loss": 0.3015678882598877, |
| "memory(GiB)": 71.86, |
| "step": 2580, |
| "token_acc": 0.9027051901521875, |
| "train_speed(iter/s)": 0.081115 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.2590896189212799, |
| "eval_runtime": 104.8434, |
| "eval_samples_per_second": 31.8, |
| "eval_steps_per_second": 3.977, |
| "eval_token_acc": 0.9006845098082552, |
| "step": 2581 |
| }, |
| { |
| "epoch": 1.0015501247365999, |
| "grad_norm": 0.5477020144462585, |
| "learning_rate": 7.952562870363431e-06, |
| "loss": 0.25441799163818357, |
| "memory(GiB)": 71.86, |
| "step": 2585, |
| "token_acc": 0.9042220212839953, |
| "train_speed(iter/s)": 0.080632 |
| }, |
| { |
| "epoch": 1.0034877806573497, |
| "grad_norm": 0.5873720049858093, |
| "learning_rate": 7.943936002996523e-06, |
| "loss": 0.24828603267669677, |
| "memory(GiB)": 72.48, |
| "step": 2590, |
| "token_acc": 0.9172822882928782, |
| "train_speed(iter/s)": 0.080629 |
| }, |
| { |
| "epoch": 1.0054254365780997, |
| "grad_norm": 0.6077190637588501, |
| "learning_rate": 7.935295700594008e-06, |
| "loss": 0.25363209247589114, |
| "memory(GiB)": 72.48, |
| "step": 2595, |
| "token_acc": 0.9170952689124289, |
| "train_speed(iter/s)": 0.08063 |
| }, |
| { |
| "epoch": 1.0073630924988495, |
| "grad_norm": 0.5879862904548645, |
| "learning_rate": 7.926642002587031e-06, |
| "loss": 0.24791350364685058, |
| "memory(GiB)": 72.48, |
| "step": 2600, |
| "token_acc": 0.912384696809315, |
| "train_speed(iter/s)": 0.080644 |
| }, |
| { |
| "epoch": 1.0093007484195995, |
| "grad_norm": 0.5582185387611389, |
| "learning_rate": 7.917974948467875e-06, |
| "loss": 0.23642451763153077, |
| "memory(GiB)": 72.48, |
| "step": 2605, |
| "token_acc": 0.921409292741678, |
| "train_speed(iter/s)": 0.080649 |
| }, |
| { |
| "epoch": 1.0112384043403493, |
| "grad_norm": 0.6070327758789062, |
| "learning_rate": 7.909294577789765e-06, |
| "loss": 0.23681924343109131, |
| "memory(GiB)": 72.48, |
| "step": 2610, |
| "token_acc": 0.9224056603773585, |
| "train_speed(iter/s)": 0.080642 |
| }, |
| { |
| "epoch": 1.013176060261099, |
| "grad_norm": 0.5828495025634766, |
| "learning_rate": 7.900600930166709e-06, |
| "loss": 0.2503397464752197, |
| "memory(GiB)": 72.48, |
| "step": 2615, |
| "token_acc": 0.9147136677642198, |
| "train_speed(iter/s)": 0.080619 |
| }, |
| { |
| "epoch": 1.015113716181849, |
| "grad_norm": 0.5445525646209717, |
| "learning_rate": 7.891894045273296e-06, |
| "loss": 0.24323725700378418, |
| "memory(GiB)": 72.48, |
| "step": 2620, |
| "token_acc": 0.9177933840903124, |
| "train_speed(iter/s)": 0.080613 |
| }, |
| { |
| "epoch": 1.0170513721025989, |
| "grad_norm": 0.5520332455635071, |
| "learning_rate": 7.883173962844535e-06, |
| "loss": 0.23910813331604003, |
| "memory(GiB)": 72.48, |
| "step": 2625, |
| "token_acc": 0.9152115156852623, |
| "train_speed(iter/s)": 0.080612 |
| }, |
| { |
| "epoch": 1.0189890280233487, |
| "grad_norm": 0.5695937871932983, |
| "learning_rate": 7.874440722675654e-06, |
| "loss": 0.2562225580215454, |
| "memory(GiB)": 72.48, |
| "step": 2630, |
| "token_acc": 0.911888513298042, |
| "train_speed(iter/s)": 0.080611 |
| }, |
| { |
| "epoch": 1.0209266839440987, |
| "grad_norm": 0.5355059504508972, |
| "learning_rate": 7.865694364621936e-06, |
| "loss": 0.25381102561950686, |
| "memory(GiB)": 72.48, |
| "step": 2635, |
| "token_acc": 0.9101550981895382, |
| "train_speed(iter/s)": 0.080613 |
| }, |
| { |
| "epoch": 1.0228643398648485, |
| "grad_norm": 0.603877067565918, |
| "learning_rate": 7.856934928598526e-06, |
| "loss": 0.24031267166137696, |
| "memory(GiB)": 72.48, |
| "step": 2640, |
| "token_acc": 0.9104291735853877, |
| "train_speed(iter/s)": 0.080612 |
| }, |
| { |
| "epoch": 1.0248019957855983, |
| "grad_norm": 0.5709960460662842, |
| "learning_rate": 7.848162454580248e-06, |
| "loss": 0.23710267543792723, |
| "memory(GiB)": 72.48, |
| "step": 2645, |
| "token_acc": 0.9217326459812599, |
| "train_speed(iter/s)": 0.080619 |
| }, |
| { |
| "epoch": 1.0267396517063483, |
| "grad_norm": 0.6112598776817322, |
| "learning_rate": 7.839376982601434e-06, |
| "loss": 0.2632643938064575, |
| "memory(GiB)": 72.48, |
| "step": 2650, |
| "token_acc": 0.9160356212515902, |
| "train_speed(iter/s)": 0.08062 |
| }, |
| { |
| "epoch": 1.028677307627098, |
| "grad_norm": 0.5586497187614441, |
| "learning_rate": 7.830578552755728e-06, |
| "loss": 0.2568197250366211, |
| "memory(GiB)": 72.48, |
| "step": 2655, |
| "token_acc": 0.9143515554986558, |
| "train_speed(iter/s)": 0.08061 |
| }, |
| { |
| "epoch": 1.030614963547848, |
| "grad_norm": 0.5832489728927612, |
| "learning_rate": 7.821767205195913e-06, |
| "loss": 0.24051623344421386, |
| "memory(GiB)": 72.48, |
| "step": 2660, |
| "token_acc": 0.9099613992040456, |
| "train_speed(iter/s)": 0.080604 |
| }, |
| { |
| "epoch": 1.0325526194685979, |
| "grad_norm": 0.564221203327179, |
| "learning_rate": 7.812942980133723e-06, |
| "loss": 0.2458951473236084, |
| "memory(GiB)": 72.48, |
| "step": 2665, |
| "token_acc": 0.918996300466463, |
| "train_speed(iter/s)": 0.080602 |
| }, |
| { |
| "epoch": 1.0344902753893477, |
| "grad_norm": 0.5216150283813477, |
| "learning_rate": 7.804105917839658e-06, |
| "loss": 0.24463191032409667, |
| "memory(GiB)": 72.48, |
| "step": 2670, |
| "token_acc": 0.9253029928271086, |
| "train_speed(iter/s)": 0.080602 |
| }, |
| { |
| "epoch": 1.0364279313100977, |
| "grad_norm": 0.5396441221237183, |
| "learning_rate": 7.795256058642799e-06, |
| "loss": 0.24592304229736328, |
| "memory(GiB)": 72.48, |
| "step": 2675, |
| "token_acc": 0.9139424329252409, |
| "train_speed(iter/s)": 0.080603 |
| }, |
| { |
| "epoch": 1.0383655872308475, |
| "grad_norm": 0.5646589398384094, |
| "learning_rate": 7.786393442930638e-06, |
| "loss": 0.2515740394592285, |
| "memory(GiB)": 72.48, |
| "step": 2680, |
| "token_acc": 0.9193938660698082, |
| "train_speed(iter/s)": 0.08061 |
| }, |
| { |
| "epoch": 1.0403032431515973, |
| "grad_norm": 0.5464441180229187, |
| "learning_rate": 7.777518111148873e-06, |
| "loss": 0.25590317249298095, |
| "memory(GiB)": 72.48, |
| "step": 2685, |
| "token_acc": 0.9124738451633672, |
| "train_speed(iter/s)": 0.080607 |
| }, |
| { |
| "epoch": 1.0422408990723473, |
| "grad_norm": 0.6054975986480713, |
| "learning_rate": 7.768630103801239e-06, |
| "loss": 0.24789299964904785, |
| "memory(GiB)": 72.48, |
| "step": 2690, |
| "token_acc": 0.9170572651314449, |
| "train_speed(iter/s)": 0.080614 |
| }, |
| { |
| "epoch": 1.044178554993097, |
| "grad_norm": 0.6122474670410156, |
| "learning_rate": 7.759729461449317e-06, |
| "loss": 0.2429750919342041, |
| "memory(GiB)": 72.48, |
| "step": 2695, |
| "token_acc": 0.9201933542480313, |
| "train_speed(iter/s)": 0.080616 |
| }, |
| { |
| "epoch": 1.0461162109138469, |
| "grad_norm": 0.5993043780326843, |
| "learning_rate": 7.750816224712345e-06, |
| "loss": 0.2456050395965576, |
| "memory(GiB)": 72.48, |
| "step": 2700, |
| "token_acc": 0.9081399895724713, |
| "train_speed(iter/s)": 0.080608 |
| }, |
| { |
| "epoch": 1.048053866834597, |
| "grad_norm": 0.5747437477111816, |
| "learning_rate": 7.741890434267043e-06, |
| "loss": 0.2360602378845215, |
| "memory(GiB)": 72.48, |
| "step": 2705, |
| "token_acc": 0.9099276579253326, |
| "train_speed(iter/s)": 0.080614 |
| }, |
| { |
| "epoch": 1.0499915227553467, |
| "grad_norm": 0.6145790815353394, |
| "learning_rate": 7.732952130847418e-06, |
| "loss": 0.23937726020812988, |
| "memory(GiB)": 72.48, |
| "step": 2710, |
| "token_acc": 0.9228708069056745, |
| "train_speed(iter/s)": 0.080619 |
| }, |
| { |
| "epoch": 1.0519291786760967, |
| "grad_norm": 0.5605219006538391, |
| "learning_rate": 7.724001355244582e-06, |
| "loss": 0.24741392135620116, |
| "memory(GiB)": 72.48, |
| "step": 2715, |
| "token_acc": 0.9123340546230774, |
| "train_speed(iter/s)": 0.080622 |
| }, |
| { |
| "epoch": 1.0538668345968465, |
| "grad_norm": 0.573218822479248, |
| "learning_rate": 7.715038148306566e-06, |
| "loss": 0.2538093090057373, |
| "memory(GiB)": 72.48, |
| "step": 2720, |
| "token_acc": 0.9092292928290149, |
| "train_speed(iter/s)": 0.080625 |
| }, |
| { |
| "epoch": 1.0558044905175963, |
| "grad_norm": 0.5715059041976929, |
| "learning_rate": 7.706062550938134e-06, |
| "loss": 0.25236949920654295, |
| "memory(GiB)": 72.48, |
| "step": 2725, |
| "token_acc": 0.9107142857142857, |
| "train_speed(iter/s)": 0.080622 |
| }, |
| { |
| "epoch": 1.0577421464383463, |
| "grad_norm": 0.5391227602958679, |
| "learning_rate": 7.697074604100595e-06, |
| "loss": 0.23396830558776854, |
| "memory(GiB)": 72.48, |
| "step": 2730, |
| "token_acc": 0.9122044684709344, |
| "train_speed(iter/s)": 0.08064 |
| }, |
| { |
| "epoch": 1.059679802359096, |
| "grad_norm": 0.5576350688934326, |
| "learning_rate": 7.688074348811612e-06, |
| "loss": 0.24273238182067872, |
| "memory(GiB)": 72.48, |
| "step": 2735, |
| "token_acc": 0.9194692680264683, |
| "train_speed(iter/s)": 0.08064 |
| }, |
| { |
| "epoch": 1.0616174582798459, |
| "grad_norm": 0.5859219431877136, |
| "learning_rate": 7.679061826145027e-06, |
| "loss": 0.2543762683868408, |
| "memory(GiB)": 72.48, |
| "step": 2740, |
| "token_acc": 0.9142654460696551, |
| "train_speed(iter/s)": 0.080643 |
| }, |
| { |
| "epoch": 1.063555114200596, |
| "grad_norm": 0.5633178949356079, |
| "learning_rate": 7.670037077230659e-06, |
| "loss": 0.23209047317504883, |
| "memory(GiB)": 72.48, |
| "step": 2745, |
| "token_acc": 0.931188868990219, |
| "train_speed(iter/s)": 0.080652 |
| }, |
| { |
| "epoch": 1.0654927701213457, |
| "grad_norm": 0.5863512754440308, |
| "learning_rate": 7.661000143254129e-06, |
| "loss": 0.2488046646118164, |
| "memory(GiB)": 72.48, |
| "step": 2750, |
| "token_acc": 0.9081606630538731, |
| "train_speed(iter/s)": 0.080661 |
| }, |
| { |
| "epoch": 1.0674304260420955, |
| "grad_norm": 0.5774025917053223, |
| "learning_rate": 7.651951065456658e-06, |
| "loss": 0.23735444545745848, |
| "memory(GiB)": 72.48, |
| "step": 2755, |
| "token_acc": 0.9132469507736406, |
| "train_speed(iter/s)": 0.080665 |
| }, |
| { |
| "epoch": 1.0693680819628455, |
| "grad_norm": 0.6099271178245544, |
| "learning_rate": 7.642889885134897e-06, |
| "loss": 0.2543477058410645, |
| "memory(GiB)": 72.48, |
| "step": 2760, |
| "token_acc": 0.9162998624484182, |
| "train_speed(iter/s)": 0.080679 |
| }, |
| { |
| "epoch": 1.0713057378835953, |
| "grad_norm": 0.5847581624984741, |
| "learning_rate": 7.63381664364072e-06, |
| "loss": 0.24816784858703614, |
| "memory(GiB)": 72.48, |
| "step": 2765, |
| "token_acc": 0.9119142811450504, |
| "train_speed(iter/s)": 0.080672 |
| }, |
| { |
| "epoch": 1.0732433938043453, |
| "grad_norm": 0.5548186898231506, |
| "learning_rate": 7.624731382381048e-06, |
| "loss": 0.25109171867370605, |
| "memory(GiB)": 72.48, |
| "step": 2770, |
| "token_acc": 0.9072886841777668, |
| "train_speed(iter/s)": 0.080683 |
| }, |
| { |
| "epoch": 1.075181049725095, |
| "grad_norm": 0.5436238050460815, |
| "learning_rate": 7.6156341428176536e-06, |
| "loss": 0.24203226566314698, |
| "memory(GiB)": 72.48, |
| "step": 2775, |
| "token_acc": 0.914740183562211, |
| "train_speed(iter/s)": 0.080688 |
| }, |
| { |
| "epoch": 1.0771187056458449, |
| "grad_norm": 0.5817808508872986, |
| "learning_rate": 7.606524966466979e-06, |
| "loss": 0.23057384490966798, |
| "memory(GiB)": 72.48, |
| "step": 2780, |
| "token_acc": 0.9219449364031557, |
| "train_speed(iter/s)": 0.080686 |
| }, |
| { |
| "epoch": 1.079056361566595, |
| "grad_norm": 0.5919598937034607, |
| "learning_rate": 7.597403894899932e-06, |
| "loss": 0.259158730506897, |
| "memory(GiB)": 72.48, |
| "step": 2785, |
| "token_acc": 0.9131787972118658, |
| "train_speed(iter/s)": 0.080686 |
| }, |
| { |
| "epoch": 1.0809940174873447, |
| "grad_norm": 0.5995221138000488, |
| "learning_rate": 7.588270969741715e-06, |
| "loss": 0.24664411544799805, |
| "memory(GiB)": 72.48, |
| "step": 2790, |
| "token_acc": 0.9199007507316452, |
| "train_speed(iter/s)": 0.080685 |
| }, |
| { |
| "epoch": 1.0829316734080945, |
| "grad_norm": 0.5648677349090576, |
| "learning_rate": 7.579126232671621e-06, |
| "loss": 0.24066920280456544, |
| "memory(GiB)": 72.48, |
| "step": 2795, |
| "token_acc": 0.9095726198749131, |
| "train_speed(iter/s)": 0.080696 |
| }, |
| { |
| "epoch": 1.0848693293288445, |
| "grad_norm": 0.5983613729476929, |
| "learning_rate": 7.5699697254228496e-06, |
| "loss": 0.2533870697021484, |
| "memory(GiB)": 72.48, |
| "step": 2800, |
| "token_acc": 0.9211015879343565, |
| "train_speed(iter/s)": 0.080698 |
| }, |
| { |
| "epoch": 1.0868069852495943, |
| "grad_norm": 0.5517467260360718, |
| "learning_rate": 7.560801489782315e-06, |
| "loss": 0.24492838382720947, |
| "memory(GiB)": 72.48, |
| "step": 2805, |
| "token_acc": 0.9226202126965486, |
| "train_speed(iter/s)": 0.080705 |
| }, |
| { |
| "epoch": 1.088744641170344, |
| "grad_norm": 0.5659517049789429, |
| "learning_rate": 7.5516215675904555e-06, |
| "loss": 0.2431710481643677, |
| "memory(GiB)": 72.48, |
| "step": 2810, |
| "token_acc": 0.9124055690828494, |
| "train_speed(iter/s)": 0.080734 |
| }, |
| { |
| "epoch": 1.090682297091094, |
| "grad_norm": 0.5607476234436035, |
| "learning_rate": 7.542430000741042e-06, |
| "loss": 0.25229265689849856, |
| "memory(GiB)": 72.48, |
| "step": 2815, |
| "token_acc": 0.9133296939725764, |
| "train_speed(iter/s)": 0.080736 |
| }, |
| { |
| "epoch": 1.092619953011844, |
| "grad_norm": 0.5616917014122009, |
| "learning_rate": 7.533226831180988e-06, |
| "loss": 0.24518215656280518, |
| "memory(GiB)": 72.48, |
| "step": 2820, |
| "token_acc": 0.9195824853935535, |
| "train_speed(iter/s)": 0.080733 |
| }, |
| { |
| "epoch": 1.0945576089325937, |
| "grad_norm": 0.5850186347961426, |
| "learning_rate": 7.524012100910158e-06, |
| "loss": 0.2529883861541748, |
| "memory(GiB)": 72.48, |
| "step": 2825, |
| "token_acc": 0.9079726351795816, |
| "train_speed(iter/s)": 0.080733 |
| }, |
| { |
| "epoch": 1.0964952648533437, |
| "grad_norm": 0.5485085844993591, |
| "learning_rate": 7.5147858519811725e-06, |
| "loss": 0.2463089942932129, |
| "memory(GiB)": 72.48, |
| "step": 2830, |
| "token_acc": 0.9261502906365991, |
| "train_speed(iter/s)": 0.080741 |
| }, |
| { |
| "epoch": 1.0984329207740935, |
| "grad_norm": 0.5665585994720459, |
| "learning_rate": 7.50554812649922e-06, |
| "loss": 0.24828519821166992, |
| "memory(GiB)": 72.48, |
| "step": 2835, |
| "token_acc": 0.912733688460625, |
| "train_speed(iter/s)": 0.080748 |
| }, |
| { |
| "epoch": 1.1003705766948435, |
| "grad_norm": 0.5671249032020569, |
| "learning_rate": 7.496298966621869e-06, |
| "loss": 0.2461564540863037, |
| "memory(GiB)": 72.48, |
| "step": 2840, |
| "token_acc": 0.9144147028688525, |
| "train_speed(iter/s)": 0.080745 |
| }, |
| { |
| "epoch": 1.1023082326155933, |
| "grad_norm": 0.5896663069725037, |
| "learning_rate": 7.4870384145588625e-06, |
| "loss": 0.24836764335632325, |
| "memory(GiB)": 72.48, |
| "step": 2845, |
| "token_acc": 0.924081992805267, |
| "train_speed(iter/s)": 0.080735 |
| }, |
| { |
| "epoch": 1.104245888536343, |
| "grad_norm": 0.5926647782325745, |
| "learning_rate": 7.477766512571938e-06, |
| "loss": 0.2438123941421509, |
| "memory(GiB)": 72.48, |
| "step": 2850, |
| "token_acc": 0.9217040298905791, |
| "train_speed(iter/s)": 0.080743 |
| }, |
| { |
| "epoch": 1.106183544457093, |
| "grad_norm": 0.5987430810928345, |
| "learning_rate": 7.468483302974629e-06, |
| "loss": 0.24998788833618163, |
| "memory(GiB)": 72.48, |
| "step": 2855, |
| "token_acc": 0.9153590774069544, |
| "train_speed(iter/s)": 0.080732 |
| }, |
| { |
| "epoch": 1.108121200377843, |
| "grad_norm": 0.606680691242218, |
| "learning_rate": 7.459188828132069e-06, |
| "loss": 0.2444852828979492, |
| "memory(GiB)": 72.48, |
| "step": 2860, |
| "token_acc": 0.9168447750135362, |
| "train_speed(iter/s)": 0.080731 |
| }, |
| { |
| "epoch": 1.1100588562985927, |
| "grad_norm": 0.6073605418205261, |
| "learning_rate": 7.449883130460809e-06, |
| "loss": 0.2551856517791748, |
| "memory(GiB)": 72.48, |
| "step": 2865, |
| "token_acc": 0.9164820180671552, |
| "train_speed(iter/s)": 0.080735 |
| }, |
| { |
| "epoch": 1.1119965122193427, |
| "grad_norm": 0.5303031206130981, |
| "learning_rate": 7.440566252428612e-06, |
| "loss": 0.2426982879638672, |
| "memory(GiB)": 72.48, |
| "step": 2870, |
| "token_acc": 0.921721788266578, |
| "train_speed(iter/s)": 0.080731 |
| }, |
| { |
| "epoch": 1.1139341681400925, |
| "grad_norm": 0.6147097945213318, |
| "learning_rate": 7.431238236554263e-06, |
| "loss": 0.24436643123626708, |
| "memory(GiB)": 72.48, |
| "step": 2875, |
| "token_acc": 0.9231390279347651, |
| "train_speed(iter/s)": 0.080731 |
| }, |
| { |
| "epoch": 1.1158718240608425, |
| "grad_norm": 0.5928937196731567, |
| "learning_rate": 7.4218991254073815e-06, |
| "loss": 0.24488534927368164, |
| "memory(GiB)": 72.48, |
| "step": 2880, |
| "token_acc": 0.922065383847127, |
| "train_speed(iter/s)": 0.08074 |
| }, |
| { |
| "epoch": 1.1178094799815923, |
| "grad_norm": 0.5589123964309692, |
| "learning_rate": 7.412548961608217e-06, |
| "loss": 0.23948745727539061, |
| "memory(GiB)": 72.48, |
| "step": 2885, |
| "token_acc": 0.917822325964575, |
| "train_speed(iter/s)": 0.080734 |
| }, |
| { |
| "epoch": 1.119747135902342, |
| "grad_norm": 0.5714585781097412, |
| "learning_rate": 7.403187787827459e-06, |
| "loss": 0.24300243854522705, |
| "memory(GiB)": 72.48, |
| "step": 2890, |
| "token_acc": 0.9179357311522535, |
| "train_speed(iter/s)": 0.080747 |
| }, |
| { |
| "epoch": 1.121684791823092, |
| "grad_norm": 0.5701161026954651, |
| "learning_rate": 7.393815646786047e-06, |
| "loss": 0.24715614318847656, |
| "memory(GiB)": 72.48, |
| "step": 2895, |
| "token_acc": 0.9169183809735779, |
| "train_speed(iter/s)": 0.080739 |
| }, |
| { |
| "epoch": 1.123622447743842, |
| "grad_norm": 0.541515588760376, |
| "learning_rate": 7.384432581254963e-06, |
| "loss": 0.23558435440063477, |
| "memory(GiB)": 72.48, |
| "step": 2900, |
| "token_acc": 0.9188448238262874, |
| "train_speed(iter/s)": 0.080735 |
| }, |
| { |
| "epoch": 1.1255601036645917, |
| "grad_norm": 0.5822292566299438, |
| "learning_rate": 7.375038634055056e-06, |
| "loss": 0.2428572654724121, |
| "memory(GiB)": 72.48, |
| "step": 2905, |
| "token_acc": 0.9137654495707143, |
| "train_speed(iter/s)": 0.080733 |
| }, |
| { |
| "epoch": 1.1274977595853417, |
| "grad_norm": 0.6057553291320801, |
| "learning_rate": 7.3656338480568234e-06, |
| "loss": 0.23881745338439941, |
| "memory(GiB)": 72.48, |
| "step": 2910, |
| "token_acc": 0.9118832437713728, |
| "train_speed(iter/s)": 0.080724 |
| }, |
| { |
| "epoch": 1.1294354155060915, |
| "grad_norm": 0.6620405912399292, |
| "learning_rate": 7.3562182661802325e-06, |
| "loss": 0.23701815605163573, |
| "memory(GiB)": 72.48, |
| "step": 2915, |
| "token_acc": 0.926943768632423, |
| "train_speed(iter/s)": 0.080722 |
| }, |
| { |
| "epoch": 1.1313730714268413, |
| "grad_norm": 0.5819861888885498, |
| "learning_rate": 7.34679193139452e-06, |
| "loss": 0.2385103225708008, |
| "memory(GiB)": 72.48, |
| "step": 2920, |
| "token_acc": 0.9193125252253465, |
| "train_speed(iter/s)": 0.08072 |
| }, |
| { |
| "epoch": 1.1333107273475913, |
| "grad_norm": 0.548102080821991, |
| "learning_rate": 7.337354886717991e-06, |
| "loss": 0.2568079471588135, |
| "memory(GiB)": 72.48, |
| "step": 2925, |
| "token_acc": 0.9147654320987655, |
| "train_speed(iter/s)": 0.080731 |
| }, |
| { |
| "epoch": 1.135248383268341, |
| "grad_norm": 0.572535514831543, |
| "learning_rate": 7.32790717521783e-06, |
| "loss": 0.24390482902526855, |
| "memory(GiB)": 72.48, |
| "step": 2930, |
| "token_acc": 0.9171663495027108, |
| "train_speed(iter/s)": 0.080734 |
| }, |
| { |
| "epoch": 1.137186039189091, |
| "grad_norm": 0.5705650448799133, |
| "learning_rate": 7.3184488400099e-06, |
| "loss": 0.2597285270690918, |
| "memory(GiB)": 72.48, |
| "step": 2935, |
| "token_acc": 0.912927241962775, |
| "train_speed(iter/s)": 0.08073 |
| }, |
| { |
| "epoch": 1.139123695109841, |
| "grad_norm": 0.5626857280731201, |
| "learning_rate": 7.308979924258547e-06, |
| "loss": 0.2346130132675171, |
| "memory(GiB)": 72.48, |
| "step": 2940, |
| "token_acc": 0.9203948469131671, |
| "train_speed(iter/s)": 0.080736 |
| }, |
| { |
| "epoch": 1.1410613510305907, |
| "grad_norm": 0.5514906048774719, |
| "learning_rate": 7.2995004711763996e-06, |
| "loss": 0.23178393840789796, |
| "memory(GiB)": 72.48, |
| "step": 2945, |
| "token_acc": 0.926688815060908, |
| "train_speed(iter/s)": 0.080731 |
| }, |
| { |
| "epoch": 1.1429990069513407, |
| "grad_norm": 0.5872433185577393, |
| "learning_rate": 7.290010524024178e-06, |
| "loss": 0.2397766590118408, |
| "memory(GiB)": 72.48, |
| "step": 2950, |
| "token_acc": 0.9042819620059862, |
| "train_speed(iter/s)": 0.080738 |
| }, |
| { |
| "epoch": 1.1449366628720905, |
| "grad_norm": 0.5944450497627258, |
| "learning_rate": 7.2805101261104934e-06, |
| "loss": 0.24837064743041992, |
| "memory(GiB)": 72.48, |
| "step": 2955, |
| "token_acc": 0.9123956365409227, |
| "train_speed(iter/s)": 0.080736 |
| }, |
| { |
| "epoch": 1.1468743187928403, |
| "grad_norm": 0.5810188055038452, |
| "learning_rate": 7.270999320791651e-06, |
| "loss": 0.2451401948928833, |
| "memory(GiB)": 72.48, |
| "step": 2960, |
| "token_acc": 0.9096873667289965, |
| "train_speed(iter/s)": 0.080733 |
| }, |
| { |
| "epoch": 1.1488119747135903, |
| "grad_norm": 0.5396209955215454, |
| "learning_rate": 7.261478151471448e-06, |
| "loss": 0.24097609519958496, |
| "memory(GiB)": 72.48, |
| "step": 2965, |
| "token_acc": 0.9194308013187109, |
| "train_speed(iter/s)": 0.080733 |
| }, |
| { |
| "epoch": 1.15074963063434, |
| "grad_norm": 0.5635169148445129, |
| "learning_rate": 7.251946661600982e-06, |
| "loss": 0.22938740253448486, |
| "memory(GiB)": 72.48, |
| "step": 2970, |
| "token_acc": 0.9203730023919299, |
| "train_speed(iter/s)": 0.080728 |
| }, |
| { |
| "epoch": 1.15268728655509, |
| "grad_norm": 0.5849376320838928, |
| "learning_rate": 7.242404894678452e-06, |
| "loss": 0.2458806037902832, |
| "memory(GiB)": 72.48, |
| "step": 2975, |
| "token_acc": 0.9125376851487744, |
| "train_speed(iter/s)": 0.080737 |
| }, |
| { |
| "epoch": 1.15462494247584, |
| "grad_norm": 0.5923815965652466, |
| "learning_rate": 7.232852894248951e-06, |
| "loss": 0.2423017978668213, |
| "memory(GiB)": 72.48, |
| "step": 2980, |
| "token_acc": 0.9189056677779956, |
| "train_speed(iter/s)": 0.080741 |
| }, |
| { |
| "epoch": 1.1565625983965897, |
| "grad_norm": 0.5999000072479248, |
| "learning_rate": 7.223290703904278e-06, |
| "loss": 0.24765682220458984, |
| "memory(GiB)": 72.48, |
| "step": 2985, |
| "token_acc": 0.9232919480142308, |
| "train_speed(iter/s)": 0.080744 |
| }, |
| { |
| "epoch": 1.1585002543173397, |
| "grad_norm": 0.5837448239326477, |
| "learning_rate": 7.213718367282737e-06, |
| "loss": 0.24890732765197754, |
| "memory(GiB)": 72.48, |
| "step": 2990, |
| "token_acc": 0.9119633463431189, |
| "train_speed(iter/s)": 0.080737 |
| }, |
| { |
| "epoch": 1.1604379102380895, |
| "grad_norm": 0.5687659382820129, |
| "learning_rate": 7.204135928068934e-06, |
| "loss": 0.24406375885009765, |
| "memory(GiB)": 72.48, |
| "step": 2995, |
| "token_acc": 0.9261226980651183, |
| "train_speed(iter/s)": 0.080741 |
| }, |
| { |
| "epoch": 1.1623755661588393, |
| "grad_norm": 0.5431941151618958, |
| "learning_rate": 7.194543429993576e-06, |
| "loss": 0.23955135345458983, |
| "memory(GiB)": 72.48, |
| "step": 3000, |
| "token_acc": 0.920778318276581, |
| "train_speed(iter/s)": 0.080744 |
| }, |
| { |
| "epoch": 1.1643132220795893, |
| "grad_norm": 0.6085385084152222, |
| "learning_rate": 7.18494091683328e-06, |
| "loss": 0.24273269176483153, |
| "memory(GiB)": 72.48, |
| "step": 3005, |
| "token_acc": 0.9147768308536761, |
| "train_speed(iter/s)": 0.080754 |
| }, |
| { |
| "epoch": 1.166250878000339, |
| "grad_norm": 0.5439560413360596, |
| "learning_rate": 7.175328432410367e-06, |
| "loss": 0.2393632411956787, |
| "memory(GiB)": 72.48, |
| "step": 3010, |
| "token_acc": 0.9225050983487928, |
| "train_speed(iter/s)": 0.080748 |
| }, |
| { |
| "epoch": 1.168188533921089, |
| "grad_norm": 0.5491480231285095, |
| "learning_rate": 7.1657060205926606e-06, |
| "loss": 0.24930839538574218, |
| "memory(GiB)": 72.48, |
| "step": 3015, |
| "token_acc": 0.9135148013580321, |
| "train_speed(iter/s)": 0.080759 |
| }, |
| { |
| "epoch": 1.170126189841839, |
| "grad_norm": 0.5586943626403809, |
| "learning_rate": 7.156073725293293e-06, |
| "loss": 0.2412470817565918, |
| "memory(GiB)": 72.48, |
| "step": 3020, |
| "token_acc": 0.9073272300624649, |
| "train_speed(iter/s)": 0.080762 |
| }, |
| { |
| "epoch": 1.1720638457625887, |
| "grad_norm": 0.5404574871063232, |
| "learning_rate": 7.146431590470498e-06, |
| "loss": 0.24754083156585693, |
| "memory(GiB)": 72.48, |
| "step": 3025, |
| "token_acc": 0.9088854223042883, |
| "train_speed(iter/s)": 0.080763 |
| }, |
| { |
| "epoch": 1.1740015016833385, |
| "grad_norm": 0.5775381922721863, |
| "learning_rate": 7.1367796601274144e-06, |
| "loss": 0.23798027038574218, |
| "memory(GiB)": 72.48, |
| "step": 3030, |
| "token_acc": 0.9125572996249479, |
| "train_speed(iter/s)": 0.080769 |
| }, |
| { |
| "epoch": 1.1759391576040885, |
| "grad_norm": 0.5799135565757751, |
| "learning_rate": 7.127117978311884e-06, |
| "loss": 0.24995725154876708, |
| "memory(GiB)": 72.48, |
| "step": 3035, |
| "token_acc": 0.9050905683947533, |
| "train_speed(iter/s)": 0.080775 |
| }, |
| { |
| "epoch": 1.1778768135248383, |
| "grad_norm": 0.5825245380401611, |
| "learning_rate": 7.117446589116253e-06, |
| "loss": 0.25867457389831544, |
| "memory(GiB)": 72.48, |
| "step": 3040, |
| "token_acc": 0.9115309110803591, |
| "train_speed(iter/s)": 0.080772 |
| }, |
| { |
| "epoch": 1.179814469445588, |
| "grad_norm": 0.5762625932693481, |
| "learning_rate": 7.107765536677162e-06, |
| "loss": 0.23916106224060057, |
| "memory(GiB)": 72.48, |
| "step": 3045, |
| "token_acc": 0.9156965587821415, |
| "train_speed(iter/s)": 0.080771 |
| }, |
| { |
| "epoch": 1.1817521253663381, |
| "grad_norm": 0.5372615456581116, |
| "learning_rate": 7.098074865175358e-06, |
| "loss": 0.2506131649017334, |
| "memory(GiB)": 72.48, |
| "step": 3050, |
| "token_acc": 0.9142211471131199, |
| "train_speed(iter/s)": 0.080783 |
| }, |
| { |
| "epoch": 1.183689781287088, |
| "grad_norm": 0.6118589043617249, |
| "learning_rate": 7.088374618835485e-06, |
| "loss": 0.24305667877197265, |
| "memory(GiB)": 72.48, |
| "step": 3055, |
| "token_acc": 0.9190882087046365, |
| "train_speed(iter/s)": 0.080785 |
| }, |
| { |
| "epoch": 1.1856274372078377, |
| "grad_norm": 0.6260302066802979, |
| "learning_rate": 7.078664841925879e-06, |
| "loss": 0.2404834270477295, |
| "memory(GiB)": 72.48, |
| "step": 3060, |
| "token_acc": 0.9191426510493594, |
| "train_speed(iter/s)": 0.08079 |
| }, |
| { |
| "epoch": 1.1875650931285877, |
| "grad_norm": 0.561583936214447, |
| "learning_rate": 7.0689455787583725e-06, |
| "loss": 0.24459214210510255, |
| "memory(GiB)": 72.48, |
| "step": 3065, |
| "token_acc": 0.920421984891899, |
| "train_speed(iter/s)": 0.080784 |
| }, |
| { |
| "epoch": 1.1895027490493375, |
| "grad_norm": 0.57597416639328, |
| "learning_rate": 7.059216873688093e-06, |
| "loss": 0.23862845897674562, |
| "memory(GiB)": 72.48, |
| "step": 3070, |
| "token_acc": 0.916607294317218, |
| "train_speed(iter/s)": 0.08078 |
| }, |
| { |
| "epoch": 1.1914404049700875, |
| "grad_norm": 0.5645927786827087, |
| "learning_rate": 7.049478771113248e-06, |
| "loss": 0.24027485847473146, |
| "memory(GiB)": 72.48, |
| "step": 3075, |
| "token_acc": 0.9250272257010618, |
| "train_speed(iter/s)": 0.080775 |
| }, |
| { |
| "epoch": 1.1933780608908373, |
| "grad_norm": 0.6004230380058289, |
| "learning_rate": 7.039731315474941e-06, |
| "loss": 0.252230167388916, |
| "memory(GiB)": 72.48, |
| "step": 3080, |
| "token_acc": 0.9052574971529799, |
| "train_speed(iter/s)": 0.080784 |
| }, |
| { |
| "epoch": 1.195315716811587, |
| "grad_norm": 0.5419390201568604, |
| "learning_rate": 7.029974551256957e-06, |
| "loss": 0.2478321075439453, |
| "memory(GiB)": 72.48, |
| "step": 3085, |
| "token_acc": 0.9099167547568711, |
| "train_speed(iter/s)": 0.080779 |
| }, |
| { |
| "epoch": 1.1972533727323371, |
| "grad_norm": 0.5636507272720337, |
| "learning_rate": 7.020208522985559e-06, |
| "loss": 0.24982898235321044, |
| "memory(GiB)": 72.48, |
| "step": 3090, |
| "token_acc": 0.9075738529226901, |
| "train_speed(iter/s)": 0.080783 |
| }, |
| { |
| "epoch": 1.199191028653087, |
| "grad_norm": 0.5782200694084167, |
| "learning_rate": 7.010433275229289e-06, |
| "loss": 0.22980303764343263, |
| "memory(GiB)": 72.48, |
| "step": 3095, |
| "token_acc": 0.9177668261448253, |
| "train_speed(iter/s)": 0.080792 |
| }, |
| { |
| "epoch": 1.201128684573837, |
| "grad_norm": 0.5414060354232788, |
| "learning_rate": 7.0006488525987686e-06, |
| "loss": 0.2309312105178833, |
| "memory(GiB)": 72.48, |
| "step": 3100, |
| "token_acc": 0.9217795870892701, |
| "train_speed(iter/s)": 0.080785 |
| }, |
| { |
| "epoch": 1.2030663404945867, |
| "grad_norm": 0.5498025417327881, |
| "learning_rate": 6.990855299746482e-06, |
| "loss": 0.2509820222854614, |
| "memory(GiB)": 72.48, |
| "step": 3105, |
| "token_acc": 0.9060274263846072, |
| "train_speed(iter/s)": 0.080787 |
| }, |
| { |
| "epoch": 1.2050039964153365, |
| "grad_norm": 0.5826764106750488, |
| "learning_rate": 6.981052661366583e-06, |
| "loss": 0.2404792547225952, |
| "memory(GiB)": 72.48, |
| "step": 3110, |
| "token_acc": 0.9211193348879652, |
| "train_speed(iter/s)": 0.080782 |
| }, |
| { |
| "epoch": 1.2069416523360865, |
| "grad_norm": 0.6133027076721191, |
| "learning_rate": 6.971240982194692e-06, |
| "loss": 0.24429504871368407, |
| "memory(GiB)": 72.48, |
| "step": 3115, |
| "token_acc": 0.9174181613979454, |
| "train_speed(iter/s)": 0.080798 |
| }, |
| { |
| "epoch": 1.2088793082568363, |
| "grad_norm": 0.571624755859375, |
| "learning_rate": 6.961420307007684e-06, |
| "loss": 0.24460911750793457, |
| "memory(GiB)": 72.48, |
| "step": 3120, |
| "token_acc": 0.9124199343852523, |
| "train_speed(iter/s)": 0.080817 |
| }, |
| { |
| "epoch": 1.210816964177586, |
| "grad_norm": 0.5924415588378906, |
| "learning_rate": 6.95159068062349e-06, |
| "loss": 0.23861315250396728, |
| "memory(GiB)": 72.48, |
| "step": 3125, |
| "token_acc": 0.9094428978272695, |
| "train_speed(iter/s)": 0.080814 |
| }, |
| { |
| "epoch": 1.2127546200983361, |
| "grad_norm": 0.5533959269523621, |
| "learning_rate": 6.941752147900893e-06, |
| "loss": 0.23748021125793456, |
| "memory(GiB)": 72.48, |
| "step": 3130, |
| "token_acc": 0.9162806088682991, |
| "train_speed(iter/s)": 0.080819 |
| }, |
| { |
| "epoch": 1.214692276019086, |
| "grad_norm": 0.597855269908905, |
| "learning_rate": 6.931904753739317e-06, |
| "loss": 0.249676513671875, |
| "memory(GiB)": 72.48, |
| "step": 3135, |
| "token_acc": 0.9153109572247441, |
| "train_speed(iter/s)": 0.080817 |
| }, |
| { |
| "epoch": 1.2166299319398357, |
| "grad_norm": 0.5988847613334656, |
| "learning_rate": 6.922048543078629e-06, |
| "loss": 0.23966631889343262, |
| "memory(GiB)": 72.48, |
| "step": 3140, |
| "token_acc": 0.9228861802814934, |
| "train_speed(iter/s)": 0.080823 |
| }, |
| { |
| "epoch": 1.2185675878605857, |
| "grad_norm": 0.57000732421875, |
| "learning_rate": 6.912183560898933e-06, |
| "loss": 0.24102869033813476, |
| "memory(GiB)": 72.48, |
| "step": 3145, |
| "token_acc": 0.9046189600077467, |
| "train_speed(iter/s)": 0.08083 |
| }, |
| { |
| "epoch": 1.2205052437813355, |
| "grad_norm": 0.5944874286651611, |
| "learning_rate": 6.902309852220357e-06, |
| "loss": 0.24906721115112304, |
| "memory(GiB)": 72.48, |
| "step": 3150, |
| "token_acc": 0.9149213846249457, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 1.2224428997020853, |
| "grad_norm": 0.5449170470237732, |
| "learning_rate": 6.89242746210286e-06, |
| "loss": 0.2430576801300049, |
| "memory(GiB)": 72.48, |
| "step": 3155, |
| "token_acc": 0.9243398027362393, |
| "train_speed(iter/s)": 0.080847 |
| }, |
| { |
| "epoch": 1.2243805556228353, |
| "grad_norm": 0.5581479072570801, |
| "learning_rate": 6.882536435646017e-06, |
| "loss": 0.23644862174987794, |
| "memory(GiB)": 72.48, |
| "step": 3160, |
| "token_acc": 0.9143110192634143, |
| "train_speed(iter/s)": 0.080855 |
| }, |
| { |
| "epoch": 1.2263182115435851, |
| "grad_norm": 0.5619523525238037, |
| "learning_rate": 6.872636817988814e-06, |
| "loss": 0.24310529232025146, |
| "memory(GiB)": 72.48, |
| "step": 3165, |
| "token_acc": 0.9242279311287237, |
| "train_speed(iter/s)": 0.08086 |
| }, |
| { |
| "epoch": 1.228255867464335, |
| "grad_norm": 0.5570391416549683, |
| "learning_rate": 6.862728654309449e-06, |
| "loss": 0.23729300498962402, |
| "memory(GiB)": 72.48, |
| "step": 3170, |
| "token_acc": 0.9165948350170672, |
| "train_speed(iter/s)": 0.080872 |
| }, |
| { |
| "epoch": 1.230193523385085, |
| "grad_norm": 0.608040988445282, |
| "learning_rate": 6.852811989825118e-06, |
| "loss": 0.24692845344543457, |
| "memory(GiB)": 72.48, |
| "step": 3175, |
| "token_acc": 0.9088531576908175, |
| "train_speed(iter/s)": 0.080868 |
| }, |
| { |
| "epoch": 1.2321311793058347, |
| "grad_norm": 0.6009603142738342, |
| "learning_rate": 6.84288686979181e-06, |
| "loss": 0.243471360206604, |
| "memory(GiB)": 72.48, |
| "step": 3180, |
| "token_acc": 0.9175115352019837, |
| "train_speed(iter/s)": 0.080869 |
| }, |
| { |
| "epoch": 1.2340688352265847, |
| "grad_norm": 0.564767599105835, |
| "learning_rate": 6.832953339504105e-06, |
| "loss": 0.25086545944213867, |
| "memory(GiB)": 72.48, |
| "step": 3185, |
| "token_acc": 0.9183437357284213, |
| "train_speed(iter/s)": 0.080862 |
| }, |
| { |
| "epoch": 1.2360064911473345, |
| "grad_norm": 0.5559407472610474, |
| "learning_rate": 6.823011444294962e-06, |
| "loss": 0.25046041011810305, |
| "memory(GiB)": 72.48, |
| "step": 3190, |
| "token_acc": 0.9121144863683043, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 1.2379441470680843, |
| "grad_norm": 0.5758140087127686, |
| "learning_rate": 6.813061229535517e-06, |
| "loss": 0.24177942276000977, |
| "memory(GiB)": 72.48, |
| "step": 3195, |
| "token_acc": 0.9083065626434144, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 1.2398818029888343, |
| "grad_norm": 0.6040492653846741, |
| "learning_rate": 6.80310274063487e-06, |
| "loss": 0.2439272880554199, |
| "memory(GiB)": 72.48, |
| "step": 3200, |
| "token_acc": 0.9128642730639188, |
| "train_speed(iter/s)": 0.080873 |
| }, |
| { |
| "epoch": 1.2418194589095841, |
| "grad_norm": 0.5259760022163391, |
| "learning_rate": 6.7931360230398835e-06, |
| "loss": 0.2501484155654907, |
| "memory(GiB)": 72.48, |
| "step": 3205, |
| "token_acc": 0.9080067044868488, |
| "train_speed(iter/s)": 0.08088 |
| }, |
| { |
| "epoch": 1.243757114830334, |
| "grad_norm": 0.5799282789230347, |
| "learning_rate": 6.7831611222349745e-06, |
| "loss": 0.23147008419036866, |
| "memory(GiB)": 72.48, |
| "step": 3210, |
| "token_acc": 0.9207616921842848, |
| "train_speed(iter/s)": 0.080882 |
| }, |
| { |
| "epoch": 1.245694770751084, |
| "grad_norm": 0.5783043503761292, |
| "learning_rate": 6.773178083741899e-06, |
| "loss": 0.2400331974029541, |
| "memory(GiB)": 72.48, |
| "step": 3215, |
| "token_acc": 0.9239953407105417, |
| "train_speed(iter/s)": 0.080869 |
| }, |
| { |
| "epoch": 1.2476324266718337, |
| "grad_norm": 0.6135613322257996, |
| "learning_rate": 6.763186953119556e-06, |
| "loss": 0.25634021759033204, |
| "memory(GiB)": 72.48, |
| "step": 3220, |
| "token_acc": 0.9119443126087644, |
| "train_speed(iter/s)": 0.08087 |
| }, |
| { |
| "epoch": 1.2495700825925837, |
| "grad_norm": 0.5815515518188477, |
| "learning_rate": 6.753187775963773e-06, |
| "loss": 0.2590181350708008, |
| "memory(GiB)": 72.48, |
| "step": 3225, |
| "token_acc": 0.906501256281407, |
| "train_speed(iter/s)": 0.080852 |
| }, |
| { |
| "epoch": 1.2515077385133335, |
| "grad_norm": 0.5874833464622498, |
| "learning_rate": 6.743180597907095e-06, |
| "loss": 0.24145932197570802, |
| "memory(GiB)": 72.48, |
| "step": 3230, |
| "token_acc": 0.9158553754905191, |
| "train_speed(iter/s)": 0.080847 |
| }, |
| { |
| "epoch": 1.2534453944340833, |
| "grad_norm": 0.5646886229515076, |
| "learning_rate": 6.7331654646185876e-06, |
| "loss": 0.25124948024749755, |
| "memory(GiB)": 72.48, |
| "step": 3235, |
| "token_acc": 0.913716884521197, |
| "train_speed(iter/s)": 0.080845 |
| }, |
| { |
| "epoch": 1.2553830503548333, |
| "grad_norm": 0.5993587970733643, |
| "learning_rate": 6.723142421803614e-06, |
| "loss": 0.24982619285583496, |
| "memory(GiB)": 72.48, |
| "step": 3240, |
| "token_acc": 0.9041410309541267, |
| "train_speed(iter/s)": 0.080833 |
| }, |
| { |
| "epoch": 1.2573207062755831, |
| "grad_norm": 0.5263670086860657, |
| "learning_rate": 6.713111515203635e-06, |
| "loss": 0.2320237398147583, |
| "memory(GiB)": 72.48, |
| "step": 3245, |
| "token_acc": 0.9194774030129316, |
| "train_speed(iter/s)": 0.080831 |
| }, |
| { |
| "epoch": 1.259258362196333, |
| "grad_norm": 0.5713490843772888, |
| "learning_rate": 6.703072790596003e-06, |
| "loss": 0.245804500579834, |
| "memory(GiB)": 72.48, |
| "step": 3250, |
| "token_acc": 0.9159877625382421, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 1.261196018117083, |
| "grad_norm": 0.5493902564048767, |
| "learning_rate": 6.693026293793745e-06, |
| "loss": 0.2505074977874756, |
| "memory(GiB)": 72.48, |
| "step": 3255, |
| "token_acc": 0.9134997827467496, |
| "train_speed(iter/s)": 0.080834 |
| }, |
| { |
| "epoch": 1.2631336740378327, |
| "grad_norm": 0.5481184124946594, |
| "learning_rate": 6.682972070645357e-06, |
| "loss": 0.2504453182220459, |
| "memory(GiB)": 72.48, |
| "step": 3260, |
| "token_acc": 0.9140858147091983, |
| "train_speed(iter/s)": 0.080833 |
| }, |
| { |
| "epoch": 1.2650713299585825, |
| "grad_norm": 0.5771926045417786, |
| "learning_rate": 6.672910167034599e-06, |
| "loss": 0.24117374420166016, |
| "memory(GiB)": 72.48, |
| "step": 3265, |
| "token_acc": 0.9149578195976639, |
| "train_speed(iter/s)": 0.080835 |
| }, |
| { |
| "epoch": 1.2670089858793325, |
| "grad_norm": 0.5835690498352051, |
| "learning_rate": 6.6628406288802785e-06, |
| "loss": 0.2438589096069336, |
| "memory(GiB)": 72.48, |
| "step": 3270, |
| "token_acc": 0.9113916349809886, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 1.2689466418000823, |
| "grad_norm": 0.5542348027229309, |
| "learning_rate": 6.652763502136044e-06, |
| "loss": 0.24294943809509278, |
| "memory(GiB)": 72.48, |
| "step": 3275, |
| "token_acc": 0.9156563907170073, |
| "train_speed(iter/s)": 0.080839 |
| }, |
| { |
| "epoch": 1.2708842977208321, |
| "grad_norm": 0.6041167378425598, |
| "learning_rate": 6.642678832790177e-06, |
| "loss": 0.2392800807952881, |
| "memory(GiB)": 72.48, |
| "step": 3280, |
| "token_acc": 0.9145334292861793, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 1.2728219536415821, |
| "grad_norm": 0.5393861532211304, |
| "learning_rate": 6.632586666865383e-06, |
| "loss": 0.2392728567123413, |
| "memory(GiB)": 72.48, |
| "step": 3285, |
| "token_acc": 0.9183891314895681, |
| "train_speed(iter/s)": 0.080822 |
| }, |
| { |
| "epoch": 1.274759609562332, |
| "grad_norm": 0.584801971912384, |
| "learning_rate": 6.622487050418572e-06, |
| "loss": 0.24840068817138672, |
| "memory(GiB)": 72.48, |
| "step": 3290, |
| "token_acc": 0.9125019857029388, |
| "train_speed(iter/s)": 0.080825 |
| }, |
| { |
| "epoch": 1.2766972654830817, |
| "grad_norm": 0.549976110458374, |
| "learning_rate": 6.612380029540663e-06, |
| "loss": 0.24426255226135254, |
| "memory(GiB)": 72.48, |
| "step": 3295, |
| "token_acc": 0.911495008293431, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 1.2786349214038317, |
| "grad_norm": 0.6103242039680481, |
| "learning_rate": 6.602265650356363e-06, |
| "loss": 0.2518136501312256, |
| "memory(GiB)": 72.48, |
| "step": 3300, |
| "token_acc": 0.9153556827473426, |
| "train_speed(iter/s)": 0.080828 |
| }, |
| { |
| "epoch": 1.2805725773245815, |
| "grad_norm": 0.5820600390434265, |
| "learning_rate": 6.5921439590239565e-06, |
| "loss": 0.23869671821594238, |
| "memory(GiB)": 72.48, |
| "step": 3305, |
| "token_acc": 0.9189243027888446, |
| "train_speed(iter/s)": 0.080835 |
| }, |
| { |
| "epoch": 1.2825102332453315, |
| "grad_norm": 0.6232005953788757, |
| "learning_rate": 6.582015001735105e-06, |
| "loss": 0.2620884656906128, |
| "memory(GiB)": 72.48, |
| "step": 3310, |
| "token_acc": 0.9090964460821634, |
| "train_speed(iter/s)": 0.080831 |
| }, |
| { |
| "epoch": 1.2844478891660813, |
| "grad_norm": 0.5614112019538879, |
| "learning_rate": 6.571878824714622e-06, |
| "loss": 0.24253828525543214, |
| "memory(GiB)": 72.48, |
| "step": 3315, |
| "token_acc": 0.926297150111924, |
| "train_speed(iter/s)": 0.080825 |
| }, |
| { |
| "epoch": 1.2863855450868313, |
| "grad_norm": 0.5985612869262695, |
| "learning_rate": 6.561735474220274e-06, |
| "loss": 0.24328384399414063, |
| "memory(GiB)": 72.48, |
| "step": 3320, |
| "token_acc": 0.9195212546430045, |
| "train_speed(iter/s)": 0.080829 |
| }, |
| { |
| "epoch": 1.2883232010075811, |
| "grad_norm": 0.5797421336174011, |
| "learning_rate": 6.551584996542561e-06, |
| "loss": 0.2494278907775879, |
| "memory(GiB)": 72.48, |
| "step": 3325, |
| "token_acc": 0.9162843641130006, |
| "train_speed(iter/s)": 0.080833 |
| }, |
| { |
| "epoch": 1.290260856928331, |
| "grad_norm": 0.5532296895980835, |
| "learning_rate": 6.541427438004515e-06, |
| "loss": 0.24654242992401124, |
| "memory(GiB)": 72.48, |
| "step": 3330, |
| "token_acc": 0.9212665664708294, |
| "train_speed(iter/s)": 0.080832 |
| }, |
| { |
| "epoch": 1.292198512849081, |
| "grad_norm": 0.5895270705223083, |
| "learning_rate": 6.531262844961472e-06, |
| "loss": 0.23927061557769774, |
| "memory(GiB)": 72.48, |
| "step": 3335, |
| "token_acc": 0.9222903885480572, |
| "train_speed(iter/s)": 0.080828 |
| }, |
| { |
| "epoch": 1.2941361687698307, |
| "grad_norm": 0.6126574873924255, |
| "learning_rate": 6.521091263800882e-06, |
| "loss": 0.24304308891296386, |
| "memory(GiB)": 72.48, |
| "step": 3340, |
| "token_acc": 0.9151754441826403, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 1.2960738246905805, |
| "grad_norm": 0.5514196753501892, |
| "learning_rate": 6.510912740942079e-06, |
| "loss": 0.23981564044952391, |
| "memory(GiB)": 72.48, |
| "step": 3345, |
| "token_acc": 0.9065907584794265, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 1.2980114806113305, |
| "grad_norm": 0.563449501991272, |
| "learning_rate": 6.500727322836079e-06, |
| "loss": 0.2316061496734619, |
| "memory(GiB)": 72.48, |
| "step": 3350, |
| "token_acc": 0.9239852975678423, |
| "train_speed(iter/s)": 0.08084 |
| }, |
| { |
| "epoch": 1.2999491365320803, |
| "grad_norm": 0.5519877076148987, |
| "learning_rate": 6.490535055965365e-06, |
| "loss": 0.2478248119354248, |
| "memory(GiB)": 72.48, |
| "step": 3355, |
| "token_acc": 0.9153843855508083, |
| "train_speed(iter/s)": 0.080847 |
| }, |
| { |
| "epoch": 1.3018867924528301, |
| "grad_norm": 0.5955557227134705, |
| "learning_rate": 6.480335986843675e-06, |
| "loss": 0.22984936237335205, |
| "memory(GiB)": 72.48, |
| "step": 3360, |
| "token_acc": 0.9195110142822561, |
| "train_speed(iter/s)": 0.080861 |
| }, |
| { |
| "epoch": 1.3038244483735801, |
| "grad_norm": 0.5684180855751038, |
| "learning_rate": 6.470130162015789e-06, |
| "loss": 0.24722616672515868, |
| "memory(GiB)": 72.48, |
| "step": 3365, |
| "token_acc": 0.9208458149779736, |
| "train_speed(iter/s)": 0.080861 |
| }, |
| { |
| "epoch": 1.30576210429433, |
| "grad_norm": 0.5446616411209106, |
| "learning_rate": 6.459917628057319e-06, |
| "loss": 0.23405933380126953, |
| "memory(GiB)": 72.48, |
| "step": 3370, |
| "token_acc": 0.919267457705986, |
| "train_speed(iter/s)": 0.080861 |
| }, |
| { |
| "epoch": 1.3076997602150797, |
| "grad_norm": 0.5617885589599609, |
| "learning_rate": 6.449698431574497e-06, |
| "loss": 0.24888741970062256, |
| "memory(GiB)": 72.48, |
| "step": 3375, |
| "token_acc": 0.9263560451898113, |
| "train_speed(iter/s)": 0.080862 |
| }, |
| { |
| "epoch": 1.3096374161358297, |
| "grad_norm": 0.6414940357208252, |
| "learning_rate": 6.439472619203956e-06, |
| "loss": 0.24251883029937743, |
| "memory(GiB)": 72.48, |
| "step": 3380, |
| "token_acc": 0.9109069886947585, |
| "train_speed(iter/s)": 0.08087 |
| }, |
| { |
| "epoch": 1.3115750720565795, |
| "grad_norm": 0.5910158157348633, |
| "learning_rate": 6.429240237612523e-06, |
| "loss": 0.24664535522460937, |
| "memory(GiB)": 72.48, |
| "step": 3385, |
| "token_acc": 0.9196039465885716, |
| "train_speed(iter/s)": 0.080864 |
| }, |
| { |
| "epoch": 1.3135127279773293, |
| "grad_norm": 0.5916693806648254, |
| "learning_rate": 6.419001333497007e-06, |
| "loss": 0.24849185943603516, |
| "memory(GiB)": 72.48, |
| "step": 3390, |
| "token_acc": 0.9203673689897353, |
| "train_speed(iter/s)": 0.080874 |
| }, |
| { |
| "epoch": 1.3154503838980793, |
| "grad_norm": 0.5980777740478516, |
| "learning_rate": 6.4087559535839785e-06, |
| "loss": 0.24993062019348145, |
| "memory(GiB)": 72.48, |
| "step": 3395, |
| "token_acc": 0.904221611997948, |
| "train_speed(iter/s)": 0.080874 |
| }, |
| { |
| "epoch": 1.3173880398188291, |
| "grad_norm": 0.5520764589309692, |
| "learning_rate": 6.3985041446295645e-06, |
| "loss": 0.25055632591247556, |
| "memory(GiB)": 72.48, |
| "step": 3400, |
| "token_acc": 0.913022677255765, |
| "train_speed(iter/s)": 0.080871 |
| }, |
| { |
| "epoch": 1.319325695739579, |
| "grad_norm": 0.5889905095100403, |
| "learning_rate": 6.388245953419232e-06, |
| "loss": 0.24010930061340333, |
| "memory(GiB)": 72.48, |
| "step": 3405, |
| "token_acc": 0.9154612325344033, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 1.321263351660329, |
| "grad_norm": 0.5444064736366272, |
| "learning_rate": 6.377981426767574e-06, |
| "loss": 0.24458551406860352, |
| "memory(GiB)": 72.48, |
| "step": 3410, |
| "token_acc": 0.914785553047404, |
| "train_speed(iter/s)": 0.080871 |
| }, |
| { |
| "epoch": 1.3232010075810787, |
| "grad_norm": 0.6147655844688416, |
| "learning_rate": 6.367710611518095e-06, |
| "loss": 0.24504415988922118, |
| "memory(GiB)": 72.48, |
| "step": 3415, |
| "token_acc": 0.9134879163945134, |
| "train_speed(iter/s)": 0.080868 |
| }, |
| { |
| "epoch": 1.3251386635018287, |
| "grad_norm": 0.5855667591094971, |
| "learning_rate": 6.357433554543e-06, |
| "loss": 0.24513816833496094, |
| "memory(GiB)": 72.48, |
| "step": 3420, |
| "token_acc": 0.9188609715242881, |
| "train_speed(iter/s)": 0.080875 |
| }, |
| { |
| "epoch": 1.3270763194225785, |
| "grad_norm": 0.5783650875091553, |
| "learning_rate": 6.3471503027429744e-06, |
| "loss": 0.24665217399597167, |
| "memory(GiB)": 72.48, |
| "step": 3425, |
| "token_acc": 0.920804794520548, |
| "train_speed(iter/s)": 0.080874 |
| }, |
| { |
| "epoch": 1.3290139753433285, |
| "grad_norm": 0.5873458981513977, |
| "learning_rate": 6.336860903046982e-06, |
| "loss": 0.25883505344390867, |
| "memory(GiB)": 72.48, |
| "step": 3430, |
| "token_acc": 0.9149731218041169, |
| "train_speed(iter/s)": 0.080883 |
| }, |
| { |
| "epoch": 1.3309516312640783, |
| "grad_norm": 0.5453392267227173, |
| "learning_rate": 6.326565402412035e-06, |
| "loss": 0.24571163654327394, |
| "memory(GiB)": 72.48, |
| "step": 3435, |
| "token_acc": 0.9219789446489698, |
| "train_speed(iter/s)": 0.080871 |
| }, |
| { |
| "epoch": 1.3328892871848281, |
| "grad_norm": 0.6027646064758301, |
| "learning_rate": 6.3162638478229965e-06, |
| "loss": 0.24323391914367676, |
| "memory(GiB)": 72.48, |
| "step": 3440, |
| "token_acc": 0.9171201888400761, |
| "train_speed(iter/s)": 0.080879 |
| }, |
| { |
| "epoch": 1.3348269431055781, |
| "grad_norm": 0.6076679825782776, |
| "learning_rate": 6.305956286292352e-06, |
| "loss": 0.2376950979232788, |
| "memory(GiB)": 72.48, |
| "step": 3445, |
| "token_acc": 0.912549107285445, |
| "train_speed(iter/s)": 0.080885 |
| }, |
| { |
| "epoch": 1.336764599026328, |
| "grad_norm": 0.6235714554786682, |
| "learning_rate": 6.29564276486e-06, |
| "loss": 0.25636212825775145, |
| "memory(GiB)": 72.48, |
| "step": 3450, |
| "token_acc": 0.9180037284924055, |
| "train_speed(iter/s)": 0.080873 |
| }, |
| { |
| "epoch": 1.3387022549470777, |
| "grad_norm": 0.6193718910217285, |
| "learning_rate": 6.285323330593042e-06, |
| "loss": 0.247752046585083, |
| "memory(GiB)": 72.48, |
| "step": 3455, |
| "token_acc": 0.9203055292686224, |
| "train_speed(iter/s)": 0.080874 |
| }, |
| { |
| "epoch": 1.3406399108678277, |
| "grad_norm": 0.5753944516181946, |
| "learning_rate": 6.274998030585559e-06, |
| "loss": 0.24330606460571289, |
| "memory(GiB)": 72.48, |
| "step": 3460, |
| "token_acc": 0.9098618296116823, |
| "train_speed(iter/s)": 0.080872 |
| }, |
| { |
| "epoch": 1.3425775667885775, |
| "grad_norm": 0.5862641334533691, |
| "learning_rate": 6.264666911958404e-06, |
| "loss": 0.23821985721588135, |
| "memory(GiB)": 72.48, |
| "step": 3465, |
| "token_acc": 0.9153970303421562, |
| "train_speed(iter/s)": 0.080872 |
| }, |
| { |
| "epoch": 1.3445152227093273, |
| "grad_norm": 0.602203369140625, |
| "learning_rate": 6.254330021858985e-06, |
| "loss": 0.24624221324920653, |
| "memory(GiB)": 72.48, |
| "step": 3470, |
| "token_acc": 0.9169319741799807, |
| "train_speed(iter/s)": 0.080874 |
| }, |
| { |
| "epoch": 1.3464528786300773, |
| "grad_norm": 0.5835914611816406, |
| "learning_rate": 6.243987407461044e-06, |
| "loss": 0.24328317642211914, |
| "memory(GiB)": 72.48, |
| "step": 3475, |
| "token_acc": 0.9154203718674212, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 1.3483905345508271, |
| "grad_norm": 0.5612725019454956, |
| "learning_rate": 6.233639115964454e-06, |
| "loss": 0.23664026260375975, |
| "memory(GiB)": 72.48, |
| "step": 3480, |
| "token_acc": 0.9206032300616778, |
| "train_speed(iter/s)": 0.080873 |
| }, |
| { |
| "epoch": 1.350328190471577, |
| "grad_norm": 0.6184651851654053, |
| "learning_rate": 6.223285194594986e-06, |
| "loss": 0.2517274856567383, |
| "memory(GiB)": 72.48, |
| "step": 3485, |
| "token_acc": 0.9179711323439723, |
| "train_speed(iter/s)": 0.080875 |
| }, |
| { |
| "epoch": 1.352265846392327, |
| "grad_norm": 0.5319633483886719, |
| "learning_rate": 6.212925690604113e-06, |
| "loss": 0.25687355995178224, |
| "memory(GiB)": 72.48, |
| "step": 3490, |
| "token_acc": 0.9110232762406676, |
| "train_speed(iter/s)": 0.080874 |
| }, |
| { |
| "epoch": 1.3542035023130767, |
| "grad_norm": 0.6207578182220459, |
| "learning_rate": 6.2025606512687816e-06, |
| "loss": 0.23406295776367186, |
| "memory(GiB)": 72.48, |
| "step": 3495, |
| "token_acc": 0.9191729323308271, |
| "train_speed(iter/s)": 0.080872 |
| }, |
| { |
| "epoch": 1.3561411582338265, |
| "grad_norm": 0.5518024563789368, |
| "learning_rate": 6.192190123891201e-06, |
| "loss": 0.2398101806640625, |
| "memory(GiB)": 72.48, |
| "step": 3500, |
| "token_acc": 0.9174518777762821, |
| "train_speed(iter/s)": 0.08087 |
| }, |
| { |
| "epoch": 1.3580788141545765, |
| "grad_norm": 0.5900284051895142, |
| "learning_rate": 6.18181415579862e-06, |
| "loss": 0.22111029624938966, |
| "memory(GiB)": 72.48, |
| "step": 3505, |
| "token_acc": 0.9129341398762999, |
| "train_speed(iter/s)": 0.080889 |
| }, |
| { |
| "epoch": 1.3600164700753263, |
| "grad_norm": 0.5826399922370911, |
| "learning_rate": 6.1714327943431255e-06, |
| "loss": 0.24394874572753905, |
| "memory(GiB)": 72.48, |
| "step": 3510, |
| "token_acc": 0.9186270406027627, |
| "train_speed(iter/s)": 0.080885 |
| }, |
| { |
| "epoch": 1.3619541259960761, |
| "grad_norm": 0.5645210146903992, |
| "learning_rate": 6.1610460869014096e-06, |
| "loss": 0.23350658416748046, |
| "memory(GiB)": 72.48, |
| "step": 3515, |
| "token_acc": 0.9214305633017289, |
| "train_speed(iter/s)": 0.080888 |
| }, |
| { |
| "epoch": 1.3638917819168261, |
| "grad_norm": 0.5921911597251892, |
| "learning_rate": 6.150654080874569e-06, |
| "loss": 0.23872621059417726, |
| "memory(GiB)": 72.48, |
| "step": 3520, |
| "token_acc": 0.9154886606325638, |
| "train_speed(iter/s)": 0.080896 |
| }, |
| { |
| "epoch": 1.365829437837576, |
| "grad_norm": 0.6218807697296143, |
| "learning_rate": 6.140256823687875e-06, |
| "loss": 0.24797072410583496, |
| "memory(GiB)": 72.48, |
| "step": 3525, |
| "token_acc": 0.9094979818365287, |
| "train_speed(iter/s)": 0.080896 |
| }, |
| { |
| "epoch": 1.367767093758326, |
| "grad_norm": 0.5517158508300781, |
| "learning_rate": 6.129854362790567e-06, |
| "loss": 0.23862147331237793, |
| "memory(GiB)": 72.48, |
| "step": 3530, |
| "token_acc": 0.9226441179307462, |
| "train_speed(iter/s)": 0.080898 |
| }, |
| { |
| "epoch": 1.3697047496790757, |
| "grad_norm": 0.5724954009056091, |
| "learning_rate": 6.1194467456556305e-06, |
| "loss": 0.22112350463867186, |
| "memory(GiB)": 72.48, |
| "step": 3535, |
| "token_acc": 0.917589736399327, |
| "train_speed(iter/s)": 0.080902 |
| }, |
| { |
| "epoch": 1.3716424055998255, |
| "grad_norm": 0.5898615121841431, |
| "learning_rate": 6.109034019779583e-06, |
| "loss": 0.2419571876525879, |
| "memory(GiB)": 72.48, |
| "step": 3540, |
| "token_acc": 0.9103599797194524, |
| "train_speed(iter/s)": 0.080904 |
| }, |
| { |
| "epoch": 1.3735800615205755, |
| "grad_norm": 0.5712639689445496, |
| "learning_rate": 6.098616232682255e-06, |
| "loss": 0.24022350311279297, |
| "memory(GiB)": 72.48, |
| "step": 3545, |
| "token_acc": 0.9193883397794013, |
| "train_speed(iter/s)": 0.080902 |
| }, |
| { |
| "epoch": 1.3755177174413253, |
| "grad_norm": 0.6036831736564636, |
| "learning_rate": 6.088193431906576e-06, |
| "loss": 0.2510042428970337, |
| "memory(GiB)": 72.48, |
| "step": 3550, |
| "token_acc": 0.9242049814658256, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 1.3774553733620754, |
| "grad_norm": 0.5672377347946167, |
| "learning_rate": 6.077765665018356e-06, |
| "loss": 0.24461116790771484, |
| "memory(GiB)": 72.48, |
| "step": 3555, |
| "token_acc": 0.9190039318479686, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 1.3793930292828251, |
| "grad_norm": 0.6056391000747681, |
| "learning_rate": 6.067332979606069e-06, |
| "loss": 0.24074273109436034, |
| "memory(GiB)": 72.48, |
| "step": 3560, |
| "token_acc": 0.9218976658314652, |
| "train_speed(iter/s)": 0.080911 |
| }, |
| { |
| "epoch": 1.381330685203575, |
| "grad_norm": 0.5587301850318909, |
| "learning_rate": 6.0568954232806335e-06, |
| "loss": 0.24055736064910888, |
| "memory(GiB)": 72.48, |
| "step": 3565, |
| "token_acc": 0.9175392670157068, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 1.383268341124325, |
| "grad_norm": 0.5654682517051697, |
| "learning_rate": 6.046453043675197e-06, |
| "loss": 0.2507538557052612, |
| "memory(GiB)": 72.48, |
| "step": 3570, |
| "token_acc": 0.9204025918494886, |
| "train_speed(iter/s)": 0.080917 |
| }, |
| { |
| "epoch": 1.3852059970450747, |
| "grad_norm": 0.5490247011184692, |
| "learning_rate": 6.036005888444922e-06, |
| "loss": 0.2445591926574707, |
| "memory(GiB)": 72.48, |
| "step": 3575, |
| "token_acc": 0.9217004355235751, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 1.3871436529658245, |
| "grad_norm": 0.6079689264297485, |
| "learning_rate": 6.025554005266761e-06, |
| "loss": 0.24872751235961915, |
| "memory(GiB)": 72.48, |
| "step": 3580, |
| "token_acc": 0.9157847533632287, |
| "train_speed(iter/s)": 0.080913 |
| }, |
| { |
| "epoch": 1.3890813088865746, |
| "grad_norm": 0.5383176207542419, |
| "learning_rate": 6.015097441839246e-06, |
| "loss": 0.24193205833435058, |
| "memory(GiB)": 72.48, |
| "step": 3585, |
| "token_acc": 0.9197718753003749, |
| "train_speed(iter/s)": 0.080914 |
| }, |
| { |
| "epoch": 1.3910189648073243, |
| "grad_norm": 0.5508478283882141, |
| "learning_rate": 6.004636245882265e-06, |
| "loss": 0.23985228538513184, |
| "memory(GiB)": 72.48, |
| "step": 3590, |
| "token_acc": 0.915475677288855, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 1.3929566207280741, |
| "grad_norm": 0.5500128865242004, |
| "learning_rate": 5.994170465136853e-06, |
| "loss": 0.23378024101257325, |
| "memory(GiB)": 72.48, |
| "step": 3595, |
| "token_acc": 0.9231037610773847, |
| "train_speed(iter/s)": 0.080899 |
| }, |
| { |
| "epoch": 1.3948942766488241, |
| "grad_norm": 0.564172089099884, |
| "learning_rate": 5.98370014736496e-06, |
| "loss": 0.24442434310913086, |
| "memory(GiB)": 72.48, |
| "step": 3600, |
| "token_acc": 0.9116323797094481, |
| "train_speed(iter/s)": 0.080899 |
| }, |
| { |
| "epoch": 1.396831932569574, |
| "grad_norm": 0.6100159287452698, |
| "learning_rate": 5.97322534034925e-06, |
| "loss": 0.2467043399810791, |
| "memory(GiB)": 72.48, |
| "step": 3605, |
| "token_acc": 0.913891419509397, |
| "train_speed(iter/s)": 0.08089 |
| }, |
| { |
| "epoch": 1.3987695884903237, |
| "grad_norm": 0.5676229596138, |
| "learning_rate": 5.962746091892866e-06, |
| "loss": 0.23828344345092772, |
| "memory(GiB)": 72.48, |
| "step": 3610, |
| "token_acc": 0.9075207695671185, |
| "train_speed(iter/s)": 0.080885 |
| }, |
| { |
| "epoch": 1.4007072444110737, |
| "grad_norm": 0.5872523784637451, |
| "learning_rate": 5.952262449819225e-06, |
| "loss": 0.25522215366363527, |
| "memory(GiB)": 72.48, |
| "step": 3615, |
| "token_acc": 0.9063737962575285, |
| "train_speed(iter/s)": 0.080877 |
| }, |
| { |
| "epoch": 1.4026449003318235, |
| "grad_norm": 0.593393862247467, |
| "learning_rate": 5.941774461971794e-06, |
| "loss": 0.2511239290237427, |
| "memory(GiB)": 72.48, |
| "step": 3620, |
| "token_acc": 0.9169553533849192, |
| "train_speed(iter/s)": 0.080877 |
| }, |
| { |
| "epoch": 1.4045825562525733, |
| "grad_norm": 0.543071448802948, |
| "learning_rate": 5.931282176213875e-06, |
| "loss": 0.24414536952972413, |
| "memory(GiB)": 72.48, |
| "step": 3625, |
| "token_acc": 0.9186960812402508, |
| "train_speed(iter/s)": 0.080872 |
| }, |
| { |
| "epoch": 1.4065202121733233, |
| "grad_norm": 0.5903857946395874, |
| "learning_rate": 5.920785640428377e-06, |
| "loss": 0.23714954853057862, |
| "memory(GiB)": 72.48, |
| "step": 3630, |
| "token_acc": 0.9076739427012278, |
| "train_speed(iter/s)": 0.080883 |
| }, |
| { |
| "epoch": 1.4084578680940731, |
| "grad_norm": 0.5597670078277588, |
| "learning_rate": 5.910284902517614e-06, |
| "loss": 0.24124536514282227, |
| "memory(GiB)": 72.48, |
| "step": 3635, |
| "token_acc": 0.9245324857713061, |
| "train_speed(iter/s)": 0.080888 |
| }, |
| { |
| "epoch": 1.410395524014823, |
| "grad_norm": 0.56638503074646, |
| "learning_rate": 5.899780010403066e-06, |
| "loss": 0.23916149139404297, |
| "memory(GiB)": 72.48, |
| "step": 3640, |
| "token_acc": 0.9108338966386038, |
| "train_speed(iter/s)": 0.080886 |
| }, |
| { |
| "epoch": 1.412333179935573, |
| "grad_norm": 0.5485346913337708, |
| "learning_rate": 5.8892710120251806e-06, |
| "loss": 0.23300790786743164, |
| "memory(GiB)": 72.48, |
| "step": 3645, |
| "token_acc": 0.9207074721780604, |
| "train_speed(iter/s)": 0.080893 |
| }, |
| { |
| "epoch": 1.4142708358563227, |
| "grad_norm": 0.5811129808425903, |
| "learning_rate": 5.87875795534314e-06, |
| "loss": 0.23677351474761962, |
| "memory(GiB)": 72.48, |
| "step": 3650, |
| "token_acc": 0.9236622904740684, |
| "train_speed(iter/s)": 0.080892 |
| }, |
| { |
| "epoch": 1.4162084917770728, |
| "grad_norm": 0.5233686566352844, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 0.24698786735534667, |
| "memory(GiB)": 72.48, |
| "step": 3655, |
| "token_acc": 0.909466098919096, |
| "train_speed(iter/s)": 0.08089 |
| }, |
| { |
| "epoch": 1.4181461476978225, |
| "grad_norm": 0.5673002004623413, |
| "learning_rate": 5.85771985899572e-06, |
| "loss": 0.24160895347595215, |
| "memory(GiB)": 72.48, |
| "step": 3660, |
| "token_acc": 0.9140604973083825, |
| "train_speed(iter/s)": 0.08089 |
| }, |
| { |
| "epoch": 1.4200838036185726, |
| "grad_norm": 0.520982563495636, |
| "learning_rate": 5.847194915340432e-06, |
| "loss": 0.2456125497817993, |
| "memory(GiB)": 72.48, |
| "step": 3665, |
| "token_acc": 0.9184846596024655, |
| "train_speed(iter/s)": 0.08089 |
| }, |
| { |
| "epoch": 1.4220214595393224, |
| "grad_norm": 0.5889917612075806, |
| "learning_rate": 5.836666105400739e-06, |
| "loss": 0.2436441659927368, |
| "memory(GiB)": 72.48, |
| "step": 3670, |
| "token_acc": 0.9159888877732034, |
| "train_speed(iter/s)": 0.080892 |
| }, |
| { |
| "epoch": 1.4239591154600721, |
| "grad_norm": 0.5952669382095337, |
| "learning_rate": 5.826133477226239e-06, |
| "loss": 0.24678261280059816, |
| "memory(GiB)": 72.48, |
| "step": 3675, |
| "token_acc": 0.9162605668821482, |
| "train_speed(iter/s)": 0.08089 |
| }, |
| { |
| "epoch": 1.4258967713808222, |
| "grad_norm": 0.5867316126823425, |
| "learning_rate": 5.815597078883955e-06, |
| "loss": 0.24308767318725585, |
| "memory(GiB)": 72.48, |
| "step": 3680, |
| "token_acc": 0.9156236666192575, |
| "train_speed(iter/s)": 0.080892 |
| }, |
| { |
| "epoch": 1.427834427301572, |
| "grad_norm": 0.5791524648666382, |
| "learning_rate": 5.805056958458111e-06, |
| "loss": 0.23985023498535157, |
| "memory(GiB)": 72.48, |
| "step": 3685, |
| "token_acc": 0.9216729778596527, |
| "train_speed(iter/s)": 0.080893 |
| }, |
| { |
| "epoch": 1.4297720832223217, |
| "grad_norm": 0.6174024343490601, |
| "learning_rate": 5.79451316404992e-06, |
| "loss": 0.24676356315612794, |
| "memory(GiB)": 72.48, |
| "step": 3690, |
| "token_acc": 0.915408156476615, |
| "train_speed(iter/s)": 0.080903 |
| }, |
| { |
| "epoch": 1.4317097391430718, |
| "grad_norm": 0.563883364200592, |
| "learning_rate": 5.7839657437773644e-06, |
| "loss": 0.23523716926574706, |
| "memory(GiB)": 72.48, |
| "step": 3695, |
| "token_acc": 0.917732751608371, |
| "train_speed(iter/s)": 0.080904 |
| }, |
| { |
| "epoch": 1.4336473950638216, |
| "grad_norm": 0.5705844759941101, |
| "learning_rate": 5.77341474577497e-06, |
| "loss": 0.23880462646484374, |
| "memory(GiB)": 72.48, |
| "step": 3700, |
| "token_acc": 0.9202369333288459, |
| "train_speed(iter/s)": 0.080898 |
| }, |
| { |
| "epoch": 1.4355850509845713, |
| "grad_norm": 0.5820704698562622, |
| "learning_rate": 5.76286021819359e-06, |
| "loss": 0.23488302230834962, |
| "memory(GiB)": 72.48, |
| "step": 3705, |
| "token_acc": 0.9148230088495575, |
| "train_speed(iter/s)": 0.080902 |
| }, |
| { |
| "epoch": 1.4375227069053214, |
| "grad_norm": 0.5883727073669434, |
| "learning_rate": 5.752302209200187e-06, |
| "loss": 0.2560927391052246, |
| "memory(GiB)": 72.48, |
| "step": 3710, |
| "token_acc": 0.9100109639875179, |
| "train_speed(iter/s)": 0.080901 |
| }, |
| { |
| "epoch": 1.4394603628260711, |
| "grad_norm": 0.5630972385406494, |
| "learning_rate": 5.7417407669776135e-06, |
| "loss": 0.23473753929138183, |
| "memory(GiB)": 72.48, |
| "step": 3715, |
| "token_acc": 0.9217704758933832, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 1.441398018746821, |
| "grad_norm": 0.5839196443557739, |
| "learning_rate": 5.731175939724384e-06, |
| "loss": 0.24403223991394044, |
| "memory(GiB)": 72.48, |
| "step": 3720, |
| "token_acc": 0.922809604043808, |
| "train_speed(iter/s)": 0.080904 |
| }, |
| { |
| "epoch": 1.443335674667571, |
| "grad_norm": 0.5882570147514343, |
| "learning_rate": 5.720607775654467e-06, |
| "loss": 0.23943960666656494, |
| "memory(GiB)": 72.48, |
| "step": 3725, |
| "token_acc": 0.9180094480036239, |
| "train_speed(iter/s)": 0.080906 |
| }, |
| { |
| "epoch": 1.4452733305883207, |
| "grad_norm": 0.6032676100730896, |
| "learning_rate": 5.710036322997055e-06, |
| "loss": 0.24937214851379394, |
| "memory(GiB)": 72.48, |
| "step": 3730, |
| "token_acc": 0.9147170043348603, |
| "train_speed(iter/s)": 0.080893 |
| }, |
| { |
| "epoch": 1.4472109865090705, |
| "grad_norm": 0.5497545599937439, |
| "learning_rate": 5.699461629996349e-06, |
| "loss": 0.2398749828338623, |
| "memory(GiB)": 72.48, |
| "step": 3735, |
| "token_acc": 0.9171464422793032, |
| "train_speed(iter/s)": 0.080894 |
| }, |
| { |
| "epoch": 1.4491486424298206, |
| "grad_norm": 0.5942593216896057, |
| "learning_rate": 5.68888374491134e-06, |
| "loss": 0.2407346248626709, |
| "memory(GiB)": 72.48, |
| "step": 3740, |
| "token_acc": 0.9223353884933747, |
| "train_speed(iter/s)": 0.08089 |
| }, |
| { |
| "epoch": 1.4510862983505703, |
| "grad_norm": 0.5797078013420105, |
| "learning_rate": 5.678302716015586e-06, |
| "loss": 0.25144505500793457, |
| "memory(GiB)": 72.48, |
| "step": 3745, |
| "token_acc": 0.9161485407535759, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 1.4530239542713201, |
| "grad_norm": 0.6159892678260803, |
| "learning_rate": 5.66771859159699e-06, |
| "loss": 0.24453871250152587, |
| "memory(GiB)": 72.48, |
| "step": 3750, |
| "token_acc": 0.9172922514524446, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 1.4549616101920702, |
| "grad_norm": 0.6003066897392273, |
| "learning_rate": 5.6571314199575845e-06, |
| "loss": 0.2406759262084961, |
| "memory(GiB)": 72.48, |
| "step": 3755, |
| "token_acc": 0.9244022744399534, |
| "train_speed(iter/s)": 0.080914 |
| }, |
| { |
| "epoch": 1.45689926611282, |
| "grad_norm": 0.5446083545684814, |
| "learning_rate": 5.646541249413304e-06, |
| "loss": 0.24054651260375975, |
| "memory(GiB)": 72.48, |
| "step": 3760, |
| "token_acc": 0.9230030296312717, |
| "train_speed(iter/s)": 0.080918 |
| }, |
| { |
| "epoch": 1.45883692203357, |
| "grad_norm": 0.5630480647087097, |
| "learning_rate": 5.635948128293775e-06, |
| "loss": 0.25225830078125, |
| "memory(GiB)": 72.48, |
| "step": 3765, |
| "token_acc": 0.9266996208017335, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 1.4607745779543198, |
| "grad_norm": 0.6038119792938232, |
| "learning_rate": 5.625352104942085e-06, |
| "loss": 0.24250342845916747, |
| "memory(GiB)": 72.48, |
| "step": 3770, |
| "token_acc": 0.9214726151614073, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 1.4627122338750698, |
| "grad_norm": 0.5660669803619385, |
| "learning_rate": 5.614753227714567e-06, |
| "loss": 0.23111968040466307, |
| "memory(GiB)": 72.48, |
| "step": 3775, |
| "token_acc": 0.9160903040584627, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 1.4646498897958196, |
| "grad_norm": 0.6035475730895996, |
| "learning_rate": 5.6041515449805804e-06, |
| "loss": 0.24287838935852052, |
| "memory(GiB)": 72.48, |
| "step": 3780, |
| "token_acc": 0.9190956330224715, |
| "train_speed(iter/s)": 0.080923 |
| }, |
| { |
| "epoch": 1.4665875457165694, |
| "grad_norm": 0.5913589596748352, |
| "learning_rate": 5.5935471051222844e-06, |
| "loss": 0.24291999340057374, |
| "memory(GiB)": 72.48, |
| "step": 3785, |
| "token_acc": 0.9209064512056558, |
| "train_speed(iter/s)": 0.08093 |
| }, |
| { |
| "epoch": 1.4685252016373194, |
| "grad_norm": 0.6146913766860962, |
| "learning_rate": 5.582939956534421e-06, |
| "loss": 0.2419736385345459, |
| "memory(GiB)": 72.48, |
| "step": 3790, |
| "token_acc": 0.9155126180068325, |
| "train_speed(iter/s)": 0.080943 |
| }, |
| { |
| "epoch": 1.4704628575580692, |
| "grad_norm": 0.5897228121757507, |
| "learning_rate": 5.572330147624097e-06, |
| "loss": 0.23940186500549315, |
| "memory(GiB)": 72.48, |
| "step": 3795, |
| "token_acc": 0.9194137976870929, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 1.472400513478819, |
| "grad_norm": 0.5833806991577148, |
| "learning_rate": 5.561717726810557e-06, |
| "loss": 0.2410250186920166, |
| "memory(GiB)": 72.48, |
| "step": 3800, |
| "token_acc": 0.9106603227673118, |
| "train_speed(iter/s)": 0.080941 |
| }, |
| { |
| "epoch": 1.474338169399569, |
| "grad_norm": 0.5665867924690247, |
| "learning_rate": 5.551102742524967e-06, |
| "loss": 0.23539307117462158, |
| "memory(GiB)": 72.48, |
| "step": 3805, |
| "token_acc": 0.9179119150609516, |
| "train_speed(iter/s)": 0.080945 |
| }, |
| { |
| "epoch": 1.4762758253203188, |
| "grad_norm": 0.5832652449607849, |
| "learning_rate": 5.540485243210194e-06, |
| "loss": 0.23438220024108886, |
| "memory(GiB)": 72.48, |
| "step": 3810, |
| "token_acc": 0.9260935143288085, |
| "train_speed(iter/s)": 0.080948 |
| }, |
| { |
| "epoch": 1.4782134812410685, |
| "grad_norm": 0.5857250690460205, |
| "learning_rate": 5.529865277320575e-06, |
| "loss": 0.2527660846710205, |
| "memory(GiB)": 72.48, |
| "step": 3815, |
| "token_acc": 0.9062848751835536, |
| "train_speed(iter/s)": 0.080941 |
| }, |
| { |
| "epoch": 1.4801511371618186, |
| "grad_norm": 0.5673555135726929, |
| "learning_rate": 5.51924289332171e-06, |
| "loss": 0.24681317806243896, |
| "memory(GiB)": 72.48, |
| "step": 3820, |
| "token_acc": 0.9058411144350769, |
| "train_speed(iter/s)": 0.080938 |
| }, |
| { |
| "epoch": 1.4820887930825684, |
| "grad_norm": 0.5933298468589783, |
| "learning_rate": 5.5086181396902335e-06, |
| "loss": 0.2507610321044922, |
| "memory(GiB)": 72.48, |
| "step": 3825, |
| "token_acc": 0.9154622988259644, |
| "train_speed(iter/s)": 0.080928 |
| }, |
| { |
| "epoch": 1.4840264490033181, |
| "grad_norm": 0.5970496535301208, |
| "learning_rate": 5.497991064913592e-06, |
| "loss": 0.23358287811279296, |
| "memory(GiB)": 72.48, |
| "step": 3830, |
| "token_acc": 0.9168089297439265, |
| "train_speed(iter/s)": 0.080929 |
| }, |
| { |
| "epoch": 1.4859641049240682, |
| "grad_norm": 0.5848801732063293, |
| "learning_rate": 5.487361717489828e-06, |
| "loss": 0.24907338619232178, |
| "memory(GiB)": 72.48, |
| "step": 3835, |
| "token_acc": 0.9162323031913104, |
| "train_speed(iter/s)": 0.080934 |
| }, |
| { |
| "epoch": 1.487901760844818, |
| "grad_norm": 0.5591345429420471, |
| "learning_rate": 5.476730145927354e-06, |
| "loss": 0.24457969665527343, |
| "memory(GiB)": 72.48, |
| "step": 3840, |
| "token_acc": 0.9261170157878444, |
| "train_speed(iter/s)": 0.080933 |
| }, |
| { |
| "epoch": 1.4898394167655677, |
| "grad_norm": 0.5818787217140198, |
| "learning_rate": 5.46609639874473e-06, |
| "loss": 0.2569440841674805, |
| "memory(GiB)": 72.48, |
| "step": 3845, |
| "token_acc": 0.9123161416577237, |
| "train_speed(iter/s)": 0.080928 |
| }, |
| { |
| "epoch": 1.4917770726863178, |
| "grad_norm": 0.5587847828865051, |
| "learning_rate": 5.455460524470447e-06, |
| "loss": 0.23373932838439943, |
| "memory(GiB)": 72.48, |
| "step": 3850, |
| "token_acc": 0.9148546222664016, |
| "train_speed(iter/s)": 0.080921 |
| }, |
| { |
| "epoch": 1.4937147286070676, |
| "grad_norm": 0.6207402944564819, |
| "learning_rate": 5.444822571642705e-06, |
| "loss": 0.24684290885925292, |
| "memory(GiB)": 72.48, |
| "step": 3855, |
| "token_acc": 0.9198185556244493, |
| "train_speed(iter/s)": 0.080921 |
| }, |
| { |
| "epoch": 1.4956523845278173, |
| "grad_norm": 0.5635160207748413, |
| "learning_rate": 5.434182588809187e-06, |
| "loss": 0.23250012397766112, |
| "memory(GiB)": 72.48, |
| "step": 3860, |
| "token_acc": 0.9091337892928136, |
| "train_speed(iter/s)": 0.080933 |
| }, |
| { |
| "epoch": 1.4975900404485674, |
| "grad_norm": 0.5980255603790283, |
| "learning_rate": 5.423540624526843e-06, |
| "loss": 0.23713982105255127, |
| "memory(GiB)": 72.48, |
| "step": 3865, |
| "token_acc": 0.9194705073806251, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 1.4995276963693172, |
| "grad_norm": 0.6113118529319763, |
| "learning_rate": 5.412896727361663e-06, |
| "loss": 0.24681100845336915, |
| "memory(GiB)": 72.48, |
| "step": 3870, |
| "token_acc": 0.9220164887146912, |
| "train_speed(iter/s)": 0.08094 |
| }, |
| { |
| "epoch": 1.501465352290067, |
| "grad_norm": 0.5499523878097534, |
| "learning_rate": 5.402250945888457e-06, |
| "loss": 0.2583901882171631, |
| "memory(GiB)": 72.48, |
| "step": 3875, |
| "token_acc": 0.9158980665436907, |
| "train_speed(iter/s)": 0.080942 |
| }, |
| { |
| "epoch": 1.503403008210817, |
| "grad_norm": 0.5971123576164246, |
| "learning_rate": 5.391603328690639e-06, |
| "loss": 0.23753485679626465, |
| "memory(GiB)": 72.48, |
| "step": 3880, |
| "token_acc": 0.9205142428296408, |
| "train_speed(iter/s)": 0.080938 |
| }, |
| { |
| "epoch": 1.505340664131567, |
| "grad_norm": 0.5578671097755432, |
| "learning_rate": 5.380953924359995e-06, |
| "loss": 0.24026894569396973, |
| "memory(GiB)": 72.48, |
| "step": 3885, |
| "token_acc": 0.9254251914710007, |
| "train_speed(iter/s)": 0.080932 |
| }, |
| { |
| "epoch": 1.5072783200523165, |
| "grad_norm": 0.6061117053031921, |
| "learning_rate": 5.370302781496471e-06, |
| "loss": 0.24199223518371582, |
| "memory(GiB)": 72.48, |
| "step": 3890, |
| "token_acc": 0.9121084317954992, |
| "train_speed(iter/s)": 0.080928 |
| }, |
| { |
| "epoch": 1.5092159759730666, |
| "grad_norm": 0.5695658922195435, |
| "learning_rate": 5.3596499487079466e-06, |
| "loss": 0.24618167877197267, |
| "memory(GiB)": 72.48, |
| "step": 3895, |
| "token_acc": 0.9203536977491962, |
| "train_speed(iter/s)": 0.08093 |
| }, |
| { |
| "epoch": 1.5111536318938166, |
| "grad_norm": 0.6070546507835388, |
| "learning_rate": 5.348995474610011e-06, |
| "loss": 0.23130958080291747, |
| "memory(GiB)": 72.48, |
| "step": 3900, |
| "token_acc": 0.9138054830287207, |
| "train_speed(iter/s)": 0.080925 |
| }, |
| { |
| "epoch": 1.5130912878145664, |
| "grad_norm": 0.5819863677024841, |
| "learning_rate": 5.338339407825746e-06, |
| "loss": 0.2405244827270508, |
| "memory(GiB)": 72.48, |
| "step": 3905, |
| "token_acc": 0.9126821826208329, |
| "train_speed(iter/s)": 0.080922 |
| }, |
| { |
| "epoch": 1.5150289437353162, |
| "grad_norm": 0.5835677981376648, |
| "learning_rate": 5.3276817969855e-06, |
| "loss": 0.24713582992553712, |
| "memory(GiB)": 72.48, |
| "step": 3910, |
| "token_acc": 0.9214152938914172, |
| "train_speed(iter/s)": 0.080929 |
| }, |
| { |
| "epoch": 1.5169665996560662, |
| "grad_norm": 0.5336800217628479, |
| "learning_rate": 5.317022690726669e-06, |
| "loss": 0.2448024034500122, |
| "memory(GiB)": 72.48, |
| "step": 3915, |
| "token_acc": 0.9104163794292017, |
| "train_speed(iter/s)": 0.080936 |
| }, |
| { |
| "epoch": 1.518904255576816, |
| "grad_norm": 0.5947490930557251, |
| "learning_rate": 5.306362137693473e-06, |
| "loss": 0.23219313621520996, |
| "memory(GiB)": 72.48, |
| "step": 3920, |
| "token_acc": 0.9211144568346514, |
| "train_speed(iter/s)": 0.08093 |
| }, |
| { |
| "epoch": 1.5208419114975658, |
| "grad_norm": 0.6150482296943665, |
| "learning_rate": 5.295700186536739e-06, |
| "loss": 0.24512362480163574, |
| "memory(GiB)": 72.48, |
| "step": 3925, |
| "token_acc": 0.9168956760705465, |
| "train_speed(iter/s)": 0.080932 |
| }, |
| { |
| "epoch": 1.5227795674183158, |
| "grad_norm": 0.5676717758178711, |
| "learning_rate": 5.2850368859136666e-06, |
| "loss": 0.23571786880493165, |
| "memory(GiB)": 72.48, |
| "step": 3930, |
| "token_acc": 0.9132034377207441, |
| "train_speed(iter/s)": 0.080932 |
| }, |
| { |
| "epoch": 1.5247172233390656, |
| "grad_norm": 0.6027458906173706, |
| "learning_rate": 5.274372284487619e-06, |
| "loss": 0.23536810874938965, |
| "memory(GiB)": 72.48, |
| "step": 3935, |
| "token_acc": 0.9179603899853925, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 1.5266548792598154, |
| "grad_norm": 0.6239861249923706, |
| "learning_rate": 5.263706430927895e-06, |
| "loss": 0.22824177742004395, |
| "memory(GiB)": 72.48, |
| "step": 3940, |
| "token_acc": 0.9203759531831885, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 1.5285925351805654, |
| "grad_norm": 0.6263760924339294, |
| "learning_rate": 5.253039373909507e-06, |
| "loss": 0.2348174571990967, |
| "memory(GiB)": 72.48, |
| "step": 3945, |
| "token_acc": 0.9155720509924157, |
| "train_speed(iter/s)": 0.080941 |
| }, |
| { |
| "epoch": 1.5305301911013152, |
| "grad_norm": 0.546149492263794, |
| "learning_rate": 5.242371162112958e-06, |
| "loss": 0.22561445236206054, |
| "memory(GiB)": 72.48, |
| "step": 3950, |
| "token_acc": 0.9153545911616829, |
| "train_speed(iter/s)": 0.080926 |
| }, |
| { |
| "epoch": 1.532467847022065, |
| "grad_norm": 0.5849190950393677, |
| "learning_rate": 5.2317018442240255e-06, |
| "loss": 0.23886852264404296, |
| "memory(GiB)": 72.48, |
| "step": 3955, |
| "token_acc": 0.9273893393592981, |
| "train_speed(iter/s)": 0.080925 |
| }, |
| { |
| "epoch": 1.534405502942815, |
| "grad_norm": 0.5801311731338501, |
| "learning_rate": 5.221031468933532e-06, |
| "loss": 0.24041290283203126, |
| "memory(GiB)": 72.48, |
| "step": 3960, |
| "token_acc": 0.9221912783589187, |
| "train_speed(iter/s)": 0.08093 |
| }, |
| { |
| "epoch": 1.5363431588635648, |
| "grad_norm": 0.5716031193733215, |
| "learning_rate": 5.210360084937125e-06, |
| "loss": 0.21993811130523683, |
| "memory(GiB)": 72.48, |
| "step": 3965, |
| "token_acc": 0.9108157099697886, |
| "train_speed(iter/s)": 0.08094 |
| }, |
| { |
| "epoch": 1.5382808147843146, |
| "grad_norm": 0.5501062870025635, |
| "learning_rate": 5.199687740935057e-06, |
| "loss": 0.23140015602111816, |
| "memory(GiB)": 72.48, |
| "step": 3970, |
| "token_acc": 0.9091976870256595, |
| "train_speed(iter/s)": 0.080951 |
| }, |
| { |
| "epoch": 1.5402184707050646, |
| "grad_norm": 0.5458846092224121, |
| "learning_rate": 5.189014485631957e-06, |
| "loss": 0.2395151138305664, |
| "memory(GiB)": 72.48, |
| "step": 3975, |
| "token_acc": 0.9074286778629745, |
| "train_speed(iter/s)": 0.080949 |
| }, |
| { |
| "epoch": 1.5421561266258146, |
| "grad_norm": 0.5497922301292419, |
| "learning_rate": 5.178340367736621e-06, |
| "loss": 0.24570670127868652, |
| "memory(GiB)": 72.48, |
| "step": 3980, |
| "token_acc": 0.9208731241473397, |
| "train_speed(iter/s)": 0.080946 |
| }, |
| { |
| "epoch": 1.5440937825465642, |
| "grad_norm": 0.5686230659484863, |
| "learning_rate": 5.167665435961774e-06, |
| "loss": 0.23753948211669923, |
| "memory(GiB)": 72.48, |
| "step": 3985, |
| "token_acc": 0.9165367101241345, |
| "train_speed(iter/s)": 0.080943 |
| }, |
| { |
| "epoch": 1.5460314384673142, |
| "grad_norm": 0.5919835567474365, |
| "learning_rate": 5.156989739023861e-06, |
| "loss": 0.2217550754547119, |
| "memory(GiB)": 72.48, |
| "step": 3990, |
| "token_acc": 0.9181872307638427, |
| "train_speed(iter/s)": 0.080948 |
| }, |
| { |
| "epoch": 1.5479690943880642, |
| "grad_norm": 0.5866130590438843, |
| "learning_rate": 5.146313325642814e-06, |
| "loss": 0.24096102714538575, |
| "memory(GiB)": 72.48, |
| "step": 3995, |
| "token_acc": 0.9132077771682818, |
| "train_speed(iter/s)": 0.080949 |
| }, |
| { |
| "epoch": 1.5499067503088138, |
| "grad_norm": 0.5747529864311218, |
| "learning_rate": 5.1356362445418395e-06, |
| "loss": 0.23785798549652098, |
| "memory(GiB)": 72.48, |
| "step": 4000, |
| "token_acc": 0.9139216157730223, |
| "train_speed(iter/s)": 0.080951 |
| }, |
| { |
| "epoch": 1.5518444062295638, |
| "grad_norm": 0.5678396821022034, |
| "learning_rate": 5.124958544447185e-06, |
| "loss": 0.2483994483947754, |
| "memory(GiB)": 72.48, |
| "step": 4005, |
| "token_acc": 0.9185373042256417, |
| "train_speed(iter/s)": 0.080956 |
| }, |
| { |
| "epoch": 1.5537820621503138, |
| "grad_norm": 0.5727476477622986, |
| "learning_rate": 5.1142802740879285e-06, |
| "loss": 0.2249526262283325, |
| "memory(GiB)": 72.48, |
| "step": 4010, |
| "token_acc": 0.9195504694031384, |
| "train_speed(iter/s)": 0.080967 |
| }, |
| { |
| "epoch": 1.5557197180710636, |
| "grad_norm": 0.5712579488754272, |
| "learning_rate": 5.103601482195748e-06, |
| "loss": 0.2431882858276367, |
| "memory(GiB)": 72.48, |
| "step": 4015, |
| "token_acc": 0.912086790912801, |
| "train_speed(iter/s)": 0.080964 |
| }, |
| { |
| "epoch": 1.5576573739918134, |
| "grad_norm": 0.5597922205924988, |
| "learning_rate": 5.0929222175047025e-06, |
| "loss": 0.23807837963104247, |
| "memory(GiB)": 72.48, |
| "step": 4020, |
| "token_acc": 0.9209720305610717, |
| "train_speed(iter/s)": 0.080966 |
| }, |
| { |
| "epoch": 1.5595950299125634, |
| "grad_norm": 0.6336641311645508, |
| "learning_rate": 5.082242528751008e-06, |
| "loss": 0.23821985721588135, |
| "memory(GiB)": 72.48, |
| "step": 4025, |
| "token_acc": 0.9157320502829261, |
| "train_speed(iter/s)": 0.080972 |
| }, |
| { |
| "epoch": 1.5615326858333132, |
| "grad_norm": 0.6377075910568237, |
| "learning_rate": 5.071562464672815e-06, |
| "loss": 0.24726173877716065, |
| "memory(GiB)": 72.48, |
| "step": 4030, |
| "token_acc": 0.9214040816326531, |
| "train_speed(iter/s)": 0.080961 |
| }, |
| { |
| "epoch": 1.563470341754063, |
| "grad_norm": 0.554710865020752, |
| "learning_rate": 5.060882074009988e-06, |
| "loss": 0.24027609825134277, |
| "memory(GiB)": 72.48, |
| "step": 4035, |
| "token_acc": 0.9306184489692517, |
| "train_speed(iter/s)": 0.080962 |
| }, |
| { |
| "epoch": 1.565407997674813, |
| "grad_norm": 0.5918537378311157, |
| "learning_rate": 5.050201405503883e-06, |
| "loss": 0.2343815803527832, |
| "memory(GiB)": 72.48, |
| "step": 4040, |
| "token_acc": 0.9282309322033898, |
| "train_speed(iter/s)": 0.080959 |
| }, |
| { |
| "epoch": 1.5673456535955628, |
| "grad_norm": 0.56691575050354, |
| "learning_rate": 5.039520507897121e-06, |
| "loss": 0.23046693801879883, |
| "memory(GiB)": 72.48, |
| "step": 4045, |
| "token_acc": 0.9246655031995347, |
| "train_speed(iter/s)": 0.08096 |
| }, |
| { |
| "epoch": 1.5692833095163126, |
| "grad_norm": 0.5462111234664917, |
| "learning_rate": 5.02883942993337e-06, |
| "loss": 0.23171257972717285, |
| "memory(GiB)": 72.48, |
| "step": 4050, |
| "token_acc": 0.9142313359528488, |
| "train_speed(iter/s)": 0.080955 |
| }, |
| { |
| "epoch": 1.5712209654370626, |
| "grad_norm": 0.6200662851333618, |
| "learning_rate": 5.0181582203571245e-06, |
| "loss": 0.24694461822509767, |
| "memory(GiB)": 72.48, |
| "step": 4055, |
| "token_acc": 0.9075337364830647, |
| "train_speed(iter/s)": 0.080954 |
| }, |
| { |
| "epoch": 1.5731586213578124, |
| "grad_norm": 0.5647085309028625, |
| "learning_rate": 5.007476927913473e-06, |
| "loss": 0.23658227920532227, |
| "memory(GiB)": 72.48, |
| "step": 4060, |
| "token_acc": 0.9197628187551412, |
| "train_speed(iter/s)": 0.080952 |
| }, |
| { |
| "epoch": 1.5750962772785622, |
| "grad_norm": 0.5532594919204712, |
| "learning_rate": 4.996795601347885e-06, |
| "loss": 0.2439354658126831, |
| "memory(GiB)": 72.48, |
| "step": 4065, |
| "token_acc": 0.9192622409924772, |
| "train_speed(iter/s)": 0.080954 |
| }, |
| { |
| "epoch": 1.5770339331993122, |
| "grad_norm": 0.5520694255828857, |
| "learning_rate": 4.9861142894059906e-06, |
| "loss": 0.23830606937408447, |
| "memory(GiB)": 72.48, |
| "step": 4070, |
| "token_acc": 0.923468251021166, |
| "train_speed(iter/s)": 0.080955 |
| }, |
| { |
| "epoch": 1.578971589120062, |
| "grad_norm": 0.5831983089447021, |
| "learning_rate": 4.975433040833344e-06, |
| "loss": 0.24065241813659669, |
| "memory(GiB)": 72.48, |
| "step": 4075, |
| "token_acc": 0.9181812322763598, |
| "train_speed(iter/s)": 0.080952 |
| }, |
| { |
| "epoch": 1.5809092450408118, |
| "grad_norm": 0.6240581274032593, |
| "learning_rate": 4.964751904375217e-06, |
| "loss": 0.24529509544372557, |
| "memory(GiB)": 72.48, |
| "step": 4080, |
| "token_acc": 0.9190409564996184, |
| "train_speed(iter/s)": 0.080944 |
| }, |
| { |
| "epoch": 1.5828469009615618, |
| "grad_norm": 0.5469648241996765, |
| "learning_rate": 4.9540709287763685e-06, |
| "loss": 0.23772037029266357, |
| "memory(GiB)": 72.48, |
| "step": 4085, |
| "token_acc": 0.9185966835685898, |
| "train_speed(iter/s)": 0.080942 |
| }, |
| { |
| "epoch": 1.5847845568823116, |
| "grad_norm": 0.5892153978347778, |
| "learning_rate": 4.943390162780823e-06, |
| "loss": 0.25028512477874754, |
| "memory(GiB)": 72.48, |
| "step": 4090, |
| "token_acc": 0.9149353318786959, |
| "train_speed(iter/s)": 0.080945 |
| }, |
| { |
| "epoch": 1.5867222128030614, |
| "grad_norm": 0.6053619384765625, |
| "learning_rate": 4.932709655131646e-06, |
| "loss": 0.2440941572189331, |
| "memory(GiB)": 72.48, |
| "step": 4095, |
| "token_acc": 0.9105009493208704, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 1.5886598687238114, |
| "grad_norm": 0.566652238368988, |
| "learning_rate": 4.922029454570727e-06, |
| "loss": 0.2509720802307129, |
| "memory(GiB)": 72.48, |
| "step": 4100, |
| "token_acc": 0.9231726836208429, |
| "train_speed(iter/s)": 0.080927 |
| }, |
| { |
| "epoch": 1.5905975246445614, |
| "grad_norm": 0.5329635739326477, |
| "learning_rate": 4.911349609838554e-06, |
| "loss": 0.23644027709960938, |
| "memory(GiB)": 72.48, |
| "step": 4105, |
| "token_acc": 0.9231957270182438, |
| "train_speed(iter/s)": 0.080922 |
| }, |
| { |
| "epoch": 1.592535180565311, |
| "grad_norm": 0.6006894111633301, |
| "learning_rate": 4.900670169673989e-06, |
| "loss": 0.24374380111694335, |
| "memory(GiB)": 72.48, |
| "step": 4110, |
| "token_acc": 0.919171473872415, |
| "train_speed(iter/s)": 0.080918 |
| }, |
| { |
| "epoch": 1.594472836486061, |
| "grad_norm": 0.6029812097549438, |
| "learning_rate": 4.88999118281405e-06, |
| "loss": 0.23951337337493897, |
| "memory(GiB)": 72.48, |
| "step": 4115, |
| "token_acc": 0.9179036655558922, |
| "train_speed(iter/s)": 0.080914 |
| }, |
| { |
| "epoch": 1.596410492406811, |
| "grad_norm": 0.5867647528648376, |
| "learning_rate": 4.879312697993685e-06, |
| "loss": 0.24152259826660155, |
| "memory(GiB)": 72.48, |
| "step": 4120, |
| "token_acc": 0.915404528818401, |
| "train_speed(iter/s)": 0.080921 |
| }, |
| { |
| "epoch": 1.5983481483275608, |
| "grad_norm": 0.5779484510421753, |
| "learning_rate": 4.868634763945549e-06, |
| "loss": 0.2460160255432129, |
| "memory(GiB)": 72.48, |
| "step": 4125, |
| "token_acc": 0.9114298024250631, |
| "train_speed(iter/s)": 0.08092 |
| }, |
| { |
| "epoch": 1.6002858042483106, |
| "grad_norm": 0.5900539755821228, |
| "learning_rate": 4.857957429399788e-06, |
| "loss": 0.2371826171875, |
| "memory(GiB)": 72.48, |
| "step": 4130, |
| "token_acc": 0.917403008709422, |
| "train_speed(iter/s)": 0.080927 |
| }, |
| { |
| "epoch": 1.6022234601690606, |
| "grad_norm": 0.6000462770462036, |
| "learning_rate": 4.847280743083812e-06, |
| "loss": 0.2404171943664551, |
| "memory(GiB)": 72.48, |
| "step": 4135, |
| "token_acc": 0.9210667922243896, |
| "train_speed(iter/s)": 0.080932 |
| }, |
| { |
| "epoch": 1.6041611160898104, |
| "grad_norm": 0.5760965347290039, |
| "learning_rate": 4.836604753722065e-06, |
| "loss": 0.23797254562377929, |
| "memory(GiB)": 72.48, |
| "step": 4140, |
| "token_acc": 0.9124895437874011, |
| "train_speed(iter/s)": 0.080926 |
| }, |
| { |
| "epoch": 1.6060987720105602, |
| "grad_norm": 0.5743480920791626, |
| "learning_rate": 4.825929510035818e-06, |
| "loss": 0.23742265701293946, |
| "memory(GiB)": 72.48, |
| "step": 4145, |
| "token_acc": 0.9258983303417211, |
| "train_speed(iter/s)": 0.080923 |
| }, |
| { |
| "epoch": 1.6080364279313102, |
| "grad_norm": 0.5980106592178345, |
| "learning_rate": 4.815255060742938e-06, |
| "loss": 0.23633360862731934, |
| "memory(GiB)": 72.48, |
| "step": 4150, |
| "token_acc": 0.9170678477437576, |
| "train_speed(iter/s)": 0.080921 |
| }, |
| { |
| "epoch": 1.60997408385206, |
| "grad_norm": 0.5759811997413635, |
| "learning_rate": 4.804581454557663e-06, |
| "loss": 0.24039463996887206, |
| "memory(GiB)": 72.48, |
| "step": 4155, |
| "token_acc": 0.9208314753198642, |
| "train_speed(iter/s)": 0.080917 |
| }, |
| { |
| "epoch": 1.6119117397728098, |
| "grad_norm": 0.5623453259468079, |
| "learning_rate": 4.793908740190388e-06, |
| "loss": 0.2390963077545166, |
| "memory(GiB)": 72.48, |
| "step": 4160, |
| "token_acc": 0.9164110429447853, |
| "train_speed(iter/s)": 0.080919 |
| }, |
| { |
| "epoch": 1.6138493956935598, |
| "grad_norm": 0.5763586759567261, |
| "learning_rate": 4.783236966347436e-06, |
| "loss": 0.2290616512298584, |
| "memory(GiB)": 72.48, |
| "step": 4165, |
| "token_acc": 0.9196206818694965, |
| "train_speed(iter/s)": 0.080925 |
| }, |
| { |
| "epoch": 1.6157870516143096, |
| "grad_norm": 0.5638933777809143, |
| "learning_rate": 4.772566181730835e-06, |
| "loss": 0.23402900695800782, |
| "memory(GiB)": 72.48, |
| "step": 4170, |
| "token_acc": 0.9164476459327039, |
| "train_speed(iter/s)": 0.080925 |
| }, |
| { |
| "epoch": 1.6177247075350594, |
| "grad_norm": 0.5336311459541321, |
| "learning_rate": 4.7618964350381054e-06, |
| "loss": 0.2433910369873047, |
| "memory(GiB)": 72.48, |
| "step": 4175, |
| "token_acc": 0.9133335336719055, |
| "train_speed(iter/s)": 0.080928 |
| }, |
| { |
| "epoch": 1.6196623634558094, |
| "grad_norm": 0.5347714424133301, |
| "learning_rate": 4.751227774962022e-06, |
| "loss": 0.2435020923614502, |
| "memory(GiB)": 72.48, |
| "step": 4180, |
| "token_acc": 0.9224319358610322, |
| "train_speed(iter/s)": 0.080923 |
| }, |
| { |
| "epoch": 1.6216000193765592, |
| "grad_norm": 0.5854753255844116, |
| "learning_rate": 4.74056025019041e-06, |
| "loss": 0.23801109790802003, |
| "memory(GiB)": 72.48, |
| "step": 4185, |
| "token_acc": 0.9016558105557922, |
| "train_speed(iter/s)": 0.080927 |
| }, |
| { |
| "epoch": 1.623537675297309, |
| "grad_norm": 0.5887041687965393, |
| "learning_rate": 4.729893909405905e-06, |
| "loss": 0.23980984687805176, |
| "memory(GiB)": 72.48, |
| "step": 4190, |
| "token_acc": 0.9203709837694601, |
| "train_speed(iter/s)": 0.080931 |
| }, |
| { |
| "epoch": 1.625475331218059, |
| "grad_norm": 0.5586225986480713, |
| "learning_rate": 4.719228801285748e-06, |
| "loss": 0.23947548866271973, |
| "memory(GiB)": 72.48, |
| "step": 4195, |
| "token_acc": 0.9210292268531297, |
| "train_speed(iter/s)": 0.080926 |
| }, |
| { |
| "epoch": 1.6274129871388088, |
| "grad_norm": 0.5300182700157166, |
| "learning_rate": 4.708564974501545e-06, |
| "loss": 0.23387060165405274, |
| "memory(GiB)": 72.48, |
| "step": 4200, |
| "token_acc": 0.9218061674008811, |
| "train_speed(iter/s)": 0.080915 |
| }, |
| { |
| "epoch": 1.6293506430595586, |
| "grad_norm": 0.5876345634460449, |
| "learning_rate": 4.69790247771906e-06, |
| "loss": 0.22723779678344727, |
| "memory(GiB)": 72.48, |
| "step": 4205, |
| "token_acc": 0.9254710351059201, |
| "train_speed(iter/s)": 0.080918 |
| }, |
| { |
| "epoch": 1.6312882989803086, |
| "grad_norm": 0.5677121877670288, |
| "learning_rate": 4.687241359597988e-06, |
| "loss": 0.23792381286621095, |
| "memory(GiB)": 72.48, |
| "step": 4210, |
| "token_acc": 0.9219298563103985, |
| "train_speed(iter/s)": 0.080922 |
| }, |
| { |
| "epoch": 1.6332259549010586, |
| "grad_norm": 0.6215115189552307, |
| "learning_rate": 4.676581668791731e-06, |
| "loss": 0.23932018280029296, |
| "memory(GiB)": 72.48, |
| "step": 4215, |
| "token_acc": 0.9241675418545746, |
| "train_speed(iter/s)": 0.08092 |
| }, |
| { |
| "epoch": 1.6351636108218082, |
| "grad_norm": 0.590103268623352, |
| "learning_rate": 4.665923453947176e-06, |
| "loss": 0.22915961742401122, |
| "memory(GiB)": 72.48, |
| "step": 4220, |
| "token_acc": 0.9112952111537684, |
| "train_speed(iter/s)": 0.080928 |
| }, |
| { |
| "epoch": 1.6371012667425582, |
| "grad_norm": 0.585893452167511, |
| "learning_rate": 4.655266763704476e-06, |
| "loss": 0.2472785472869873, |
| "memory(GiB)": 72.48, |
| "step": 4225, |
| "token_acc": 0.9150163544454356, |
| "train_speed(iter/s)": 0.08092 |
| }, |
| { |
| "epoch": 1.6390389226633082, |
| "grad_norm": 0.5503528714179993, |
| "learning_rate": 4.644611646696826e-06, |
| "loss": 0.25024340152740476, |
| "memory(GiB)": 72.48, |
| "step": 4230, |
| "token_acc": 0.9093188977582041, |
| "train_speed(iter/s)": 0.080923 |
| }, |
| { |
| "epoch": 1.640976578584058, |
| "grad_norm": 0.5937880873680115, |
| "learning_rate": 4.633958151550242e-06, |
| "loss": 0.24387547969818116, |
| "memory(GiB)": 72.48, |
| "step": 4235, |
| "token_acc": 0.9265440293318161, |
| "train_speed(iter/s)": 0.080922 |
| }, |
| { |
| "epoch": 1.6429142345048078, |
| "grad_norm": 0.5671422481536865, |
| "learning_rate": 4.623306326883336e-06, |
| "loss": 0.23435800075531005, |
| "memory(GiB)": 72.48, |
| "step": 4240, |
| "token_acc": 0.9159615384615385, |
| "train_speed(iter/s)": 0.080924 |
| }, |
| { |
| "epoch": 1.6448518904255578, |
| "grad_norm": 0.6274139285087585, |
| "learning_rate": 4.612656221307097e-06, |
| "loss": 0.2379068374633789, |
| "memory(GiB)": 72.48, |
| "step": 4245, |
| "token_acc": 0.9165053473390244, |
| "train_speed(iter/s)": 0.080925 |
| }, |
| { |
| "epoch": 1.6467895463463076, |
| "grad_norm": 0.5559883713722229, |
| "learning_rate": 4.602007883424673e-06, |
| "loss": 0.24304821491241455, |
| "memory(GiB)": 72.48, |
| "step": 4250, |
| "token_acc": 0.9144308877309946, |
| "train_speed(iter/s)": 0.080918 |
| }, |
| { |
| "epoch": 1.6487272022670574, |
| "grad_norm": 0.6241846084594727, |
| "learning_rate": 4.59136136183114e-06, |
| "loss": 0.2329557418823242, |
| "memory(GiB)": 72.48, |
| "step": 4255, |
| "token_acc": 0.920522930068479, |
| "train_speed(iter/s)": 0.08092 |
| }, |
| { |
| "epoch": 1.6506648581878074, |
| "grad_norm": 0.5585425496101379, |
| "learning_rate": 4.580716705113285e-06, |
| "loss": 0.2362123489379883, |
| "memory(GiB)": 72.48, |
| "step": 4260, |
| "token_acc": 0.920124514617631, |
| "train_speed(iter/s)": 0.080922 |
| }, |
| { |
| "epoch": 1.6526025141085572, |
| "grad_norm": 0.5571277737617493, |
| "learning_rate": 4.570073961849388e-06, |
| "loss": 0.24444966316223143, |
| "memory(GiB)": 72.48, |
| "step": 4265, |
| "token_acc": 0.9185006185092489, |
| "train_speed(iter/s)": 0.080927 |
| }, |
| { |
| "epoch": 1.654540170029307, |
| "grad_norm": 0.5879797339439392, |
| "learning_rate": 4.559433180608994e-06, |
| "loss": 0.23404364585876464, |
| "memory(GiB)": 72.48, |
| "step": 4270, |
| "token_acc": 0.9163342705813117, |
| "train_speed(iter/s)": 0.080932 |
| }, |
| { |
| "epoch": 1.656477825950057, |
| "grad_norm": 0.5947754979133606, |
| "learning_rate": 4.548794409952697e-06, |
| "loss": 0.24070556163787843, |
| "memory(GiB)": 72.48, |
| "step": 4275, |
| "token_acc": 0.9180384736676127, |
| "train_speed(iter/s)": 0.080938 |
| }, |
| { |
| "epoch": 1.6584154818708068, |
| "grad_norm": 0.553914487361908, |
| "learning_rate": 4.538157698431911e-06, |
| "loss": 0.23187220096588135, |
| "memory(GiB)": 72.48, |
| "step": 4280, |
| "token_acc": 0.9252291530776889, |
| "train_speed(iter/s)": 0.080932 |
| }, |
| { |
| "epoch": 1.6603531377915566, |
| "grad_norm": 0.5857269763946533, |
| "learning_rate": 4.527523094588655e-06, |
| "loss": 0.24459095001220704, |
| "memory(GiB)": 72.48, |
| "step": 4285, |
| "token_acc": 0.911944202266783, |
| "train_speed(iter/s)": 0.080933 |
| }, |
| { |
| "epoch": 1.6622907937123066, |
| "grad_norm": 0.6173809766769409, |
| "learning_rate": 4.516890646955331e-06, |
| "loss": 0.23617539405822754, |
| "memory(GiB)": 72.48, |
| "step": 4290, |
| "token_acc": 0.9265784625690839, |
| "train_speed(iter/s)": 0.080943 |
| }, |
| { |
| "epoch": 1.6642284496330564, |
| "grad_norm": 0.5930407047271729, |
| "learning_rate": 4.506260404054499e-06, |
| "loss": 0.23567602634429932, |
| "memory(GiB)": 72.48, |
| "step": 4295, |
| "token_acc": 0.9236125473974491, |
| "train_speed(iter/s)": 0.080938 |
| }, |
| { |
| "epoch": 1.6661661055538062, |
| "grad_norm": 0.536730945110321, |
| "learning_rate": 4.495632414398659e-06, |
| "loss": 0.23399744033813477, |
| "memory(GiB)": 72.48, |
| "step": 4300, |
| "token_acc": 0.917054956277526, |
| "train_speed(iter/s)": 0.080931 |
| }, |
| { |
| "epoch": 1.6681037614745562, |
| "grad_norm": 0.554707944393158, |
| "learning_rate": 4.485006726490025e-06, |
| "loss": 0.2406824827194214, |
| "memory(GiB)": 72.48, |
| "step": 4305, |
| "token_acc": 0.915921518173046, |
| "train_speed(iter/s)": 0.080929 |
| }, |
| { |
| "epoch": 1.670041417395306, |
| "grad_norm": 0.5852320194244385, |
| "learning_rate": 4.474383388820308e-06, |
| "loss": 0.23379006385803222, |
| "memory(GiB)": 72.48, |
| "step": 4310, |
| "token_acc": 0.9103054104281356, |
| "train_speed(iter/s)": 0.080927 |
| }, |
| { |
| "epoch": 1.6719790733160558, |
| "grad_norm": 0.6317037343978882, |
| "learning_rate": 4.463762449870497e-06, |
| "loss": 0.23294405937194823, |
| "memory(GiB)": 72.48, |
| "step": 4315, |
| "token_acc": 0.9220221695393093, |
| "train_speed(iter/s)": 0.080941 |
| }, |
| { |
| "epoch": 1.6739167292368058, |
| "grad_norm": 0.5779034495353699, |
| "learning_rate": 4.4531439581106295e-06, |
| "loss": 0.24557096958160402, |
| "memory(GiB)": 72.48, |
| "step": 4320, |
| "token_acc": 0.9234873129472999, |
| "train_speed(iter/s)": 0.080942 |
| }, |
| { |
| "epoch": 1.6758543851575558, |
| "grad_norm": 0.593512237071991, |
| "learning_rate": 4.442527961999575e-06, |
| "loss": 0.23549408912658693, |
| "memory(GiB)": 72.48, |
| "step": 4325, |
| "token_acc": 0.9139355455414447, |
| "train_speed(iter/s)": 0.080943 |
| }, |
| { |
| "epoch": 1.6777920410783054, |
| "grad_norm": 0.6004024147987366, |
| "learning_rate": 4.431914509984815e-06, |
| "loss": 0.24675235748291016, |
| "memory(GiB)": 72.48, |
| "step": 4330, |
| "token_acc": 0.9207930790722725, |
| "train_speed(iter/s)": 0.080946 |
| }, |
| { |
| "epoch": 1.6797296969990554, |
| "grad_norm": 0.6113540530204773, |
| "learning_rate": 4.421303650502224e-06, |
| "loss": 0.24028847217559815, |
| "memory(GiB)": 72.48, |
| "step": 4335, |
| "token_acc": 0.9078976291411525, |
| "train_speed(iter/s)": 0.080948 |
| }, |
| { |
| "epoch": 1.6816673529198054, |
| "grad_norm": 0.5542721152305603, |
| "learning_rate": 4.410695431975839e-06, |
| "loss": 0.23191099166870116, |
| "memory(GiB)": 72.48, |
| "step": 4340, |
| "token_acc": 0.9157742044998611, |
| "train_speed(iter/s)": 0.080951 |
| }, |
| { |
| "epoch": 1.683605008840555, |
| "grad_norm": 0.5804362297058105, |
| "learning_rate": 4.400089902817649e-06, |
| "loss": 0.23258790969848633, |
| "memory(GiB)": 72.48, |
| "step": 4345, |
| "token_acc": 0.9184183581280492, |
| "train_speed(iter/s)": 0.080953 |
| }, |
| { |
| "epoch": 1.685542664761305, |
| "grad_norm": 0.6177292466163635, |
| "learning_rate": 4.389487111427368e-06, |
| "loss": 0.2363499164581299, |
| "memory(GiB)": 72.48, |
| "step": 4350, |
| "token_acc": 0.9231600270087779, |
| "train_speed(iter/s)": 0.080956 |
| }, |
| { |
| "epoch": 1.687480320682055, |
| "grad_norm": 0.5826561450958252, |
| "learning_rate": 4.378887106192218e-06, |
| "loss": 0.2349027156829834, |
| "memory(GiB)": 72.48, |
| "step": 4355, |
| "token_acc": 0.9184171214292247, |
| "train_speed(iter/s)": 0.080969 |
| }, |
| { |
| "epoch": 1.6894179766028048, |
| "grad_norm": 0.5665357112884521, |
| "learning_rate": 4.368289935486703e-06, |
| "loss": 0.23099970817565918, |
| "memory(GiB)": 72.48, |
| "step": 4360, |
| "token_acc": 0.9163845394272817, |
| "train_speed(iter/s)": 0.080962 |
| }, |
| { |
| "epoch": 1.6913556325235546, |
| "grad_norm": 0.6047654747962952, |
| "learning_rate": 4.357695647672392e-06, |
| "loss": 0.23423008918762206, |
| "memory(GiB)": 72.48, |
| "step": 4365, |
| "token_acc": 0.9242815165197449, |
| "train_speed(iter/s)": 0.080962 |
| }, |
| { |
| "epoch": 1.6932932884443046, |
| "grad_norm": 0.5313150882720947, |
| "learning_rate": 4.347104291097698e-06, |
| "loss": 0.241463041305542, |
| "memory(GiB)": 72.48, |
| "step": 4370, |
| "token_acc": 0.9196163157733444, |
| "train_speed(iter/s)": 0.080961 |
| }, |
| { |
| "epoch": 1.6952309443650544, |
| "grad_norm": 0.5334797501564026, |
| "learning_rate": 4.3365159140976585e-06, |
| "loss": 0.2367786645889282, |
| "memory(GiB)": 72.48, |
| "step": 4375, |
| "token_acc": 0.9231794649223425, |
| "train_speed(iter/s)": 0.080964 |
| }, |
| { |
| "epoch": 1.6971686002858042, |
| "grad_norm": 0.5730844736099243, |
| "learning_rate": 4.325930564993713e-06, |
| "loss": 0.24104855060577393, |
| "memory(GiB)": 72.48, |
| "step": 4380, |
| "token_acc": 0.9082375734901122, |
| "train_speed(iter/s)": 0.080961 |
| }, |
| { |
| "epoch": 1.6991062562065542, |
| "grad_norm": 0.550591766834259, |
| "learning_rate": 4.315348292093477e-06, |
| "loss": 0.23299641609191896, |
| "memory(GiB)": 72.48, |
| "step": 4385, |
| "token_acc": 0.9203085632523562, |
| "train_speed(iter/s)": 0.080961 |
| }, |
| { |
| "epoch": 1.701043912127304, |
| "grad_norm": 0.5867196321487427, |
| "learning_rate": 4.3047691436905345e-06, |
| "loss": 0.2483926773071289, |
| "memory(GiB)": 72.48, |
| "step": 4390, |
| "token_acc": 0.9120561220799884, |
| "train_speed(iter/s)": 0.080965 |
| }, |
| { |
| "epoch": 1.7029815680480538, |
| "grad_norm": 0.5573843121528625, |
| "learning_rate": 4.29419316806421e-06, |
| "loss": 0.23110456466674806, |
| "memory(GiB)": 72.48, |
| "step": 4395, |
| "token_acc": 0.9250759034964579, |
| "train_speed(iter/s)": 0.080964 |
| }, |
| { |
| "epoch": 1.7049192239688038, |
| "grad_norm": 0.6066656708717346, |
| "learning_rate": 4.283620413479343e-06, |
| "loss": 0.23136162757873535, |
| "memory(GiB)": 72.48, |
| "step": 4400, |
| "token_acc": 0.9227839111627607, |
| "train_speed(iter/s)": 0.080971 |
| }, |
| { |
| "epoch": 1.7068568798895536, |
| "grad_norm": 0.6089814901351929, |
| "learning_rate": 4.273050928186078e-06, |
| "loss": 0.24389944076538086, |
| "memory(GiB)": 72.48, |
| "step": 4405, |
| "token_acc": 0.9158227439850352, |
| "train_speed(iter/s)": 0.080971 |
| }, |
| { |
| "epoch": 1.7087945358103034, |
| "grad_norm": 0.5605995655059814, |
| "learning_rate": 4.26248476041964e-06, |
| "loss": 0.2332322120666504, |
| "memory(GiB)": 72.48, |
| "step": 4410, |
| "token_acc": 0.9247135842880524, |
| "train_speed(iter/s)": 0.080961 |
| }, |
| { |
| "epoch": 1.7107321917310534, |
| "grad_norm": 0.5949825644493103, |
| "learning_rate": 4.2519219584001106e-06, |
| "loss": 0.24070992469787597, |
| "memory(GiB)": 72.48, |
| "step": 4415, |
| "token_acc": 0.9084620570390128, |
| "train_speed(iter/s)": 0.080961 |
| }, |
| { |
| "epoch": 1.7126698476518032, |
| "grad_norm": 0.609623908996582, |
| "learning_rate": 4.241362570332216e-06, |
| "loss": 0.23741843700408935, |
| "memory(GiB)": 72.48, |
| "step": 4420, |
| "token_acc": 0.9277577279146398, |
| "train_speed(iter/s)": 0.080957 |
| }, |
| { |
| "epoch": 1.714607503572553, |
| "grad_norm": 0.5393386483192444, |
| "learning_rate": 4.230806644405096e-06, |
| "loss": 0.22986218929290772, |
| "memory(GiB)": 72.48, |
| "step": 4425, |
| "token_acc": 0.916211918051701, |
| "train_speed(iter/s)": 0.080956 |
| }, |
| { |
| "epoch": 1.716545159493303, |
| "grad_norm": 0.5796214938163757, |
| "learning_rate": 4.220254228792098e-06, |
| "loss": 0.23478846549987792, |
| "memory(GiB)": 72.48, |
| "step": 4430, |
| "token_acc": 0.9145027540705768, |
| "train_speed(iter/s)": 0.080961 |
| }, |
| { |
| "epoch": 1.718482815414053, |
| "grad_norm": 0.5430638790130615, |
| "learning_rate": 4.209705371650544e-06, |
| "loss": 0.23895716667175293, |
| "memory(GiB)": 72.48, |
| "step": 4435, |
| "token_acc": 0.9104987196683332, |
| "train_speed(iter/s)": 0.080964 |
| }, |
| { |
| "epoch": 1.7204204713348026, |
| "grad_norm": 0.5694401264190674, |
| "learning_rate": 4.19916012112152e-06, |
| "loss": 0.21948375701904296, |
| "memory(GiB)": 72.48, |
| "step": 4440, |
| "token_acc": 0.9201905284079227, |
| "train_speed(iter/s)": 0.080967 |
| }, |
| { |
| "epoch": 1.7223581272555526, |
| "grad_norm": 0.5556661486625671, |
| "learning_rate": 4.188618525329648e-06, |
| "loss": 0.2423006057739258, |
| "memory(GiB)": 72.48, |
| "step": 4445, |
| "token_acc": 0.9202016516865112, |
| "train_speed(iter/s)": 0.080973 |
| }, |
| { |
| "epoch": 1.7242957831763026, |
| "grad_norm": 0.5384371876716614, |
| "learning_rate": 4.178080632382875e-06, |
| "loss": 0.2399946928024292, |
| "memory(GiB)": 72.48, |
| "step": 4450, |
| "token_acc": 0.9170499120789937, |
| "train_speed(iter/s)": 0.080967 |
| }, |
| { |
| "epoch": 1.7262334390970522, |
| "grad_norm": 0.5373743772506714, |
| "learning_rate": 4.167546490372251e-06, |
| "loss": 0.2229823350906372, |
| "memory(GiB)": 72.48, |
| "step": 4455, |
| "token_acc": 0.9276191068154271, |
| "train_speed(iter/s)": 0.080973 |
| }, |
| { |
| "epoch": 1.7281710950178022, |
| "grad_norm": 0.599141538143158, |
| "learning_rate": 4.157016147371704e-06, |
| "loss": 0.23781347274780273, |
| "memory(GiB)": 72.48, |
| "step": 4460, |
| "token_acc": 0.9221821756225426, |
| "train_speed(iter/s)": 0.080972 |
| }, |
| { |
| "epoch": 1.7301087509385522, |
| "grad_norm": 0.6435913443565369, |
| "learning_rate": 4.146489651437826e-06, |
| "loss": 0.23757214546203614, |
| "memory(GiB)": 72.48, |
| "step": 4465, |
| "token_acc": 0.9272176631496678, |
| "train_speed(iter/s)": 0.080965 |
| }, |
| { |
| "epoch": 1.732046406859302, |
| "grad_norm": 0.586665689945221, |
| "learning_rate": 4.135967050609655e-06, |
| "loss": 0.25291759967803956, |
| "memory(GiB)": 72.48, |
| "step": 4470, |
| "token_acc": 0.9171209800918836, |
| "train_speed(iter/s)": 0.080956 |
| }, |
| { |
| "epoch": 1.7339840627800518, |
| "grad_norm": 0.556600034236908, |
| "learning_rate": 4.12544839290845e-06, |
| "loss": 0.24632768630981444, |
| "memory(GiB)": 72.48, |
| "step": 4475, |
| "token_acc": 0.9216685429874978, |
| "train_speed(iter/s)": 0.080958 |
| }, |
| { |
| "epoch": 1.7359217187008018, |
| "grad_norm": 0.5605632066726685, |
| "learning_rate": 4.114933726337477e-06, |
| "loss": 0.23179025650024415, |
| "memory(GiB)": 72.48, |
| "step": 4480, |
| "token_acc": 0.9092191080557025, |
| "train_speed(iter/s)": 0.080954 |
| }, |
| { |
| "epoch": 1.7378593746215516, |
| "grad_norm": 0.6133179068565369, |
| "learning_rate": 4.1044230988817865e-06, |
| "loss": 0.23435473442077637, |
| "memory(GiB)": 72.48, |
| "step": 4485, |
| "token_acc": 0.9098795180722892, |
| "train_speed(iter/s)": 0.080956 |
| }, |
| { |
| "epoch": 1.7397970305423014, |
| "grad_norm": 0.5663028359413147, |
| "learning_rate": 4.093916558507996e-06, |
| "loss": 0.23801469802856445, |
| "memory(GiB)": 72.48, |
| "step": 4490, |
| "token_acc": 0.9059203301274585, |
| "train_speed(iter/s)": 0.080963 |
| }, |
| { |
| "epoch": 1.7417346864630514, |
| "grad_norm": 0.5228857398033142, |
| "learning_rate": 4.083414153164073e-06, |
| "loss": 0.23023405075073242, |
| "memory(GiB)": 72.48, |
| "step": 4495, |
| "token_acc": 0.9179573892458572, |
| "train_speed(iter/s)": 0.080961 |
| }, |
| { |
| "epoch": 1.7436723423838012, |
| "grad_norm": 0.5712965726852417, |
| "learning_rate": 4.072915930779112e-06, |
| "loss": 0.24636218547821045, |
| "memory(GiB)": 72.48, |
| "step": 4500, |
| "token_acc": 0.9129208140493248, |
| "train_speed(iter/s)": 0.08096 |
| }, |
| { |
| "epoch": 1.745609998304551, |
| "grad_norm": 0.6002662181854248, |
| "learning_rate": 4.062421939263123e-06, |
| "loss": 0.23319830894470214, |
| "memory(GiB)": 72.48, |
| "step": 4505, |
| "token_acc": 0.9240683564299562, |
| "train_speed(iter/s)": 0.080957 |
| }, |
| { |
| "epoch": 1.747547654225301, |
| "grad_norm": 0.5801786780357361, |
| "learning_rate": 4.051932226506797e-06, |
| "loss": 0.23109970092773438, |
| "memory(GiB)": 72.48, |
| "step": 4510, |
| "token_acc": 0.9139643057434104, |
| "train_speed(iter/s)": 0.080955 |
| }, |
| { |
| "epoch": 1.7494853101460508, |
| "grad_norm": 0.5877240300178528, |
| "learning_rate": 4.041446840381309e-06, |
| "loss": 0.23986215591430665, |
| "memory(GiB)": 72.48, |
| "step": 4515, |
| "token_acc": 0.9096103267934339, |
| "train_speed(iter/s)": 0.080949 |
| }, |
| { |
| "epoch": 1.7514229660668006, |
| "grad_norm": 0.6062881946563721, |
| "learning_rate": 4.030965828738085e-06, |
| "loss": 0.23394384384155273, |
| "memory(GiB)": 72.48, |
| "step": 4520, |
| "token_acc": 0.9261261870163225, |
| "train_speed(iter/s)": 0.080948 |
| }, |
| { |
| "epoch": 1.7533606219875506, |
| "grad_norm": 0.5838972330093384, |
| "learning_rate": 4.020489239408586e-06, |
| "loss": 0.23096508979797364, |
| "memory(GiB)": 72.48, |
| "step": 4525, |
| "token_acc": 0.9149048625792812, |
| "train_speed(iter/s)": 0.08095 |
| }, |
| { |
| "epoch": 1.7552982779083004, |
| "grad_norm": 0.5489374995231628, |
| "learning_rate": 4.010017120204095e-06, |
| "loss": 0.23239753246307374, |
| "memory(GiB)": 72.48, |
| "step": 4530, |
| "token_acc": 0.91888136800855, |
| "train_speed(iter/s)": 0.080952 |
| }, |
| { |
| "epoch": 1.7572359338290502, |
| "grad_norm": 0.5938902497291565, |
| "learning_rate": 3.999549518915491e-06, |
| "loss": 0.23465304374694823, |
| "memory(GiB)": 72.48, |
| "step": 4535, |
| "token_acc": 0.9183352306971138, |
| "train_speed(iter/s)": 0.08096 |
| }, |
| { |
| "epoch": 1.7591735897498002, |
| "grad_norm": 0.6340152025222778, |
| "learning_rate": 3.989086483313039e-06, |
| "loss": 0.24967737197875978, |
| "memory(GiB)": 72.48, |
| "step": 4540, |
| "token_acc": 0.9101321585903084, |
| "train_speed(iter/s)": 0.080967 |
| }, |
| { |
| "epoch": 1.7611112456705502, |
| "grad_norm": 0.5557016134262085, |
| "learning_rate": 3.978628061146161e-06, |
| "loss": 0.2475881338119507, |
| "memory(GiB)": 72.48, |
| "step": 4545, |
| "token_acc": 0.9139274296179036, |
| "train_speed(iter/s)": 0.080972 |
| }, |
| { |
| "epoch": 1.7630489015912998, |
| "grad_norm": 0.5651369690895081, |
| "learning_rate": 3.968174300143234e-06, |
| "loss": 0.23720135688781738, |
| "memory(GiB)": 72.48, |
| "step": 4550, |
| "token_acc": 0.9207509948641203, |
| "train_speed(iter/s)": 0.080966 |
| }, |
| { |
| "epoch": 1.7649865575120498, |
| "grad_norm": 0.5669804811477661, |
| "learning_rate": 3.957725248011356e-06, |
| "loss": 0.24074983596801758, |
| "memory(GiB)": 72.48, |
| "step": 4555, |
| "token_acc": 0.925565985643291, |
| "train_speed(iter/s)": 0.080965 |
| }, |
| { |
| "epoch": 1.7669242134327998, |
| "grad_norm": 0.538011372089386, |
| "learning_rate": 3.94728095243614e-06, |
| "loss": 0.22811007499694824, |
| "memory(GiB)": 72.48, |
| "step": 4560, |
| "token_acc": 0.9120666521047212, |
| "train_speed(iter/s)": 0.080967 |
| }, |
| { |
| "epoch": 1.7688618693535494, |
| "grad_norm": 0.6247624754905701, |
| "learning_rate": 3.93684146108149e-06, |
| "loss": 0.24950284957885743, |
| "memory(GiB)": 72.48, |
| "step": 4565, |
| "token_acc": 0.9151660309293592, |
| "train_speed(iter/s)": 0.080968 |
| }, |
| { |
| "epoch": 1.7707995252742994, |
| "grad_norm": 0.5420736074447632, |
| "learning_rate": 3.926406821589383e-06, |
| "loss": 0.2388762950897217, |
| "memory(GiB)": 72.48, |
| "step": 4570, |
| "token_acc": 0.9174054909198774, |
| "train_speed(iter/s)": 0.08097 |
| }, |
| { |
| "epoch": 1.7727371811950494, |
| "grad_norm": 0.5523030757904053, |
| "learning_rate": 3.915977081579658e-06, |
| "loss": 0.2391650438308716, |
| "memory(GiB)": 72.48, |
| "step": 4575, |
| "token_acc": 0.9292837420821829, |
| "train_speed(iter/s)": 0.080971 |
| }, |
| { |
| "epoch": 1.7746748371157992, |
| "grad_norm": 0.6097285151481628, |
| "learning_rate": 3.905552288649792e-06, |
| "loss": 0.23768877983093262, |
| "memory(GiB)": 72.48, |
| "step": 4580, |
| "token_acc": 0.9200933400605449, |
| "train_speed(iter/s)": 0.080976 |
| }, |
| { |
| "epoch": 1.776612493036549, |
| "grad_norm": 0.6126767992973328, |
| "learning_rate": 3.895132490374686e-06, |
| "loss": 0.2507157802581787, |
| "memory(GiB)": 72.48, |
| "step": 4585, |
| "token_acc": 0.921820542609171, |
| "train_speed(iter/s)": 0.08097 |
| }, |
| { |
| "epoch": 1.778550148957299, |
| "grad_norm": 0.5675840973854065, |
| "learning_rate": 3.884717734306448e-06, |
| "loss": 0.22835259437561034, |
| "memory(GiB)": 72.48, |
| "step": 4590, |
| "token_acc": 0.9248906174039864, |
| "train_speed(iter/s)": 0.080974 |
| }, |
| { |
| "epoch": 1.7804878048780488, |
| "grad_norm": 0.5561676621437073, |
| "learning_rate": 3.8743080679741735e-06, |
| "loss": 0.23788776397705078, |
| "memory(GiB)": 72.48, |
| "step": 4595, |
| "token_acc": 0.9150849505935879, |
| "train_speed(iter/s)": 0.080976 |
| }, |
| { |
| "epoch": 1.7824254607987986, |
| "grad_norm": 0.5253614187240601, |
| "learning_rate": 3.8639035388837335e-06, |
| "loss": 0.22025654315948487, |
| "memory(GiB)": 72.48, |
| "step": 4600, |
| "token_acc": 0.917173455160343, |
| "train_speed(iter/s)": 0.080977 |
| }, |
| { |
| "epoch": 1.7843631167195486, |
| "grad_norm": 0.5718342661857605, |
| "learning_rate": 3.853504194517551e-06, |
| "loss": 0.24571945667266845, |
| "memory(GiB)": 72.48, |
| "step": 4605, |
| "token_acc": 0.9146592199343296, |
| "train_speed(iter/s)": 0.08098 |
| }, |
| { |
| "epoch": 1.7863007726402984, |
| "grad_norm": 0.5782023072242737, |
| "learning_rate": 3.843110082334388e-06, |
| "loss": 0.2297668933868408, |
| "memory(GiB)": 72.48, |
| "step": 4610, |
| "token_acc": 0.9213476319227147, |
| "train_speed(iter/s)": 0.080991 |
| }, |
| { |
| "epoch": 1.7882384285610482, |
| "grad_norm": 0.6311032772064209, |
| "learning_rate": 3.832721249769132e-06, |
| "loss": 0.237768816947937, |
| "memory(GiB)": 72.48, |
| "step": 4615, |
| "token_acc": 0.9166639987193853, |
| "train_speed(iter/s)": 0.080997 |
| }, |
| { |
| "epoch": 1.7901760844817982, |
| "grad_norm": 0.5533443093299866, |
| "learning_rate": 3.8223377442325744e-06, |
| "loss": 0.2349649429321289, |
| "memory(GiB)": 72.48, |
| "step": 4620, |
| "token_acc": 0.9282687009959737, |
| "train_speed(iter/s)": 0.080996 |
| }, |
| { |
| "epoch": 1.792113740402548, |
| "grad_norm": 0.5883183479309082, |
| "learning_rate": 3.811959613111197e-06, |
| "loss": 0.23518748283386232, |
| "memory(GiB)": 72.48, |
| "step": 4625, |
| "token_acc": 0.9147151559677963, |
| "train_speed(iter/s)": 0.080996 |
| }, |
| { |
| "epoch": 1.7940513963232978, |
| "grad_norm": 0.5875689387321472, |
| "learning_rate": 3.8015869037669496e-06, |
| "loss": 0.23918418884277343, |
| "memory(GiB)": 72.48, |
| "step": 4630, |
| "token_acc": 0.9236037186856859, |
| "train_speed(iter/s)": 0.080993 |
| }, |
| { |
| "epoch": 1.7959890522440478, |
| "grad_norm": 0.5876352190971375, |
| "learning_rate": 3.7912196635370452e-06, |
| "loss": 0.2464871883392334, |
| "memory(GiB)": 72.48, |
| "step": 4635, |
| "token_acc": 0.9047433144641811, |
| "train_speed(iter/s)": 0.080991 |
| }, |
| { |
| "epoch": 1.7979267081647976, |
| "grad_norm": 0.5628306865692139, |
| "learning_rate": 3.7808579397337373e-06, |
| "loss": 0.23177266120910645, |
| "memory(GiB)": 72.48, |
| "step": 4640, |
| "token_acc": 0.9217531942422772, |
| "train_speed(iter/s)": 0.080996 |
| }, |
| { |
| "epoch": 1.7998643640855474, |
| "grad_norm": 0.5328852534294128, |
| "learning_rate": 3.7705017796441003e-06, |
| "loss": 0.2317207098007202, |
| "memory(GiB)": 72.48, |
| "step": 4645, |
| "token_acc": 0.9266968638705438, |
| "train_speed(iter/s)": 0.080997 |
| }, |
| { |
| "epoch": 1.8018020200062974, |
| "grad_norm": 0.5785929560661316, |
| "learning_rate": 3.760151230529821e-06, |
| "loss": 0.23354239463806153, |
| "memory(GiB)": 72.48, |
| "step": 4650, |
| "token_acc": 0.9157065070244578, |
| "train_speed(iter/s)": 0.081 |
| }, |
| { |
| "epoch": 1.8037396759270472, |
| "grad_norm": 0.5678642392158508, |
| "learning_rate": 3.74980633962698e-06, |
| "loss": 0.23528060913085938, |
| "memory(GiB)": 72.48, |
| "step": 4655, |
| "token_acc": 0.9224140938426653, |
| "train_speed(iter/s)": 0.080992 |
| }, |
| { |
| "epoch": 1.805677331847797, |
| "grad_norm": 0.5892844796180725, |
| "learning_rate": 3.7394671541458345e-06, |
| "loss": 0.24472272396087646, |
| "memory(GiB)": 72.48, |
| "step": 4660, |
| "token_acc": 0.9063246013304899, |
| "train_speed(iter/s)": 0.080987 |
| }, |
| { |
| "epoch": 1.807614987768547, |
| "grad_norm": 0.5627365112304688, |
| "learning_rate": 3.7291337212706057e-06, |
| "loss": 0.2330082893371582, |
| "memory(GiB)": 72.48, |
| "step": 4665, |
| "token_acc": 0.9164701813952026, |
| "train_speed(iter/s)": 0.080993 |
| }, |
| { |
| "epoch": 1.809552643689297, |
| "grad_norm": 0.5916730761528015, |
| "learning_rate": 3.7188060881592603e-06, |
| "loss": 0.22397446632385254, |
| "memory(GiB)": 72.48, |
| "step": 4670, |
| "token_acc": 0.9247956042501075, |
| "train_speed(iter/s)": 0.080995 |
| }, |
| { |
| "epoch": 1.8114902996100466, |
| "grad_norm": 0.5689864754676819, |
| "learning_rate": 3.708484301943298e-06, |
| "loss": 0.2298985242843628, |
| "memory(GiB)": 72.48, |
| "step": 4675, |
| "token_acc": 0.9312669969824535, |
| "train_speed(iter/s)": 0.081 |
| }, |
| { |
| "epoch": 1.8134279555307966, |
| "grad_norm": 0.5318856835365295, |
| "learning_rate": 3.6981684097275357e-06, |
| "loss": 0.23902740478515624, |
| "memory(GiB)": 72.48, |
| "step": 4680, |
| "token_acc": 0.9195728129738282, |
| "train_speed(iter/s)": 0.081 |
| }, |
| { |
| "epoch": 1.8153656114515466, |
| "grad_norm": 0.5639890432357788, |
| "learning_rate": 3.6878584585898913e-06, |
| "loss": 0.22230730056762696, |
| "memory(GiB)": 72.48, |
| "step": 4685, |
| "token_acc": 0.9186222446376123, |
| "train_speed(iter/s)": 0.080999 |
| }, |
| { |
| "epoch": 1.8173032673722964, |
| "grad_norm": 0.5812826752662659, |
| "learning_rate": 3.677554495581173e-06, |
| "loss": 0.233660888671875, |
| "memory(GiB)": 72.48, |
| "step": 4690, |
| "token_acc": 0.9195180279617365, |
| "train_speed(iter/s)": 0.080994 |
| }, |
| { |
| "epoch": 1.8192409232930462, |
| "grad_norm": 0.5809809565544128, |
| "learning_rate": 3.667256567724855e-06, |
| "loss": 0.23797879219055176, |
| "memory(GiB)": 72.48, |
| "step": 4695, |
| "token_acc": 0.9145623572554444, |
| "train_speed(iter/s)": 0.080996 |
| }, |
| { |
| "epoch": 1.8211785792137962, |
| "grad_norm": 0.5800526142120361, |
| "learning_rate": 3.656964722016875e-06, |
| "loss": 0.24539968967437745, |
| "memory(GiB)": 72.48, |
| "step": 4700, |
| "token_acc": 0.9213923231497565, |
| "train_speed(iter/s)": 0.080991 |
| }, |
| { |
| "epoch": 1.823116235134546, |
| "grad_norm": 0.5599920749664307, |
| "learning_rate": 3.646679005425412e-06, |
| "loss": 0.2380000591278076, |
| "memory(GiB)": 72.48, |
| "step": 4705, |
| "token_acc": 0.9146419233467253, |
| "train_speed(iter/s)": 0.080988 |
| }, |
| { |
| "epoch": 1.8250538910552958, |
| "grad_norm": 0.5796039700508118, |
| "learning_rate": 3.636399464890673e-06, |
| "loss": 0.22853050231933594, |
| "memory(GiB)": 72.48, |
| "step": 4710, |
| "token_acc": 0.9201017307438235, |
| "train_speed(iter/s)": 0.080978 |
| }, |
| { |
| "epoch": 1.8269915469760458, |
| "grad_norm": 0.5604573488235474, |
| "learning_rate": 3.626126147324682e-06, |
| "loss": 0.2464301347732544, |
| "memory(GiB)": 72.48, |
| "step": 4715, |
| "token_acc": 0.912274280136652, |
| "train_speed(iter/s)": 0.08097 |
| }, |
| { |
| "epoch": 1.8289292028967956, |
| "grad_norm": 0.6337606310844421, |
| "learning_rate": 3.615859099611063e-06, |
| "loss": 0.24546365737915038, |
| "memory(GiB)": 72.48, |
| "step": 4720, |
| "token_acc": 0.9116720267680213, |
| "train_speed(iter/s)": 0.080972 |
| }, |
| { |
| "epoch": 1.8308668588175454, |
| "grad_norm": 0.5712890625, |
| "learning_rate": 3.6055983686048267e-06, |
| "loss": 0.2261251449584961, |
| "memory(GiB)": 72.48, |
| "step": 4725, |
| "token_acc": 0.9267950729378777, |
| "train_speed(iter/s)": 0.080969 |
| }, |
| { |
| "epoch": 1.8328045147382954, |
| "grad_norm": 0.6142176389694214, |
| "learning_rate": 3.595344001132154e-06, |
| "loss": 0.22348663806915284, |
| "memory(GiB)": 72.48, |
| "step": 4730, |
| "token_acc": 0.9256462478458405, |
| "train_speed(iter/s)": 0.080966 |
| }, |
| { |
| "epoch": 1.8347421706590452, |
| "grad_norm": 0.5970928072929382, |
| "learning_rate": 3.5850960439901882e-06, |
| "loss": 0.23217971324920655, |
| "memory(GiB)": 72.48, |
| "step": 4735, |
| "token_acc": 0.9128682072117826, |
| "train_speed(iter/s)": 0.080963 |
| }, |
| { |
| "epoch": 1.836679826579795, |
| "grad_norm": 0.5526993870735168, |
| "learning_rate": 3.5748545439468175e-06, |
| "loss": 0.2441422939300537, |
| "memory(GiB)": 72.48, |
| "step": 4740, |
| "token_acc": 0.9143733759118374, |
| "train_speed(iter/s)": 0.080963 |
| }, |
| { |
| "epoch": 1.838617482500545, |
| "grad_norm": 0.5681501626968384, |
| "learning_rate": 3.5646195477404622e-06, |
| "loss": 0.24079837799072265, |
| "memory(GiB)": 72.48, |
| "step": 4745, |
| "token_acc": 0.9231079654808468, |
| "train_speed(iter/s)": 0.080967 |
| }, |
| { |
| "epoch": 1.8405551384212948, |
| "grad_norm": 0.6051904559135437, |
| "learning_rate": 3.5543911020798633e-06, |
| "loss": 0.22717626094818116, |
| "memory(GiB)": 72.48, |
| "step": 4750, |
| "token_acc": 0.9185553304634084, |
| "train_speed(iter/s)": 0.080961 |
| }, |
| { |
| "epoch": 1.8424927943420446, |
| "grad_norm": 0.5435159802436829, |
| "learning_rate": 3.544169253643861e-06, |
| "loss": 0.23873801231384278, |
| "memory(GiB)": 72.48, |
| "step": 4755, |
| "token_acc": 0.9283750440606274, |
| "train_speed(iter/s)": 0.080956 |
| }, |
| { |
| "epoch": 1.8444304502627946, |
| "grad_norm": 0.5636653900146484, |
| "learning_rate": 3.533954049081196e-06, |
| "loss": 0.23597636222839355, |
| "memory(GiB)": 72.48, |
| "step": 4760, |
| "token_acc": 0.916999383257052, |
| "train_speed(iter/s)": 0.080954 |
| }, |
| { |
| "epoch": 1.8463681061835444, |
| "grad_norm": 0.5626488327980042, |
| "learning_rate": 3.5237455350102846e-06, |
| "loss": 0.22850396633148193, |
| "memory(GiB)": 72.48, |
| "step": 4765, |
| "token_acc": 0.9220848468690639, |
| "train_speed(iter/s)": 0.080959 |
| }, |
| { |
| "epoch": 1.8483057621042942, |
| "grad_norm": 0.5658772587776184, |
| "learning_rate": 3.513543758019011e-06, |
| "loss": 0.23132677078247071, |
| "memory(GiB)": 72.48, |
| "step": 4770, |
| "token_acc": 0.915556797367359, |
| "train_speed(iter/s)": 0.08095 |
| }, |
| { |
| "epoch": 1.8502434180250442, |
| "grad_norm": 0.5570623278617859, |
| "learning_rate": 3.5033487646645137e-06, |
| "loss": 0.232391881942749, |
| "memory(GiB)": 72.48, |
| "step": 4775, |
| "token_acc": 0.916139132802396, |
| "train_speed(iter/s)": 0.080954 |
| }, |
| { |
| "epoch": 1.8521810739457942, |
| "grad_norm": 0.5710665583610535, |
| "learning_rate": 3.4931606014729747e-06, |
| "loss": 0.2462557315826416, |
| "memory(GiB)": 72.48, |
| "step": 4780, |
| "token_acc": 0.9168437752566827, |
| "train_speed(iter/s)": 0.080954 |
| }, |
| { |
| "epoch": 1.8541187298665438, |
| "grad_norm": 0.54609614610672, |
| "learning_rate": 3.482979314939404e-06, |
| "loss": 0.23276431560516359, |
| "memory(GiB)": 72.48, |
| "step": 4785, |
| "token_acc": 0.9173787210584344, |
| "train_speed(iter/s)": 0.080957 |
| }, |
| { |
| "epoch": 1.8560563857872938, |
| "grad_norm": 0.5884754657745361, |
| "learning_rate": 3.47280495152743e-06, |
| "loss": 0.23690245151519776, |
| "memory(GiB)": 72.48, |
| "step": 4790, |
| "token_acc": 0.9226331313000357, |
| "train_speed(iter/s)": 0.080962 |
| }, |
| { |
| "epoch": 1.8579940417080438, |
| "grad_norm": 0.5663356781005859, |
| "learning_rate": 3.462637557669084e-06, |
| "loss": 0.22185473442077636, |
| "memory(GiB)": 72.48, |
| "step": 4795, |
| "token_acc": 0.9220468365894683, |
| "train_speed(iter/s)": 0.080968 |
| }, |
| { |
| "epoch": 1.8599316976287934, |
| "grad_norm": 0.6133237481117249, |
| "learning_rate": 3.452477179764595e-06, |
| "loss": 0.23785548210144042, |
| "memory(GiB)": 72.48, |
| "step": 4800, |
| "token_acc": 0.920966865833786, |
| "train_speed(iter/s)": 0.08097 |
| }, |
| { |
| "epoch": 1.8618693535495434, |
| "grad_norm": 0.6006550192832947, |
| "learning_rate": 3.4423238641821703e-06, |
| "loss": 0.2466524600982666, |
| "memory(GiB)": 72.48, |
| "step": 4805, |
| "token_acc": 0.9154687183640413, |
| "train_speed(iter/s)": 0.080969 |
| }, |
| { |
| "epoch": 1.8638070094702934, |
| "grad_norm": 0.5626029372215271, |
| "learning_rate": 3.4321776572577925e-06, |
| "loss": 0.23381493091583253, |
| "memory(GiB)": 72.48, |
| "step": 4810, |
| "token_acc": 0.916163872129071, |
| "train_speed(iter/s)": 0.080962 |
| }, |
| { |
| "epoch": 1.8657446653910432, |
| "grad_norm": 0.5765612721443176, |
| "learning_rate": 3.4220386052949934e-06, |
| "loss": 0.22932858467102052, |
| "memory(GiB)": 72.48, |
| "step": 4815, |
| "token_acc": 0.9275064822817631, |
| "train_speed(iter/s)": 0.080966 |
| }, |
| { |
| "epoch": 1.867682321311793, |
| "grad_norm": 0.6077262759208679, |
| "learning_rate": 3.411906754564662e-06, |
| "loss": 0.2330641746520996, |
| "memory(GiB)": 72.48, |
| "step": 4820, |
| "token_acc": 0.9138856860484269, |
| "train_speed(iter/s)": 0.080965 |
| }, |
| { |
| "epoch": 1.869619977232543, |
| "grad_norm": 0.5446533560752869, |
| "learning_rate": 3.4017821513048166e-06, |
| "loss": 0.2277933120727539, |
| "memory(GiB)": 72.48, |
| "step": 4825, |
| "token_acc": 0.9137432806139888, |
| "train_speed(iter/s)": 0.080964 |
| }, |
| { |
| "epoch": 1.8715576331532928, |
| "grad_norm": 0.5742576122283936, |
| "learning_rate": 3.3916648417204057e-06, |
| "loss": 0.22510766983032227, |
| "memory(GiB)": 72.48, |
| "step": 4830, |
| "token_acc": 0.9291481701562445, |
| "train_speed(iter/s)": 0.080958 |
| }, |
| { |
| "epoch": 1.8734952890740426, |
| "grad_norm": 0.5845666527748108, |
| "learning_rate": 3.38155487198309e-06, |
| "loss": 0.23452870845794677, |
| "memory(GiB)": 72.48, |
| "step": 4835, |
| "token_acc": 0.9201796933493929, |
| "train_speed(iter/s)": 0.080953 |
| }, |
| { |
| "epoch": 1.8754329449947926, |
| "grad_norm": 0.5497782826423645, |
| "learning_rate": 3.371452288231033e-06, |
| "loss": 0.23567650318145753, |
| "memory(GiB)": 72.48, |
| "step": 4840, |
| "token_acc": 0.9179969055436055, |
| "train_speed(iter/s)": 0.080955 |
| }, |
| { |
| "epoch": 1.8773706009155424, |
| "grad_norm": 0.6008865237236023, |
| "learning_rate": 3.3613571365686937e-06, |
| "loss": 0.24633493423461914, |
| "memory(GiB)": 72.48, |
| "step": 4845, |
| "token_acc": 0.9117782152230971, |
| "train_speed(iter/s)": 0.080952 |
| }, |
| { |
| "epoch": 1.8793082568362922, |
| "grad_norm": 0.5858570337295532, |
| "learning_rate": 3.351269463066611e-06, |
| "loss": 0.22995858192443847, |
| "memory(GiB)": 72.48, |
| "step": 4850, |
| "token_acc": 0.9209995343783952, |
| "train_speed(iter/s)": 0.080954 |
| }, |
| { |
| "epoch": 1.8812459127570422, |
| "grad_norm": 0.5978894233703613, |
| "learning_rate": 3.341189313761197e-06, |
| "loss": 0.23932342529296874, |
| "memory(GiB)": 72.48, |
| "step": 4855, |
| "token_acc": 0.9203655352480418, |
| "train_speed(iter/s)": 0.080953 |
| }, |
| { |
| "epoch": 1.883183568677792, |
| "grad_norm": 0.563109278678894, |
| "learning_rate": 3.331116734654529e-06, |
| "loss": 0.22672557830810547, |
| "memory(GiB)": 72.48, |
| "step": 4860, |
| "token_acc": 0.9299508482638338, |
| "train_speed(iter/s)": 0.080951 |
| }, |
| { |
| "epoch": 1.8851212245985418, |
| "grad_norm": 0.6089335083961487, |
| "learning_rate": 3.321051771714132e-06, |
| "loss": 0.24452657699584962, |
| "memory(GiB)": 72.48, |
| "step": 4865, |
| "token_acc": 0.9190753378830456, |
| "train_speed(iter/s)": 0.080955 |
| }, |
| { |
| "epoch": 1.8870588805192918, |
| "grad_norm": 0.5918197631835938, |
| "learning_rate": 3.310994470872779e-06, |
| "loss": 0.22724080085754395, |
| "memory(GiB)": 72.48, |
| "step": 4870, |
| "token_acc": 0.9237190302815025, |
| "train_speed(iter/s)": 0.080954 |
| }, |
| { |
| "epoch": 1.8889965364400416, |
| "grad_norm": 0.5685690641403198, |
| "learning_rate": 3.3009448780282705e-06, |
| "loss": 0.22789173126220702, |
| "memory(GiB)": 72.48, |
| "step": 4875, |
| "token_acc": 0.9148283738540004, |
| "train_speed(iter/s)": 0.080949 |
| }, |
| { |
| "epoch": 1.8909341923607914, |
| "grad_norm": 0.5765368342399597, |
| "learning_rate": 3.290903039043234e-06, |
| "loss": 0.23500416278839112, |
| "memory(GiB)": 72.48, |
| "step": 4880, |
| "token_acc": 0.9254320820955981, |
| "train_speed(iter/s)": 0.080953 |
| }, |
| { |
| "epoch": 1.8928718482815414, |
| "grad_norm": 0.5873888731002808, |
| "learning_rate": 3.2808689997449097e-06, |
| "loss": 0.22656865119934083, |
| "memory(GiB)": 72.48, |
| "step": 4885, |
| "token_acc": 0.9189216209010931, |
| "train_speed(iter/s)": 0.080956 |
| }, |
| { |
| "epoch": 1.8948095042022914, |
| "grad_norm": 0.6020394563674927, |
| "learning_rate": 3.2708428059249437e-06, |
| "loss": 0.2382340431213379, |
| "memory(GiB)": 72.48, |
| "step": 4890, |
| "token_acc": 0.9176993854615828, |
| "train_speed(iter/s)": 0.080958 |
| }, |
| { |
| "epoch": 1.896747160123041, |
| "grad_norm": 0.5413195490837097, |
| "learning_rate": 3.2608245033391785e-06, |
| "loss": 0.2260499954223633, |
| "memory(GiB)": 72.48, |
| "step": 4895, |
| "token_acc": 0.9201593520561024, |
| "train_speed(iter/s)": 0.080955 |
| }, |
| { |
| "epoch": 1.898684816043791, |
| "grad_norm": 0.5700716972351074, |
| "learning_rate": 3.250814137707444e-06, |
| "loss": 0.23337714672088622, |
| "memory(GiB)": 72.48, |
| "step": 4900, |
| "token_acc": 0.9231728610626091, |
| "train_speed(iter/s)": 0.080956 |
| }, |
| { |
| "epoch": 1.900622471964541, |
| "grad_norm": 0.5829481482505798, |
| "learning_rate": 3.2408117547133483e-06, |
| "loss": 0.23229174613952636, |
| "memory(GiB)": 72.48, |
| "step": 4905, |
| "token_acc": 0.9205786313682806, |
| "train_speed(iter/s)": 0.080962 |
| }, |
| { |
| "epoch": 1.9025601278852906, |
| "grad_norm": 0.5741070508956909, |
| "learning_rate": 3.2308174000040684e-06, |
| "loss": 0.22675461769104005, |
| "memory(GiB)": 72.48, |
| "step": 4910, |
| "token_acc": 0.9256168249825498, |
| "train_speed(iter/s)": 0.080961 |
| }, |
| { |
| "epoch": 1.9044977838060406, |
| "grad_norm": 0.5426390767097473, |
| "learning_rate": 3.2208311191901454e-06, |
| "loss": 0.24353694915771484, |
| "memory(GiB)": 72.48, |
| "step": 4915, |
| "token_acc": 0.932967065778866, |
| "train_speed(iter/s)": 0.080959 |
| }, |
| { |
| "epoch": 1.9064354397267906, |
| "grad_norm": 0.5824358463287354, |
| "learning_rate": 3.210852957845274e-06, |
| "loss": 0.2382187843322754, |
| "memory(GiB)": 72.48, |
| "step": 4920, |
| "token_acc": 0.9137491742946117, |
| "train_speed(iter/s)": 0.080953 |
| }, |
| { |
| "epoch": 1.9083730956475404, |
| "grad_norm": 0.5535913109779358, |
| "learning_rate": 3.200882961506092e-06, |
| "loss": 0.22503528594970704, |
| "memory(GiB)": 72.48, |
| "step": 4925, |
| "token_acc": 0.9320696191625022, |
| "train_speed(iter/s)": 0.080951 |
| }, |
| { |
| "epoch": 1.9103107515682902, |
| "grad_norm": 0.5807521939277649, |
| "learning_rate": 3.1909211756719793e-06, |
| "loss": 0.23938980102539062, |
| "memory(GiB)": 72.48, |
| "step": 4930, |
| "token_acc": 0.9238847673186241, |
| "train_speed(iter/s)": 0.080945 |
| }, |
| { |
| "epoch": 1.9122484074890402, |
| "grad_norm": 0.5811308026313782, |
| "learning_rate": 3.1809676458048435e-06, |
| "loss": 0.24059734344482422, |
| "memory(GiB)": 72.48, |
| "step": 4935, |
| "token_acc": 0.9185429646536303, |
| "train_speed(iter/s)": 0.080934 |
| }, |
| { |
| "epoch": 1.91418606340979, |
| "grad_norm": 0.5475001931190491, |
| "learning_rate": 3.171022417328913e-06, |
| "loss": 0.22782738208770753, |
| "memory(GiB)": 72.48, |
| "step": 4940, |
| "token_acc": 0.9167722277380259, |
| "train_speed(iter/s)": 0.080941 |
| }, |
| { |
| "epoch": 1.9161237193305398, |
| "grad_norm": 0.5681920051574707, |
| "learning_rate": 3.1610855356305354e-06, |
| "loss": 0.23460836410522462, |
| "memory(GiB)": 72.48, |
| "step": 4945, |
| "token_acc": 0.926126769710352, |
| "train_speed(iter/s)": 0.080935 |
| }, |
| { |
| "epoch": 1.9180613752512898, |
| "grad_norm": 0.65660160779953, |
| "learning_rate": 3.151157046057965e-06, |
| "loss": 0.23111975193023682, |
| "memory(GiB)": 72.48, |
| "step": 4950, |
| "token_acc": 0.934885964307862, |
| "train_speed(iter/s)": 0.080932 |
| }, |
| { |
| "epoch": 1.9199990311720396, |
| "grad_norm": 0.5645193457603455, |
| "learning_rate": 3.141236993921158e-06, |
| "loss": 0.23473634719848632, |
| "memory(GiB)": 72.48, |
| "step": 4955, |
| "token_acc": 0.9231619679380874, |
| "train_speed(iter/s)": 0.080929 |
| }, |
| { |
| "epoch": 1.9219366870927894, |
| "grad_norm": 0.6071648001670837, |
| "learning_rate": 3.1313254244915653e-06, |
| "loss": 0.22611503601074218, |
| "memory(GiB)": 72.48, |
| "step": 4960, |
| "token_acc": 0.9172273912623548, |
| "train_speed(iter/s)": 0.080931 |
| }, |
| { |
| "epoch": 1.9238743430135394, |
| "grad_norm": 0.59253990650177, |
| "learning_rate": 3.121422383001927e-06, |
| "loss": 0.22836060523986818, |
| "memory(GiB)": 72.48, |
| "step": 4965, |
| "token_acc": 0.924332395735667, |
| "train_speed(iter/s)": 0.08092 |
| }, |
| { |
| "epoch": 1.9258119989342892, |
| "grad_norm": 0.5572670102119446, |
| "learning_rate": 3.111527914646061e-06, |
| "loss": 0.2278214931488037, |
| "memory(GiB)": 72.48, |
| "step": 4970, |
| "token_acc": 0.9200324466826647, |
| "train_speed(iter/s)": 0.080918 |
| }, |
| { |
| "epoch": 1.927749654855039, |
| "grad_norm": 0.5666624903678894, |
| "learning_rate": 3.101642064578664e-06, |
| "loss": 0.21897401809692382, |
| "memory(GiB)": 72.48, |
| "step": 4975, |
| "token_acc": 0.9254069046055032, |
| "train_speed(iter/s)": 0.080921 |
| }, |
| { |
| "epoch": 1.929687310775789, |
| "grad_norm": 0.5414124131202698, |
| "learning_rate": 3.091764877915101e-06, |
| "loss": 0.21500952243804933, |
| "memory(GiB)": 72.48, |
| "step": 4980, |
| "token_acc": 0.9259556000667668, |
| "train_speed(iter/s)": 0.080926 |
| }, |
| { |
| "epoch": 1.9316249666965388, |
| "grad_norm": 0.6081916689872742, |
| "learning_rate": 3.081896399731202e-06, |
| "loss": 0.23827474117279052, |
| "memory(GiB)": 72.48, |
| "step": 4985, |
| "token_acc": 0.9237981946679729, |
| "train_speed(iter/s)": 0.080928 |
| }, |
| { |
| "epoch": 1.9335626226172886, |
| "grad_norm": 0.5481286644935608, |
| "learning_rate": 3.0720366750630524e-06, |
| "loss": 0.2238980531692505, |
| "memory(GiB)": 72.48, |
| "step": 4990, |
| "token_acc": 0.9258295380611581, |
| "train_speed(iter/s)": 0.080932 |
| }, |
| { |
| "epoch": 1.9355002785380386, |
| "grad_norm": 0.5673938393592834, |
| "learning_rate": 3.0621857489067908e-06, |
| "loss": 0.23099775314331056, |
| "memory(GiB)": 72.48, |
| "step": 4995, |
| "token_acc": 0.9236520674207999, |
| "train_speed(iter/s)": 0.080928 |
| }, |
| { |
| "epoch": 1.9374379344587886, |
| "grad_norm": 0.5904103517532349, |
| "learning_rate": 3.0523436662184013e-06, |
| "loss": 0.2294713020324707, |
| "memory(GiB)": 72.48, |
| "step": 5000, |
| "token_acc": 0.9182310696736802, |
| "train_speed(iter/s)": 0.080935 |
| }, |
| { |
| "epoch": 1.9393755903795382, |
| "grad_norm": 0.5377696752548218, |
| "learning_rate": 3.0425104719135124e-06, |
| "loss": 0.23087844848632813, |
| "memory(GiB)": 72.48, |
| "step": 5005, |
| "token_acc": 0.9300810424280948, |
| "train_speed(iter/s)": 0.080933 |
| }, |
| { |
| "epoch": 1.9413132463002882, |
| "grad_norm": 0.6162227392196655, |
| "learning_rate": 3.0326862108671863e-06, |
| "loss": 0.23395605087280275, |
| "memory(GiB)": 72.48, |
| "step": 5010, |
| "token_acc": 0.9195617918511293, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 1.9432509022210382, |
| "grad_norm": 0.5727110505104065, |
| "learning_rate": 3.022870927913719e-06, |
| "loss": 0.22884960174560548, |
| "memory(GiB)": 72.48, |
| "step": 5015, |
| "token_acc": 0.9202504168370718, |
| "train_speed(iter/s)": 0.080936 |
| }, |
| { |
| "epoch": 1.9451885581417878, |
| "grad_norm": 0.5721259117126465, |
| "learning_rate": 3.0130646678464344e-06, |
| "loss": 0.23694920539855957, |
| "memory(GiB)": 72.48, |
| "step": 5020, |
| "token_acc": 0.9184976587314582, |
| "train_speed(iter/s)": 0.080935 |
| }, |
| { |
| "epoch": 1.9471262140625378, |
| "grad_norm": 0.5581057071685791, |
| "learning_rate": 3.0032674754174775e-06, |
| "loss": 0.23090701103210448, |
| "memory(GiB)": 72.48, |
| "step": 5025, |
| "token_acc": 0.9220899962811454, |
| "train_speed(iter/s)": 0.080932 |
| }, |
| { |
| "epoch": 1.9490638699832878, |
| "grad_norm": 0.5250717401504517, |
| "learning_rate": 2.9934793953376135e-06, |
| "loss": 0.22847251892089843, |
| "memory(GiB)": 72.48, |
| "step": 5030, |
| "token_acc": 0.9218730397691632, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 1.9510015259040376, |
| "grad_norm": 0.6175058484077454, |
| "learning_rate": 2.98370047227602e-06, |
| "loss": 0.23140432834625244, |
| "memory(GiB)": 72.48, |
| "step": 5035, |
| "token_acc": 0.9123302326963654, |
| "train_speed(iter/s)": 0.080938 |
| }, |
| { |
| "epoch": 1.9529391818247874, |
| "grad_norm": 0.6077125072479248, |
| "learning_rate": 2.973930750860088e-06, |
| "loss": 0.23387997150421141, |
| "memory(GiB)": 72.48, |
| "step": 5040, |
| "token_acc": 0.9297987071708477, |
| "train_speed(iter/s)": 0.080942 |
| }, |
| { |
| "epoch": 1.9548768377455374, |
| "grad_norm": 0.5392444133758545, |
| "learning_rate": 2.9641702756752134e-06, |
| "loss": 0.22093567848205567, |
| "memory(GiB)": 72.48, |
| "step": 5045, |
| "token_acc": 0.9346273904775573, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 1.9568144936662872, |
| "grad_norm": 0.6002140045166016, |
| "learning_rate": 2.9544190912645978e-06, |
| "loss": 0.23936209678649903, |
| "memory(GiB)": 72.48, |
| "step": 5050, |
| "token_acc": 0.9177467430255535, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 1.958752149587037, |
| "grad_norm": 0.5801167488098145, |
| "learning_rate": 2.9446772421290427e-06, |
| "loss": 0.2447594165802002, |
| "memory(GiB)": 72.48, |
| "step": 5055, |
| "token_acc": 0.9178470254957507, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 1.960689805507787, |
| "grad_norm": 0.5460479259490967, |
| "learning_rate": 2.9349447727267444e-06, |
| "loss": 0.22198958396911622, |
| "memory(GiB)": 72.48, |
| "step": 5060, |
| "token_acc": 0.9201584184777183, |
| "train_speed(iter/s)": 0.080946 |
| }, |
| { |
| "epoch": 1.9626274614285368, |
| "grad_norm": 0.5960065126419067, |
| "learning_rate": 2.9252217274730964e-06, |
| "loss": 0.23214805126190186, |
| "memory(GiB)": 72.48, |
| "step": 5065, |
| "token_acc": 0.918548799182422, |
| "train_speed(iter/s)": 0.080943 |
| }, |
| { |
| "epoch": 1.9645651173492866, |
| "grad_norm": 0.6021069884300232, |
| "learning_rate": 2.9155081507404813e-06, |
| "loss": 0.22706222534179688, |
| "memory(GiB)": 72.48, |
| "step": 5070, |
| "token_acc": 0.9270862900936702, |
| "train_speed(iter/s)": 0.080948 |
| }, |
| { |
| "epoch": 1.9665027732700366, |
| "grad_norm": 0.6196221113204956, |
| "learning_rate": 2.905804086858074e-06, |
| "loss": 0.23130836486816406, |
| "memory(GiB)": 72.48, |
| "step": 5075, |
| "token_acc": 0.9091763126992115, |
| "train_speed(iter/s)": 0.080945 |
| }, |
| { |
| "epoch": 1.9684404291907864, |
| "grad_norm": 0.5536569356918335, |
| "learning_rate": 2.896109580111634e-06, |
| "loss": 0.2225879430770874, |
| "memory(GiB)": 72.48, |
| "step": 5080, |
| "token_acc": 0.9220095988186069, |
| "train_speed(iter/s)": 0.080946 |
| }, |
| { |
| "epoch": 1.9703780851115362, |
| "grad_norm": 0.5561604499816895, |
| "learning_rate": 2.8864246747433065e-06, |
| "loss": 0.2250436782836914, |
| "memory(GiB)": 72.48, |
| "step": 5085, |
| "token_acc": 0.9154984677577102, |
| "train_speed(iter/s)": 0.080947 |
| }, |
| { |
| "epoch": 1.9723157410322862, |
| "grad_norm": 0.5968231558799744, |
| "learning_rate": 2.87674941495142e-06, |
| "loss": 0.23562178611755372, |
| "memory(GiB)": 72.48, |
| "step": 5090, |
| "token_acc": 0.9204119545024932, |
| "train_speed(iter/s)": 0.080949 |
| }, |
| { |
| "epoch": 1.974253396953036, |
| "grad_norm": 0.6080553531646729, |
| "learning_rate": 2.8670838448902815e-06, |
| "loss": 0.23601851463317872, |
| "memory(GiB)": 72.48, |
| "step": 5095, |
| "token_acc": 0.9168413922131544, |
| "train_speed(iter/s)": 0.08095 |
| }, |
| { |
| "epoch": 1.9761910528737858, |
| "grad_norm": 0.5545769333839417, |
| "learning_rate": 2.857428008669983e-06, |
| "loss": 0.23675503730773925, |
| "memory(GiB)": 72.48, |
| "step": 5100, |
| "token_acc": 0.9197643979057591, |
| "train_speed(iter/s)": 0.08095 |
| }, |
| { |
| "epoch": 1.9781287087945358, |
| "grad_norm": 0.5824362635612488, |
| "learning_rate": 2.8477819503561876e-06, |
| "loss": 0.2293771266937256, |
| "memory(GiB)": 72.48, |
| "step": 5105, |
| "token_acc": 0.9200192184497117, |
| "train_speed(iter/s)": 0.080951 |
| }, |
| { |
| "epoch": 1.9800663647152856, |
| "grad_norm": 0.5837356448173523, |
| "learning_rate": 2.838145713969941e-06, |
| "loss": 0.22520647048950196, |
| "memory(GiB)": 72.48, |
| "step": 5110, |
| "token_acc": 0.91966621411305, |
| "train_speed(iter/s)": 0.080958 |
| }, |
| { |
| "epoch": 1.9820040206360354, |
| "grad_norm": 0.578384280204773, |
| "learning_rate": 2.8285193434874637e-06, |
| "loss": 0.23027491569519043, |
| "memory(GiB)": 72.48, |
| "step": 5115, |
| "token_acc": 0.9200089985859365, |
| "train_speed(iter/s)": 0.08095 |
| }, |
| { |
| "epoch": 1.9839416765567854, |
| "grad_norm": 0.5708916187286377, |
| "learning_rate": 2.8189028828399546e-06, |
| "loss": 0.23889336585998536, |
| "memory(GiB)": 72.48, |
| "step": 5120, |
| "token_acc": 0.9176338228327807, |
| "train_speed(iter/s)": 0.080953 |
| }, |
| { |
| "epoch": 1.9858793324775355, |
| "grad_norm": 0.5448703765869141, |
| "learning_rate": 2.8092963759133806e-06, |
| "loss": 0.2348088264465332, |
| "memory(GiB)": 72.48, |
| "step": 5125, |
| "token_acc": 0.9210328730454086, |
| "train_speed(iter/s)": 0.080956 |
| }, |
| { |
| "epoch": 1.987816988398285, |
| "grad_norm": 0.5802229046821594, |
| "learning_rate": 2.79969986654829e-06, |
| "loss": 0.2369297981262207, |
| "memory(GiB)": 72.48, |
| "step": 5130, |
| "token_acc": 0.9280055353745027, |
| "train_speed(iter/s)": 0.080951 |
| }, |
| { |
| "epoch": 1.989754644319035, |
| "grad_norm": 0.5599392652511597, |
| "learning_rate": 2.7901133985396035e-06, |
| "loss": 0.2221320629119873, |
| "memory(GiB)": 72.48, |
| "step": 5135, |
| "token_acc": 0.9295444199320657, |
| "train_speed(iter/s)": 0.080959 |
| }, |
| { |
| "epoch": 1.991692300239785, |
| "grad_norm": 0.5506635308265686, |
| "learning_rate": 2.7805370156364182e-06, |
| "loss": 0.2278140068054199, |
| "memory(GiB)": 72.48, |
| "step": 5140, |
| "token_acc": 0.9242746872504658, |
| "train_speed(iter/s)": 0.080964 |
| }, |
| { |
| "epoch": 1.9936299561605348, |
| "grad_norm": 0.5609742403030396, |
| "learning_rate": 2.7709707615418046e-06, |
| "loss": 0.21695501804351808, |
| "memory(GiB)": 72.48, |
| "step": 5145, |
| "token_acc": 0.9342338352524358, |
| "train_speed(iter/s)": 0.080959 |
| }, |
| { |
| "epoch": 1.9955676120812846, |
| "grad_norm": 0.6262750625610352, |
| "learning_rate": 2.7614146799126106e-06, |
| "loss": 0.23981974124908448, |
| "memory(GiB)": 72.48, |
| "step": 5150, |
| "token_acc": 0.9218865924748277, |
| "train_speed(iter/s)": 0.080963 |
| }, |
| { |
| "epoch": 1.9975052680020347, |
| "grad_norm": 0.5987786054611206, |
| "learning_rate": 2.7518688143592593e-06, |
| "loss": 0.23971712589263916, |
| "memory(GiB)": 72.48, |
| "step": 5155, |
| "token_acc": 0.9039727507942227, |
| "train_speed(iter/s)": 0.080966 |
| }, |
| { |
| "epoch": 1.9994429239227844, |
| "grad_norm": 0.6018982529640198, |
| "learning_rate": 2.7423332084455543e-06, |
| "loss": 0.23073389530181884, |
| "memory(GiB)": 72.48, |
| "step": 5160, |
| "token_acc": 0.9174361075221379, |
| "train_speed(iter/s)": 0.080969 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.24373722076416016, |
| "eval_runtime": 104.8784, |
| "eval_samples_per_second": 31.789, |
| "eval_steps_per_second": 3.976, |
| "eval_token_acc": 0.9073567439055579, |
| "step": 5162 |
| }, |
| { |
| "epoch": 2.0011625935524497, |
| "grad_norm": 0.5473401546478271, |
| "learning_rate": 2.7328079056884727e-06, |
| "loss": 0.20911026000976562, |
| "memory(GiB)": 72.48, |
| "step": 5165, |
| "token_acc": 0.9121585000026832, |
| "train_speed(iter/s)": 0.080753 |
| }, |
| { |
| "epoch": 2.0031002494731998, |
| "grad_norm": 0.5424655675888062, |
| "learning_rate": 2.723292949557976e-06, |
| "loss": 0.17972030639648437, |
| "memory(GiB)": 72.48, |
| "step": 5170, |
| "token_acc": 0.9384443200895355, |
| "train_speed(iter/s)": 0.080747 |
| }, |
| { |
| "epoch": 2.0050379053939498, |
| "grad_norm": 0.5732024908065796, |
| "learning_rate": 2.7137883834768076e-06, |
| "loss": 0.19022332429885863, |
| "memory(GiB)": 72.48, |
| "step": 5175, |
| "token_acc": 0.9398598319451069, |
| "train_speed(iter/s)": 0.080743 |
| }, |
| { |
| "epoch": 2.0069755613146993, |
| "grad_norm": 0.5672215223312378, |
| "learning_rate": 2.704294250820293e-06, |
| "loss": 0.1836836814880371, |
| "memory(GiB)": 72.48, |
| "step": 5180, |
| "token_acc": 0.945720689172153, |
| "train_speed(iter/s)": 0.080735 |
| }, |
| { |
| "epoch": 2.0089132172354494, |
| "grad_norm": 0.5584644675254822, |
| "learning_rate": 2.6948105949161446e-06, |
| "loss": 0.1804587125778198, |
| "memory(GiB)": 72.48, |
| "step": 5185, |
| "token_acc": 0.9337713186011016, |
| "train_speed(iter/s)": 0.080734 |
| }, |
| { |
| "epoch": 2.0108508731561994, |
| "grad_norm": 0.5705211162567139, |
| "learning_rate": 2.68533745904426e-06, |
| "loss": 0.1826365113258362, |
| "memory(GiB)": 72.48, |
| "step": 5190, |
| "token_acc": 0.937906090964376, |
| "train_speed(iter/s)": 0.080734 |
| }, |
| { |
| "epoch": 2.012788529076949, |
| "grad_norm": 0.543353796005249, |
| "learning_rate": 2.67587488643653e-06, |
| "loss": 0.18609116077423096, |
| "memory(GiB)": 72.48, |
| "step": 5195, |
| "token_acc": 0.9352631412985211, |
| "train_speed(iter/s)": 0.080738 |
| }, |
| { |
| "epoch": 2.014726184997699, |
| "grad_norm": 0.5423488616943359, |
| "learning_rate": 2.6664229202766377e-06, |
| "loss": 0.18657855987548827, |
| "memory(GiB)": 72.48, |
| "step": 5200, |
| "token_acc": 0.9393792727932724, |
| "train_speed(iter/s)": 0.080735 |
| }, |
| { |
| "epoch": 2.016663840918449, |
| "grad_norm": 0.5571469068527222, |
| "learning_rate": 2.656981603699864e-06, |
| "loss": 0.18082787990570068, |
| "memory(GiB)": 72.48, |
| "step": 5205, |
| "token_acc": 0.9425005176136532, |
| "train_speed(iter/s)": 0.080729 |
| }, |
| { |
| "epoch": 2.018601496839199, |
| "grad_norm": 0.5621320009231567, |
| "learning_rate": 2.647550979792887e-06, |
| "loss": 0.18856477737426758, |
| "memory(GiB)": 72.48, |
| "step": 5210, |
| "token_acc": 0.9337371210375588, |
| "train_speed(iter/s)": 0.080733 |
| }, |
| { |
| "epoch": 2.0205391527599486, |
| "grad_norm": 0.5593150854110718, |
| "learning_rate": 2.6381310915935863e-06, |
| "loss": 0.17925558090209961, |
| "memory(GiB)": 72.48, |
| "step": 5215, |
| "token_acc": 0.9430511481898523, |
| "train_speed(iter/s)": 0.080731 |
| }, |
| { |
| "epoch": 2.0224768086806986, |
| "grad_norm": 0.6031186580657959, |
| "learning_rate": 2.6287219820908505e-06, |
| "loss": 0.18746013641357423, |
| "memory(GiB)": 72.48, |
| "step": 5220, |
| "token_acc": 0.9389572271872836, |
| "train_speed(iter/s)": 0.080731 |
| }, |
| { |
| "epoch": 2.0244144646014486, |
| "grad_norm": 0.5469415783882141, |
| "learning_rate": 2.6193236942243793e-06, |
| "loss": 0.18313326835632324, |
| "memory(GiB)": 72.48, |
| "step": 5225, |
| "token_acc": 0.9326484751203852, |
| "train_speed(iter/s)": 0.080736 |
| }, |
| { |
| "epoch": 2.026352120522198, |
| "grad_norm": 0.5326108336448669, |
| "learning_rate": 2.60993627088448e-06, |
| "loss": 0.1851900815963745, |
| "memory(GiB)": 72.48, |
| "step": 5230, |
| "token_acc": 0.9346198750085846, |
| "train_speed(iter/s)": 0.080736 |
| }, |
| { |
| "epoch": 2.028289776442948, |
| "grad_norm": 0.5510690808296204, |
| "learning_rate": 2.6005597549118844e-06, |
| "loss": 0.18629932403564453, |
| "memory(GiB)": 72.48, |
| "step": 5235, |
| "token_acc": 0.9409745831309697, |
| "train_speed(iter/s)": 0.080734 |
| }, |
| { |
| "epoch": 2.030227432363698, |
| "grad_norm": 0.5134409666061401, |
| "learning_rate": 2.5911941890975446e-06, |
| "loss": 0.1788806438446045, |
| "memory(GiB)": 72.48, |
| "step": 5240, |
| "token_acc": 0.9349969751966122, |
| "train_speed(iter/s)": 0.080737 |
| }, |
| { |
| "epoch": 2.0321650882844478, |
| "grad_norm": 0.5783552527427673, |
| "learning_rate": 2.5818396161824434e-06, |
| "loss": 0.18259315490722655, |
| "memory(GiB)": 72.48, |
| "step": 5245, |
| "token_acc": 0.9349454931343859, |
| "train_speed(iter/s)": 0.080742 |
| }, |
| { |
| "epoch": 2.0341027442051978, |
| "grad_norm": 0.5441116690635681, |
| "learning_rate": 2.5724960788573887e-06, |
| "loss": 0.1771859884262085, |
| "memory(GiB)": 72.48, |
| "step": 5250, |
| "token_acc": 0.9366488850601372, |
| "train_speed(iter/s)": 0.080746 |
| }, |
| { |
| "epoch": 2.036040400125948, |
| "grad_norm": 0.548576295375824, |
| "learning_rate": 2.563163619762833e-06, |
| "loss": 0.17985576391220093, |
| "memory(GiB)": 72.48, |
| "step": 5255, |
| "token_acc": 0.9376425855513308, |
| "train_speed(iter/s)": 0.080746 |
| }, |
| { |
| "epoch": 2.0379780560466974, |
| "grad_norm": 0.528372585773468, |
| "learning_rate": 2.55384228148867e-06, |
| "loss": 0.1838611841201782, |
| "memory(GiB)": 72.48, |
| "step": 5260, |
| "token_acc": 0.9355038872869292, |
| "train_speed(iter/s)": 0.080748 |
| }, |
| { |
| "epoch": 2.0399157119674474, |
| "grad_norm": 0.5744831562042236, |
| "learning_rate": 2.544532106574041e-06, |
| "loss": 0.18163129091262817, |
| "memory(GiB)": 72.48, |
| "step": 5265, |
| "token_acc": 0.9287501840129545, |
| "train_speed(iter/s)": 0.08075 |
| }, |
| { |
| "epoch": 2.0418533678881974, |
| "grad_norm": 0.5563311576843262, |
| "learning_rate": 2.5352331375071437e-06, |
| "loss": 0.1764971375465393, |
| "memory(GiB)": 72.48, |
| "step": 5270, |
| "token_acc": 0.9388310203072358, |
| "train_speed(iter/s)": 0.080749 |
| }, |
| { |
| "epoch": 2.043791023808947, |
| "grad_norm": 0.5354945659637451, |
| "learning_rate": 2.525945416725034e-06, |
| "loss": 0.17477788925170898, |
| "memory(GiB)": 72.48, |
| "step": 5275, |
| "token_acc": 0.9430880951598594, |
| "train_speed(iter/s)": 0.080752 |
| }, |
| { |
| "epoch": 2.045728679729697, |
| "grad_norm": 0.5532642602920532, |
| "learning_rate": 2.516668986613437e-06, |
| "loss": 0.1873680591583252, |
| "memory(GiB)": 72.48, |
| "step": 5280, |
| "token_acc": 0.9383436207013498, |
| "train_speed(iter/s)": 0.080751 |
| }, |
| { |
| "epoch": 2.047666335650447, |
| "grad_norm": 0.5849664807319641, |
| "learning_rate": 2.507403889506551e-06, |
| "loss": 0.1897585391998291, |
| "memory(GiB)": 72.48, |
| "step": 5285, |
| "token_acc": 0.9374327148375657, |
| "train_speed(iter/s)": 0.080757 |
| }, |
| { |
| "epoch": 2.0496039915711965, |
| "grad_norm": 0.5856564044952393, |
| "learning_rate": 2.4981501676868525e-06, |
| "loss": 0.18512728214263915, |
| "memory(GiB)": 72.48, |
| "step": 5290, |
| "token_acc": 0.9345540861308919, |
| "train_speed(iter/s)": 0.080749 |
| }, |
| { |
| "epoch": 2.0515416474919466, |
| "grad_norm": 0.5460135340690613, |
| "learning_rate": 2.488907863384907e-06, |
| "loss": 0.1801106095314026, |
| "memory(GiB)": 72.48, |
| "step": 5295, |
| "token_acc": 0.9387762374826272, |
| "train_speed(iter/s)": 0.08075 |
| }, |
| { |
| "epoch": 2.0534793034126966, |
| "grad_norm": 0.5635910034179688, |
| "learning_rate": 2.4796770187791746e-06, |
| "loss": 0.1819519281387329, |
| "memory(GiB)": 72.48, |
| "step": 5300, |
| "token_acc": 0.9433834024332223, |
| "train_speed(iter/s)": 0.080754 |
| }, |
| { |
| "epoch": 2.055416959333446, |
| "grad_norm": 0.5485233664512634, |
| "learning_rate": 2.470457675995821e-06, |
| "loss": 0.19221407175064087, |
| "memory(GiB)": 72.48, |
| "step": 5305, |
| "token_acc": 0.9369553014860114, |
| "train_speed(iter/s)": 0.080752 |
| }, |
| { |
| "epoch": 2.057354615254196, |
| "grad_norm": 0.5501575469970703, |
| "learning_rate": 2.461249877108513e-06, |
| "loss": 0.1826251745223999, |
| "memory(GiB)": 72.48, |
| "step": 5310, |
| "token_acc": 0.9387762324379799, |
| "train_speed(iter/s)": 0.08075 |
| }, |
| { |
| "epoch": 2.059292271174946, |
| "grad_norm": 0.5151733756065369, |
| "learning_rate": 2.452053664138244e-06, |
| "loss": 0.17927682399749756, |
| "memory(GiB)": 72.48, |
| "step": 5315, |
| "token_acc": 0.9408301346691822, |
| "train_speed(iter/s)": 0.080751 |
| }, |
| { |
| "epoch": 2.061229927095696, |
| "grad_norm": 0.5707798600196838, |
| "learning_rate": 2.4428690790531303e-06, |
| "loss": 0.17755191326141356, |
| "memory(GiB)": 72.48, |
| "step": 5320, |
| "token_acc": 0.9346226935572856, |
| "train_speed(iter/s)": 0.080755 |
| }, |
| { |
| "epoch": 2.0631675830164458, |
| "grad_norm": 0.5683720111846924, |
| "learning_rate": 2.4336961637682214e-06, |
| "loss": 0.18373007774353028, |
| "memory(GiB)": 72.48, |
| "step": 5325, |
| "token_acc": 0.9251355606087108, |
| "train_speed(iter/s)": 0.080753 |
| }, |
| { |
| "epoch": 2.0651052389371958, |
| "grad_norm": 0.564379096031189, |
| "learning_rate": 2.424534960145314e-06, |
| "loss": 0.18030145168304443, |
| "memory(GiB)": 72.48, |
| "step": 5330, |
| "token_acc": 0.932678254920862, |
| "train_speed(iter/s)": 0.080759 |
| }, |
| { |
| "epoch": 2.067042894857946, |
| "grad_norm": 0.5815133452415466, |
| "learning_rate": 2.415385509992752e-06, |
| "loss": 0.17914021015167236, |
| "memory(GiB)": 72.48, |
| "step": 5335, |
| "token_acc": 0.9349110494532398, |
| "train_speed(iter/s)": 0.080763 |
| }, |
| { |
| "epoch": 2.0689805507786954, |
| "grad_norm": 0.5522861480712891, |
| "learning_rate": 2.4062478550652457e-06, |
| "loss": 0.16629674434661865, |
| "memory(GiB)": 72.48, |
| "step": 5340, |
| "token_acc": 0.9421650160461024, |
| "train_speed(iter/s)": 0.080765 |
| }, |
| { |
| "epoch": 2.0709182066994454, |
| "grad_norm": 0.6132717728614807, |
| "learning_rate": 2.3971220370636754e-06, |
| "loss": 0.1797514796257019, |
| "memory(GiB)": 72.48, |
| "step": 5345, |
| "token_acc": 0.9354761661036612, |
| "train_speed(iter/s)": 0.080771 |
| }, |
| { |
| "epoch": 2.0728558626201954, |
| "grad_norm": 0.5450883507728577, |
| "learning_rate": 2.388008097634897e-06, |
| "loss": 0.17508809566497802, |
| "memory(GiB)": 72.48, |
| "step": 5350, |
| "token_acc": 0.9436914154696878, |
| "train_speed(iter/s)": 0.080771 |
| }, |
| { |
| "epoch": 2.074793518540945, |
| "grad_norm": 0.5589373707771301, |
| "learning_rate": 2.3789060783715625e-06, |
| "loss": 0.185202956199646, |
| "memory(GiB)": 72.48, |
| "step": 5355, |
| "token_acc": 0.9355623297444545, |
| "train_speed(iter/s)": 0.080772 |
| }, |
| { |
| "epoch": 2.076731174461695, |
| "grad_norm": 0.5466156005859375, |
| "learning_rate": 2.3698160208119233e-06, |
| "loss": 0.17637484073638915, |
| "memory(GiB)": 72.48, |
| "step": 5360, |
| "token_acc": 0.9371462897973379, |
| "train_speed(iter/s)": 0.080774 |
| }, |
| { |
| "epoch": 2.078668830382445, |
| "grad_norm": 0.572566568851471, |
| "learning_rate": 2.3607379664396414e-06, |
| "loss": 0.1739407777786255, |
| "memory(GiB)": 72.48, |
| "step": 5365, |
| "token_acc": 0.9440337552742616, |
| "train_speed(iter/s)": 0.080779 |
| }, |
| { |
| "epoch": 2.0806064863031946, |
| "grad_norm": 0.5492961406707764, |
| "learning_rate": 2.3516719566836032e-06, |
| "loss": 0.1829778552055359, |
| "memory(GiB)": 72.48, |
| "step": 5370, |
| "token_acc": 0.9381134293270643, |
| "train_speed(iter/s)": 0.080778 |
| }, |
| { |
| "epoch": 2.0825441422239446, |
| "grad_norm": 0.5619326233863831, |
| "learning_rate": 2.3426180329177217e-06, |
| "loss": 0.18022637367248534, |
| "memory(GiB)": 72.48, |
| "step": 5375, |
| "token_acc": 0.9355506145389414, |
| "train_speed(iter/s)": 0.08078 |
| }, |
| { |
| "epoch": 2.0844817981446946, |
| "grad_norm": 0.5690088868141174, |
| "learning_rate": 2.33357623646076e-06, |
| "loss": 0.17759082317352295, |
| "memory(GiB)": 72.48, |
| "step": 5380, |
| "token_acc": 0.9401349614395886, |
| "train_speed(iter/s)": 0.080782 |
| }, |
| { |
| "epoch": 2.086419454065444, |
| "grad_norm": 0.5527958273887634, |
| "learning_rate": 2.324546608576134e-06, |
| "loss": 0.18613047599792482, |
| "memory(GiB)": 72.48, |
| "step": 5385, |
| "token_acc": 0.9303143995579211, |
| "train_speed(iter/s)": 0.080788 |
| }, |
| { |
| "epoch": 2.088357109986194, |
| "grad_norm": 0.546678900718689, |
| "learning_rate": 2.3155291904717286e-06, |
| "loss": 0.17882769107818602, |
| "memory(GiB)": 72.48, |
| "step": 5390, |
| "token_acc": 0.9307346020053202, |
| "train_speed(iter/s)": 0.080798 |
| }, |
| { |
| "epoch": 2.090294765906944, |
| "grad_norm": 0.5436745882034302, |
| "learning_rate": 2.3065240232997055e-06, |
| "loss": 0.18277444839477539, |
| "memory(GiB)": 72.48, |
| "step": 5395, |
| "token_acc": 0.9328300921187308, |
| "train_speed(iter/s)": 0.080803 |
| }, |
| { |
| "epoch": 2.0922324218276938, |
| "grad_norm": 0.5892859697341919, |
| "learning_rate": 2.2975311481563186e-06, |
| "loss": 0.17380056381225586, |
| "memory(GiB)": 72.48, |
| "step": 5400, |
| "token_acc": 0.9400539580180298, |
| "train_speed(iter/s)": 0.08081 |
| }, |
| { |
| "epoch": 2.0941700777484438, |
| "grad_norm": 0.5891724228858948, |
| "learning_rate": 2.2885506060817274e-06, |
| "loss": 0.18320174217224122, |
| "memory(GiB)": 72.48, |
| "step": 5405, |
| "token_acc": 0.9279184917158636, |
| "train_speed(iter/s)": 0.080811 |
| }, |
| { |
| "epoch": 2.096107733669194, |
| "grad_norm": 0.5693268179893494, |
| "learning_rate": 2.2795824380598033e-06, |
| "loss": 0.18516907691955567, |
| "memory(GiB)": 72.48, |
| "step": 5410, |
| "token_acc": 0.9385831960461285, |
| "train_speed(iter/s)": 0.080815 |
| }, |
| { |
| "epoch": 2.0980453895899434, |
| "grad_norm": 0.5973000526428223, |
| "learning_rate": 2.2706266850179504e-06, |
| "loss": 0.18582874536514282, |
| "memory(GiB)": 72.48, |
| "step": 5415, |
| "token_acc": 0.9332009691748553, |
| "train_speed(iter/s)": 0.080808 |
| }, |
| { |
| "epoch": 2.0999830455106934, |
| "grad_norm": 0.552210807800293, |
| "learning_rate": 2.261683387826915e-06, |
| "loss": 0.17878878116607666, |
| "memory(GiB)": 72.48, |
| "step": 5420, |
| "token_acc": 0.9350939878825539, |
| "train_speed(iter/s)": 0.080809 |
| }, |
| { |
| "epoch": 2.1019207014314434, |
| "grad_norm": 0.5775700807571411, |
| "learning_rate": 2.2527525873006005e-06, |
| "loss": 0.18521888256073, |
| "memory(GiB)": 72.48, |
| "step": 5425, |
| "token_acc": 0.9341401817048771, |
| "train_speed(iter/s)": 0.08081 |
| }, |
| { |
| "epoch": 2.1038583573521934, |
| "grad_norm": 0.5570356845855713, |
| "learning_rate": 2.2438343241958793e-06, |
| "loss": 0.18898725509643555, |
| "memory(GiB)": 72.48, |
| "step": 5430, |
| "token_acc": 0.9432755241652183, |
| "train_speed(iter/s)": 0.08081 |
| }, |
| { |
| "epoch": 2.105796013272943, |
| "grad_norm": 0.5660508871078491, |
| "learning_rate": 2.2349286392124047e-06, |
| "loss": 0.18533942699432374, |
| "memory(GiB)": 72.48, |
| "step": 5435, |
| "token_acc": 0.9363217294547767, |
| "train_speed(iter/s)": 0.080809 |
| }, |
| { |
| "epoch": 2.107733669193693, |
| "grad_norm": 0.5466207265853882, |
| "learning_rate": 2.2260355729924323e-06, |
| "loss": 0.18058595657348633, |
| "memory(GiB)": 72.48, |
| "step": 5440, |
| "token_acc": 0.9440250535684852, |
| "train_speed(iter/s)": 0.080815 |
| }, |
| { |
| "epoch": 2.109671325114443, |
| "grad_norm": 0.555391788482666, |
| "learning_rate": 2.217155166120629e-06, |
| "loss": 0.17833871841430665, |
| "memory(GiB)": 72.48, |
| "step": 5445, |
| "token_acc": 0.9375606285972967, |
| "train_speed(iter/s)": 0.080813 |
| }, |
| { |
| "epoch": 2.1116089810351926, |
| "grad_norm": 0.571415901184082, |
| "learning_rate": 2.2082874591238875e-06, |
| "loss": 0.19260311126708984, |
| "memory(GiB)": 72.48, |
| "step": 5450, |
| "token_acc": 0.9347647782529732, |
| "train_speed(iter/s)": 0.08081 |
| }, |
| { |
| "epoch": 2.1135466369559426, |
| "grad_norm": 0.5604360103607178, |
| "learning_rate": 2.1994324924711458e-06, |
| "loss": 0.1859738349914551, |
| "memory(GiB)": 72.48, |
| "step": 5455, |
| "token_acc": 0.9434542469025228, |
| "train_speed(iter/s)": 0.080806 |
| }, |
| { |
| "epoch": 2.1154842928766926, |
| "grad_norm": 0.5471526384353638, |
| "learning_rate": 2.190590306573198e-06, |
| "loss": 0.1887765884399414, |
| "memory(GiB)": 72.48, |
| "step": 5460, |
| "token_acc": 0.9441244438312696, |
| "train_speed(iter/s)": 0.080807 |
| }, |
| { |
| "epoch": 2.117421948797442, |
| "grad_norm": 0.5706793069839478, |
| "learning_rate": 2.1817609417825124e-06, |
| "loss": 0.18401453495025635, |
| "memory(GiB)": 72.48, |
| "step": 5465, |
| "token_acc": 0.9318755256518082, |
| "train_speed(iter/s)": 0.080808 |
| }, |
| { |
| "epoch": 2.119359604718192, |
| "grad_norm": 0.5502917766571045, |
| "learning_rate": 2.172944438393044e-06, |
| "loss": 0.18263095617294312, |
| "memory(GiB)": 72.48, |
| "step": 5470, |
| "token_acc": 0.9409442551972239, |
| "train_speed(iter/s)": 0.080813 |
| }, |
| { |
| "epoch": 2.121297260638942, |
| "grad_norm": 0.5597527027130127, |
| "learning_rate": 2.164140836640056e-06, |
| "loss": 0.18310918807983398, |
| "memory(GiB)": 72.48, |
| "step": 5475, |
| "token_acc": 0.9404852285855752, |
| "train_speed(iter/s)": 0.080816 |
| }, |
| { |
| "epoch": 2.1232349165596918, |
| "grad_norm": 0.566792368888855, |
| "learning_rate": 2.155350176699932e-06, |
| "loss": 0.1773963212966919, |
| "memory(GiB)": 72.48, |
| "step": 5480, |
| "token_acc": 0.9416049569291219, |
| "train_speed(iter/s)": 0.080815 |
| }, |
| { |
| "epoch": 2.125172572480442, |
| "grad_norm": 0.5454405546188354, |
| "learning_rate": 2.146572498689994e-06, |
| "loss": 0.1833777904510498, |
| "memory(GiB)": 72.48, |
| "step": 5485, |
| "token_acc": 0.9389079172704519, |
| "train_speed(iter/s)": 0.080814 |
| }, |
| { |
| "epoch": 2.127110228401192, |
| "grad_norm": 0.5806962847709656, |
| "learning_rate": 2.137807842668323e-06, |
| "loss": 0.1891713857650757, |
| "memory(GiB)": 72.48, |
| "step": 5490, |
| "token_acc": 0.933165319281249, |
| "train_speed(iter/s)": 0.080814 |
| }, |
| { |
| "epoch": 2.1290478843219414, |
| "grad_norm": 0.5590419769287109, |
| "learning_rate": 2.1290562486335635e-06, |
| "loss": 0.1789409875869751, |
| "memory(GiB)": 72.48, |
| "step": 5495, |
| "token_acc": 0.9396403965720047, |
| "train_speed(iter/s)": 0.080817 |
| }, |
| { |
| "epoch": 2.1309855402426914, |
| "grad_norm": 0.5818547606468201, |
| "learning_rate": 2.120317756524758e-06, |
| "loss": 0.18473920822143555, |
| "memory(GiB)": 72.48, |
| "step": 5500, |
| "token_acc": 0.9352483427802863, |
| "train_speed(iter/s)": 0.080813 |
| }, |
| { |
| "epoch": 2.1329231961634414, |
| "grad_norm": 0.5636138319969177, |
| "learning_rate": 2.111592406221154e-06, |
| "loss": 0.18380820751190186, |
| "memory(GiB)": 72.48, |
| "step": 5505, |
| "token_acc": 0.9377715487035739, |
| "train_speed(iter/s)": 0.080818 |
| }, |
| { |
| "epoch": 2.134860852084191, |
| "grad_norm": 0.5725641846656799, |
| "learning_rate": 2.1028802375420244e-06, |
| "loss": 0.17626445293426513, |
| "memory(GiB)": 72.48, |
| "step": 5510, |
| "token_acc": 0.9363154366506186, |
| "train_speed(iter/s)": 0.080821 |
| }, |
| { |
| "epoch": 2.136798508004941, |
| "grad_norm": 0.5447889566421509, |
| "learning_rate": 2.094181290246487e-06, |
| "loss": 0.17057907581329346, |
| "memory(GiB)": 72.48, |
| "step": 5515, |
| "token_acc": 0.9342668222481525, |
| "train_speed(iter/s)": 0.080821 |
| }, |
| { |
| "epoch": 2.138736163925691, |
| "grad_norm": 0.5622581243515015, |
| "learning_rate": 2.085495604033321e-06, |
| "loss": 0.17494313716888427, |
| "memory(GiB)": 72.48, |
| "step": 5520, |
| "token_acc": 0.9453718261800421, |
| "train_speed(iter/s)": 0.080822 |
| }, |
| { |
| "epoch": 2.1406738198464406, |
| "grad_norm": 0.583169162273407, |
| "learning_rate": 2.076823218540789e-06, |
| "loss": 0.1729996919631958, |
| "memory(GiB)": 72.48, |
| "step": 5525, |
| "token_acc": 0.9369956443934226, |
| "train_speed(iter/s)": 0.080828 |
| }, |
| { |
| "epoch": 2.1426114757671906, |
| "grad_norm": 0.5594836473464966, |
| "learning_rate": 2.068164173346449e-06, |
| "loss": 0.18059495687484742, |
| "memory(GiB)": 72.48, |
| "step": 5530, |
| "token_acc": 0.927891023765402, |
| "train_speed(iter/s)": 0.080827 |
| }, |
| { |
| "epoch": 2.1445491316879406, |
| "grad_norm": 0.5576629638671875, |
| "learning_rate": 2.0595185079669837e-06, |
| "loss": 0.18530012369155885, |
| "memory(GiB)": 72.48, |
| "step": 5535, |
| "token_acc": 0.9356248957116636, |
| "train_speed(iter/s)": 0.080828 |
| }, |
| { |
| "epoch": 2.1464867876086906, |
| "grad_norm": 0.5433487892150879, |
| "learning_rate": 2.050886261858015e-06, |
| "loss": 0.18785598278045654, |
| "memory(GiB)": 72.48, |
| "step": 5540, |
| "token_acc": 0.9393884512058921, |
| "train_speed(iter/s)": 0.080827 |
| }, |
| { |
| "epoch": 2.14842444352944, |
| "grad_norm": 0.6088826060295105, |
| "learning_rate": 2.0422674744139216e-06, |
| "loss": 0.18208863735198974, |
| "memory(GiB)": 72.48, |
| "step": 5545, |
| "token_acc": 0.940564921147756, |
| "train_speed(iter/s)": 0.080827 |
| }, |
| { |
| "epoch": 2.15036209945019, |
| "grad_norm": 0.6054071187973022, |
| "learning_rate": 2.033662184967663e-06, |
| "loss": 0.18218772411346434, |
| "memory(GiB)": 72.48, |
| "step": 5550, |
| "token_acc": 0.9345688912649618, |
| "train_speed(iter/s)": 0.080828 |
| }, |
| { |
| "epoch": 2.15229975537094, |
| "grad_norm": 0.5496972799301147, |
| "learning_rate": 2.0250704327906025e-06, |
| "loss": 0.17930705547332765, |
| "memory(GiB)": 72.48, |
| "step": 5555, |
| "token_acc": 0.9460105112279026, |
| "train_speed(iter/s)": 0.080825 |
| }, |
| { |
| "epoch": 2.1542374112916898, |
| "grad_norm": 0.576370120048523, |
| "learning_rate": 2.016492257092316e-06, |
| "loss": 0.1762238025665283, |
| "memory(GiB)": 72.48, |
| "step": 5560, |
| "token_acc": 0.9480714235893144, |
| "train_speed(iter/s)": 0.080827 |
| }, |
| { |
| "epoch": 2.15617506721244, |
| "grad_norm": 0.5276342034339905, |
| "learning_rate": 2.0079276970204278e-06, |
| "loss": 0.17920913696289062, |
| "memory(GiB)": 72.48, |
| "step": 5565, |
| "token_acc": 0.9295376792944352, |
| "train_speed(iter/s)": 0.080828 |
| }, |
| { |
| "epoch": 2.15811272313319, |
| "grad_norm": 0.5416987538337708, |
| "learning_rate": 1.9993767916604244e-06, |
| "loss": 0.17366034984588624, |
| "memory(GiB)": 72.48, |
| "step": 5570, |
| "token_acc": 0.9397382920110193, |
| "train_speed(iter/s)": 0.080829 |
| }, |
| { |
| "epoch": 2.1600503790539394, |
| "grad_norm": 0.5710574984550476, |
| "learning_rate": 1.9908395800354768e-06, |
| "loss": 0.17503013610839843, |
| "memory(GiB)": 72.48, |
| "step": 5575, |
| "token_acc": 0.947594501718213, |
| "train_speed(iter/s)": 0.080832 |
| }, |
| { |
| "epoch": 2.1619880349746894, |
| "grad_norm": 0.5497194528579712, |
| "learning_rate": 1.982316101106263e-06, |
| "loss": 0.17725342512130737, |
| "memory(GiB)": 72.48, |
| "step": 5580, |
| "token_acc": 0.9366713820442807, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 2.1639256908954394, |
| "grad_norm": 0.5640018582344055, |
| "learning_rate": 1.97380639377079e-06, |
| "loss": 0.1781161069869995, |
| "memory(GiB)": 72.48, |
| "step": 5585, |
| "token_acc": 0.9356641580134006, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 2.165863346816189, |
| "grad_norm": 0.5462841391563416, |
| "learning_rate": 1.965310496864217e-06, |
| "loss": 0.17803661823272704, |
| "memory(GiB)": 72.48, |
| "step": 5590, |
| "token_acc": 0.9411239374193762, |
| "train_speed(iter/s)": 0.080837 |
| }, |
| { |
| "epoch": 2.167801002736939, |
| "grad_norm": 0.5980976819992065, |
| "learning_rate": 1.956828449158675e-06, |
| "loss": 0.18186450004577637, |
| "memory(GiB)": 72.48, |
| "step": 5595, |
| "token_acc": 0.9349667665841261, |
| "train_speed(iter/s)": 0.08084 |
| }, |
| { |
| "epoch": 2.169738658657689, |
| "grad_norm": 0.5738852620124817, |
| "learning_rate": 1.948360289363094e-06, |
| "loss": 0.17838164567947387, |
| "memory(GiB)": 72.48, |
| "step": 5600, |
| "token_acc": 0.9373933206660011, |
| "train_speed(iter/s)": 0.080837 |
| }, |
| { |
| "epoch": 2.1716763145784386, |
| "grad_norm": 0.5519325137138367, |
| "learning_rate": 1.939906056123025e-06, |
| "loss": 0.18007891178131102, |
| "memory(GiB)": 72.48, |
| "step": 5605, |
| "token_acc": 0.9323807416305757, |
| "train_speed(iter/s)": 0.080831 |
| }, |
| { |
| "epoch": 2.1736139704991886, |
| "grad_norm": 0.5649546384811401, |
| "learning_rate": 1.931465788020464e-06, |
| "loss": 0.17921049594879152, |
| "memory(GiB)": 72.48, |
| "step": 5610, |
| "token_acc": 0.9420893516963386, |
| "train_speed(iter/s)": 0.080822 |
| }, |
| { |
| "epoch": 2.1755516264199386, |
| "grad_norm": 0.5733870267868042, |
| "learning_rate": 1.923039523573676e-06, |
| "loss": 0.17436559200286866, |
| "memory(GiB)": 72.48, |
| "step": 5615, |
| "token_acc": 0.9356518043701526, |
| "train_speed(iter/s)": 0.080822 |
| }, |
| { |
| "epoch": 2.177489282340688, |
| "grad_norm": 0.5561407208442688, |
| "learning_rate": 1.914627301237014e-06, |
| "loss": 0.1824689269065857, |
| "memory(GiB)": 72.48, |
| "step": 5620, |
| "token_acc": 0.9439788895624577, |
| "train_speed(iter/s)": 0.080824 |
| }, |
| { |
| "epoch": 2.179426938261438, |
| "grad_norm": 0.552563488483429, |
| "learning_rate": 1.9062291594007508e-06, |
| "loss": 0.17798593044281005, |
| "memory(GiB)": 72.48, |
| "step": 5625, |
| "token_acc": 0.9391477839532564, |
| "train_speed(iter/s)": 0.080828 |
| }, |
| { |
| "epoch": 2.181364594182188, |
| "grad_norm": 0.5480149388313293, |
| "learning_rate": 1.8978451363909028e-06, |
| "loss": 0.17788171768188477, |
| "memory(GiB)": 72.48, |
| "step": 5630, |
| "token_acc": 0.9355552744750822, |
| "train_speed(iter/s)": 0.08083 |
| }, |
| { |
| "epoch": 2.1833022501029378, |
| "grad_norm": 0.5488108992576599, |
| "learning_rate": 1.889475270469051e-06, |
| "loss": 0.18669912815093995, |
| "memory(GiB)": 72.48, |
| "step": 5635, |
| "token_acc": 0.9317667314989, |
| "train_speed(iter/s)": 0.080832 |
| }, |
| { |
| "epoch": 2.185239906023688, |
| "grad_norm": 0.5238755941390991, |
| "learning_rate": 1.8811195998321696e-06, |
| "loss": 0.17910051345825195, |
| "memory(GiB)": 72.48, |
| "step": 5640, |
| "token_acc": 0.9435528162567889, |
| "train_speed(iter/s)": 0.080835 |
| }, |
| { |
| "epoch": 2.187177561944438, |
| "grad_norm": 0.5725862383842468, |
| "learning_rate": 1.8727781626124502e-06, |
| "loss": 0.1775214672088623, |
| "memory(GiB)": 72.48, |
| "step": 5645, |
| "token_acc": 0.9405904786954234, |
| "train_speed(iter/s)": 0.080837 |
| }, |
| { |
| "epoch": 2.1891152178651874, |
| "grad_norm": 0.5793642401695251, |
| "learning_rate": 1.8644509968771302e-06, |
| "loss": 0.1762074589729309, |
| "memory(GiB)": 72.48, |
| "step": 5650, |
| "token_acc": 0.9406073468368653, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 2.1910528737859374, |
| "grad_norm": 0.5786473155021667, |
| "learning_rate": 1.8561381406283125e-06, |
| "loss": 0.18379080295562744, |
| "memory(GiB)": 72.48, |
| "step": 5655, |
| "token_acc": 0.939537311971808, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 2.1929905297066874, |
| "grad_norm": 0.5768445134162903, |
| "learning_rate": 1.847839631802802e-06, |
| "loss": 0.1796635150909424, |
| "memory(GiB)": 72.48, |
| "step": 5660, |
| "token_acc": 0.9358396971477058, |
| "train_speed(iter/s)": 0.080837 |
| }, |
| { |
| "epoch": 2.1949281856274374, |
| "grad_norm": 0.5747734308242798, |
| "learning_rate": 1.8395555082719242e-06, |
| "loss": 0.1772952437400818, |
| "memory(GiB)": 72.48, |
| "step": 5665, |
| "token_acc": 0.9418783422459893, |
| "train_speed(iter/s)": 0.080839 |
| }, |
| { |
| "epoch": 2.196865841548187, |
| "grad_norm": 0.5827616453170776, |
| "learning_rate": 1.831285807841357e-06, |
| "loss": 0.1837351679801941, |
| "memory(GiB)": 72.48, |
| "step": 5670, |
| "token_acc": 0.9460246982100735, |
| "train_speed(iter/s)": 0.080843 |
| }, |
| { |
| "epoch": 2.198803497468937, |
| "grad_norm": 0.5412510633468628, |
| "learning_rate": 1.823030568250958e-06, |
| "loss": 0.1785785436630249, |
| "memory(GiB)": 72.48, |
| "step": 5675, |
| "token_acc": 0.9367165906023012, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 2.200741153389687, |
| "grad_norm": 0.577472448348999, |
| "learning_rate": 1.8147898271745856e-06, |
| "loss": 0.17977702617645264, |
| "memory(GiB)": 72.48, |
| "step": 5680, |
| "token_acc": 0.9380172565592534, |
| "train_speed(iter/s)": 0.080839 |
| }, |
| { |
| "epoch": 2.2026788093104366, |
| "grad_norm": 0.599717378616333, |
| "learning_rate": 1.8065636222199363e-06, |
| "loss": 0.18185811042785643, |
| "memory(GiB)": 72.48, |
| "step": 5685, |
| "token_acc": 0.9329965688143347, |
| "train_speed(iter/s)": 0.080843 |
| }, |
| { |
| "epoch": 2.2046164652311866, |
| "grad_norm": 0.5418193936347961, |
| "learning_rate": 1.7983519909283698e-06, |
| "loss": 0.1911879301071167, |
| "memory(GiB)": 72.48, |
| "step": 5690, |
| "token_acc": 0.9374051225735602, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.2065541211519366, |
| "grad_norm": 0.5883736610412598, |
| "learning_rate": 1.7901549707747346e-06, |
| "loss": 0.17937839031219482, |
| "memory(GiB)": 72.48, |
| "step": 5695, |
| "token_acc": 0.9443733016834807, |
| "train_speed(iter/s)": 0.080839 |
| }, |
| { |
| "epoch": 2.208491777072686, |
| "grad_norm": 0.5832462310791016, |
| "learning_rate": 1.7819725991672004e-06, |
| "loss": 0.1827946901321411, |
| "memory(GiB)": 72.48, |
| "step": 5700, |
| "token_acc": 0.9389562126353207, |
| "train_speed(iter/s)": 0.080839 |
| }, |
| { |
| "epoch": 2.210429432993436, |
| "grad_norm": 0.5429852604866028, |
| "learning_rate": 1.7738049134470875e-06, |
| "loss": 0.1846563220024109, |
| "memory(GiB)": 72.48, |
| "step": 5705, |
| "token_acc": 0.9425343639606869, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 2.212367088914186, |
| "grad_norm": 0.5365347266197205, |
| "learning_rate": 1.7656519508886943e-06, |
| "loss": 0.17892229557037354, |
| "memory(GiB)": 72.48, |
| "step": 5710, |
| "token_acc": 0.9343960349619905, |
| "train_speed(iter/s)": 0.080843 |
| }, |
| { |
| "epoch": 2.214304744834936, |
| "grad_norm": 0.5458106398582458, |
| "learning_rate": 1.7575137486991255e-06, |
| "loss": 0.18492650985717773, |
| "memory(GiB)": 72.48, |
| "step": 5715, |
| "token_acc": 0.9375423428073685, |
| "train_speed(iter/s)": 0.080843 |
| }, |
| { |
| "epoch": 2.216242400755686, |
| "grad_norm": 0.5663112998008728, |
| "learning_rate": 1.7493903440181293e-06, |
| "loss": 0.1814013957977295, |
| "memory(GiB)": 72.48, |
| "step": 5720, |
| "token_acc": 0.9399794251912814, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.218180056676436, |
| "grad_norm": 0.5446045994758606, |
| "learning_rate": 1.7412817739179211e-06, |
| "loss": 0.17452262639999389, |
| "memory(GiB)": 72.48, |
| "step": 5725, |
| "token_acc": 0.936608194789894, |
| "train_speed(iter/s)": 0.080845 |
| }, |
| { |
| "epoch": 2.2201177125971854, |
| "grad_norm": 0.5571759343147278, |
| "learning_rate": 1.7331880754030172e-06, |
| "loss": 0.1752955675125122, |
| "memory(GiB)": 72.48, |
| "step": 5730, |
| "token_acc": 0.9398055752269362, |
| "train_speed(iter/s)": 0.080844 |
| }, |
| { |
| "epoch": 2.2220553685179354, |
| "grad_norm": 0.605722188949585, |
| "learning_rate": 1.725109285410066e-06, |
| "loss": 0.18015103340148925, |
| "memory(GiB)": 72.48, |
| "step": 5735, |
| "token_acc": 0.9428771929824561, |
| "train_speed(iter/s)": 0.08084 |
| }, |
| { |
| "epoch": 2.2239930244386854, |
| "grad_norm": 0.5884843468666077, |
| "learning_rate": 1.7170454408076797e-06, |
| "loss": 0.18069713115692138, |
| "memory(GiB)": 72.48, |
| "step": 5740, |
| "token_acc": 0.9442363301787592, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 2.225930680359435, |
| "grad_norm": 0.5465793609619141, |
| "learning_rate": 1.7089965783962608e-06, |
| "loss": 0.17759050130844117, |
| "memory(GiB)": 72.48, |
| "step": 5745, |
| "token_acc": 0.942534179533327, |
| "train_speed(iter/s)": 0.080837 |
| }, |
| { |
| "epoch": 2.227868336280185, |
| "grad_norm": 0.5535234212875366, |
| "learning_rate": 1.7009627349078434e-06, |
| "loss": 0.18377819061279296, |
| "memory(GiB)": 72.48, |
| "step": 5750, |
| "token_acc": 0.9349665677107899, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.229805992200935, |
| "grad_norm": 0.5973391532897949, |
| "learning_rate": 1.6929439470059195e-06, |
| "loss": 0.18841125965118408, |
| "memory(GiB)": 72.48, |
| "step": 5755, |
| "token_acc": 0.9280665540751861, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 2.231743648121685, |
| "grad_norm": 0.5607689619064331, |
| "learning_rate": 1.6849402512852724e-06, |
| "loss": 0.1824529767036438, |
| "memory(GiB)": 72.48, |
| "step": 5760, |
| "token_acc": 0.9380976346250629, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 2.2336813040424346, |
| "grad_norm": 0.5884976983070374, |
| "learning_rate": 1.6769516842718115e-06, |
| "loss": 0.1874626874923706, |
| "memory(GiB)": 72.48, |
| "step": 5765, |
| "token_acc": 0.9303225806451613, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.2356189599631846, |
| "grad_norm": 0.5460970401763916, |
| "learning_rate": 1.6689782824224037e-06, |
| "loss": 0.18645325899124146, |
| "memory(GiB)": 72.48, |
| "step": 5770, |
| "token_acc": 0.9363728328740364, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 2.2375566158839346, |
| "grad_norm": 0.577038586139679, |
| "learning_rate": 1.6610200821247051e-06, |
| "loss": 0.19113829135894775, |
| "memory(GiB)": 72.48, |
| "step": 5775, |
| "token_acc": 0.9348496530454896, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 2.239494271804684, |
| "grad_norm": 0.5635644197463989, |
| "learning_rate": 1.6530771196970014e-06, |
| "loss": 0.17582175731658936, |
| "memory(GiB)": 72.48, |
| "step": 5780, |
| "token_acc": 0.9394200897480152, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.241431927725434, |
| "grad_norm": 0.5600139498710632, |
| "learning_rate": 1.6451494313880362e-06, |
| "loss": 0.18222497701644896, |
| "memory(GiB)": 72.48, |
| "step": 5785, |
| "token_acc": 0.9339602988006309, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.243369583646184, |
| "grad_norm": 0.5830628275871277, |
| "learning_rate": 1.637237053376849e-06, |
| "loss": 0.18120429515838624, |
| "memory(GiB)": 72.48, |
| "step": 5790, |
| "token_acc": 0.9346733668341709, |
| "train_speed(iter/s)": 0.08084 |
| }, |
| { |
| "epoch": 2.245307239566934, |
| "grad_norm": 0.5865716934204102, |
| "learning_rate": 1.6293400217726074e-06, |
| "loss": 0.17290226221084595, |
| "memory(GiB)": 72.48, |
| "step": 5795, |
| "token_acc": 0.9306092690039942, |
| "train_speed(iter/s)": 0.080847 |
| }, |
| { |
| "epoch": 2.247244895487684, |
| "grad_norm": 0.558928370475769, |
| "learning_rate": 1.6214583726144462e-06, |
| "loss": 0.18007926940917968, |
| "memory(GiB)": 72.48, |
| "step": 5800, |
| "token_acc": 0.938637446403049, |
| "train_speed(iter/s)": 0.080846 |
| }, |
| { |
| "epoch": 2.249182551408434, |
| "grad_norm": 0.5890582799911499, |
| "learning_rate": 1.6135921418712959e-06, |
| "loss": 0.18051011562347413, |
| "memory(GiB)": 72.48, |
| "step": 5805, |
| "token_acc": 0.9370391190767501, |
| "train_speed(iter/s)": 0.080848 |
| }, |
| { |
| "epoch": 2.2511202073291834, |
| "grad_norm": 0.5818348526954651, |
| "learning_rate": 1.605741365441726e-06, |
| "loss": 0.17387816905975342, |
| "memory(GiB)": 72.48, |
| "step": 5810, |
| "token_acc": 0.9401293419561304, |
| "train_speed(iter/s)": 0.080842 |
| }, |
| { |
| "epoch": 2.2530578632499334, |
| "grad_norm": 0.5488393902778625, |
| "learning_rate": 1.597906079153778e-06, |
| "loss": 0.1793942928314209, |
| "memory(GiB)": 72.48, |
| "step": 5815, |
| "token_acc": 0.9430531840890407, |
| "train_speed(iter/s)": 0.080839 |
| }, |
| { |
| "epoch": 2.2549955191706834, |
| "grad_norm": 0.5455806851387024, |
| "learning_rate": 1.590086318764803e-06, |
| "loss": 0.1723968505859375, |
| "memory(GiB)": 72.48, |
| "step": 5820, |
| "token_acc": 0.9418294573643411, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 2.256933175091433, |
| "grad_norm": 0.5931349396705627, |
| "learning_rate": 1.582282119961296e-06, |
| "loss": 0.1813875675201416, |
| "memory(GiB)": 72.48, |
| "step": 5825, |
| "token_acc": 0.9381025025154988, |
| "train_speed(iter/s)": 0.080835 |
| }, |
| { |
| "epoch": 2.258870831012183, |
| "grad_norm": 0.6027455925941467, |
| "learning_rate": 1.5744935183587362e-06, |
| "loss": 0.17939608097076415, |
| "memory(GiB)": 72.48, |
| "step": 5830, |
| "token_acc": 0.9448940650085538, |
| "train_speed(iter/s)": 0.080835 |
| }, |
| { |
| "epoch": 2.260808486932933, |
| "grad_norm": 0.5705295205116272, |
| "learning_rate": 1.5667205495014203e-06, |
| "loss": 0.1888546347618103, |
| "memory(GiB)": 72.48, |
| "step": 5835, |
| "token_acc": 0.927797833935018, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 2.2627461428536826, |
| "grad_norm": 0.545686662197113, |
| "learning_rate": 1.5589632488623053e-06, |
| "loss": 0.1704582691192627, |
| "memory(GiB)": 72.48, |
| "step": 5840, |
| "token_acc": 0.9393606027269393, |
| "train_speed(iter/s)": 0.080833 |
| }, |
| { |
| "epoch": 2.2646837987744326, |
| "grad_norm": 0.5886187553405762, |
| "learning_rate": 1.5512216518428435e-06, |
| "loss": 0.1811639666557312, |
| "memory(GiB)": 72.48, |
| "step": 5845, |
| "token_acc": 0.9381139489194499, |
| "train_speed(iter/s)": 0.080833 |
| }, |
| { |
| "epoch": 2.2666214546951826, |
| "grad_norm": 0.5708662271499634, |
| "learning_rate": 1.5434957937728223e-06, |
| "loss": 0.18458144664764403, |
| "memory(GiB)": 72.48, |
| "step": 5850, |
| "token_acc": 0.9392434210526316, |
| "train_speed(iter/s)": 0.080831 |
| }, |
| { |
| "epoch": 2.268559110615932, |
| "grad_norm": 0.5548220872879028, |
| "learning_rate": 1.535785709910202e-06, |
| "loss": 0.1823875665664673, |
| "memory(GiB)": 72.48, |
| "step": 5855, |
| "token_acc": 0.9409633096783935, |
| "train_speed(iter/s)": 0.080825 |
| }, |
| { |
| "epoch": 2.270496766536682, |
| "grad_norm": 0.5868509411811829, |
| "learning_rate": 1.528091435440956e-06, |
| "loss": 0.1741629958152771, |
| "memory(GiB)": 72.48, |
| "step": 5860, |
| "token_acc": 0.937952826274297, |
| "train_speed(iter/s)": 0.080828 |
| }, |
| { |
| "epoch": 2.272434422457432, |
| "grad_norm": 0.570006251335144, |
| "learning_rate": 1.5204130054789056e-06, |
| "loss": 0.17339808940887452, |
| "memory(GiB)": 72.48, |
| "step": 5865, |
| "token_acc": 0.9392643559899685, |
| "train_speed(iter/s)": 0.080823 |
| }, |
| { |
| "epoch": 2.274372078378182, |
| "grad_norm": 0.5686207413673401, |
| "learning_rate": 1.5127504550655687e-06, |
| "loss": 0.17772071361541747, |
| "memory(GiB)": 72.48, |
| "step": 5870, |
| "token_acc": 0.9382895076050528, |
| "train_speed(iter/s)": 0.080827 |
| }, |
| { |
| "epoch": 2.276309734298932, |
| "grad_norm": 0.5607608556747437, |
| "learning_rate": 1.5051038191699919e-06, |
| "loss": 0.1857010006904602, |
| "memory(GiB)": 72.48, |
| "step": 5875, |
| "token_acc": 0.9359289971495206, |
| "train_speed(iter/s)": 0.080832 |
| }, |
| { |
| "epoch": 2.278247390219682, |
| "grad_norm": 0.5881747603416443, |
| "learning_rate": 1.497473132688595e-06, |
| "loss": 0.18277556896209718, |
| "memory(GiB)": 72.48, |
| "step": 5880, |
| "token_acc": 0.937333792197228, |
| "train_speed(iter/s)": 0.080831 |
| }, |
| { |
| "epoch": 2.2801850461404314, |
| "grad_norm": 0.5419440865516663, |
| "learning_rate": 1.4898584304450102e-06, |
| "loss": 0.16934518814086913, |
| "memory(GiB)": 72.48, |
| "step": 5885, |
| "token_acc": 0.9370211175481219, |
| "train_speed(iter/s)": 0.080831 |
| }, |
| { |
| "epoch": 2.2821227020611814, |
| "grad_norm": 0.5387718081474304, |
| "learning_rate": 1.4822597471899257e-06, |
| "loss": 0.1737461805343628, |
| "memory(GiB)": 72.48, |
| "step": 5890, |
| "token_acc": 0.9424770290964778, |
| "train_speed(iter/s)": 0.080829 |
| }, |
| { |
| "epoch": 2.2840603579819314, |
| "grad_norm": 0.5882220268249512, |
| "learning_rate": 1.4746771176009184e-06, |
| "loss": 0.18355174064636232, |
| "memory(GiB)": 72.48, |
| "step": 5895, |
| "token_acc": 0.9305537914390957, |
| "train_speed(iter/s)": 0.080822 |
| }, |
| { |
| "epoch": 2.2859980139026814, |
| "grad_norm": 0.5630345344543457, |
| "learning_rate": 1.4671105762823097e-06, |
| "loss": 0.17446522712707518, |
| "memory(GiB)": 72.48, |
| "step": 5900, |
| "token_acc": 0.9483853713980076, |
| "train_speed(iter/s)": 0.080821 |
| }, |
| { |
| "epoch": 2.287935669823431, |
| "grad_norm": 0.5718346238136292, |
| "learning_rate": 1.4595601577649954e-06, |
| "loss": 0.1837272524833679, |
| "memory(GiB)": 72.48, |
| "step": 5905, |
| "token_acc": 0.9470140331004971, |
| "train_speed(iter/s)": 0.080825 |
| }, |
| { |
| "epoch": 2.289873325744181, |
| "grad_norm": 0.5395617485046387, |
| "learning_rate": 1.4520258965062955e-06, |
| "loss": 0.19387768507003783, |
| "memory(GiB)": 72.48, |
| "step": 5910, |
| "token_acc": 0.9369055466616443, |
| "train_speed(iter/s)": 0.080821 |
| }, |
| { |
| "epoch": 2.291810981664931, |
| "grad_norm": 0.5457859039306641, |
| "learning_rate": 1.4445078268897928e-06, |
| "loss": 0.1853898286819458, |
| "memory(GiB)": 72.48, |
| "step": 5915, |
| "token_acc": 0.9348705882352941, |
| "train_speed(iter/s)": 0.080823 |
| }, |
| { |
| "epoch": 2.2937486375856806, |
| "grad_norm": 0.5322543382644653, |
| "learning_rate": 1.4370059832251771e-06, |
| "loss": 0.17845585346221923, |
| "memory(GiB)": 72.48, |
| "step": 5920, |
| "token_acc": 0.9355810524966348, |
| "train_speed(iter/s)": 0.08082 |
| }, |
| { |
| "epoch": 2.2956862935064306, |
| "grad_norm": 0.5761004090309143, |
| "learning_rate": 1.4295203997480927e-06, |
| "loss": 0.17757489681243896, |
| "memory(GiB)": 72.48, |
| "step": 5925, |
| "token_acc": 0.9361775170089258, |
| "train_speed(iter/s)": 0.080827 |
| }, |
| { |
| "epoch": 2.2976239494271806, |
| "grad_norm": 0.6075831651687622, |
| "learning_rate": 1.4220511106199707e-06, |
| "loss": 0.18235840797424316, |
| "memory(GiB)": 72.48, |
| "step": 5930, |
| "token_acc": 0.940095087163233, |
| "train_speed(iter/s)": 0.080825 |
| }, |
| { |
| "epoch": 2.29956160534793, |
| "grad_norm": 0.5502698421478271, |
| "learning_rate": 1.4145981499278877e-06, |
| "loss": 0.17435197830200194, |
| "memory(GiB)": 72.48, |
| "step": 5935, |
| "token_acc": 0.9347957784650703, |
| "train_speed(iter/s)": 0.080821 |
| }, |
| { |
| "epoch": 2.30149926126868, |
| "grad_norm": 0.5805485844612122, |
| "learning_rate": 1.4071615516844012e-06, |
| "loss": 0.1907791018486023, |
| "memory(GiB)": 72.48, |
| "step": 5940, |
| "token_acc": 0.9363292452254561, |
| "train_speed(iter/s)": 0.080821 |
| }, |
| { |
| "epoch": 2.30343691718943, |
| "grad_norm": 0.5583797097206116, |
| "learning_rate": 1.399741349827396e-06, |
| "loss": 0.1786908984184265, |
| "memory(GiB)": 72.48, |
| "step": 5945, |
| "token_acc": 0.9454643241305127, |
| "train_speed(iter/s)": 0.08082 |
| }, |
| { |
| "epoch": 2.30537457311018, |
| "grad_norm": 0.5587339997291565, |
| "learning_rate": 1.3923375782199312e-06, |
| "loss": 0.18267627954483032, |
| "memory(GiB)": 72.48, |
| "step": 5950, |
| "token_acc": 0.9357506361323156, |
| "train_speed(iter/s)": 0.080821 |
| }, |
| { |
| "epoch": 2.30731222903093, |
| "grad_norm": 0.6145024299621582, |
| "learning_rate": 1.3849502706500833e-06, |
| "loss": 0.18850283622741698, |
| "memory(GiB)": 72.48, |
| "step": 5955, |
| "token_acc": 0.9302015806766717, |
| "train_speed(iter/s)": 0.080825 |
| }, |
| { |
| "epoch": 2.30924988495168, |
| "grad_norm": 0.5499513149261475, |
| "learning_rate": 1.377579460830792e-06, |
| "loss": 0.1795741558074951, |
| "memory(GiB)": 72.48, |
| "step": 5960, |
| "token_acc": 0.9391224050070112, |
| "train_speed(iter/s)": 0.080822 |
| }, |
| { |
| "epoch": 2.3111875408724294, |
| "grad_norm": 0.5719257593154907, |
| "learning_rate": 1.3702251823997082e-06, |
| "loss": 0.1977135419845581, |
| "memory(GiB)": 72.48, |
| "step": 5965, |
| "token_acc": 0.929769291964996, |
| "train_speed(iter/s)": 0.080826 |
| }, |
| { |
| "epoch": 2.3131251967931794, |
| "grad_norm": 0.553665816783905, |
| "learning_rate": 1.3628874689190409e-06, |
| "loss": 0.1760319709777832, |
| "memory(GiB)": 72.48, |
| "step": 5970, |
| "token_acc": 0.9368856121537087, |
| "train_speed(iter/s)": 0.080828 |
| }, |
| { |
| "epoch": 2.3150628527139294, |
| "grad_norm": 0.5423727631568909, |
| "learning_rate": 1.3555663538754016e-06, |
| "loss": 0.1752350687980652, |
| "memory(GiB)": 72.48, |
| "step": 5975, |
| "token_acc": 0.9409250955490686, |
| "train_speed(iter/s)": 0.08083 |
| }, |
| { |
| "epoch": 2.3170005086346794, |
| "grad_norm": 0.5965731143951416, |
| "learning_rate": 1.3482618706796536e-06, |
| "loss": 0.18348853588104247, |
| "memory(GiB)": 72.48, |
| "step": 5980, |
| "token_acc": 0.9422314911366007, |
| "train_speed(iter/s)": 0.08083 |
| }, |
| { |
| "epoch": 2.318938164555429, |
| "grad_norm": 0.5590642690658569, |
| "learning_rate": 1.3409740526667581e-06, |
| "loss": 0.17766213417053223, |
| "memory(GiB)": 72.48, |
| "step": 5985, |
| "token_acc": 0.9440682217205919, |
| "train_speed(iter/s)": 0.080834 |
| }, |
| { |
| "epoch": 2.320875820476179, |
| "grad_norm": 0.5313416719436646, |
| "learning_rate": 1.3337029330956203e-06, |
| "loss": 0.17021008729934692, |
| "memory(GiB)": 72.48, |
| "step": 5990, |
| "token_acc": 0.9451896876964997, |
| "train_speed(iter/s)": 0.080835 |
| }, |
| { |
| "epoch": 2.322813476396929, |
| "grad_norm": 0.5804678797721863, |
| "learning_rate": 1.3264485451489435e-06, |
| "loss": 0.17493901252746583, |
| "memory(GiB)": 72.48, |
| "step": 5995, |
| "token_acc": 0.9351591363590144, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.3247511323176786, |
| "grad_norm": 0.5881349444389343, |
| "learning_rate": 1.3192109219330717e-06, |
| "loss": 0.1815449118614197, |
| "memory(GiB)": 72.48, |
| "step": 6000, |
| "token_acc": 0.931446340860101, |
| "train_speed(iter/s)": 0.080844 |
| }, |
| { |
| "epoch": 2.3266887882384286, |
| "grad_norm": 0.6008525490760803, |
| "learning_rate": 1.311990096477842e-06, |
| "loss": 0.1786556601524353, |
| "memory(GiB)": 72.48, |
| "step": 6005, |
| "token_acc": 0.9421632364334557, |
| "train_speed(iter/s)": 0.080843 |
| }, |
| { |
| "epoch": 2.3286264441591786, |
| "grad_norm": 0.5839970111846924, |
| "learning_rate": 1.3047861017364332e-06, |
| "loss": 0.18735158443450928, |
| "memory(GiB)": 72.48, |
| "step": 6010, |
| "token_acc": 0.9399279134961954, |
| "train_speed(iter/s)": 0.080844 |
| }, |
| { |
| "epoch": 2.330564100079928, |
| "grad_norm": 0.5671572685241699, |
| "learning_rate": 1.2975989705852144e-06, |
| "loss": 0.17721595764160156, |
| "memory(GiB)": 72.48, |
| "step": 6015, |
| "token_acc": 0.9393546294794876, |
| "train_speed(iter/s)": 0.080846 |
| }, |
| { |
| "epoch": 2.332501756000678, |
| "grad_norm": 0.5906935334205627, |
| "learning_rate": 1.290428735823593e-06, |
| "loss": 0.1887272357940674, |
| "memory(GiB)": 72.48, |
| "step": 6020, |
| "token_acc": 0.9377751858871556, |
| "train_speed(iter/s)": 0.080846 |
| }, |
| { |
| "epoch": 2.3344394119214282, |
| "grad_norm": 0.5749350190162659, |
| "learning_rate": 1.283275430173871e-06, |
| "loss": 0.18697314262390136, |
| "memory(GiB)": 72.48, |
| "step": 6025, |
| "token_acc": 0.944213984667972, |
| "train_speed(iter/s)": 0.080846 |
| }, |
| { |
| "epoch": 2.336377067842178, |
| "grad_norm": 0.5701440572738647, |
| "learning_rate": 1.2761390862810907e-06, |
| "loss": 0.1756414532661438, |
| "memory(GiB)": 72.48, |
| "step": 6030, |
| "token_acc": 0.9355313030976343, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.338314723762928, |
| "grad_norm": 0.5958893299102783, |
| "learning_rate": 1.2690197367128886e-06, |
| "loss": 0.18113667964935304, |
| "memory(GiB)": 72.48, |
| "step": 6035, |
| "token_acc": 0.9414217825623121, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 2.340252379683678, |
| "grad_norm": 0.5517078042030334, |
| "learning_rate": 1.2619174139593426e-06, |
| "loss": 0.17177793979644776, |
| "memory(GiB)": 72.48, |
| "step": 6040, |
| "token_acc": 0.9472622376384808, |
| "train_speed(iter/s)": 0.080835 |
| }, |
| { |
| "epoch": 2.3421900356044274, |
| "grad_norm": 0.539323627948761, |
| "learning_rate": 1.2548321504328309e-06, |
| "loss": 0.16735206842422484, |
| "memory(GiB)": 72.48, |
| "step": 6045, |
| "token_acc": 0.937607171866069, |
| "train_speed(iter/s)": 0.080835 |
| }, |
| { |
| "epoch": 2.3441276915251774, |
| "grad_norm": 0.5506569743156433, |
| "learning_rate": 1.247763978467874e-06, |
| "loss": 0.1754169464111328, |
| "memory(GiB)": 72.48, |
| "step": 6050, |
| "token_acc": 0.9432072879762852, |
| "train_speed(iter/s)": 0.080839 |
| }, |
| { |
| "epoch": 2.3460653474459274, |
| "grad_norm": 0.5305989384651184, |
| "learning_rate": 1.2407129303209964e-06, |
| "loss": 0.1766362190246582, |
| "memory(GiB)": 72.48, |
| "step": 6055, |
| "token_acc": 0.9392191659272404, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.348003003366677, |
| "grad_norm": 0.5523601770401001, |
| "learning_rate": 1.233679038170576e-06, |
| "loss": 0.18580877780914307, |
| "memory(GiB)": 72.48, |
| "step": 6060, |
| "token_acc": 0.9315313647443612, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.349940659287427, |
| "grad_norm": 0.5426312685012817, |
| "learning_rate": 1.2266623341166955e-06, |
| "loss": 0.1706899881362915, |
| "memory(GiB)": 72.48, |
| "step": 6065, |
| "token_acc": 0.9475939237279133, |
| "train_speed(iter/s)": 0.080844 |
| }, |
| { |
| "epoch": 2.351878315208177, |
| "grad_norm": 0.5808708667755127, |
| "learning_rate": 1.2196628501809994e-06, |
| "loss": 0.18040728569030762, |
| "memory(GiB)": 72.48, |
| "step": 6070, |
| "token_acc": 0.941207314675231, |
| "train_speed(iter/s)": 0.080846 |
| }, |
| { |
| "epoch": 2.3538159711289266, |
| "grad_norm": 0.5775749087333679, |
| "learning_rate": 1.2126806183065449e-06, |
| "loss": 0.18611918687820433, |
| "memory(GiB)": 72.48, |
| "step": 6075, |
| "token_acc": 0.9329947627360736, |
| "train_speed(iter/s)": 0.080846 |
| }, |
| { |
| "epoch": 2.3557536270496766, |
| "grad_norm": 0.5580968856811523, |
| "learning_rate": 1.2057156703576557e-06, |
| "loss": 0.17953190803527833, |
| "memory(GiB)": 72.48, |
| "step": 6080, |
| "token_acc": 0.934079196294605, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 2.3576912829704266, |
| "grad_norm": 0.5725741982460022, |
| "learning_rate": 1.1987680381197797e-06, |
| "loss": 0.1781769275665283, |
| "memory(GiB)": 72.48, |
| "step": 6085, |
| "token_acc": 0.9443928376949934, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 2.359628938891176, |
| "grad_norm": 0.5721781849861145, |
| "learning_rate": 1.1918377532993425e-06, |
| "loss": 0.17660874128341675, |
| "memory(GiB)": 72.48, |
| "step": 6090, |
| "token_acc": 0.9424791673421744, |
| "train_speed(iter/s)": 0.080836 |
| }, |
| { |
| "epoch": 2.361566594811926, |
| "grad_norm": 0.5958847403526306, |
| "learning_rate": 1.184924847523602e-06, |
| "loss": 0.17371095418930055, |
| "memory(GiB)": 72.48, |
| "step": 6095, |
| "token_acc": 0.9402736922880164, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 2.3635042507326762, |
| "grad_norm": 0.5600293278694153, |
| "learning_rate": 1.1780293523405044e-06, |
| "loss": 0.17254823446273804, |
| "memory(GiB)": 72.48, |
| "step": 6100, |
| "token_acc": 0.9396461656334889, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.365441906653426, |
| "grad_norm": 0.5692081451416016, |
| "learning_rate": 1.1711512992185408e-06, |
| "loss": 0.18158187866210937, |
| "memory(GiB)": 72.48, |
| "step": 6105, |
| "token_acc": 0.9332443257676902, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 2.367379562574176, |
| "grad_norm": 0.5824898481369019, |
| "learning_rate": 1.1642907195466047e-06, |
| "loss": 0.1743251323699951, |
| "memory(GiB)": 72.48, |
| "step": 6110, |
| "token_acc": 0.9418575383840926, |
| "train_speed(iter/s)": 0.080842 |
| }, |
| { |
| "epoch": 2.369317218494926, |
| "grad_norm": 0.5314090847969055, |
| "learning_rate": 1.1574476446338423e-06, |
| "loss": 0.17454535961151124, |
| "memory(GiB)": 72.48, |
| "step": 6115, |
| "token_acc": 0.9452273082330885, |
| "train_speed(iter/s)": 0.080841 |
| }, |
| { |
| "epoch": 2.3712548744156754, |
| "grad_norm": 0.5183026790618896, |
| "learning_rate": 1.1506221057095191e-06, |
| "loss": 0.1725538730621338, |
| "memory(GiB)": 72.48, |
| "step": 6120, |
| "token_acc": 0.9378591705324378, |
| "train_speed(iter/s)": 0.080834 |
| }, |
| { |
| "epoch": 2.3731925303364254, |
| "grad_norm": 0.5685054063796997, |
| "learning_rate": 1.143814133922872e-06, |
| "loss": 0.182060706615448, |
| "memory(GiB)": 72.48, |
| "step": 6125, |
| "token_acc": 0.9310846806665428, |
| "train_speed(iter/s)": 0.080835 |
| }, |
| { |
| "epoch": 2.3751301862571754, |
| "grad_norm": 0.5826152563095093, |
| "learning_rate": 1.137023760342967e-06, |
| "loss": 0.18058472871780396, |
| "memory(GiB)": 72.48, |
| "step": 6130, |
| "token_acc": 0.9364295232214241, |
| "train_speed(iter/s)": 0.080842 |
| }, |
| { |
| "epoch": 2.3770678421779254, |
| "grad_norm": 0.5600957870483398, |
| "learning_rate": 1.13025101595856e-06, |
| "loss": 0.18131771087646484, |
| "memory(GiB)": 72.48, |
| "step": 6135, |
| "token_acc": 0.9257859586251832, |
| "train_speed(iter/s)": 0.080838 |
| }, |
| { |
| "epoch": 2.379005498098675, |
| "grad_norm": 0.5904654860496521, |
| "learning_rate": 1.1234959316779509e-06, |
| "loss": 0.17485294342041016, |
| "memory(GiB)": 72.48, |
| "step": 6140, |
| "token_acc": 0.9428003462759306, |
| "train_speed(iter/s)": 0.080842 |
| }, |
| { |
| "epoch": 2.380943154019425, |
| "grad_norm": 0.6105183362960815, |
| "learning_rate": 1.116758538328847e-06, |
| "loss": 0.1759654998779297, |
| "memory(GiB)": 72.48, |
| "step": 6145, |
| "token_acc": 0.9417246357789736, |
| "train_speed(iter/s)": 0.080843 |
| }, |
| { |
| "epoch": 2.382880809940175, |
| "grad_norm": 0.5727225542068481, |
| "learning_rate": 1.1100388666582224e-06, |
| "loss": 0.17617188692092894, |
| "memory(GiB)": 72.48, |
| "step": 6150, |
| "token_acc": 0.9286671773266948, |
| "train_speed(iter/s)": 0.080848 |
| }, |
| { |
| "epoch": 2.3848184658609246, |
| "grad_norm": 0.5402427315711975, |
| "learning_rate": 1.1033369473321737e-06, |
| "loss": 0.1745760917663574, |
| "memory(GiB)": 72.48, |
| "step": 6155, |
| "token_acc": 0.9433132808884775, |
| "train_speed(iter/s)": 0.080844 |
| }, |
| { |
| "epoch": 2.3867561217816746, |
| "grad_norm": 0.5623111724853516, |
| "learning_rate": 1.0966528109357833e-06, |
| "loss": 0.17613692283630372, |
| "memory(GiB)": 72.48, |
| "step": 6160, |
| "token_acc": 0.939906103286385, |
| "train_speed(iter/s)": 0.080849 |
| }, |
| { |
| "epoch": 2.3886937777024246, |
| "grad_norm": 0.5594596266746521, |
| "learning_rate": 1.0899864879729782e-06, |
| "loss": 0.17606816291809083, |
| "memory(GiB)": 72.48, |
| "step": 6165, |
| "token_acc": 0.9393115942028986, |
| "train_speed(iter/s)": 0.080852 |
| }, |
| { |
| "epoch": 2.390631433623174, |
| "grad_norm": 0.5597702264785767, |
| "learning_rate": 1.083338008866394e-06, |
| "loss": 0.1854230523109436, |
| "memory(GiB)": 72.48, |
| "step": 6170, |
| "token_acc": 0.9366223908918406, |
| "train_speed(iter/s)": 0.080855 |
| }, |
| { |
| "epoch": 2.392569089543924, |
| "grad_norm": 0.5472501516342163, |
| "learning_rate": 1.076707403957229e-06, |
| "loss": 0.1821220636367798, |
| "memory(GiB)": 72.48, |
| "step": 6175, |
| "token_acc": 0.9418488444722048, |
| "train_speed(iter/s)": 0.080853 |
| }, |
| { |
| "epoch": 2.3945067454646742, |
| "grad_norm": 0.5826203227043152, |
| "learning_rate": 1.070094703505114e-06, |
| "loss": 0.18362678289413453, |
| "memory(GiB)": 72.48, |
| "step": 6180, |
| "token_acc": 0.9364526939932972, |
| "train_speed(iter/s)": 0.080857 |
| }, |
| { |
| "epoch": 2.396444401385424, |
| "grad_norm": 0.5729585289955139, |
| "learning_rate": 1.0634999376879684e-06, |
| "loss": 0.1807836651802063, |
| "memory(GiB)": 72.48, |
| "step": 6185, |
| "token_acc": 0.9344845892015115, |
| "train_speed(iter/s)": 0.080859 |
| }, |
| { |
| "epoch": 2.398382057306174, |
| "grad_norm": 0.5837453603744507, |
| "learning_rate": 1.0569231366018667e-06, |
| "loss": 0.18412914276123046, |
| "memory(GiB)": 72.48, |
| "step": 6190, |
| "token_acc": 0.9381114618822868, |
| "train_speed(iter/s)": 0.080861 |
| }, |
| { |
| "epoch": 2.400319713226924, |
| "grad_norm": 0.5625714063644409, |
| "learning_rate": 1.0503643302608986e-06, |
| "loss": 0.18067336082458496, |
| "memory(GiB)": 72.48, |
| "step": 6195, |
| "token_acc": 0.9334223206146633, |
| "train_speed(iter/s)": 0.080861 |
| }, |
| { |
| "epoch": 2.402257369147674, |
| "grad_norm": 0.5841348767280579, |
| "learning_rate": 1.0438235485970288e-06, |
| "loss": 0.18118083477020264, |
| "memory(GiB)": 72.48, |
| "step": 6200, |
| "token_acc": 0.933785735882017, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.4041950250684234, |
| "grad_norm": 0.5591301918029785, |
| "learning_rate": 1.0373008214599678e-06, |
| "loss": 0.17193083763122557, |
| "memory(GiB)": 72.48, |
| "step": 6205, |
| "token_acc": 0.9449988472812304, |
| "train_speed(iter/s)": 0.080869 |
| }, |
| { |
| "epoch": 2.4061326809891734, |
| "grad_norm": 0.5522191524505615, |
| "learning_rate": 1.0307961786170318e-06, |
| "loss": 0.18121827840805055, |
| "memory(GiB)": 72.48, |
| "step": 6210, |
| "token_acc": 0.934540164861807, |
| "train_speed(iter/s)": 0.080868 |
| }, |
| { |
| "epoch": 2.4080703369099234, |
| "grad_norm": 0.5809552073478699, |
| "learning_rate": 1.0243096497530058e-06, |
| "loss": 0.17905534505844117, |
| "memory(GiB)": 72.48, |
| "step": 6215, |
| "token_acc": 0.9423424701251729, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 2.410007992830673, |
| "grad_norm": 0.5794199705123901, |
| "learning_rate": 1.0178412644700093e-06, |
| "loss": 0.1741298794746399, |
| "memory(GiB)": 72.48, |
| "step": 6220, |
| "token_acc": 0.9405881573718373, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 2.411945648751423, |
| "grad_norm": 0.5728258490562439, |
| "learning_rate": 1.0113910522873615e-06, |
| "loss": 0.1761408805847168, |
| "memory(GiB)": 72.48, |
| "step": 6225, |
| "token_acc": 0.9384154460719041, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 2.413883304672173, |
| "grad_norm": 0.5635868906974792, |
| "learning_rate": 1.0049590426414479e-06, |
| "loss": 0.1836428999900818, |
| "memory(GiB)": 72.48, |
| "step": 6230, |
| "token_acc": 0.9472552491778397, |
| "train_speed(iter/s)": 0.080868 |
| }, |
| { |
| "epoch": 2.4158209605929226, |
| "grad_norm": 0.5687157511711121, |
| "learning_rate": 9.985452648855803e-07, |
| "loss": 0.17928725481033325, |
| "memory(GiB)": 72.48, |
| "step": 6235, |
| "token_acc": 0.9374187884108868, |
| "train_speed(iter/s)": 0.080868 |
| }, |
| { |
| "epoch": 2.4177586165136726, |
| "grad_norm": 0.5609813332557678, |
| "learning_rate": 9.921497482898702e-07, |
| "loss": 0.16830335855484008, |
| "memory(GiB)": 72.48, |
| "step": 6240, |
| "token_acc": 0.9397059267534192, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 2.4196962724344226, |
| "grad_norm": 0.5949937701225281, |
| "learning_rate": 9.857725220410908e-07, |
| "loss": 0.17889876365661622, |
| "memory(GiB)": 72.48, |
| "step": 6245, |
| "token_acc": 0.9311128386055134, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 2.421633928355172, |
| "grad_norm": 0.5891610980033875, |
| "learning_rate": 9.79413615242546e-07, |
| "loss": 0.1865074396133423, |
| "memory(GiB)": 72.48, |
| "step": 6250, |
| "token_acc": 0.9318126741790864, |
| "train_speed(iter/s)": 0.08086 |
| }, |
| { |
| "epoch": 2.4235715842759222, |
| "grad_norm": 0.5652107000350952, |
| "learning_rate": 9.730730569139368e-07, |
| "loss": 0.18406026363372802, |
| "memory(GiB)": 72.48, |
| "step": 6255, |
| "token_acc": 0.9368455497382199, |
| "train_speed(iter/s)": 0.080858 |
| }, |
| { |
| "epoch": 2.4255092401966722, |
| "grad_norm": 0.5708385705947876, |
| "learning_rate": 9.667508759912242e-07, |
| "loss": 0.1803309679031372, |
| "memory(GiB)": 72.48, |
| "step": 6260, |
| "token_acc": 0.9434427336574824, |
| "train_speed(iter/s)": 0.080858 |
| }, |
| { |
| "epoch": 2.427446896117422, |
| "grad_norm": 0.5924727916717529, |
| "learning_rate": 9.604471013265064e-07, |
| "loss": 0.1720449686050415, |
| "memory(GiB)": 72.48, |
| "step": 6265, |
| "token_acc": 0.9419114576082417, |
| "train_speed(iter/s)": 0.080862 |
| }, |
| { |
| "epoch": 2.429384552038172, |
| "grad_norm": 0.5573844313621521, |
| "learning_rate": 9.541617616878812e-07, |
| "loss": 0.18263744115829467, |
| "memory(GiB)": 72.48, |
| "step": 6270, |
| "token_acc": 0.9388622714083271, |
| "train_speed(iter/s)": 0.080859 |
| }, |
| { |
| "epoch": 2.431322207958922, |
| "grad_norm": 0.583974301815033, |
| "learning_rate": 9.478948857593146e-07, |
| "loss": 0.18000080585479736, |
| "memory(GiB)": 72.48, |
| "step": 6275, |
| "token_acc": 0.9437852006659346, |
| "train_speed(iter/s)": 0.080857 |
| }, |
| { |
| "epoch": 2.4332598638796714, |
| "grad_norm": 0.5686377286911011, |
| "learning_rate": 9.416465021405108e-07, |
| "loss": 0.18695411682128907, |
| "memory(GiB)": 72.48, |
| "step": 6280, |
| "token_acc": 0.9373716297721123, |
| "train_speed(iter/s)": 0.08086 |
| }, |
| { |
| "epoch": 2.4351975198004214, |
| "grad_norm": 0.5770736336708069, |
| "learning_rate": 9.354166393467845e-07, |
| "loss": 0.18186668157577515, |
| "memory(GiB)": 72.48, |
| "step": 6285, |
| "token_acc": 0.940144604777449, |
| "train_speed(iter/s)": 0.080863 |
| }, |
| { |
| "epoch": 2.4371351757211714, |
| "grad_norm": 0.5655450224876404, |
| "learning_rate": 9.292053258089251e-07, |
| "loss": 0.18221802711486818, |
| "memory(GiB)": 72.48, |
| "step": 6290, |
| "token_acc": 0.9383114885237831, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 2.439072831641921, |
| "grad_norm": 0.5566943883895874, |
| "learning_rate": 9.23012589873073e-07, |
| "loss": 0.18132885694503784, |
| "memory(GiB)": 72.48, |
| "step": 6295, |
| "token_acc": 0.9439205058717254, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 2.441010487562671, |
| "grad_norm": 0.548239529132843, |
| "learning_rate": 9.168384598005831e-07, |
| "loss": 0.17681779861450195, |
| "memory(GiB)": 72.48, |
| "step": 6300, |
| "token_acc": 0.9441413081497027, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 2.442948143483421, |
| "grad_norm": 0.5674452185630798, |
| "learning_rate": 9.106829637679043e-07, |
| "loss": 0.18225634098052979, |
| "memory(GiB)": 72.48, |
| "step": 6305, |
| "token_acc": 0.9364469845805798, |
| "train_speed(iter/s)": 0.080861 |
| }, |
| { |
| "epoch": 2.4448857994041706, |
| "grad_norm": 0.5836702585220337, |
| "learning_rate": 9.045461298664443e-07, |
| "loss": 0.1857957124710083, |
| "memory(GiB)": 72.48, |
| "step": 6310, |
| "token_acc": 0.9381540015876453, |
| "train_speed(iter/s)": 0.08086 |
| }, |
| { |
| "epoch": 2.4468234553249206, |
| "grad_norm": 0.5767743587493896, |
| "learning_rate": 8.984279861024453e-07, |
| "loss": 0.18774926662445068, |
| "memory(GiB)": 72.48, |
| "step": 6315, |
| "token_acc": 0.9405742821473159, |
| "train_speed(iter/s)": 0.080862 |
| }, |
| { |
| "epoch": 2.4487611112456706, |
| "grad_norm": 0.5304026007652283, |
| "learning_rate": 8.92328560396854e-07, |
| "loss": 0.17166202068328856, |
| "memory(GiB)": 72.48, |
| "step": 6320, |
| "token_acc": 0.938264787094899, |
| "train_speed(iter/s)": 0.080859 |
| }, |
| { |
| "epoch": 2.45069876716642, |
| "grad_norm": 0.5317398309707642, |
| "learning_rate": 8.862478805851921e-07, |
| "loss": 0.17200467586517335, |
| "memory(GiB)": 72.48, |
| "step": 6325, |
| "token_acc": 0.94351630867144, |
| "train_speed(iter/s)": 0.080859 |
| }, |
| { |
| "epoch": 2.4526364230871702, |
| "grad_norm": 0.525132954120636, |
| "learning_rate": 8.801859744174357e-07, |
| "loss": 0.17509570121765136, |
| "memory(GiB)": 72.48, |
| "step": 6330, |
| "token_acc": 0.9369052979138526, |
| "train_speed(iter/s)": 0.080858 |
| }, |
| { |
| "epoch": 2.4545740790079202, |
| "grad_norm": 0.5590113401412964, |
| "learning_rate": 8.741428695578841e-07, |
| "loss": 0.18100138902664184, |
| "memory(GiB)": 72.48, |
| "step": 6335, |
| "token_acc": 0.9329828274896727, |
| "train_speed(iter/s)": 0.080856 |
| }, |
| { |
| "epoch": 2.45651173492867, |
| "grad_norm": 0.5894602537155151, |
| "learning_rate": 8.681185935850334e-07, |
| "loss": 0.18703469038009643, |
| "memory(GiB)": 72.48, |
| "step": 6340, |
| "token_acc": 0.9299990313519099, |
| "train_speed(iter/s)": 0.080856 |
| }, |
| { |
| "epoch": 2.45844939084942, |
| "grad_norm": 0.5846702456474304, |
| "learning_rate": 8.621131739914524e-07, |
| "loss": 0.1720949411392212, |
| "memory(GiB)": 72.48, |
| "step": 6345, |
| "token_acc": 0.9361929221840499, |
| "train_speed(iter/s)": 0.08086 |
| }, |
| { |
| "epoch": 2.46038704677017, |
| "grad_norm": 0.5550934076309204, |
| "learning_rate": 8.561266381836558e-07, |
| "loss": 0.18395935297012328, |
| "memory(GiB)": 72.48, |
| "step": 6350, |
| "token_acc": 0.9337567171470444, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 2.46232470269092, |
| "grad_norm": 0.548347532749176, |
| "learning_rate": 8.501590134819809e-07, |
| "loss": 0.17986433506011962, |
| "memory(GiB)": 72.48, |
| "step": 6355, |
| "token_acc": 0.9395786970703899, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 2.4642623586116694, |
| "grad_norm": 0.5795870423316956, |
| "learning_rate": 8.442103271204588e-07, |
| "loss": 0.18020589351654054, |
| "memory(GiB)": 72.48, |
| "step": 6360, |
| "token_acc": 0.9434226980205569, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.4662000145324194, |
| "grad_norm": 0.6125761270523071, |
| "learning_rate": 8.382806062466958e-07, |
| "loss": 0.1750793695449829, |
| "memory(GiB)": 72.48, |
| "step": 6365, |
| "token_acc": 0.9413035279596804, |
| "train_speed(iter/s)": 0.080863 |
| }, |
| { |
| "epoch": 2.4681376704531695, |
| "grad_norm": 0.5819019079208374, |
| "learning_rate": 8.323698779217465e-07, |
| "loss": 0.17794256210327147, |
| "memory(GiB)": 72.48, |
| "step": 6370, |
| "token_acc": 0.9363123419158181, |
| "train_speed(iter/s)": 0.080863 |
| }, |
| { |
| "epoch": 2.470075326373919, |
| "grad_norm": 0.5621497631072998, |
| "learning_rate": 8.264781691199892e-07, |
| "loss": 0.17670561075210572, |
| "memory(GiB)": 72.48, |
| "step": 6375, |
| "token_acc": 0.937870866343779, |
| "train_speed(iter/s)": 0.080862 |
| }, |
| { |
| "epoch": 2.472012982294669, |
| "grad_norm": 0.5306990742683411, |
| "learning_rate": 8.206055067290059e-07, |
| "loss": 0.17103338241577148, |
| "memory(GiB)": 72.48, |
| "step": 6380, |
| "token_acc": 0.9375408569560825, |
| "train_speed(iter/s)": 0.08087 |
| }, |
| { |
| "epoch": 2.473950638215419, |
| "grad_norm": 0.5630704760551453, |
| "learning_rate": 8.14751917549455e-07, |
| "loss": 0.18032557964324952, |
| "memory(GiB)": 72.48, |
| "step": 6385, |
| "token_acc": 0.9378391711889492, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 2.4758882941361686, |
| "grad_norm": 0.5820339322090149, |
| "learning_rate": 8.089174282949547e-07, |
| "loss": 0.1891242265701294, |
| "memory(GiB)": 72.48, |
| "step": 6390, |
| "token_acc": 0.9376766041210274, |
| "train_speed(iter/s)": 0.080869 |
| }, |
| { |
| "epoch": 2.4778259500569186, |
| "grad_norm": 0.5377040505409241, |
| "learning_rate": 8.031020655919563e-07, |
| "loss": 0.18622909784317015, |
| "memory(GiB)": 72.48, |
| "step": 6395, |
| "token_acc": 0.9344472204871955, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.4797636059776687, |
| "grad_norm": 0.5398117303848267, |
| "learning_rate": 7.973058559796265e-07, |
| "loss": 0.1743963360786438, |
| "memory(GiB)": 72.48, |
| "step": 6400, |
| "token_acc": 0.9385559164124266, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.481701261898418, |
| "grad_norm": 0.5849392414093018, |
| "learning_rate": 7.915288259097226e-07, |
| "loss": 0.17278592586517333, |
| "memory(GiB)": 72.48, |
| "step": 6405, |
| "token_acc": 0.9324333727645954, |
| "train_speed(iter/s)": 0.080869 |
| }, |
| { |
| "epoch": 2.4836389178191682, |
| "grad_norm": 0.5697488188743591, |
| "learning_rate": 7.857710017464737e-07, |
| "loss": 0.17992591857910156, |
| "memory(GiB)": 72.48, |
| "step": 6410, |
| "token_acc": 0.9392217270361619, |
| "train_speed(iter/s)": 0.080868 |
| }, |
| { |
| "epoch": 2.4855765737399182, |
| "grad_norm": 0.5471551418304443, |
| "learning_rate": 7.800324097664629e-07, |
| "loss": 0.17690662145614625, |
| "memory(GiB)": 72.48, |
| "step": 6415, |
| "token_acc": 0.9364684527210999, |
| "train_speed(iter/s)": 0.080862 |
| }, |
| { |
| "epoch": 2.487514229660668, |
| "grad_norm": 0.5691121816635132, |
| "learning_rate": 7.743130761584999e-07, |
| "loss": 0.17598928213119508, |
| "memory(GiB)": 72.48, |
| "step": 6420, |
| "token_acc": 0.9407388727777077, |
| "train_speed(iter/s)": 0.080858 |
| }, |
| { |
| "epoch": 2.489451885581418, |
| "grad_norm": 0.5582486391067505, |
| "learning_rate": 7.686130270235098e-07, |
| "loss": 0.17861016988754272, |
| "memory(GiB)": 72.48, |
| "step": 6425, |
| "token_acc": 0.9379861845790031, |
| "train_speed(iter/s)": 0.080861 |
| }, |
| { |
| "epoch": 2.491389541502168, |
| "grad_norm": 0.5467748641967773, |
| "learning_rate": 7.629322883744095e-07, |
| "loss": 0.17899925708770753, |
| "memory(GiB)": 72.48, |
| "step": 6430, |
| "token_acc": 0.9343893494406608, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 2.493327197422918, |
| "grad_norm": 0.5240247845649719, |
| "learning_rate": 7.572708861359912e-07, |
| "loss": 0.1765903949737549, |
| "memory(GiB)": 72.48, |
| "step": 6435, |
| "token_acc": 0.9447293978675122, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.4952648533436674, |
| "grad_norm": 0.5655544996261597, |
| "learning_rate": 7.516288461448018e-07, |
| "loss": 0.17434144020080566, |
| "memory(GiB)": 72.48, |
| "step": 6440, |
| "token_acc": 0.941343003467252, |
| "train_speed(iter/s)": 0.080869 |
| }, |
| { |
| "epoch": 2.4972025092644174, |
| "grad_norm": 0.5669127106666565, |
| "learning_rate": 7.460061941490243e-07, |
| "loss": 0.17970057725906372, |
| "memory(GiB)": 72.48, |
| "step": 6445, |
| "token_acc": 0.931559674649211, |
| "train_speed(iter/s)": 0.080868 |
| }, |
| { |
| "epoch": 2.4991401651851675, |
| "grad_norm": 0.61557537317276, |
| "learning_rate": 7.404029558083653e-07, |
| "loss": 0.17963045835494995, |
| "memory(GiB)": 72.48, |
| "step": 6450, |
| "token_acc": 0.9467308217308217, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 2.501077821105917, |
| "grad_norm": 0.5478566288948059, |
| "learning_rate": 7.348191566939322e-07, |
| "loss": 0.17327165603637695, |
| "memory(GiB)": 72.48, |
| "step": 6455, |
| "token_acc": 0.9342934293429342, |
| "train_speed(iter/s)": 0.080869 |
| }, |
| { |
| "epoch": 2.503015477026667, |
| "grad_norm": 0.5557734966278076, |
| "learning_rate": 7.292548222881213e-07, |
| "loss": 0.17639074325561524, |
| "memory(GiB)": 72.48, |
| "step": 6460, |
| "token_acc": 0.9434013791220045, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 2.504953132947417, |
| "grad_norm": 0.5693845748901367, |
| "learning_rate": 7.237099779844964e-07, |
| "loss": 0.1701158881187439, |
| "memory(GiB)": 72.48, |
| "step": 6465, |
| "token_acc": 0.9314411035534931, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.5068907888681666, |
| "grad_norm": 0.6034876704216003, |
| "learning_rate": 7.181846490876781e-07, |
| "loss": 0.1744617462158203, |
| "memory(GiB)": 72.48, |
| "step": 6470, |
| "token_acc": 0.9363396040821346, |
| "train_speed(iter/s)": 0.080862 |
| }, |
| { |
| "epoch": 2.5088284447889166, |
| "grad_norm": 0.5935420393943787, |
| "learning_rate": 7.126788608132252e-07, |
| "loss": 0.17597038745880128, |
| "memory(GiB)": 72.48, |
| "step": 6475, |
| "token_acc": 0.9392177771662481, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.5107661007096667, |
| "grad_norm": 0.6055991053581238, |
| "learning_rate": 7.071926382875194e-07, |
| "loss": 0.17976925373077393, |
| "memory(GiB)": 72.48, |
| "step": 6480, |
| "token_acc": 0.9430797089070347, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.5127037566304162, |
| "grad_norm": 0.5665844082832336, |
| "learning_rate": 7.017260065476517e-07, |
| "loss": 0.17805242538452148, |
| "memory(GiB)": 72.48, |
| "step": 6485, |
| "token_acc": 0.9451027397260274, |
| "train_speed(iter/s)": 0.08087 |
| }, |
| { |
| "epoch": 2.5146414125511662, |
| "grad_norm": 0.5817480087280273, |
| "learning_rate": 6.962789905413086e-07, |
| "loss": 0.1649151086807251, |
| "memory(GiB)": 72.48, |
| "step": 6490, |
| "token_acc": 0.941207838954806, |
| "train_speed(iter/s)": 0.080868 |
| }, |
| { |
| "epoch": 2.5165790684719163, |
| "grad_norm": 0.5650894045829773, |
| "learning_rate": 6.908516151266581e-07, |
| "loss": 0.18006772994995118, |
| "memory(GiB)": 72.48, |
| "step": 6495, |
| "token_acc": 0.936968601459463, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 2.518516724392666, |
| "grad_norm": 0.5909514427185059, |
| "learning_rate": 6.854439050722356e-07, |
| "loss": 0.1848963975906372, |
| "memory(GiB)": 72.48, |
| "step": 6500, |
| "token_acc": 0.9373421717171717, |
| "train_speed(iter/s)": 0.080864 |
| }, |
| { |
| "epoch": 2.520454380313416, |
| "grad_norm": 0.5733851790428162, |
| "learning_rate": 6.800558850568295e-07, |
| "loss": 0.17741067409515382, |
| "memory(GiB)": 72.48, |
| "step": 6505, |
| "token_acc": 0.9419108778563745, |
| "train_speed(iter/s)": 0.080864 |
| }, |
| { |
| "epoch": 2.522392036234166, |
| "grad_norm": 0.5257155895233154, |
| "learning_rate": 6.746875796693714e-07, |
| "loss": 0.17399654388427735, |
| "memory(GiB)": 72.48, |
| "step": 6510, |
| "token_acc": 0.9378819399297881, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 2.5243296921549154, |
| "grad_norm": 0.5499019622802734, |
| "learning_rate": 6.693390134088229e-07, |
| "loss": 0.17450027465820311, |
| "memory(GiB)": 72.48, |
| "step": 6515, |
| "token_acc": 0.9319403386186509, |
| "train_speed(iter/s)": 0.080864 |
| }, |
| { |
| "epoch": 2.5262673480756654, |
| "grad_norm": 0.5747637748718262, |
| "learning_rate": 6.640102106840635e-07, |
| "loss": 0.18348932266235352, |
| "memory(GiB)": 72.48, |
| "step": 6520, |
| "token_acc": 0.9365109034267913, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 2.5282050039964155, |
| "grad_norm": 0.5292996168136597, |
| "learning_rate": 6.587011958137779e-07, |
| "loss": 0.1740797519683838, |
| "memory(GiB)": 72.48, |
| "step": 6525, |
| "token_acc": 0.9383220106422141, |
| "train_speed(iter/s)": 0.08086 |
| }, |
| { |
| "epoch": 2.530142659917165, |
| "grad_norm": 0.550775945186615, |
| "learning_rate": 6.534119930263488e-07, |
| "loss": 0.18158359527587892, |
| "memory(GiB)": 72.48, |
| "step": 6530, |
| "token_acc": 0.9338427133805441, |
| "train_speed(iter/s)": 0.080859 |
| }, |
| { |
| "epoch": 2.532080315837915, |
| "grad_norm": 0.5693374872207642, |
| "learning_rate": 6.481426264597412e-07, |
| "loss": 0.17761898040771484, |
| "memory(GiB)": 72.48, |
| "step": 6535, |
| "token_acc": 0.9389302357932809, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.534017971758665, |
| "grad_norm": 0.5608578324317932, |
| "learning_rate": 6.42893120161398e-07, |
| "loss": 0.17133185863494874, |
| "memory(GiB)": 72.48, |
| "step": 6540, |
| "token_acc": 0.9415438337306518, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.5359556276794146, |
| "grad_norm": 0.547332763671875, |
| "learning_rate": 6.376634980881224e-07, |
| "loss": 0.18398122787475585, |
| "memory(GiB)": 72.48, |
| "step": 6545, |
| "token_acc": 0.9410398930556533, |
| "train_speed(iter/s)": 0.080868 |
| }, |
| { |
| "epoch": 2.5378932836001646, |
| "grad_norm": 0.5538179278373718, |
| "learning_rate": 6.324537841059781e-07, |
| "loss": 0.18358099460601807, |
| "memory(GiB)": 72.48, |
| "step": 6550, |
| "token_acc": 0.940280399029388, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.5398309395209147, |
| "grad_norm": 0.6050009727478027, |
| "learning_rate": 6.272640019901732e-07, |
| "loss": 0.17833110094070434, |
| "memory(GiB)": 72.48, |
| "step": 6555, |
| "token_acc": 0.9338536585365854, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.5417685954416642, |
| "grad_norm": 0.5568081736564636, |
| "learning_rate": 6.22094175424956e-07, |
| "loss": 0.17815144062042237, |
| "memory(GiB)": 72.48, |
| "step": 6560, |
| "token_acc": 0.9374870053364751, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.5437062513624142, |
| "grad_norm": 0.5535020232200623, |
| "learning_rate": 6.16944328003502e-07, |
| "loss": 0.18283143043518066, |
| "memory(GiB)": 72.48, |
| "step": 6565, |
| "token_acc": 0.9375201599896781, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 2.5456439072831643, |
| "grad_norm": 0.5756219625473022, |
| "learning_rate": 6.118144832278117e-07, |
| "loss": 0.17421250343322753, |
| "memory(GiB)": 72.48, |
| "step": 6570, |
| "token_acc": 0.944570943075616, |
| "train_speed(iter/s)": 0.080863 |
| }, |
| { |
| "epoch": 2.547581563203914, |
| "grad_norm": 0.5519009828567505, |
| "learning_rate": 6.067046645086e-07, |
| "loss": 0.17970068454742433, |
| "memory(GiB)": 72.48, |
| "step": 6575, |
| "token_acc": 0.9271296054350666, |
| "train_speed(iter/s)": 0.080864 |
| }, |
| { |
| "epoch": 2.549519219124664, |
| "grad_norm": 0.5751408338546753, |
| "learning_rate": 6.016148951651912e-07, |
| "loss": 0.17249932289123535, |
| "memory(GiB)": 72.48, |
| "step": 6580, |
| "token_acc": 0.9375794932002739, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.551456875045414, |
| "grad_norm": 0.5762319564819336, |
| "learning_rate": 5.965451984254106e-07, |
| "loss": 0.17329201698303223, |
| "memory(GiB)": 72.48, |
| "step": 6585, |
| "token_acc": 0.9398695054945055, |
| "train_speed(iter/s)": 0.080862 |
| }, |
| { |
| "epoch": 2.5533945309661634, |
| "grad_norm": 0.5738294720649719, |
| "learning_rate": 5.914955974254804e-07, |
| "loss": 0.17728983163833617, |
| "memory(GiB)": 72.48, |
| "step": 6590, |
| "token_acc": 0.9353730148138263, |
| "train_speed(iter/s)": 0.080865 |
| }, |
| { |
| "epoch": 2.5553321868869134, |
| "grad_norm": 0.5713756084442139, |
| "learning_rate": 5.864661152099122e-07, |
| "loss": 0.18523712158203126, |
| "memory(GiB)": 72.48, |
| "step": 6595, |
| "token_acc": 0.9360657869076253, |
| "train_speed(iter/s)": 0.080866 |
| }, |
| { |
| "epoch": 2.5572698428076635, |
| "grad_norm": 0.5726301074028015, |
| "learning_rate": 5.814567747314049e-07, |
| "loss": 0.18754193782806397, |
| "memory(GiB)": 72.48, |
| "step": 6600, |
| "token_acc": 0.9317076065029073, |
| "train_speed(iter/s)": 0.080869 |
| }, |
| { |
| "epoch": 2.5592074987284135, |
| "grad_norm": 0.5507714748382568, |
| "learning_rate": 5.76467598850734e-07, |
| "loss": 0.173997163772583, |
| "memory(GiB)": 72.48, |
| "step": 6605, |
| "token_acc": 0.941759388038943, |
| "train_speed(iter/s)": 0.08087 |
| }, |
| { |
| "epoch": 2.561145154649163, |
| "grad_norm": 0.5710182189941406, |
| "learning_rate": 5.71498610336656e-07, |
| "loss": 0.17184269428253174, |
| "memory(GiB)": 72.48, |
| "step": 6610, |
| "token_acc": 0.9332494515421345, |
| "train_speed(iter/s)": 0.080873 |
| }, |
| { |
| "epoch": 2.563082810569913, |
| "grad_norm": 0.5885529518127441, |
| "learning_rate": 5.665498318657963e-07, |
| "loss": 0.18314269781112671, |
| "memory(GiB)": 72.48, |
| "step": 6615, |
| "token_acc": 0.9375413683133763, |
| "train_speed(iter/s)": 0.080869 |
| }, |
| { |
| "epoch": 2.565020466490663, |
| "grad_norm": 0.5782006978988647, |
| "learning_rate": 5.616212860225529e-07, |
| "loss": 0.1815364956855774, |
| "memory(GiB)": 72.48, |
| "step": 6620, |
| "token_acc": 0.936763468013468, |
| "train_speed(iter/s)": 0.080868 |
| }, |
| { |
| "epoch": 2.5669581224114126, |
| "grad_norm": 0.5746456384658813, |
| "learning_rate": 5.567129952989831e-07, |
| "loss": 0.18321220874786376, |
| "memory(GiB)": 72.48, |
| "step": 6625, |
| "token_acc": 0.9419481429572529, |
| "train_speed(iter/s)": 0.080867 |
| }, |
| { |
| "epoch": 2.5688957783321626, |
| "grad_norm": 0.5613101720809937, |
| "learning_rate": 5.518249820947141e-07, |
| "loss": 0.18082406520843505, |
| "memory(GiB)": 72.48, |
| "step": 6630, |
| "token_acc": 0.9322963191128908, |
| "train_speed(iter/s)": 0.080864 |
| }, |
| { |
| "epoch": 2.5708334342529127, |
| "grad_norm": 0.5367494225502014, |
| "learning_rate": 5.469572687168295e-07, |
| "loss": 0.1775603175163269, |
| "memory(GiB)": 72.48, |
| "step": 6635, |
| "token_acc": 0.9408753045677452, |
| "train_speed(iter/s)": 0.080869 |
| }, |
| { |
| "epoch": 2.5727710901736627, |
| "grad_norm": 0.5576823353767395, |
| "learning_rate": 5.421098773797751e-07, |
| "loss": 0.17280523777008056, |
| "memory(GiB)": 72.48, |
| "step": 6640, |
| "token_acc": 0.941501756338469, |
| "train_speed(iter/s)": 0.080871 |
| }, |
| { |
| "epoch": 2.5747087460944122, |
| "grad_norm": 0.5695961713790894, |
| "learning_rate": 5.372828302052524e-07, |
| "loss": 0.17974162101745605, |
| "memory(GiB)": 72.48, |
| "step": 6645, |
| "token_acc": 0.9428788338900699, |
| "train_speed(iter/s)": 0.080873 |
| }, |
| { |
| "epoch": 2.5766464020151623, |
| "grad_norm": 0.5692358613014221, |
| "learning_rate": 5.324761492221203e-07, |
| "loss": 0.17853889465332032, |
| "memory(GiB)": 72.48, |
| "step": 6650, |
| "token_acc": 0.9366494749269869, |
| "train_speed(iter/s)": 0.080877 |
| }, |
| { |
| "epoch": 2.5785840579359123, |
| "grad_norm": 0.5954664349555969, |
| "learning_rate": 5.276898563662936e-07, |
| "loss": 0.18170109987258912, |
| "memory(GiB)": 72.48, |
| "step": 6655, |
| "token_acc": 0.94287518277283, |
| "train_speed(iter/s)": 0.080882 |
| }, |
| { |
| "epoch": 2.580521713856662, |
| "grad_norm": 0.5591299533843994, |
| "learning_rate": 5.22923973480644e-07, |
| "loss": 0.1883463978767395, |
| "memory(GiB)": 72.48, |
| "step": 6660, |
| "token_acc": 0.9334826559783089, |
| "train_speed(iter/s)": 0.080882 |
| }, |
| { |
| "epoch": 2.582459369777412, |
| "grad_norm": 0.55705726146698, |
| "learning_rate": 5.181785223148999e-07, |
| "loss": 0.1746220827102661, |
| "memory(GiB)": 72.48, |
| "step": 6665, |
| "token_acc": 0.9413660832285724, |
| "train_speed(iter/s)": 0.080887 |
| }, |
| { |
| "epoch": 2.584397025698162, |
| "grad_norm": 0.5565838813781738, |
| "learning_rate": 5.134535245255439e-07, |
| "loss": 0.17614197731018066, |
| "memory(GiB)": 72.48, |
| "step": 6670, |
| "token_acc": 0.9389331402365436, |
| "train_speed(iter/s)": 0.080882 |
| }, |
| { |
| "epoch": 2.5863346816189114, |
| "grad_norm": 0.5887038707733154, |
| "learning_rate": 5.087490016757202e-07, |
| "loss": 0.18511323928833007, |
| "memory(GiB)": 72.48, |
| "step": 6675, |
| "token_acc": 0.9347578538415166, |
| "train_speed(iter/s)": 0.080886 |
| }, |
| { |
| "epoch": 2.5882723375396615, |
| "grad_norm": 0.559231698513031, |
| "learning_rate": 5.040649752351323e-07, |
| "loss": 0.1750473976135254, |
| "memory(GiB)": 72.48, |
| "step": 6680, |
| "token_acc": 0.944081865416297, |
| "train_speed(iter/s)": 0.080884 |
| }, |
| { |
| "epoch": 2.5902099934604115, |
| "grad_norm": 0.5665563344955444, |
| "learning_rate": 4.994014665799463e-07, |
| "loss": 0.17605587244033813, |
| "memory(GiB)": 72.48, |
| "step": 6685, |
| "token_acc": 0.9398994906157054, |
| "train_speed(iter/s)": 0.080886 |
| }, |
| { |
| "epoch": 2.592147649381161, |
| "grad_norm": 0.529827892780304, |
| "learning_rate": 4.947584969926894e-07, |
| "loss": 0.17327487468719482, |
| "memory(GiB)": 72.48, |
| "step": 6690, |
| "token_acc": 0.9405542557748706, |
| "train_speed(iter/s)": 0.080885 |
| }, |
| { |
| "epoch": 2.594085305301911, |
| "grad_norm": 0.5781688094139099, |
| "learning_rate": 4.901360876621597e-07, |
| "loss": 0.1757380723953247, |
| "memory(GiB)": 72.48, |
| "step": 6695, |
| "token_acc": 0.9384161130443952, |
| "train_speed(iter/s)": 0.080885 |
| }, |
| { |
| "epoch": 2.596022961222661, |
| "grad_norm": 0.5731996297836304, |
| "learning_rate": 4.855342596833241e-07, |
| "loss": 0.18313926458358765, |
| "memory(GiB)": 72.48, |
| "step": 6700, |
| "token_acc": 0.9378850621149379, |
| "train_speed(iter/s)": 0.080884 |
| }, |
| { |
| "epoch": 2.5979606171434106, |
| "grad_norm": 0.5984396934509277, |
| "learning_rate": 4.809530340572244e-07, |
| "loss": 0.17600476741790771, |
| "memory(GiB)": 72.48, |
| "step": 6705, |
| "token_acc": 0.9409620058846104, |
| "train_speed(iter/s)": 0.080885 |
| }, |
| { |
| "epoch": 2.5998982730641607, |
| "grad_norm": 0.5436455607414246, |
| "learning_rate": 4.7639243169088134e-07, |
| "loss": 0.18034532070159912, |
| "memory(GiB)": 72.48, |
| "step": 6710, |
| "token_acc": 0.9404441788666468, |
| "train_speed(iter/s)": 0.080884 |
| }, |
| { |
| "epoch": 2.6018359289849107, |
| "grad_norm": 0.584514856338501, |
| "learning_rate": 4.718524733971974e-07, |
| "loss": 0.17961220741271972, |
| "memory(GiB)": 72.48, |
| "step": 6715, |
| "token_acc": 0.9375684556407448, |
| "train_speed(iter/s)": 0.080884 |
| }, |
| { |
| "epoch": 2.6037735849056602, |
| "grad_norm": 0.5552295446395874, |
| "learning_rate": 4.6733317989486435e-07, |
| "loss": 0.181429922580719, |
| "memory(GiB)": 72.48, |
| "step": 6720, |
| "token_acc": 0.9331636472525827, |
| "train_speed(iter/s)": 0.080886 |
| }, |
| { |
| "epoch": 2.6057112408264103, |
| "grad_norm": 0.5627444386482239, |
| "learning_rate": 4.628345718082677e-07, |
| "loss": 0.1798389196395874, |
| "memory(GiB)": 72.48, |
| "step": 6725, |
| "token_acc": 0.9303203159280387, |
| "train_speed(iter/s)": 0.080889 |
| }, |
| { |
| "epoch": 2.6076488967471603, |
| "grad_norm": 0.5578245520591736, |
| "learning_rate": 4.583566696673908e-07, |
| "loss": 0.17884159088134766, |
| "memory(GiB)": 72.48, |
| "step": 6730, |
| "token_acc": 0.939159530833993, |
| "train_speed(iter/s)": 0.080891 |
| }, |
| { |
| "epoch": 2.60958655266791, |
| "grad_norm": 0.5763718485832214, |
| "learning_rate": 4.5389949390772293e-07, |
| "loss": 0.18293684720993042, |
| "memory(GiB)": 72.48, |
| "step": 6735, |
| "token_acc": 0.9430318635339556, |
| "train_speed(iter/s)": 0.080889 |
| }, |
| { |
| "epoch": 2.61152420858866, |
| "grad_norm": 0.5593513250350952, |
| "learning_rate": 4.494630648701681e-07, |
| "loss": 0.18049242496490478, |
| "memory(GiB)": 72.48, |
| "step": 6740, |
| "token_acc": 0.9235769828926905, |
| "train_speed(iter/s)": 0.080883 |
| }, |
| { |
| "epoch": 2.61346186450941, |
| "grad_norm": 0.5691155791282654, |
| "learning_rate": 4.4504740280094824e-07, |
| "loss": 0.17617813348770142, |
| "memory(GiB)": 72.48, |
| "step": 6745, |
| "token_acc": 0.937477328936521, |
| "train_speed(iter/s)": 0.080882 |
| }, |
| { |
| "epoch": 2.6153995204301594, |
| "grad_norm": 0.5468465089797974, |
| "learning_rate": 4.4065252785151113e-07, |
| "loss": 0.18133144378662108, |
| "memory(GiB)": 72.48, |
| "step": 6750, |
| "token_acc": 0.9441213328620546, |
| "train_speed(iter/s)": 0.080883 |
| }, |
| { |
| "epoch": 2.6173371763509095, |
| "grad_norm": 0.5726532936096191, |
| "learning_rate": 4.3627846007844257e-07, |
| "loss": 0.16658930778503417, |
| "memory(GiB)": 72.48, |
| "step": 6755, |
| "token_acc": 0.938378446669404, |
| "train_speed(iter/s)": 0.080886 |
| }, |
| { |
| "epoch": 2.6192748322716595, |
| "grad_norm": 0.5524346232414246, |
| "learning_rate": 4.31925219443371e-07, |
| "loss": 0.16769044399261473, |
| "memory(GiB)": 72.48, |
| "step": 6760, |
| "token_acc": 0.9425934919864012, |
| "train_speed(iter/s)": 0.080889 |
| }, |
| { |
| "epoch": 2.621212488192409, |
| "grad_norm": 0.573850691318512, |
| "learning_rate": 4.275928258128764e-07, |
| "loss": 0.18391640186309816, |
| "memory(GiB)": 72.48, |
| "step": 6765, |
| "token_acc": 0.9425073364677669, |
| "train_speed(iter/s)": 0.080892 |
| }, |
| { |
| "epoch": 2.623150144113159, |
| "grad_norm": 0.5574968457221985, |
| "learning_rate": 4.2328129895840233e-07, |
| "loss": 0.17317439317703248, |
| "memory(GiB)": 72.48, |
| "step": 6770, |
| "token_acc": 0.938301515004684, |
| "train_speed(iter/s)": 0.080889 |
| }, |
| { |
| "epoch": 2.625087800033909, |
| "grad_norm": 0.5664544701576233, |
| "learning_rate": 4.189906585561637e-07, |
| "loss": 0.1772672176361084, |
| "memory(GiB)": 72.48, |
| "step": 6775, |
| "token_acc": 0.934027002010788, |
| "train_speed(iter/s)": 0.08089 |
| }, |
| { |
| "epoch": 2.6270254559546586, |
| "grad_norm": 0.5962421894073486, |
| "learning_rate": 4.14720924187057e-07, |
| "loss": 0.17836084365844726, |
| "memory(GiB)": 72.48, |
| "step": 6780, |
| "token_acc": 0.9277760689018764, |
| "train_speed(iter/s)": 0.080886 |
| }, |
| { |
| "epoch": 2.6289631118754087, |
| "grad_norm": 0.6081134080886841, |
| "learning_rate": 4.1047211533657203e-07, |
| "loss": 0.18367011547088624, |
| "memory(GiB)": 72.48, |
| "step": 6785, |
| "token_acc": 0.9310902801026671, |
| "train_speed(iter/s)": 0.080892 |
| }, |
| { |
| "epoch": 2.6309007677961587, |
| "grad_norm": 0.5423617959022522, |
| "learning_rate": 4.062442513947007e-07, |
| "loss": 0.1798401355743408, |
| "memory(GiB)": 72.48, |
| "step": 6790, |
| "token_acc": 0.9351920360530033, |
| "train_speed(iter/s)": 0.080893 |
| }, |
| { |
| "epoch": 2.6328384237169082, |
| "grad_norm": 0.5457631945610046, |
| "learning_rate": 4.0203735165585067e-07, |
| "loss": 0.1814468502998352, |
| "memory(GiB)": 72.48, |
| "step": 6795, |
| "token_acc": 0.9389636855644272, |
| "train_speed(iter/s)": 0.080888 |
| }, |
| { |
| "epoch": 2.6347760796376583, |
| "grad_norm": 0.5330853462219238, |
| "learning_rate": 3.9785143531875845e-07, |
| "loss": 0.17347393035888672, |
| "memory(GiB)": 72.48, |
| "step": 6800, |
| "token_acc": 0.936892710837255, |
| "train_speed(iter/s)": 0.080891 |
| }, |
| { |
| "epoch": 2.6367137355584083, |
| "grad_norm": 0.5479428768157959, |
| "learning_rate": 3.9368652148639883e-07, |
| "loss": 0.17252148389816285, |
| "memory(GiB)": 72.48, |
| "step": 6805, |
| "token_acc": 0.9414675828675187, |
| "train_speed(iter/s)": 0.080897 |
| }, |
| { |
| "epoch": 2.638651391479158, |
| "grad_norm": 0.549821138381958, |
| "learning_rate": 3.8954262916589716e-07, |
| "loss": 0.17428038120269776, |
| "memory(GiB)": 72.48, |
| "step": 6810, |
| "token_acc": 0.9399053094352384, |
| "train_speed(iter/s)": 0.080896 |
| }, |
| { |
| "epoch": 2.640589047399908, |
| "grad_norm": 0.5740098357200623, |
| "learning_rate": 3.854197772684476e-07, |
| "loss": 0.17955343723297118, |
| "memory(GiB)": 72.48, |
| "step": 6815, |
| "token_acc": 0.9381714466803993, |
| "train_speed(iter/s)": 0.080897 |
| }, |
| { |
| "epoch": 2.642526703320658, |
| "grad_norm": 0.5651427507400513, |
| "learning_rate": 3.813179846092213e-07, |
| "loss": 0.18090170621871948, |
| "memory(GiB)": 72.48, |
| "step": 6820, |
| "token_acc": 0.9276593234834409, |
| "train_speed(iter/s)": 0.080898 |
| }, |
| { |
| "epoch": 2.6444643592414074, |
| "grad_norm": 0.5585702061653137, |
| "learning_rate": 3.7723726990728404e-07, |
| "loss": 0.1783726453781128, |
| "memory(GiB)": 72.48, |
| "step": 6825, |
| "token_acc": 0.9355342256468165, |
| "train_speed(iter/s)": 0.0809 |
| }, |
| { |
| "epoch": 2.6464020151621575, |
| "grad_norm": 0.5618979334831238, |
| "learning_rate": 3.7317765178550904e-07, |
| "loss": 0.17897303104400636, |
| "memory(GiB)": 72.48, |
| "step": 6830, |
| "token_acc": 0.9334919962181724, |
| "train_speed(iter/s)": 0.080902 |
| }, |
| { |
| "epoch": 2.6483396710829075, |
| "grad_norm": 0.5554330348968506, |
| "learning_rate": 3.6913914877049263e-07, |
| "loss": 0.17849816083908082, |
| "memory(GiB)": 72.48, |
| "step": 6835, |
| "token_acc": 0.9372031160934828, |
| "train_speed(iter/s)": 0.080903 |
| }, |
| { |
| "epoch": 2.6502773270036575, |
| "grad_norm": 0.5622228384017944, |
| "learning_rate": 3.6512177929246997e-07, |
| "loss": 0.1788189649581909, |
| "memory(GiB)": 72.48, |
| "step": 6840, |
| "token_acc": 0.9408510241506853, |
| "train_speed(iter/s)": 0.0809 |
| }, |
| { |
| "epoch": 2.652214982924407, |
| "grad_norm": 0.575397253036499, |
| "learning_rate": 3.6112556168522996e-07, |
| "loss": 0.17994626760482788, |
| "memory(GiB)": 72.48, |
| "step": 6845, |
| "token_acc": 0.9398202072693606, |
| "train_speed(iter/s)": 0.080898 |
| }, |
| { |
| "epoch": 2.654152638845157, |
| "grad_norm": 0.579742968082428, |
| "learning_rate": 3.5715051418603263e-07, |
| "loss": 0.17810392379760742, |
| "memory(GiB)": 72.48, |
| "step": 6850, |
| "token_acc": 0.938123884445032, |
| "train_speed(iter/s)": 0.080896 |
| }, |
| { |
| "epoch": 2.656090294765907, |
| "grad_norm": 0.623775064945221, |
| "learning_rate": 3.531966549355248e-07, |
| "loss": 0.17560627460479736, |
| "memory(GiB)": 72.48, |
| "step": 6855, |
| "token_acc": 0.9456114270941055, |
| "train_speed(iter/s)": 0.0809 |
| }, |
| { |
| "epoch": 2.658027950686657, |
| "grad_norm": 0.5556665062904358, |
| "learning_rate": 3.492640019776583e-07, |
| "loss": 0.18241939544677735, |
| "memory(GiB)": 72.48, |
| "step": 6860, |
| "token_acc": 0.9434278944250641, |
| "train_speed(iter/s)": 0.0809 |
| }, |
| { |
| "epoch": 2.6599656066074067, |
| "grad_norm": 0.6107257008552551, |
| "learning_rate": 3.4535257325960916e-07, |
| "loss": 0.18052737712860106, |
| "memory(GiB)": 72.48, |
| "step": 6865, |
| "token_acc": 0.9402803168799513, |
| "train_speed(iter/s)": 0.080902 |
| }, |
| { |
| "epoch": 2.6619032625281567, |
| "grad_norm": 0.5620692372322083, |
| "learning_rate": 3.414623866316891e-07, |
| "loss": 0.1779445767402649, |
| "memory(GiB)": 72.48, |
| "step": 6870, |
| "token_acc": 0.9431987134751343, |
| "train_speed(iter/s)": 0.080901 |
| }, |
| { |
| "epoch": 2.6638409184489067, |
| "grad_norm": 0.5509908199310303, |
| "learning_rate": 3.375934598472741e-07, |
| "loss": 0.1766197443008423, |
| "memory(GiB)": 72.48, |
| "step": 6875, |
| "token_acc": 0.9389340890392175, |
| "train_speed(iter/s)": 0.080901 |
| }, |
| { |
| "epoch": 2.6657785743696563, |
| "grad_norm": 0.5386033058166504, |
| "learning_rate": 3.337458105627145e-07, |
| "loss": 0.186127233505249, |
| "memory(GiB)": 72.48, |
| "step": 6880, |
| "token_acc": 0.9373184977322413, |
| "train_speed(iter/s)": 0.080904 |
| }, |
| { |
| "epoch": 2.6677162302904063, |
| "grad_norm": 0.5640909075737, |
| "learning_rate": 3.299194563372604e-07, |
| "loss": 0.17956843376159667, |
| "memory(GiB)": 72.48, |
| "step": 6885, |
| "token_acc": 0.9340066880502286, |
| "train_speed(iter/s)": 0.080901 |
| }, |
| { |
| "epoch": 2.6696538862111563, |
| "grad_norm": 0.591550350189209, |
| "learning_rate": 3.26114414632977e-07, |
| "loss": 0.18439195156097413, |
| "memory(GiB)": 72.48, |
| "step": 6890, |
| "token_acc": 0.9411013858497447, |
| "train_speed(iter/s)": 0.080902 |
| }, |
| { |
| "epoch": 2.671591542131906, |
| "grad_norm": 0.5314768552780151, |
| "learning_rate": 3.223307028146677e-07, |
| "loss": 0.1863186001777649, |
| "memory(GiB)": 72.48, |
| "step": 6895, |
| "token_acc": 0.9312440529175235, |
| "train_speed(iter/s)": 0.080907 |
| }, |
| { |
| "epoch": 2.673529198052656, |
| "grad_norm": 0.5642319917678833, |
| "learning_rate": 3.1856833814979617e-07, |
| "loss": 0.1659186601638794, |
| "memory(GiB)": 72.48, |
| "step": 6900, |
| "token_acc": 0.9432498411530615, |
| "train_speed(iter/s)": 0.080908 |
| }, |
| { |
| "epoch": 2.675466853973406, |
| "grad_norm": 0.5679530501365662, |
| "learning_rate": 3.1482733780840215e-07, |
| "loss": 0.18756985664367676, |
| "memory(GiB)": 72.48, |
| "step": 6905, |
| "token_acc": 0.9293795510621653, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 2.6774045098941555, |
| "grad_norm": 0.5828523635864258, |
| "learning_rate": 3.111077188630296e-07, |
| "loss": 0.173581326007843, |
| "memory(GiB)": 72.48, |
| "step": 6910, |
| "token_acc": 0.9433222128533196, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 2.6793421658149055, |
| "grad_norm": 0.5647770166397095, |
| "learning_rate": 3.074094982886433e-07, |
| "loss": 0.17866160869598388, |
| "memory(GiB)": 72.48, |
| "step": 6915, |
| "token_acc": 0.9379952000526022, |
| "train_speed(iter/s)": 0.080913 |
| }, |
| { |
| "epoch": 2.6812798217356555, |
| "grad_norm": 0.5703522562980652, |
| "learning_rate": 3.037326929625545e-07, |
| "loss": 0.185385799407959, |
| "memory(GiB)": 72.48, |
| "step": 6920, |
| "token_acc": 0.93567492240451, |
| "train_speed(iter/s)": 0.080915 |
| }, |
| { |
| "epoch": 2.683217477656405, |
| "grad_norm": 0.5823149085044861, |
| "learning_rate": 3.000773196643453e-07, |
| "loss": 0.17492568492889404, |
| "memory(GiB)": 72.48, |
| "step": 6925, |
| "token_acc": 0.9457313494666344, |
| "train_speed(iter/s)": 0.080915 |
| }, |
| { |
| "epoch": 2.685155133577155, |
| "grad_norm": 0.5752143263816833, |
| "learning_rate": 2.964433950757861e-07, |
| "loss": 0.17690329551696776, |
| "memory(GiB)": 72.48, |
| "step": 6930, |
| "token_acc": 0.9423112086304626, |
| "train_speed(iter/s)": 0.080919 |
| }, |
| { |
| "epoch": 2.687092789497905, |
| "grad_norm": 0.5538632869720459, |
| "learning_rate": 2.928309357807663e-07, |
| "loss": 0.18144471645355226, |
| "memory(GiB)": 72.48, |
| "step": 6935, |
| "token_acc": 0.9387541327048349, |
| "train_speed(iter/s)": 0.080911 |
| }, |
| { |
| "epoch": 2.6890304454186547, |
| "grad_norm": 0.6096916198730469, |
| "learning_rate": 2.8923995826521387e-07, |
| "loss": 0.17612223625183104, |
| "memory(GiB)": 72.48, |
| "step": 6940, |
| "token_acc": 0.9423413840570642, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 2.6909681013394047, |
| "grad_norm": 0.5589750409126282, |
| "learning_rate": 2.8567047891702394e-07, |
| "loss": 0.18112629652023315, |
| "memory(GiB)": 72.48, |
| "step": 6945, |
| "token_acc": 0.936176827183138, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.6929057572601547, |
| "grad_norm": 0.5414318442344666, |
| "learning_rate": 2.8212251402597977e-07, |
| "loss": 0.1684859037399292, |
| "memory(GiB)": 72.48, |
| "step": 6950, |
| "token_acc": 0.9418085690744301, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.6948434131809043, |
| "grad_norm": 0.5703111886978149, |
| "learning_rate": 2.7859607978368175e-07, |
| "loss": 0.18538738489151002, |
| "memory(GiB)": 72.48, |
| "step": 6955, |
| "token_acc": 0.9352054537494527, |
| "train_speed(iter/s)": 0.080907 |
| }, |
| { |
| "epoch": 2.6967810691016543, |
| "grad_norm": 0.5782099962234497, |
| "learning_rate": 2.750911922834726e-07, |
| "loss": 0.1821625828742981, |
| "memory(GiB)": 72.48, |
| "step": 6960, |
| "token_acc": 0.9416863672182821, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 2.6987187250224043, |
| "grad_norm": 0.5848382711410522, |
| "learning_rate": 2.7160786752036206e-07, |
| "loss": 0.1709179162979126, |
| "memory(GiB)": 72.48, |
| "step": 6965, |
| "token_acc": 0.939601422943044, |
| "train_speed(iter/s)": 0.080904 |
| }, |
| { |
| "epoch": 2.700656380943154, |
| "grad_norm": 0.5618194937705994, |
| "learning_rate": 2.6814612139095863e-07, |
| "loss": 0.17481892108917235, |
| "memory(GiB)": 72.48, |
| "step": 6970, |
| "token_acc": 0.9394167579408543, |
| "train_speed(iter/s)": 0.080908 |
| }, |
| { |
| "epoch": 2.702594036863904, |
| "grad_norm": 0.5825572609901428, |
| "learning_rate": 2.6470596969338957e-07, |
| "loss": 0.17760159969329833, |
| "memory(GiB)": 72.48, |
| "step": 6975, |
| "token_acc": 0.938193398164635, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 2.704531692784654, |
| "grad_norm": 0.5450993180274963, |
| "learning_rate": 2.612874281272371e-07, |
| "loss": 0.1793541431427002, |
| "memory(GiB)": 72.48, |
| "step": 6980, |
| "token_acc": 0.9428062216461439, |
| "train_speed(iter/s)": 0.080904 |
| }, |
| { |
| "epoch": 2.7064693487054035, |
| "grad_norm": 0.5646445155143738, |
| "learning_rate": 2.5789051229346054e-07, |
| "loss": 0.1738759994506836, |
| "memory(GiB)": 72.48, |
| "step": 6985, |
| "token_acc": 0.9388832085490946, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 2.7084070046261535, |
| "grad_norm": 0.5360503792762756, |
| "learning_rate": 2.5451523769432774e-07, |
| "loss": 0.17443559169769288, |
| "memory(GiB)": 72.48, |
| "step": 6990, |
| "token_acc": 0.9433324034035431, |
| "train_speed(iter/s)": 0.080908 |
| }, |
| { |
| "epoch": 2.7103446605469035, |
| "grad_norm": 0.5633649826049805, |
| "learning_rate": 2.5116161973334443e-07, |
| "loss": 0.17268643379211426, |
| "memory(GiB)": 72.48, |
| "step": 6995, |
| "token_acc": 0.9407816564436879, |
| "train_speed(iter/s)": 0.080908 |
| }, |
| { |
| "epoch": 2.712282316467653, |
| "grad_norm": 0.5596593022346497, |
| "learning_rate": 2.4782967371518363e-07, |
| "loss": 0.17048782110214233, |
| "memory(GiB)": 72.48, |
| "step": 7000, |
| "token_acc": 0.9404626696156375, |
| "train_speed(iter/s)": 0.080911 |
| }, |
| { |
| "epoch": 2.714219972388403, |
| "grad_norm": 0.5548799633979797, |
| "learning_rate": 2.445194148456148e-07, |
| "loss": 0.17602903842926027, |
| "memory(GiB)": 72.48, |
| "step": 7005, |
| "token_acc": 0.9416142492370928, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 2.716157628309153, |
| "grad_norm": 0.5687563419342041, |
| "learning_rate": 2.4123085823143543e-07, |
| "loss": 0.18110830783843995, |
| "memory(GiB)": 72.48, |
| "step": 7010, |
| "token_acc": 0.9394876468864946, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.7180952842299027, |
| "grad_norm": 0.5731750130653381, |
| "learning_rate": 2.3796401888040277e-07, |
| "loss": 0.1813871145248413, |
| "memory(GiB)": 72.48, |
| "step": 7015, |
| "token_acc": 0.9472184931731116, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.7200329401506527, |
| "grad_norm": 0.5686662197113037, |
| "learning_rate": 2.3471891170116333e-07, |
| "loss": 0.18984251022338866, |
| "memory(GiB)": 72.48, |
| "step": 7020, |
| "token_acc": 0.935462094106528, |
| "train_speed(iter/s)": 0.080914 |
| }, |
| { |
| "epoch": 2.7219705960714027, |
| "grad_norm": 1.3723950386047363, |
| "learning_rate": 2.3149555150318748e-07, |
| "loss": 0.18166351318359375, |
| "memory(GiB)": 72.48, |
| "step": 7025, |
| "token_acc": 0.9368168127517006, |
| "train_speed(iter/s)": 0.080913 |
| }, |
| { |
| "epoch": 2.7239082519921523, |
| "grad_norm": 0.5821495652198792, |
| "learning_rate": 2.2829395299669878e-07, |
| "loss": 0.1856691598892212, |
| "memory(GiB)": 72.48, |
| "step": 7030, |
| "token_acc": 0.9391278994427886, |
| "train_speed(iter/s)": 0.080914 |
| }, |
| { |
| "epoch": 2.7258459079129023, |
| "grad_norm": 0.5817452073097229, |
| "learning_rate": 2.2511413079261024e-07, |
| "loss": 0.1843825101852417, |
| "memory(GiB)": 72.48, |
| "step": 7035, |
| "token_acc": 0.94321184930244, |
| "train_speed(iter/s)": 0.080913 |
| }, |
| { |
| "epoch": 2.7277835638336523, |
| "grad_norm": 0.5707274079322815, |
| "learning_rate": 2.2195609940245388e-07, |
| "loss": 0.18705679178237916, |
| "memory(GiB)": 72.48, |
| "step": 7040, |
| "token_acc": 0.9346490475373262, |
| "train_speed(iter/s)": 0.080914 |
| }, |
| { |
| "epoch": 2.729721219754402, |
| "grad_norm": 0.5833871960639954, |
| "learning_rate": 2.1881987323831734e-07, |
| "loss": 0.1628001570701599, |
| "memory(GiB)": 72.48, |
| "step": 7045, |
| "token_acc": 0.9400068138879425, |
| "train_speed(iter/s)": 0.080921 |
| }, |
| { |
| "epoch": 2.731658875675152, |
| "grad_norm": 0.5860168933868408, |
| "learning_rate": 2.1570546661277893e-07, |
| "loss": 0.18646140098571778, |
| "memory(GiB)": 72.48, |
| "step": 7050, |
| "token_acc": 0.9412682085144507, |
| "train_speed(iter/s)": 0.080915 |
| }, |
| { |
| "epoch": 2.733596531595902, |
| "grad_norm": 0.5721165537834167, |
| "learning_rate": 2.126128937388372e-07, |
| "loss": 0.18164710998535155, |
| "memory(GiB)": 72.48, |
| "step": 7055, |
| "token_acc": 0.9363445717051138, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 2.735534187516652, |
| "grad_norm": 0.5800652503967285, |
| "learning_rate": 2.0954216872985267e-07, |
| "loss": 0.1815112590789795, |
| "memory(GiB)": 72.48, |
| "step": 7060, |
| "token_acc": 0.9349871443505778, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 2.7374718434374015, |
| "grad_norm": 0.5445681214332581, |
| "learning_rate": 2.0649330559947888e-07, |
| "loss": 0.18451664447784424, |
| "memory(GiB)": 72.48, |
| "step": 7065, |
| "token_acc": 0.9358861279235986, |
| "train_speed(iter/s)": 0.080908 |
| }, |
| { |
| "epoch": 2.7394094993581515, |
| "grad_norm": 0.5537438988685608, |
| "learning_rate": 2.034663182615998e-07, |
| "loss": 0.17921509742736816, |
| "memory(GiB)": 72.48, |
| "step": 7070, |
| "token_acc": 0.9403338276591046, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 2.7413471552789015, |
| "grad_norm": 0.5611521601676941, |
| "learning_rate": 2.0046122053026697e-07, |
| "loss": 0.18472495079040527, |
| "memory(GiB)": 72.48, |
| "step": 7075, |
| "token_acc": 0.9353880089458043, |
| "train_speed(iter/s)": 0.080908 |
| }, |
| { |
| "epoch": 2.743284811199651, |
| "grad_norm": 0.6002687215805054, |
| "learning_rate": 1.9747802611963573e-07, |
| "loss": 0.17965627908706666, |
| "memory(GiB)": 72.48, |
| "step": 7080, |
| "token_acc": 0.9414823121622999, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.745222467120401, |
| "grad_norm": 0.7260130047798157, |
| "learning_rate": 1.9451674864390146e-07, |
| "loss": 0.18268961906433107, |
| "memory(GiB)": 72.48, |
| "step": 7085, |
| "token_acc": 0.9378769786595634, |
| "train_speed(iter/s)": 0.080907 |
| }, |
| { |
| "epoch": 2.747160123041151, |
| "grad_norm": 0.5571488738059998, |
| "learning_rate": 1.9157740161724114e-07, |
| "loss": 0.1752035140991211, |
| "memory(GiB)": 72.48, |
| "step": 7090, |
| "token_acc": 0.9346371590324241, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 2.749097778961901, |
| "grad_norm": 0.54665207862854, |
| "learning_rate": 1.8865999845374794e-07, |
| "loss": 0.1749187707901001, |
| "memory(GiB)": 72.48, |
| "step": 7095, |
| "token_acc": 0.9437026673560631, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 2.7510354348826507, |
| "grad_norm": 0.5501448512077332, |
| "learning_rate": 1.857645524673707e-07, |
| "loss": 0.16233822107315063, |
| "memory(GiB)": 72.48, |
| "step": 7100, |
| "token_acc": 0.9529371052859243, |
| "train_speed(iter/s)": 0.080907 |
| }, |
| { |
| "epoch": 2.7529730908034007, |
| "grad_norm": 0.5393111109733582, |
| "learning_rate": 1.8289107687185448e-07, |
| "loss": 0.18919681310653685, |
| "memory(GiB)": 72.48, |
| "step": 7105, |
| "token_acc": 0.9318746623446785, |
| "train_speed(iter/s)": 0.080907 |
| }, |
| { |
| "epoch": 2.7549107467241507, |
| "grad_norm": 0.5541280508041382, |
| "learning_rate": 1.800395847806802e-07, |
| "loss": 0.1748568296432495, |
| "memory(GiB)": 72.48, |
| "step": 7110, |
| "token_acc": 0.9377408026185686, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 2.7568484026449003, |
| "grad_norm": 0.5741930603981018, |
| "learning_rate": 1.7721008920700277e-07, |
| "loss": 0.17860870361328124, |
| "memory(GiB)": 72.48, |
| "step": 7115, |
| "token_acc": 0.9323363982351555, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.7587860585656503, |
| "grad_norm": 0.5515788197517395, |
| "learning_rate": 1.744026030635948e-07, |
| "loss": 0.1757514238357544, |
| "memory(GiB)": 72.48, |
| "step": 7120, |
| "token_acc": 0.9459343180840384, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.7607237144864003, |
| "grad_norm": 0.5688087940216064, |
| "learning_rate": 1.7161713916278467e-07, |
| "loss": 0.17468688488006592, |
| "memory(GiB)": 72.48, |
| "step": 7125, |
| "token_acc": 0.9401110690395053, |
| "train_speed(iter/s)": 0.080908 |
| }, |
| { |
| "epoch": 2.76266137040715, |
| "grad_norm": 0.532461404800415, |
| "learning_rate": 1.6885371021640007e-07, |
| "loss": 0.17514437437057495, |
| "memory(GiB)": 72.48, |
| "step": 7130, |
| "token_acc": 0.9459091512817107, |
| "train_speed(iter/s)": 0.080906 |
| }, |
| { |
| "epoch": 2.7645990263279, |
| "grad_norm": 0.5280036330223083, |
| "learning_rate": 1.661123288357097e-07, |
| "loss": 0.17472249269485474, |
| "memory(GiB)": 72.48, |
| "step": 7135, |
| "token_acc": 0.9471332836932241, |
| "train_speed(iter/s)": 0.080908 |
| }, |
| { |
| "epoch": 2.76653668224865, |
| "grad_norm": 0.535383939743042, |
| "learning_rate": 1.633930075313639e-07, |
| "loss": 0.16930484771728516, |
| "memory(GiB)": 72.48, |
| "step": 7140, |
| "token_acc": 0.9404903371067609, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.7684743381693995, |
| "grad_norm": 0.5797159671783447, |
| "learning_rate": 1.606957587133401e-07, |
| "loss": 0.18371963500976562, |
| "memory(GiB)": 72.48, |
| "step": 7145, |
| "token_acc": 0.9424690928086366, |
| "train_speed(iter/s)": 0.080907 |
| }, |
| { |
| "epoch": 2.7704119940901495, |
| "grad_norm": 0.5747814178466797, |
| "learning_rate": 1.5802059469088472e-07, |
| "loss": 0.1853799343109131, |
| "memory(GiB)": 72.48, |
| "step": 7150, |
| "token_acc": 0.9366994386378439, |
| "train_speed(iter/s)": 0.080908 |
| }, |
| { |
| "epoch": 2.7723496500108995, |
| "grad_norm": 0.5973690152168274, |
| "learning_rate": 1.553675276724581e-07, |
| "loss": 0.18086956739425658, |
| "memory(GiB)": 72.48, |
| "step": 7155, |
| "token_acc": 0.9434806576088719, |
| "train_speed(iter/s)": 0.080907 |
| }, |
| { |
| "epoch": 2.774287305931649, |
| "grad_norm": 0.6191303730010986, |
| "learning_rate": 1.527365697656763e-07, |
| "loss": 0.17998770475387574, |
| "memory(GiB)": 72.48, |
| "step": 7160, |
| "token_acc": 0.9484810014466653, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 2.776224961852399, |
| "grad_norm": 0.5710081458091736, |
| "learning_rate": 1.5012773297725935e-07, |
| "loss": 0.1800989866256714, |
| "memory(GiB)": 72.48, |
| "step": 7165, |
| "token_acc": 0.9347461042450296, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 2.778162617773149, |
| "grad_norm": 0.5566663146018982, |
| "learning_rate": 1.4754102921297363e-07, |
| "loss": 0.18220709562301635, |
| "memory(GiB)": 72.48, |
| "step": 7170, |
| "token_acc": 0.9431165210295468, |
| "train_speed(iter/s)": 0.080907 |
| }, |
| { |
| "epoch": 2.7801002736938987, |
| "grad_norm": 0.6151481866836548, |
| "learning_rate": 1.449764702775791e-07, |
| "loss": 0.17154158353805543, |
| "memory(GiB)": 72.48, |
| "step": 7175, |
| "token_acc": 0.9446883230904302, |
| "train_speed(iter/s)": 0.080903 |
| }, |
| { |
| "epoch": 2.7820379296146487, |
| "grad_norm": 0.5727818012237549, |
| "learning_rate": 1.4243406787477377e-07, |
| "loss": 0.17165830135345458, |
| "memory(GiB)": 72.48, |
| "step": 7180, |
| "token_acc": 0.9406975253237595, |
| "train_speed(iter/s)": 0.080903 |
| }, |
| { |
| "epoch": 2.7839755855353987, |
| "grad_norm": 0.5685797929763794, |
| "learning_rate": 1.3991383360714318e-07, |
| "loss": 0.17021799087524414, |
| "memory(GiB)": 72.48, |
| "step": 7185, |
| "token_acc": 0.9400856973995272, |
| "train_speed(iter/s)": 0.080904 |
| }, |
| { |
| "epoch": 2.7859132414561483, |
| "grad_norm": 0.5543227195739746, |
| "learning_rate": 1.3741577897610492e-07, |
| "loss": 0.17734270095825194, |
| "memory(GiB)": 72.48, |
| "step": 7190, |
| "token_acc": 0.9402452126651077, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 2.7878508973768983, |
| "grad_norm": 0.5909552574157715, |
| "learning_rate": 1.3493991538185648e-07, |
| "loss": 0.180253267288208, |
| "memory(GiB)": 72.48, |
| "step": 7195, |
| "token_acc": 0.9438191560191497, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 2.7897885532976483, |
| "grad_norm": 0.587429940700531, |
| "learning_rate": 1.3248625412332406e-07, |
| "loss": 0.1743743896484375, |
| "memory(GiB)": 72.48, |
| "step": 7200, |
| "token_acc": 0.9424674429571589, |
| "train_speed(iter/s)": 0.080905 |
| }, |
| { |
| "epoch": 2.791726209218398, |
| "grad_norm": 0.5480507612228394, |
| "learning_rate": 1.3005480639811053e-07, |
| "loss": 0.1738879919052124, |
| "memory(GiB)": 72.48, |
| "step": 7205, |
| "token_acc": 0.9413131514306579, |
| "train_speed(iter/s)": 0.080907 |
| }, |
| { |
| "epoch": 2.793663865139148, |
| "grad_norm": 0.5716990232467651, |
| "learning_rate": 1.2764558330244537e-07, |
| "loss": 0.1896296739578247, |
| "memory(GiB)": 72.48, |
| "step": 7210, |
| "token_acc": 0.9348259519529335, |
| "train_speed(iter/s)": 0.080907 |
| }, |
| { |
| "epoch": 2.795601521059898, |
| "grad_norm": 0.5980350971221924, |
| "learning_rate": 1.2525859583113087e-07, |
| "loss": 0.18182060718536378, |
| "memory(GiB)": 72.48, |
| "step": 7215, |
| "token_acc": 0.944327968261136, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 2.7975391769806475, |
| "grad_norm": 0.538253128528595, |
| "learning_rate": 1.2289385487749605e-07, |
| "loss": 0.17818151712417601, |
| "memory(GiB)": 72.48, |
| "step": 7220, |
| "token_acc": 0.9330373658897522, |
| "train_speed(iter/s)": 0.080906 |
| }, |
| { |
| "epoch": 2.7994768329013975, |
| "grad_norm": 0.5648424029350281, |
| "learning_rate": 1.2055137123334448e-07, |
| "loss": 0.17778961658477782, |
| "memory(GiB)": 72.48, |
| "step": 7225, |
| "token_acc": 0.9421487603305785, |
| "train_speed(iter/s)": 0.080908 |
| }, |
| { |
| "epoch": 2.8014144888221475, |
| "grad_norm": 0.5715829730033875, |
| "learning_rate": 1.1823115558890542e-07, |
| "loss": 0.17621153593063354, |
| "memory(GiB)": 72.48, |
| "step": 7230, |
| "token_acc": 0.9278312608637518, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 2.803352144742897, |
| "grad_norm": 0.5605965852737427, |
| "learning_rate": 1.1593321853278493e-07, |
| "loss": 0.16748225688934326, |
| "memory(GiB)": 72.48, |
| "step": 7235, |
| "token_acc": 0.9397157162539064, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.805289800663647, |
| "grad_norm": 0.5902793407440186, |
| "learning_rate": 1.1365757055191883e-07, |
| "loss": 0.17629606723785402, |
| "memory(GiB)": 72.48, |
| "step": 7240, |
| "token_acc": 0.9356316976875353, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 2.807227456584397, |
| "grad_norm": 0.5694670677185059, |
| "learning_rate": 1.1140422203152256e-07, |
| "loss": 0.1794296383857727, |
| "memory(GiB)": 72.48, |
| "step": 7245, |
| "token_acc": 0.9338077379235182, |
| "train_speed(iter/s)": 0.080906 |
| }, |
| { |
| "epoch": 2.8091651125051467, |
| "grad_norm": 0.5887570977210999, |
| "learning_rate": 1.0917318325504688e-07, |
| "loss": 0.17631058692932128, |
| "memory(GiB)": 72.48, |
| "step": 7250, |
| "token_acc": 0.9417443297591609, |
| "train_speed(iter/s)": 0.080906 |
| }, |
| { |
| "epoch": 2.8111027684258967, |
| "grad_norm": 0.5737782716751099, |
| "learning_rate": 1.0696446440412678e-07, |
| "loss": 0.17871012687683105, |
| "memory(GiB)": 72.48, |
| "step": 7255, |
| "token_acc": 0.9362381117886821, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 2.8130404243466467, |
| "grad_norm": 0.5355263948440552, |
| "learning_rate": 1.0477807555853925e-07, |
| "loss": 0.17086371183395385, |
| "memory(GiB)": 72.48, |
| "step": 7260, |
| "token_acc": 0.9380856031128405, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 2.8149780802673963, |
| "grad_norm": 0.6204429268836975, |
| "learning_rate": 1.0261402669615505e-07, |
| "loss": 0.17964229583740235, |
| "memory(GiB)": 72.48, |
| "step": 7265, |
| "token_acc": 0.939118504544675, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 2.8169157361881463, |
| "grad_norm": 0.5910000801086426, |
| "learning_rate": 1.0047232769289206e-07, |
| "loss": 0.1701977014541626, |
| "memory(GiB)": 72.48, |
| "step": 7270, |
| "token_acc": 0.9451066297274661, |
| "train_speed(iter/s)": 0.080914 |
| }, |
| { |
| "epoch": 2.8188533921088963, |
| "grad_norm": 0.5505579710006714, |
| "learning_rate": 9.835298832267415e-08, |
| "loss": 0.17648329734802246, |
| "memory(GiB)": 72.48, |
| "step": 7275, |
| "token_acc": 0.9310598783221261, |
| "train_speed(iter/s)": 0.080913 |
| }, |
| { |
| "epoch": 2.820791048029646, |
| "grad_norm": 0.5610338449478149, |
| "learning_rate": 9.625601825738185e-08, |
| "loss": 0.1816624879837036, |
| "memory(GiB)": 72.48, |
| "step": 7280, |
| "token_acc": 0.9337944986458707, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.822728703950396, |
| "grad_norm": 0.5854775309562683, |
| "learning_rate": 9.418142706681122e-08, |
| "loss": 0.1665945529937744, |
| "memory(GiB)": 72.48, |
| "step": 7285, |
| "token_acc": 0.9456264775413712, |
| "train_speed(iter/s)": 0.080911 |
| }, |
| { |
| "epoch": 2.824666359871146, |
| "grad_norm": 0.5666484832763672, |
| "learning_rate": 9.212922421863058e-08, |
| "loss": 0.17162128686904907, |
| "memory(GiB)": 72.48, |
| "step": 7290, |
| "token_acc": 0.9401981166687049, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.826604015791896, |
| "grad_norm": 0.5913862586021423, |
| "learning_rate": 9.009941907833386e-08, |
| "loss": 0.17618238925933838, |
| "memory(GiB)": 72.48, |
| "step": 7295, |
| "token_acc": 0.9368721151235406, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.8285416717126455, |
| "grad_norm": 0.5444965362548828, |
| "learning_rate": 8.809202090920178e-08, |
| "loss": 0.17910236120224, |
| "memory(GiB)": 72.48, |
| "step": 7300, |
| "token_acc": 0.944204860762879, |
| "train_speed(iter/s)": 0.080913 |
| }, |
| { |
| "epoch": 2.8304793276333955, |
| "grad_norm": 0.523826003074646, |
| "learning_rate": 8.610703887225735e-08, |
| "loss": 0.17348699569702147, |
| "memory(GiB)": 72.48, |
| "step": 7305, |
| "token_acc": 0.9402267485300329, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.8324169835541455, |
| "grad_norm": 0.5677260160446167, |
| "learning_rate": 8.414448202622494e-08, |
| "loss": 0.1860441207885742, |
| "memory(GiB)": 72.48, |
| "step": 7310, |
| "token_acc": 0.9366769209498462, |
| "train_speed(iter/s)": 0.080913 |
| }, |
| { |
| "epoch": 2.8343546394748955, |
| "grad_norm": 0.556863009929657, |
| "learning_rate": 8.22043593274885e-08, |
| "loss": 0.17637474536895753, |
| "memory(GiB)": 72.48, |
| "step": 7315, |
| "token_acc": 0.940773748044973, |
| "train_speed(iter/s)": 0.080913 |
| }, |
| { |
| "epoch": 2.836292295395645, |
| "grad_norm": 0.6017458438873291, |
| "learning_rate": 8.02866796300511e-08, |
| "loss": 0.17668163776397705, |
| "memory(GiB)": 72.48, |
| "step": 7320, |
| "token_acc": 0.9348431345918783, |
| "train_speed(iter/s)": 0.080911 |
| }, |
| { |
| "epoch": 2.838229951316395, |
| "grad_norm": 0.5390684008598328, |
| "learning_rate": 7.839145168549333e-08, |
| "loss": 0.18287774324417114, |
| "memory(GiB)": 72.48, |
| "step": 7325, |
| "token_acc": 0.9365806018433742, |
| "train_speed(iter/s)": 0.08091 |
| }, |
| { |
| "epoch": 2.840167607237145, |
| "grad_norm": 0.5609269142150879, |
| "learning_rate": 7.651868414293495e-08, |
| "loss": 0.17237355709075927, |
| "memory(GiB)": 72.48, |
| "step": 7330, |
| "token_acc": 0.9359338981528605, |
| "train_speed(iter/s)": 0.080909 |
| }, |
| { |
| "epoch": 2.8421052631578947, |
| "grad_norm": 0.5909106731414795, |
| "learning_rate": 7.466838554899547e-08, |
| "loss": 0.17883057594299318, |
| "memory(GiB)": 72.48, |
| "step": 7335, |
| "token_acc": 0.9382247853068388, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 2.8440429190786447, |
| "grad_norm": 0.5627955198287964, |
| "learning_rate": 7.284056434775367e-08, |
| "loss": 0.17918133735656738, |
| "memory(GiB)": 72.48, |
| "step": 7340, |
| "token_acc": 0.9392019891072697, |
| "train_speed(iter/s)": 0.080915 |
| }, |
| { |
| "epoch": 2.8459805749993947, |
| "grad_norm": 0.5700935125350952, |
| "learning_rate": 7.103522888070868e-08, |
| "loss": 0.18129817247390748, |
| "memory(GiB)": 72.48, |
| "step": 7345, |
| "token_acc": 0.9376185879298292, |
| "train_speed(iter/s)": 0.080912 |
| }, |
| { |
| "epoch": 2.8479182309201443, |
| "grad_norm": 0.5400469303131104, |
| "learning_rate": 6.925238738674511e-08, |
| "loss": 0.17303977012634278, |
| "memory(GiB)": 72.48, |
| "step": 7350, |
| "token_acc": 0.9393620750087627, |
| "train_speed(iter/s)": 0.080916 |
| }, |
| { |
| "epoch": 2.8498558868408943, |
| "grad_norm": 0.5996050238609314, |
| "learning_rate": 6.74920480020913e-08, |
| "loss": 0.1782202124595642, |
| "memory(GiB)": 72.48, |
| "step": 7355, |
| "token_acc": 0.9333392377409736, |
| "train_speed(iter/s)": 0.080915 |
| }, |
| { |
| "epoch": 2.8517935427616443, |
| "grad_norm": 0.5571048855781555, |
| "learning_rate": 6.575421876028721e-08, |
| "loss": 0.18081109523773192, |
| "memory(GiB)": 72.48, |
| "step": 7360, |
| "token_acc": 0.944696919067032, |
| "train_speed(iter/s)": 0.080916 |
| }, |
| { |
| "epoch": 2.853731198682394, |
| "grad_norm": 0.581684410572052, |
| "learning_rate": 6.40389075921416e-08, |
| "loss": 0.17170381546020508, |
| "memory(GiB)": 72.48, |
| "step": 7365, |
| "token_acc": 0.9400785854616895, |
| "train_speed(iter/s)": 0.080919 |
| }, |
| { |
| "epoch": 2.855668854603144, |
| "grad_norm": 0.555248498916626, |
| "learning_rate": 6.234612232570103e-08, |
| "loss": 0.17882840633392333, |
| "memory(GiB)": 72.48, |
| "step": 7370, |
| "token_acc": 0.93646096555393, |
| "train_speed(iter/s)": 0.080914 |
| }, |
| { |
| "epoch": 2.857606510523894, |
| "grad_norm": 0.5772862434387207, |
| "learning_rate": 6.067587068621205e-08, |
| "loss": 0.17019734382629395, |
| "memory(GiB)": 72.48, |
| "step": 7375, |
| "token_acc": 0.9507737160798385, |
| "train_speed(iter/s)": 0.080916 |
| }, |
| { |
| "epoch": 2.8595441664446435, |
| "grad_norm": 0.5468375086784363, |
| "learning_rate": 5.902816029608516e-08, |
| "loss": 0.17486650943756105, |
| "memory(GiB)": 72.48, |
| "step": 7380, |
| "token_acc": 0.9327562219817591, |
| "train_speed(iter/s)": 0.080917 |
| }, |
| { |
| "epoch": 2.8614818223653935, |
| "grad_norm": 0.5444268584251404, |
| "learning_rate": 5.740299867486143e-08, |
| "loss": 0.1715664267539978, |
| "memory(GiB)": 72.48, |
| "step": 7385, |
| "token_acc": 0.9419283134203945, |
| "train_speed(iter/s)": 0.080921 |
| }, |
| { |
| "epoch": 2.8634194782861435, |
| "grad_norm": 0.5390260815620422, |
| "learning_rate": 5.580039323917819e-08, |
| "loss": 0.16665502786636352, |
| "memory(GiB)": 72.48, |
| "step": 7390, |
| "token_acc": 0.9419317681336179, |
| "train_speed(iter/s)": 0.080918 |
| }, |
| { |
| "epoch": 2.865357134206893, |
| "grad_norm": 0.5891969799995422, |
| "learning_rate": 5.42203513027334e-08, |
| "loss": 0.17798895835876466, |
| "memory(GiB)": 72.48, |
| "step": 7395, |
| "token_acc": 0.9460712379935966, |
| "train_speed(iter/s)": 0.080917 |
| }, |
| { |
| "epoch": 2.867294790127643, |
| "grad_norm": 0.5550128221511841, |
| "learning_rate": 5.266288007625575e-08, |
| "loss": 0.1693821668624878, |
| "memory(GiB)": 72.48, |
| "step": 7400, |
| "token_acc": 0.9406276505513147, |
| "train_speed(iter/s)": 0.080917 |
| }, |
| { |
| "epoch": 2.869232446048393, |
| "grad_norm": 0.5216034054756165, |
| "learning_rate": 5.112798666746688e-08, |
| "loss": 0.176193904876709, |
| "memory(GiB)": 72.48, |
| "step": 7405, |
| "token_acc": 0.9479760597797217, |
| "train_speed(iter/s)": 0.080914 |
| }, |
| { |
| "epoch": 2.8711701019691427, |
| "grad_norm": 0.552652895450592, |
| "learning_rate": 4.9615678081053055e-08, |
| "loss": 0.17783741950988768, |
| "memory(GiB)": 72.48, |
| "step": 7410, |
| "token_acc": 0.9402217679489562, |
| "train_speed(iter/s)": 0.080915 |
| }, |
| { |
| "epoch": 2.8731077578898927, |
| "grad_norm": 0.5211761593818665, |
| "learning_rate": 4.8125961218632446e-08, |
| "loss": 0.17638933658599854, |
| "memory(GiB)": 72.48, |
| "step": 7415, |
| "token_acc": 0.9420168067226891, |
| "train_speed(iter/s)": 0.080914 |
| }, |
| { |
| "epoch": 2.8750454138106427, |
| "grad_norm": 0.5861157178878784, |
| "learning_rate": 4.665884287872069e-08, |
| "loss": 0.17872381210327148, |
| "memory(GiB)": 72.48, |
| "step": 7420, |
| "token_acc": 0.9381103104586167, |
| "train_speed(iter/s)": 0.080911 |
| }, |
| { |
| "epoch": 2.8769830697313923, |
| "grad_norm": 0.5576848983764648, |
| "learning_rate": 4.521432975670481e-08, |
| "loss": 0.17486473321914672, |
| "memory(GiB)": 72.48, |
| "step": 7425, |
| "token_acc": 0.9473684210526315, |
| "train_speed(iter/s)": 0.080913 |
| }, |
| { |
| "epoch": 2.8789207256521423, |
| "grad_norm": 0.5525920391082764, |
| "learning_rate": 4.3792428444808245e-08, |
| "loss": 0.17528605461120605, |
| "memory(GiB)": 72.48, |
| "step": 7430, |
| "token_acc": 0.9383393355996096, |
| "train_speed(iter/s)": 0.080914 |
| }, |
| { |
| "epoch": 2.8808583815728923, |
| "grad_norm": 0.558647632598877, |
| "learning_rate": 4.2393145432062524e-08, |
| "loss": 0.17458267211914064, |
| "memory(GiB)": 72.48, |
| "step": 7435, |
| "token_acc": 0.9422948409516172, |
| "train_speed(iter/s)": 0.080919 |
| }, |
| { |
| "epoch": 2.882796037493642, |
| "grad_norm": 0.5761637091636658, |
| "learning_rate": 4.101648710427841e-08, |
| "loss": 0.17908756732940673, |
| "memory(GiB)": 72.48, |
| "step": 7440, |
| "token_acc": 0.9369502550077611, |
| "train_speed(iter/s)": 0.080923 |
| }, |
| { |
| "epoch": 2.884733693414392, |
| "grad_norm": 0.5622467994689941, |
| "learning_rate": 3.9662459744015945e-08, |
| "loss": 0.1826641082763672, |
| "memory(GiB)": 72.48, |
| "step": 7445, |
| "token_acc": 0.939357744940791, |
| "train_speed(iter/s)": 0.080922 |
| }, |
| { |
| "epoch": 2.886671349335142, |
| "grad_norm": 0.56577068567276, |
| "learning_rate": 3.833106953055443e-08, |
| "loss": 0.17491524219512938, |
| "memory(GiB)": 72.48, |
| "step": 7450, |
| "token_acc": 0.936007640878701, |
| "train_speed(iter/s)": 0.080923 |
| }, |
| { |
| "epoch": 2.8886090052558915, |
| "grad_norm": 0.5660979747772217, |
| "learning_rate": 3.702232253986804e-08, |
| "loss": 0.1770297646522522, |
| "memory(GiB)": 72.48, |
| "step": 7455, |
| "token_acc": 0.9419789820207648, |
| "train_speed(iter/s)": 0.080918 |
| }, |
| { |
| "epoch": 2.8905466611766415, |
| "grad_norm": 0.6129657626152039, |
| "learning_rate": 3.573622474459304e-08, |
| "loss": 0.17652267217636108, |
| "memory(GiB)": 72.48, |
| "step": 7460, |
| "token_acc": 0.9396475302294245, |
| "train_speed(iter/s)": 0.080917 |
| }, |
| { |
| "epoch": 2.8924843170973915, |
| "grad_norm": 0.5563213229179382, |
| "learning_rate": 3.44727820140045e-08, |
| "loss": 0.1800151586532593, |
| "memory(GiB)": 72.48, |
| "step": 7465, |
| "token_acc": 0.940325372101073, |
| "train_speed(iter/s)": 0.08092 |
| }, |
| { |
| "epoch": 2.894421973018141, |
| "grad_norm": 0.5570213198661804, |
| "learning_rate": 3.323200011398853e-08, |
| "loss": 0.1769587755203247, |
| "memory(GiB)": 72.48, |
| "step": 7470, |
| "token_acc": 0.9385570317676186, |
| "train_speed(iter/s)": 0.080922 |
| }, |
| { |
| "epoch": 2.896359628938891, |
| "grad_norm": 0.5758700370788574, |
| "learning_rate": 3.2013884707015053e-08, |
| "loss": 0.17842193841934204, |
| "memory(GiB)": 72.48, |
| "step": 7475, |
| "token_acc": 0.9400053407708513, |
| "train_speed(iter/s)": 0.080924 |
| }, |
| { |
| "epoch": 2.898297284859641, |
| "grad_norm": 0.5742579102516174, |
| "learning_rate": 3.081844135211176e-08, |
| "loss": 0.17959511280059814, |
| "memory(GiB)": 72.48, |
| "step": 7480, |
| "token_acc": 0.9387418855241045, |
| "train_speed(iter/s)": 0.080933 |
| }, |
| { |
| "epoch": 2.9002349407803907, |
| "grad_norm": 0.5965311527252197, |
| "learning_rate": 2.964567550484021e-08, |
| "loss": 0.17918524742126465, |
| "memory(GiB)": 72.48, |
| "step": 7485, |
| "token_acc": 0.938234600747651, |
| "train_speed(iter/s)": 0.080932 |
| }, |
| { |
| "epoch": 2.9021725967011407, |
| "grad_norm": 0.5796821713447571, |
| "learning_rate": 2.8495592517270853e-08, |
| "loss": 0.1829594373703003, |
| "memory(GiB)": 72.48, |
| "step": 7490, |
| "token_acc": 0.9416605091690737, |
| "train_speed(iter/s)": 0.08093 |
| }, |
| { |
| "epoch": 2.9041102526218907, |
| "grad_norm": 0.5433580279350281, |
| "learning_rate": 2.736819763795695e-08, |
| "loss": 0.17868415117263795, |
| "memory(GiB)": 72.48, |
| "step": 7495, |
| "token_acc": 0.9428167520763385, |
| "train_speed(iter/s)": 0.080928 |
| }, |
| { |
| "epoch": 2.9060479085426403, |
| "grad_norm": 0.5626754760742188, |
| "learning_rate": 2.6263496011911805e-08, |
| "loss": 0.17260587215423584, |
| "memory(GiB)": 72.48, |
| "step": 7500, |
| "token_acc": 0.9441806517090144, |
| "train_speed(iter/s)": 0.080926 |
| }, |
| { |
| "epoch": 2.9079855644633903, |
| "grad_norm": 0.6491982936859131, |
| "learning_rate": 2.518149268058545e-08, |
| "loss": 0.17835769653320313, |
| "memory(GiB)": 72.48, |
| "step": 7505, |
| "token_acc": 0.9360804983229516, |
| "train_speed(iter/s)": 0.080924 |
| }, |
| { |
| "epoch": 2.9099232203841403, |
| "grad_norm": 0.5484529137611389, |
| "learning_rate": 2.4122192581840786e-08, |
| "loss": 0.17280452251434325, |
| "memory(GiB)": 72.48, |
| "step": 7510, |
| "token_acc": 0.9375745131805537, |
| "train_speed(iter/s)": 0.08092 |
| }, |
| { |
| "epoch": 2.9118608763048903, |
| "grad_norm": 0.5870897173881531, |
| "learning_rate": 2.3085600549932476e-08, |
| "loss": 0.18518280982971191, |
| "memory(GiB)": 72.48, |
| "step": 7515, |
| "token_acc": 0.9352595330085671, |
| "train_speed(iter/s)": 0.080924 |
| }, |
| { |
| "epoch": 2.91379853222564, |
| "grad_norm": 0.569111704826355, |
| "learning_rate": 2.2071721315483074e-08, |
| "loss": 0.18087258338928222, |
| "memory(GiB)": 72.48, |
| "step": 7520, |
| "token_acc": 0.9298930464311784, |
| "train_speed(iter/s)": 0.080926 |
| }, |
| { |
| "epoch": 2.91573618814639, |
| "grad_norm": 0.5556108951568604, |
| "learning_rate": 2.1080559505462504e-08, |
| "loss": 0.17546112537384034, |
| "memory(GiB)": 72.48, |
| "step": 7525, |
| "token_acc": 0.9410911406736484, |
| "train_speed(iter/s)": 0.080931 |
| }, |
| { |
| "epoch": 2.91767384406714, |
| "grad_norm": 0.5702312588691711, |
| "learning_rate": 2.011211964316695e-08, |
| "loss": 0.17208282947540282, |
| "memory(GiB)": 72.48, |
| "step": 7530, |
| "token_acc": 0.9407693723541322, |
| "train_speed(iter/s)": 0.080931 |
| }, |
| { |
| "epoch": 2.9196114999878895, |
| "grad_norm": 0.5893260836601257, |
| "learning_rate": 1.916640614819776e-08, |
| "loss": 0.17540626525878905, |
| "memory(GiB)": 72.48, |
| "step": 7535, |
| "token_acc": 0.945633251124329, |
| "train_speed(iter/s)": 0.08093 |
| }, |
| { |
| "epoch": 2.9215491559086395, |
| "grad_norm": 0.5966506600379944, |
| "learning_rate": 1.8243423336442022e-08, |
| "loss": 0.18844203948974608, |
| "memory(GiB)": 72.48, |
| "step": 7540, |
| "token_acc": 0.9377620422994503, |
| "train_speed(iter/s)": 0.080932 |
| }, |
| { |
| "epoch": 2.9234868118293895, |
| "grad_norm": 0.5934481620788574, |
| "learning_rate": 1.7343175420051485e-08, |
| "loss": 0.16864123344421386, |
| "memory(GiB)": 72.48, |
| "step": 7545, |
| "token_acc": 0.948881469115192, |
| "train_speed(iter/s)": 0.080938 |
| }, |
| { |
| "epoch": 2.9254244677501395, |
| "grad_norm": 0.5466935038566589, |
| "learning_rate": 1.6465666507425314e-08, |
| "loss": 0.17849595546722413, |
| "memory(GiB)": 72.48, |
| "step": 7550, |
| "token_acc": 0.9363413561239239, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 2.927362123670889, |
| "grad_norm": 0.5641075968742371, |
| "learning_rate": 1.5610900603189593e-08, |
| "loss": 0.17939648628234864, |
| "memory(GiB)": 72.48, |
| "step": 7555, |
| "token_acc": 0.9402934467665373, |
| "train_speed(iter/s)": 0.080942 |
| }, |
| { |
| "epoch": 2.929299779591639, |
| "grad_norm": 0.5786232948303223, |
| "learning_rate": 1.4778881608180085e-08, |
| "loss": 0.1795699715614319, |
| "memory(GiB)": 72.48, |
| "step": 7560, |
| "token_acc": 0.933832010905659, |
| "train_speed(iter/s)": 0.08094 |
| }, |
| { |
| "epoch": 2.931237435512389, |
| "grad_norm": 0.5693191289901733, |
| "learning_rate": 1.3969613319423924e-08, |
| "loss": 0.18208067417144774, |
| "memory(GiB)": 72.48, |
| "step": 7565, |
| "token_acc": 0.9298536732538845, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 2.9331750914331387, |
| "grad_norm": 0.5985451340675354, |
| "learning_rate": 1.3183099430122414e-08, |
| "loss": 0.18023189306259155, |
| "memory(GiB)": 72.48, |
| "step": 7570, |
| "token_acc": 0.9351883803369201, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 2.9351127473538887, |
| "grad_norm": 0.5915820598602295, |
| "learning_rate": 1.2419343529633809e-08, |
| "loss": 0.17908819913864135, |
| "memory(GiB)": 72.48, |
| "step": 7575, |
| "token_acc": 0.9367020741882518, |
| "train_speed(iter/s)": 0.08094 |
| }, |
| { |
| "epoch": 2.9370504032746387, |
| "grad_norm": 0.5795514583587646, |
| "learning_rate": 1.167834910345833e-08, |
| "loss": 0.17875158786773682, |
| "memory(GiB)": 72.48, |
| "step": 7580, |
| "token_acc": 0.9413019342020686, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 2.9389880591953883, |
| "grad_norm": 0.5513561367988586, |
| "learning_rate": 1.0960119533220404e-08, |
| "loss": 0.17047605514526368, |
| "memory(GiB)": 72.48, |
| "step": 7585, |
| "token_acc": 0.9344127422911114, |
| "train_speed(iter/s)": 0.080935 |
| }, |
| { |
| "epoch": 2.9409257151161383, |
| "grad_norm": 0.5630007982254028, |
| "learning_rate": 1.0264658096653669e-08, |
| "loss": 0.1801111578941345, |
| "memory(GiB)": 72.48, |
| "step": 7590, |
| "token_acc": 0.9363969165748981, |
| "train_speed(iter/s)": 0.080935 |
| }, |
| { |
| "epoch": 2.9428633710368883, |
| "grad_norm": 0.5613774657249451, |
| "learning_rate": 9.591967967588212e-09, |
| "loss": 0.17825416326522828, |
| "memory(GiB)": 72.48, |
| "step": 7595, |
| "token_acc": 0.9386145606497558, |
| "train_speed(iter/s)": 0.080936 |
| }, |
| { |
| "epoch": 2.944801026957638, |
| "grad_norm": 0.5781858563423157, |
| "learning_rate": 8.942052215931695e-09, |
| "loss": 0.18551340103149414, |
| "memory(GiB)": 72.48, |
| "step": 7600, |
| "token_acc": 0.9380983393031586, |
| "train_speed(iter/s)": 0.080936 |
| }, |
| { |
| "epoch": 2.946738682878388, |
| "grad_norm": 0.5731537938117981, |
| "learning_rate": 8.314913807659918e-09, |
| "loss": 0.16966052055358888, |
| "memory(GiB)": 72.48, |
| "step": 7605, |
| "token_acc": 0.9390907609581228, |
| "train_speed(iter/s)": 0.08094 |
| }, |
| { |
| "epoch": 2.948676338799138, |
| "grad_norm": 0.5632595419883728, |
| "learning_rate": 7.710555604801273e-09, |
| "loss": 0.18501553535461426, |
| "memory(GiB)": 72.48, |
| "step": 7610, |
| "token_acc": 0.9322401892478693, |
| "train_speed(iter/s)": 0.080935 |
| }, |
| { |
| "epoch": 2.9506139947198875, |
| "grad_norm": 0.5846831798553467, |
| "learning_rate": 7.128980365422866e-09, |
| "loss": 0.1800399661064148, |
| "memory(GiB)": 72.48, |
| "step": 7615, |
| "token_acc": 0.9379122666903305, |
| "train_speed(iter/s)": 0.080938 |
| }, |
| { |
| "epoch": 2.9525516506406375, |
| "grad_norm": 0.569674551486969, |
| "learning_rate": 6.570190743618865e-09, |
| "loss": 0.17378931045532225, |
| "memory(GiB)": 72.48, |
| "step": 7620, |
| "token_acc": 0.9357696937274308, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 2.9544893065613875, |
| "grad_norm": 0.5462942719459534, |
| "learning_rate": 6.0341892894982825e-09, |
| "loss": 0.180399489402771, |
| "memory(GiB)": 72.48, |
| "step": 7625, |
| "token_acc": 0.9430610889774237, |
| "train_speed(iter/s)": 0.08094 |
| }, |
| { |
| "epoch": 2.956426962482137, |
| "grad_norm": 0.5479962229728699, |
| "learning_rate": 5.52097844917443e-09, |
| "loss": 0.1843660831451416, |
| "memory(GiB)": 72.48, |
| "step": 7630, |
| "token_acc": 0.9352336893666235, |
| "train_speed(iter/s)": 0.080936 |
| }, |
| { |
| "epoch": 2.958364618402887, |
| "grad_norm": 0.6159018278121948, |
| "learning_rate": 5.030560564751042e-09, |
| "loss": 0.17185438871383668, |
| "memory(GiB)": 72.48, |
| "step": 7635, |
| "token_acc": 0.9426850041006877, |
| "train_speed(iter/s)": 0.080942 |
| }, |
| { |
| "epoch": 2.960302274323637, |
| "grad_norm": 0.599929928779602, |
| "learning_rate": 4.5629378743150544e-09, |
| "loss": 0.1817415952682495, |
| "memory(GiB)": 72.48, |
| "step": 7640, |
| "token_acc": 0.9399086010504059, |
| "train_speed(iter/s)": 0.080942 |
| }, |
| { |
| "epoch": 2.9622399302443867, |
| "grad_norm": 0.5604813694953918, |
| "learning_rate": 4.1181125119221785e-09, |
| "loss": 0.17877774238586425, |
| "memory(GiB)": 72.48, |
| "step": 7645, |
| "token_acc": 0.9378071957533259, |
| "train_speed(iter/s)": 0.080938 |
| }, |
| { |
| "epoch": 2.9641775861651367, |
| "grad_norm": 0.5399024486541748, |
| "learning_rate": 3.696086507592456e-09, |
| "loss": 0.1777539372444153, |
| "memory(GiB)": 72.48, |
| "step": 7650, |
| "token_acc": 0.942624539639465, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 2.9661152420858867, |
| "grad_norm": 0.528237521648407, |
| "learning_rate": 3.296861787295269e-09, |
| "loss": 0.1777519941329956, |
| "memory(GiB)": 72.48, |
| "step": 7655, |
| "token_acc": 0.934471078686816, |
| "train_speed(iter/s)": 0.080938 |
| }, |
| { |
| "epoch": 2.9680528980066363, |
| "grad_norm": 0.5477710366249084, |
| "learning_rate": 2.920440172944905e-09, |
| "loss": 0.169677734375, |
| "memory(GiB)": 72.48, |
| "step": 7660, |
| "token_acc": 0.9360831656606304, |
| "train_speed(iter/s)": 0.080937 |
| }, |
| { |
| "epoch": 2.9699905539273863, |
| "grad_norm": 0.5542213320732117, |
| "learning_rate": 2.5668233823911147e-09, |
| "loss": 0.17264974117279053, |
| "memory(GiB)": 72.48, |
| "step": 7665, |
| "token_acc": 0.9443621747408504, |
| "train_speed(iter/s)": 0.080943 |
| }, |
| { |
| "epoch": 2.9719282098481363, |
| "grad_norm": 0.5807104706764221, |
| "learning_rate": 2.236013029409678e-09, |
| "loss": 0.18212735652923584, |
| "memory(GiB)": 72.48, |
| "step": 7670, |
| "token_acc": 0.929225645295587, |
| "train_speed(iter/s)": 0.08094 |
| }, |
| { |
| "epoch": 2.973865865768886, |
| "grad_norm": 0.5461880564689636, |
| "learning_rate": 1.9280106236968523e-09, |
| "loss": 0.17995672225952147, |
| "memory(GiB)": 72.48, |
| "step": 7675, |
| "token_acc": 0.9400897334389021, |
| "train_speed(iter/s)": 0.080943 |
| }, |
| { |
| "epoch": 2.975803521689636, |
| "grad_norm": 0.5512206554412842, |
| "learning_rate": 1.642817570861599e-09, |
| "loss": 0.17052674293518066, |
| "memory(GiB)": 72.48, |
| "step": 7680, |
| "token_acc": 0.9422564949967869, |
| "train_speed(iter/s)": 0.08094 |
| }, |
| { |
| "epoch": 2.977741177610386, |
| "grad_norm": 0.5648573637008667, |
| "learning_rate": 1.380435172420036e-09, |
| "loss": 0.18253281116485595, |
| "memory(GiB)": 72.48, |
| "step": 7685, |
| "token_acc": 0.9338983584864048, |
| "train_speed(iter/s)": 0.080941 |
| }, |
| { |
| "epoch": 2.9796788335311355, |
| "grad_norm": 0.5563445091247559, |
| "learning_rate": 1.1408646257882183e-09, |
| "loss": 0.1693689703941345, |
| "memory(GiB)": 72.48, |
| "step": 7690, |
| "token_acc": 0.943661542215715, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 2.9816164894518855, |
| "grad_norm": 0.5370368957519531, |
| "learning_rate": 9.241070242771433e-10, |
| "loss": 0.16747430562973023, |
| "memory(GiB)": 72.48, |
| "step": 7695, |
| "token_acc": 0.9449776269094275, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 2.9835541453726355, |
| "grad_norm": 0.5579628348350525, |
| "learning_rate": 7.301633570888645e-10, |
| "loss": 0.17679812908172607, |
| "memory(GiB)": 72.48, |
| "step": 7700, |
| "token_acc": 0.9357514362443485, |
| "train_speed(iter/s)": 0.080939 |
| }, |
| { |
| "epoch": 2.985491801293385, |
| "grad_norm": 0.5460850596427917, |
| "learning_rate": 5.590345093109406e-10, |
| "loss": 0.16887528896331788, |
| "memory(GiB)": 72.48, |
| "step": 7705, |
| "token_acc": 0.9432771576566717, |
| "train_speed(iter/s)": 0.080941 |
| }, |
| { |
| "epoch": 2.987429457214135, |
| "grad_norm": 0.5764380097389221, |
| "learning_rate": 4.107212619108847e-10, |
| "loss": 0.18432481288909913, |
| "memory(GiB)": 72.48, |
| "step": 7710, |
| "token_acc": 0.9365053901437371, |
| "train_speed(iter/s)": 0.08094 |
| }, |
| { |
| "epoch": 2.989367113134885, |
| "grad_norm": 0.5813378691673279, |
| "learning_rate": 2.8522429173671875e-10, |
| "loss": 0.18421580791473388, |
| "memory(GiB)": 72.48, |
| "step": 7715, |
| "token_acc": 0.9358071332522251, |
| "train_speed(iter/s)": 0.080944 |
| }, |
| { |
| "epoch": 2.9913047690556347, |
| "grad_norm": 0.6146759390830994, |
| "learning_rate": 1.8254417150975756e-10, |
| "loss": 0.17415223121643067, |
| "memory(GiB)": 72.48, |
| "step": 7720, |
| "token_acc": 0.9437247617509288, |
| "train_speed(iter/s)": 0.080942 |
| }, |
| { |
| "epoch": 2.9932424249763847, |
| "grad_norm": 0.5900871753692627, |
| "learning_rate": 1.0268136982405363e-10, |
| "loss": 0.18007500171661378, |
| "memory(GiB)": 72.48, |
| "step": 7725, |
| "token_acc": 0.9351921381697085, |
| "train_speed(iter/s)": 0.080945 |
| }, |
| { |
| "epoch": 2.9951800808971347, |
| "grad_norm": 0.5420626401901245, |
| "learning_rate": 4.563625114417658e-11, |
| "loss": 0.1813086152076721, |
| "memory(GiB)": 72.48, |
| "step": 7730, |
| "token_acc": 0.9420055078567957, |
| "train_speed(iter/s)": 0.080946 |
| }, |
| { |
| "epoch": 2.9971177368178843, |
| "grad_norm": 0.5917452573776245, |
| "learning_rate": 1.1409075802992775e-11, |
| "loss": 0.17294812202453613, |
| "memory(GiB)": 72.48, |
| "step": 7735, |
| "token_acc": 0.9346650914688719, |
| "train_speed(iter/s)": 0.080948 |
| }, |
| { |
| "epoch": 2.9990553927386343, |
| "grad_norm": 0.5623822212219238, |
| "learning_rate": 0.0, |
| "loss": 0.17589629888534547, |
| "memory(GiB)": 72.48, |
| "step": 7740, |
| "token_acc": 0.9381395076887096, |
| "train_speed(iter/s)": 0.080953 |
| }, |
| { |
| "epoch": 2.9990553927386343, |
| "eval_loss": 0.2572384476661682, |
| "eval_runtime": 105.2178, |
| "eval_samples_per_second": 31.687, |
| "eval_steps_per_second": 3.963, |
| "eval_token_acc": 0.908205277166483, |
| "step": 7740 |
| }, |
| { |
| "epoch": 2.9990553927386343, |
| "eval_loss": 0.2572384476661682, |
| "eval_runtime": 105.452, |
| "eval_samples_per_second": 31.616, |
| "eval_steps_per_second": 3.954, |
| "eval_token_acc": 0.908205277166483, |
| "step": 7740 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 7740, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.690206829043148e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|