| { | |
| "best_global_step": 1280, | |
| "best_metric": 1.0, | |
| "best_model_checkpoint": "/projects/bffw/darora1/llm_ipc/final_models/mpi_async_n4/checkpoint-1280", | |
| "epoch": 0.6287425149700598, | |
| "eval_steps": 40, | |
| "global_step": 1680, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0007485029940119761, | |
| "grad_norm": 7.328390598297119, | |
| "learning_rate": 2.0000000000000002e-07, | |
| "loss": 0.48, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0014970059880239522, | |
| "grad_norm": 7.235108852386475, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 0.4252, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.002245508982035928, | |
| "grad_norm": 8.011260986328125, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.4299, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0029940119760479044, | |
| "grad_norm": 6.425393104553223, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 0.4424, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0037425149700598802, | |
| "grad_norm": 6.826442241668701, | |
| "learning_rate": 1.8000000000000001e-06, | |
| "loss": 0.4549, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004491017964071856, | |
| "grad_norm": 4.996034622192383, | |
| "learning_rate": 2.2e-06, | |
| "loss": 0.3498, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.005239520958083832, | |
| "grad_norm": 4.402273654937744, | |
| "learning_rate": 2.6e-06, | |
| "loss": 0.3288, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.005988023952095809, | |
| "grad_norm": 4.156887054443359, | |
| "learning_rate": 3e-06, | |
| "loss": 0.2507, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.006736526946107785, | |
| "grad_norm": 2.647883176803589, | |
| "learning_rate": 3.4000000000000005e-06, | |
| "loss": 0.197, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0074850299401197605, | |
| "grad_norm": 2.444559097290039, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 0.1474, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008233532934131737, | |
| "grad_norm": 1.8110377788543701, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 0.1494, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.008982035928143712, | |
| "grad_norm": 1.4763550758361816, | |
| "learning_rate": 4.600000000000001e-06, | |
| "loss": 0.107, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.009730538922155689, | |
| "grad_norm": 1.2829464673995972, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0854, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.010479041916167664, | |
| "grad_norm": 1.1430706977844238, | |
| "learning_rate": 5.400000000000001e-06, | |
| "loss": 0.0597, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.01122754491017964, | |
| "grad_norm": 1.3779264688491821, | |
| "learning_rate": 5.8e-06, | |
| "loss": 0.0642, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.011976047904191617, | |
| "grad_norm": 0.9946982860565186, | |
| "learning_rate": 6.200000000000001e-06, | |
| "loss": 0.0398, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.012724550898203593, | |
| "grad_norm": 1.1442718505859375, | |
| "learning_rate": 6.600000000000001e-06, | |
| "loss": 0.04, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.01347305389221557, | |
| "grad_norm": 0.6475897431373596, | |
| "learning_rate": 7e-06, | |
| "loss": 0.0283, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.014221556886227544, | |
| "grad_norm": 0.8502711057662964, | |
| "learning_rate": 7.4e-06, | |
| "loss": 0.0345, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.014970059880239521, | |
| "grad_norm": 0.5282578468322754, | |
| "learning_rate": 7.800000000000002e-06, | |
| "loss": 0.0173, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.014970059880239521, | |
| "eval_accuracy": 0.9910189955595328, | |
| "eval_loss": 0.02636127918958664, | |
| "eval_runtime": 156.2607, | |
| "eval_samples_per_second": 31.998, | |
| "eval_steps_per_second": 7.999, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.015718562874251496, | |
| "grad_norm": 0.755415141582489, | |
| "learning_rate": 8.2e-06, | |
| "loss": 0.023, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.016467065868263474, | |
| "grad_norm": 0.6637702584266663, | |
| "learning_rate": 8.6e-06, | |
| "loss": 0.0165, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.01721556886227545, | |
| "grad_norm": 0.42257505655288696, | |
| "learning_rate": 9e-06, | |
| "loss": 0.0149, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.017964071856287425, | |
| "grad_norm": 0.6686341166496277, | |
| "learning_rate": 9.4e-06, | |
| "loss": 0.019, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0187125748502994, | |
| "grad_norm": 0.5314021110534668, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 0.0169, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.019461077844311378, | |
| "grad_norm": 0.39661431312561035, | |
| "learning_rate": 9.999998993000299e-06, | |
| "loss": 0.0152, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.020209580838323353, | |
| "grad_norm": 0.571976900100708, | |
| "learning_rate": 9.999990937005126e-06, | |
| "loss": 0.0139, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.020958083832335328, | |
| "grad_norm": 0.5158469676971436, | |
| "learning_rate": 9.999974825027756e-06, | |
| "loss": 0.0092, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.021706586826347306, | |
| "grad_norm": 0.7198213338851929, | |
| "learning_rate": 9.999950657094151e-06, | |
| "loss": 0.0113, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.02245508982035928, | |
| "grad_norm": 0.48938679695129395, | |
| "learning_rate": 9.999918433243253e-06, | |
| "loss": 0.0085, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.023203592814371257, | |
| "grad_norm": 0.5157604813575745, | |
| "learning_rate": 9.999878153526974e-06, | |
| "loss": 0.0114, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.023952095808383235, | |
| "grad_norm": 0.510836124420166, | |
| "learning_rate": 9.99982981801022e-06, | |
| "loss": 0.009, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.02470059880239521, | |
| "grad_norm": 0.34386318922042847, | |
| "learning_rate": 9.999773426770864e-06, | |
| "loss": 0.0102, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.025449101796407185, | |
| "grad_norm": 0.31605905294418335, | |
| "learning_rate": 9.999708979899769e-06, | |
| "loss": 0.0095, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.02619760479041916, | |
| "grad_norm": 0.6626418828964233, | |
| "learning_rate": 9.999636477500765e-06, | |
| "loss": 0.0079, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02694610778443114, | |
| "grad_norm": 0.49883756041526794, | |
| "learning_rate": 9.999555919690673e-06, | |
| "loss": 0.0065, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.027694610778443114, | |
| "grad_norm": 0.3710748255252838, | |
| "learning_rate": 9.999467306599285e-06, | |
| "loss": 0.0055, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.02844311377245509, | |
| "grad_norm": 0.33792468905448914, | |
| "learning_rate": 9.999370638369377e-06, | |
| "loss": 0.0065, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.029191616766467067, | |
| "grad_norm": 0.33830082416534424, | |
| "learning_rate": 9.999265915156697e-06, | |
| "loss": 0.0067, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.029940119760479042, | |
| "grad_norm": 0.3766763210296631, | |
| "learning_rate": 9.999153137129978e-06, | |
| "loss": 0.0054, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.029940119760479042, | |
| "eval_accuracy": 0.9975511997129987, | |
| "eval_loss": 0.006336509715765715, | |
| "eval_runtime": 152.2618, | |
| "eval_samples_per_second": 32.838, | |
| "eval_steps_per_second": 8.21, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.030688622754491017, | |
| "grad_norm": 0.3682909607887268, | |
| "learning_rate": 9.999032304470926e-06, | |
| "loss": 0.0052, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.03143712574850299, | |
| "grad_norm": 0.48871251940727234, | |
| "learning_rate": 9.998903417374228e-06, | |
| "loss": 0.0065, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.03218562874251497, | |
| "grad_norm": 0.4313011169433594, | |
| "learning_rate": 9.998766476047546e-06, | |
| "loss": 0.0056, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.03293413173652695, | |
| "grad_norm": 0.3613654673099518, | |
| "learning_rate": 9.998621480711522e-06, | |
| "loss": 0.0034, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.033682634730538924, | |
| "grad_norm": 0.39512524008750916, | |
| "learning_rate": 9.998468431599768e-06, | |
| "loss": 0.0039, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0344311377245509, | |
| "grad_norm": 0.26590684056282043, | |
| "learning_rate": 9.99830732895888e-06, | |
| "loss": 0.0041, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.035179640718562874, | |
| "grad_norm": 0.29519563913345337, | |
| "learning_rate": 9.998138173048424e-06, | |
| "loss": 0.0048, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.03592814371257485, | |
| "grad_norm": 0.8653535842895508, | |
| "learning_rate": 9.997960964140946e-06, | |
| "loss": 0.0037, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.036676646706586824, | |
| "grad_norm": 0.5562458038330078, | |
| "learning_rate": 9.997775702521965e-06, | |
| "loss": 0.004, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.0374251497005988, | |
| "grad_norm": 0.31169670820236206, | |
| "learning_rate": 9.997582388489975e-06, | |
| "loss": 0.004, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03817365269461078, | |
| "grad_norm": 0.3139854371547699, | |
| "learning_rate": 9.99738102235644e-06, | |
| "loss": 0.0032, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.038922155688622756, | |
| "grad_norm": 0.4420141875743866, | |
| "learning_rate": 9.997171604445803e-06, | |
| "loss": 0.0037, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.03967065868263473, | |
| "grad_norm": 0.46555566787719727, | |
| "learning_rate": 9.99695413509548e-06, | |
| "loss": 0.0034, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.040419161676646706, | |
| "grad_norm": 0.2851720154285431, | |
| "learning_rate": 9.996728614655854e-06, | |
| "loss": 0.0029, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.04116766467065868, | |
| "grad_norm": 0.3109307885169983, | |
| "learning_rate": 9.996495043490285e-06, | |
| "loss": 0.0029, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.041916167664670656, | |
| "grad_norm": 0.37666594982147217, | |
| "learning_rate": 9.996253421975103e-06, | |
| "loss": 0.0038, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.04266467065868264, | |
| "grad_norm": 0.5034800171852112, | |
| "learning_rate": 9.996003750499608e-06, | |
| "loss": 0.0032, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.04341317365269461, | |
| "grad_norm": 0.3710559606552124, | |
| "learning_rate": 9.995746029466071e-06, | |
| "loss": 0.0022, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.04416167664670659, | |
| "grad_norm": 0.4710935056209564, | |
| "learning_rate": 9.995480259289731e-06, | |
| "loss": 0.0025, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.04491017964071856, | |
| "grad_norm": 0.31052565574645996, | |
| "learning_rate": 9.995206440398798e-06, | |
| "loss": 0.0024, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04491017964071856, | |
| "eval_accuracy": 0.9988225992721916, | |
| "eval_loss": 0.002896190620958805, | |
| "eval_runtime": 154.576, | |
| "eval_samples_per_second": 32.347, | |
| "eval_steps_per_second": 8.087, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04565868263473054, | |
| "grad_norm": 0.34983423352241516, | |
| "learning_rate": 9.994924573234448e-06, | |
| "loss": 0.0022, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.04640718562874251, | |
| "grad_norm": 0.2754887640476227, | |
| "learning_rate": 9.994634658250825e-06, | |
| "loss": 0.0021, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.04715568862275449, | |
| "grad_norm": 0.49522289633750916, | |
| "learning_rate": 9.994336695915041e-06, | |
| "loss": 0.0021, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.04790419161676647, | |
| "grad_norm": 0.37913596630096436, | |
| "learning_rate": 9.994030686707171e-06, | |
| "loss": 0.002, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.048652694610778445, | |
| "grad_norm": 0.3330959379673004, | |
| "learning_rate": 9.993716631120259e-06, | |
| "loss": 0.0017, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04940119760479042, | |
| "grad_norm": 0.2224518060684204, | |
| "learning_rate": 9.993394529660307e-06, | |
| "loss": 0.0018, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.050149700598802395, | |
| "grad_norm": 0.2787413001060486, | |
| "learning_rate": 9.99306438284629e-06, | |
| "loss": 0.0015, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.05089820359281437, | |
| "grad_norm": 0.43909233808517456, | |
| "learning_rate": 9.992726191210139e-06, | |
| "loss": 0.0023, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.051646706586826345, | |
| "grad_norm": 0.1608552634716034, | |
| "learning_rate": 9.992379955296745e-06, | |
| "loss": 0.0012, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.05239520958083832, | |
| "grad_norm": 0.34503915905952454, | |
| "learning_rate": 9.992025675663966e-06, | |
| "loss": 0.0018, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0531437125748503, | |
| "grad_norm": 0.17146268486976624, | |
| "learning_rate": 9.991663352882615e-06, | |
| "loss": 0.0013, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.05389221556886228, | |
| "grad_norm": 0.47353699803352356, | |
| "learning_rate": 9.991292987536469e-06, | |
| "loss": 0.002, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.05464071856287425, | |
| "grad_norm": 0.10907532274723053, | |
| "learning_rate": 9.990914580222258e-06, | |
| "loss": 0.001, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.05538922155688623, | |
| "grad_norm": 0.195388525724411, | |
| "learning_rate": 9.990528131549674e-06, | |
| "loss": 0.0013, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.0561377245508982, | |
| "grad_norm": 0.124148428440094, | |
| "learning_rate": 9.990133642141359e-06, | |
| "loss": 0.0007, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05688622754491018, | |
| "grad_norm": 0.3281680643558502, | |
| "learning_rate": 9.989731112632917e-06, | |
| "loss": 0.0018, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.05763473053892216, | |
| "grad_norm": 0.3646385669708252, | |
| "learning_rate": 9.989320543672904e-06, | |
| "loss": 0.0014, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.058383233532934134, | |
| "grad_norm": 0.20738907158374786, | |
| "learning_rate": 9.988901935922826e-06, | |
| "loss": 0.0012, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.05913173652694611, | |
| "grad_norm": 0.19206871092319489, | |
| "learning_rate": 9.988475290057145e-06, | |
| "loss": 0.0008, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.059880239520958084, | |
| "grad_norm": 0.4680192470550537, | |
| "learning_rate": 9.988040606763272e-06, | |
| "loss": 0.0011, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.059880239520958084, | |
| "eval_accuracy": 0.9995344699843772, | |
| "eval_loss": 0.0012882280861958861, | |
| "eval_runtime": 155.4759, | |
| "eval_samples_per_second": 32.159, | |
| "eval_steps_per_second": 8.04, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06062874251497006, | |
| "grad_norm": 0.10511677712202072, | |
| "learning_rate": 9.98759788674157e-06, | |
| "loss": 0.0006, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.061377245508982034, | |
| "grad_norm": 0.264397531747818, | |
| "learning_rate": 9.987147130705347e-06, | |
| "loss": 0.0008, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.06212574850299401, | |
| "grad_norm": 0.15092360973358154, | |
| "learning_rate": 9.986688339380863e-06, | |
| "loss": 0.001, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.06287425149700598, | |
| "grad_norm": 0.23679876327514648, | |
| "learning_rate": 9.98622151350732e-06, | |
| "loss": 0.0009, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.06362275449101797, | |
| "grad_norm": 0.3080887198448181, | |
| "learning_rate": 9.985746653836867e-06, | |
| "loss": 0.0015, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06437125748502993, | |
| "grad_norm": 0.13096538186073303, | |
| "learning_rate": 9.985263761134602e-06, | |
| "loss": 0.001, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.06511976047904192, | |
| "grad_norm": 0.27316954731941223, | |
| "learning_rate": 9.984772836178559e-06, | |
| "loss": 0.0008, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.0658682634730539, | |
| "grad_norm": 0.314272940158844, | |
| "learning_rate": 9.984273879759713e-06, | |
| "loss": 0.0017, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.06661676646706587, | |
| "grad_norm": 0.20915231108665466, | |
| "learning_rate": 9.983766892681985e-06, | |
| "loss": 0.0012, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.06736526946107785, | |
| "grad_norm": 0.18497829139232635, | |
| "learning_rate": 9.983251875762234e-06, | |
| "loss": 0.0008, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06811377245508982, | |
| "grad_norm": 0.20126977562904358, | |
| "learning_rate": 9.982728829830252e-06, | |
| "loss": 0.0008, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.0688622754491018, | |
| "grad_norm": 0.15316377580165863, | |
| "learning_rate": 9.982197755728771e-06, | |
| "loss": 0.001, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.06961077844311377, | |
| "grad_norm": 0.14749199151992798, | |
| "learning_rate": 9.981658654313458e-06, | |
| "loss": 0.0005, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.07035928143712575, | |
| "grad_norm": 0.25107651948928833, | |
| "learning_rate": 9.981111526452912e-06, | |
| "loss": 0.0011, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.07110778443113773, | |
| "grad_norm": 0.07325785607099533, | |
| "learning_rate": 9.980556373028665e-06, | |
| "loss": 0.0004, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0718562874251497, | |
| "grad_norm": 0.11805955320596695, | |
| "learning_rate": 9.979993194935182e-06, | |
| "loss": 0.0005, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.07260479041916168, | |
| "grad_norm": 0.19970782101154327, | |
| "learning_rate": 9.979421993079853e-06, | |
| "loss": 0.0008, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.07335329341317365, | |
| "grad_norm": 0.24476714432239532, | |
| "learning_rate": 9.978842768382999e-06, | |
| "loss": 0.0005, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.07410179640718563, | |
| "grad_norm": 0.12824182212352753, | |
| "learning_rate": 9.978255521777865e-06, | |
| "loss": 0.0004, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.0748502994011976, | |
| "grad_norm": 0.08068165183067322, | |
| "learning_rate": 9.977660254210623e-06, | |
| "loss": 0.0004, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0748502994011976, | |
| "eval_accuracy": 0.9997569708964628, | |
| "eval_loss": 0.000611252966336906, | |
| "eval_runtime": 156.7213, | |
| "eval_samples_per_second": 31.904, | |
| "eval_steps_per_second": 7.976, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07559880239520958, | |
| "grad_norm": 0.08569593727588654, | |
| "learning_rate": 9.977056966640368e-06, | |
| "loss": 0.0005, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.07634730538922156, | |
| "grad_norm": 0.10873577743768692, | |
| "learning_rate": 9.976445660039118e-06, | |
| "loss": 0.0003, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.07709580838323353, | |
| "grad_norm": 0.06685052067041397, | |
| "learning_rate": 9.975826335391808e-06, | |
| "loss": 0.0004, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.07784431137724551, | |
| "grad_norm": 0.171136736869812, | |
| "learning_rate": 9.975198993696294e-06, | |
| "loss": 0.0005, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.07859281437125748, | |
| "grad_norm": 0.2799069881439209, | |
| "learning_rate": 9.974563635963348e-06, | |
| "loss": 0.0009, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07934131736526946, | |
| "grad_norm": 0.09249293059110641, | |
| "learning_rate": 9.973920263216658e-06, | |
| "loss": 0.0005, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.08008982035928144, | |
| "grad_norm": 0.19255271553993225, | |
| "learning_rate": 9.973268876492827e-06, | |
| "loss": 0.0004, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.08083832335329341, | |
| "grad_norm": 0.1604669839143753, | |
| "learning_rate": 9.972609476841368e-06, | |
| "loss": 0.0004, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.0815868263473054, | |
| "grad_norm": 0.08825163543224335, | |
| "learning_rate": 9.971942065324704e-06, | |
| "loss": 0.0007, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.08233532934131736, | |
| "grad_norm": 0.2524869441986084, | |
| "learning_rate": 9.971266643018171e-06, | |
| "loss": 0.0006, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08308383233532934, | |
| "grad_norm": 0.10447513312101364, | |
| "learning_rate": 9.970583211010008e-06, | |
| "loss": 0.0006, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.08383233532934131, | |
| "grad_norm": 0.17385387420654297, | |
| "learning_rate": 9.969891770401358e-06, | |
| "loss": 0.0003, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.0845808383233533, | |
| "grad_norm": 0.0575445182621479, | |
| "learning_rate": 9.969192322306271e-06, | |
| "loss": 0.0002, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.08532934131736528, | |
| "grad_norm": 0.20742414891719818, | |
| "learning_rate": 9.968484867851698e-06, | |
| "loss": 0.0004, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.08607784431137724, | |
| "grad_norm": 0.22014112770557404, | |
| "learning_rate": 9.96776940817749e-06, | |
| "loss": 0.0005, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08682634730538923, | |
| "grad_norm": 0.1331041306257248, | |
| "learning_rate": 9.967045944436392e-06, | |
| "loss": 0.0004, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.0875748502994012, | |
| "grad_norm": 0.14387176930904388, | |
| "learning_rate": 9.966314477794052e-06, | |
| "loss": 0.0006, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.08832335329341318, | |
| "grad_norm": 0.1632365584373474, | |
| "learning_rate": 9.965575009429006e-06, | |
| "loss": 0.0003, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.08907185628742514, | |
| "grad_norm": 0.1252838671207428, | |
| "learning_rate": 9.964827540532685e-06, | |
| "loss": 0.0005, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.08982035928143713, | |
| "grad_norm": 0.08947388827800751, | |
| "learning_rate": 9.964072072309412e-06, | |
| "loss": 0.0004, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08982035928143713, | |
| "eval_accuracy": 0.99981676586026, | |
| "eval_loss": 0.000518214248586446, | |
| "eval_runtime": 154.0762, | |
| "eval_samples_per_second": 32.451, | |
| "eval_steps_per_second": 8.113, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09056886227544911, | |
| "grad_norm": 0.1822632998228073, | |
| "learning_rate": 9.963308605976397e-06, | |
| "loss": 0.0003, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.09131736526946108, | |
| "grad_norm": 0.1965271681547165, | |
| "learning_rate": 9.962537142763733e-06, | |
| "loss": 0.0003, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.09206586826347306, | |
| "grad_norm": 0.12774410843849182, | |
| "learning_rate": 9.961757683914406e-06, | |
| "loss": 0.0004, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.09281437125748503, | |
| "grad_norm": 0.06404659152030945, | |
| "learning_rate": 9.960970230684276e-06, | |
| "loss": 0.0003, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.09356287425149701, | |
| "grad_norm": 0.07961199432611465, | |
| "learning_rate": 9.96017478434209e-06, | |
| "loss": 0.0002, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09431137724550898, | |
| "grad_norm": 0.07755598425865173, | |
| "learning_rate": 9.959371346169466e-06, | |
| "loss": 0.0001, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.09505988023952096, | |
| "grad_norm": 0.10230294615030289, | |
| "learning_rate": 9.958559917460909e-06, | |
| "loss": 0.0004, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.09580838323353294, | |
| "grad_norm": 0.4232734441757202, | |
| "learning_rate": 9.957740499523787e-06, | |
| "loss": 0.0002, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.09655688622754491, | |
| "grad_norm": 0.45036637783050537, | |
| "learning_rate": 9.95691309367835e-06, | |
| "loss": 0.0006, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.09730538922155689, | |
| "grad_norm": 0.2974064350128174, | |
| "learning_rate": 9.95607770125771e-06, | |
| "loss": 0.0006, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09805389221556886, | |
| "grad_norm": 0.12492769956588745, | |
| "learning_rate": 9.955234323607854e-06, | |
| "loss": 0.0005, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.09880239520958084, | |
| "grad_norm": 0.08176768571138382, | |
| "learning_rate": 9.954382962087628e-06, | |
| "loss": 0.0003, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.09955089820359281, | |
| "grad_norm": 0.11267261207103729, | |
| "learning_rate": 9.95352361806875e-06, | |
| "loss": 0.0004, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.10029940119760479, | |
| "grad_norm": 0.07069454342126846, | |
| "learning_rate": 9.95265629293579e-06, | |
| "loss": 0.0002, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.10104790419161677, | |
| "grad_norm": 0.13988761603832245, | |
| "learning_rate": 9.951780988086183e-06, | |
| "loss": 0.0004, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.10179640718562874, | |
| "grad_norm": 0.07328484207391739, | |
| "learning_rate": 9.950897704930223e-06, | |
| "loss": 0.0002, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.10254491017964072, | |
| "grad_norm": 0.1726737767457962, | |
| "learning_rate": 9.95000644489105e-06, | |
| "loss": 0.0003, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.10329341317365269, | |
| "grad_norm": 0.189790740609169, | |
| "learning_rate": 9.949107209404664e-06, | |
| "loss": 0.0005, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.10404191616766467, | |
| "grad_norm": 0.08902551233768463, | |
| "learning_rate": 9.948199999919914e-06, | |
| "loss": 0.0001, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.10479041916167664, | |
| "grad_norm": 0.10343684256076813, | |
| "learning_rate": 9.947284817898493e-06, | |
| "loss": 0.0002, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.10479041916167664, | |
| "eval_accuracy": 0.9998052416354287, | |
| "eval_loss": 0.0006224001408554614, | |
| "eval_runtime": 156.154, | |
| "eval_samples_per_second": 32.02, | |
| "eval_steps_per_second": 8.005, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.10553892215568862, | |
| "grad_norm": 0.20946663618087769, | |
| "learning_rate": 9.946361664814942e-06, | |
| "loss": 0.0007, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.1062874251497006, | |
| "grad_norm": 0.024475887417793274, | |
| "learning_rate": 9.945430542156647e-06, | |
| "loss": 0.0001, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.10703592814371257, | |
| "grad_norm": 0.12402810901403427, | |
| "learning_rate": 9.944491451423829e-06, | |
| "loss": 0.0003, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.10778443113772455, | |
| "grad_norm": 0.3434118330478668, | |
| "learning_rate": 9.943544394129552e-06, | |
| "loss": 0.0004, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.10853293413173652, | |
| "grad_norm": 0.21301892399787903, | |
| "learning_rate": 9.942589371799715e-06, | |
| "loss": 0.0003, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1092814371257485, | |
| "grad_norm": 0.2948126196861267, | |
| "learning_rate": 9.941626385973047e-06, | |
| "loss": 0.0006, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.11002994011976049, | |
| "grad_norm": 0.1591068059206009, | |
| "learning_rate": 9.940655438201113e-06, | |
| "loss": 0.0003, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.11077844311377245, | |
| "grad_norm": 0.04139701649546623, | |
| "learning_rate": 9.9396765300483e-06, | |
| "loss": 0.0002, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.11152694610778444, | |
| "grad_norm": 0.11029073596000671, | |
| "learning_rate": 9.938689663091828e-06, | |
| "loss": 0.0003, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.1122754491017964, | |
| "grad_norm": 0.0646573156118393, | |
| "learning_rate": 9.937694838921734e-06, | |
| "loss": 0.0002, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11302395209580839, | |
| "grad_norm": 0.14302918314933777, | |
| "learning_rate": 9.93669205914088e-06, | |
| "loss": 0.0003, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.11377245508982035, | |
| "grad_norm": 0.17884957790374756, | |
| "learning_rate": 9.93568132536494e-06, | |
| "loss": 0.0004, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.11452095808383234, | |
| "grad_norm": 0.09195531904697418, | |
| "learning_rate": 9.934662639222412e-06, | |
| "loss": 0.0002, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.11526946107784432, | |
| "grad_norm": 0.2769736647605896, | |
| "learning_rate": 9.9336360023546e-06, | |
| "loss": 0.0003, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.11601796407185629, | |
| "grad_norm": 0.029257414862513542, | |
| "learning_rate": 9.932601416415622e-06, | |
| "loss": 0.0003, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.11676646706586827, | |
| "grad_norm": 0.08587785065174103, | |
| "learning_rate": 9.931558883072403e-06, | |
| "loss": 0.0004, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.11751497005988024, | |
| "grad_norm": 0.20471642911434174, | |
| "learning_rate": 9.930508404004668e-06, | |
| "loss": 0.0004, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.11826347305389222, | |
| "grad_norm": 0.22900666296482086, | |
| "learning_rate": 9.929449980904952e-06, | |
| "loss": 0.0006, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.11901197604790419, | |
| "grad_norm": 0.16436566412448883, | |
| "learning_rate": 9.928383615478586e-06, | |
| "loss": 0.0003, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.11976047904191617, | |
| "grad_norm": 0.05877704173326492, | |
| "learning_rate": 9.927309309443696e-06, | |
| "loss": 0.0001, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.11976047904191617, | |
| "eval_accuracy": 0.9999357040300619, | |
| "eval_loss": 0.00022764925961382687, | |
| "eval_runtime": 158.1146, | |
| "eval_samples_per_second": 31.623, | |
| "eval_steps_per_second": 7.906, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.12050898203592815, | |
| "grad_norm": 0.261000394821167, | |
| "learning_rate": 9.9262270645312e-06, | |
| "loss": 0.0003, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.12125748502994012, | |
| "grad_norm": 0.17999576032161713, | |
| "learning_rate": 9.925136882484816e-06, | |
| "loss": 0.0003, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.1220059880239521, | |
| "grad_norm": 0.15744219720363617, | |
| "learning_rate": 9.924038765061042e-06, | |
| "loss": 0.0006, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.12275449101796407, | |
| "grad_norm": 0.031700655817985535, | |
| "learning_rate": 9.922932714029163e-06, | |
| "loss": 0.0004, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.12350299401197605, | |
| "grad_norm": 0.2377641499042511, | |
| "learning_rate": 9.921818731171249e-06, | |
| "loss": 0.0003, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.12425149700598802, | |
| "grad_norm": 0.08403676003217697, | |
| "learning_rate": 9.920696818282147e-06, | |
| "loss": 0.0002, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 0.1424562782049179, | |
| "learning_rate": 9.919566977169486e-06, | |
| "loss": 0.0004, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.12574850299401197, | |
| "grad_norm": 0.0928482636809349, | |
| "learning_rate": 9.918429209653662e-06, | |
| "loss": 0.0002, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.12649700598802396, | |
| "grad_norm": 0.08917529135942459, | |
| "learning_rate": 9.917283517567845e-06, | |
| "loss": 0.0004, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.12724550898203593, | |
| "grad_norm": 0.09952011704444885, | |
| "learning_rate": 9.916129902757977e-06, | |
| "loss": 0.0003, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1279940119760479, | |
| "grad_norm": 0.05392898619174957, | |
| "learning_rate": 9.914968367082756e-06, | |
| "loss": 0.0001, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.12874251497005987, | |
| "grad_norm": 0.12771159410476685, | |
| "learning_rate": 9.913798912413653e-06, | |
| "loss": 0.0002, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.12949101796407186, | |
| "grad_norm": 0.9677438735961914, | |
| "learning_rate": 9.912621540634889e-06, | |
| "loss": 0.0003, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.13023952095808383, | |
| "grad_norm": 0.03891558572649956, | |
| "learning_rate": 9.911436253643445e-06, | |
| "loss": 0.0001, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.1309880239520958, | |
| "grad_norm": 0.03757692128419876, | |
| "learning_rate": 9.910243053349055e-06, | |
| "loss": 0.0, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1317365269461078, | |
| "grad_norm": 0.20588494837284088, | |
| "learning_rate": 9.909041941674205e-06, | |
| "loss": 0.0004, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.13248502994011976, | |
| "grad_norm": 0.29803666472435, | |
| "learning_rate": 9.90783292055412e-06, | |
| "loss": 0.0004, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.13323353293413173, | |
| "grad_norm": 0.14101789891719818, | |
| "learning_rate": 9.906615991936781e-06, | |
| "loss": 0.0002, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.1339820359281437, | |
| "grad_norm": 0.24130620062351227, | |
| "learning_rate": 9.905391157782897e-06, | |
| "loss": 0.0002, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.1347305389221557, | |
| "grad_norm": 0.2917313575744629, | |
| "learning_rate": 9.904158420065923e-06, | |
| "loss": 0.0005, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1347305389221557, | |
| "eval_accuracy": 0.9999076577000782, | |
| "eval_loss": 0.0005576053517870605, | |
| "eval_runtime": 155.2214, | |
| "eval_samples_per_second": 32.212, | |
| "eval_steps_per_second": 8.053, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.13547904191616766, | |
| "grad_norm": 0.12759952247142792, | |
| "learning_rate": 9.902917780772043e-06, | |
| "loss": 0.0003, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.13622754491017963, | |
| "grad_norm": 0.1657952070236206, | |
| "learning_rate": 9.901669241900178e-06, | |
| "loss": 0.0007, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.13697604790419163, | |
| "grad_norm": 0.10384248197078705, | |
| "learning_rate": 9.900412805461968e-06, | |
| "loss": 0.0005, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.1377245508982036, | |
| "grad_norm": 0.20811188220977783, | |
| "learning_rate": 9.899148473481786e-06, | |
| "loss": 0.0006, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.13847305389221556, | |
| "grad_norm": 0.051202207803726196, | |
| "learning_rate": 9.89787624799672e-06, | |
| "loss": 0.0003, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.13922155688622753, | |
| "grad_norm": 0.13106031715869904, | |
| "learning_rate": 9.896596131056583e-06, | |
| "loss": 0.0002, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.13997005988023953, | |
| "grad_norm": 0.1166054904460907, | |
| "learning_rate": 9.895308124723897e-06, | |
| "loss": 0.0003, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.1407185628742515, | |
| "grad_norm": 0.10474357008934021, | |
| "learning_rate": 9.894012231073895e-06, | |
| "loss": 0.0003, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.14146706586826346, | |
| "grad_norm": 0.08845887333154678, | |
| "learning_rate": 9.892708452194522e-06, | |
| "loss": 0.0004, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.14221556886227546, | |
| "grad_norm": 0.1545616239309311, | |
| "learning_rate": 9.891396790186424e-06, | |
| "loss": 0.0004, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.14296407185628743, | |
| "grad_norm": 0.04785681515932083, | |
| "learning_rate": 9.890077247162951e-06, | |
| "loss": 0.0001, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.1437125748502994, | |
| "grad_norm": 0.11323319375514984, | |
| "learning_rate": 9.888749825250151e-06, | |
| "loss": 0.0001, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.14446107784431136, | |
| "grad_norm": 0.1407540738582611, | |
| "learning_rate": 9.887414526586764e-06, | |
| "loss": 0.0002, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.14520958083832336, | |
| "grad_norm": 0.09322088956832886, | |
| "learning_rate": 9.886071353324223e-06, | |
| "loss": 0.0001, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.14595808383233533, | |
| "grad_norm": 0.07416640967130661, | |
| "learning_rate": 9.884720307626647e-06, | |
| "loss": 0.0001, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.1467065868263473, | |
| "grad_norm": 0.031197911128401756, | |
| "learning_rate": 9.883361391670841e-06, | |
| "loss": 0.0, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.1474550898203593, | |
| "grad_norm": 0.1820898950099945, | |
| "learning_rate": 9.881994607646288e-06, | |
| "loss": 0.0003, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.14820359281437126, | |
| "grad_norm": 0.1383231282234192, | |
| "learning_rate": 9.880619957755151e-06, | |
| "loss": 0.0002, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.14895209580838323, | |
| "grad_norm": 0.019146692007780075, | |
| "learning_rate": 9.879237444212265e-06, | |
| "loss": 0.0, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.1497005988023952, | |
| "grad_norm": 0.04791894555091858, | |
| "learning_rate": 9.877847069245134e-06, | |
| "loss": 0.0001, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1497005988023952, | |
| "eval_accuracy": 0.9999685074988971, | |
| "eval_loss": 0.00012279710790608078, | |
| "eval_runtime": 156.0918, | |
| "eval_samples_per_second": 32.032, | |
| "eval_steps_per_second": 8.008, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1504491017964072, | |
| "grad_norm": 0.06451380997896194, | |
| "learning_rate": 9.87644883509393e-06, | |
| "loss": 0.0001, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.15119760479041916, | |
| "grad_norm": 0.10077822208404541, | |
| "learning_rate": 9.875042744011487e-06, | |
| "loss": 0.0001, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.15194610778443113, | |
| "grad_norm": 0.07988882809877396, | |
| "learning_rate": 9.873628798263297e-06, | |
| "loss": 0.0001, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.15269461077844312, | |
| "grad_norm": 0.08547152578830719, | |
| "learning_rate": 9.87220700012751e-06, | |
| "loss": 0.0003, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.1534431137724551, | |
| "grad_norm": 0.06369513273239136, | |
| "learning_rate": 9.870777351894926e-06, | |
| "loss": 0.0001, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.15419161676646706, | |
| "grad_norm": 0.1190333142876625, | |
| "learning_rate": 9.869339855868992e-06, | |
| "loss": 0.0002, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.15494011976047903, | |
| "grad_norm": 0.4799070954322815, | |
| "learning_rate": 9.867894514365802e-06, | |
| "loss": 0.0001, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.15568862275449102, | |
| "grad_norm": 0.05317097157239914, | |
| "learning_rate": 9.86644132971409e-06, | |
| "loss": 0.0001, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.156437125748503, | |
| "grad_norm": 0.08004628121852875, | |
| "learning_rate": 9.864980304255222e-06, | |
| "loss": 0.0003, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.15718562874251496, | |
| "grad_norm": 0.06639832258224487, | |
| "learning_rate": 9.863511440343206e-06, | |
| "loss": 0.0001, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.15793413173652696, | |
| "grad_norm": 0.20095159113407135, | |
| "learning_rate": 9.862034740344673e-06, | |
| "loss": 0.0002, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.15868263473053892, | |
| "grad_norm": 0.14772972464561462, | |
| "learning_rate": 9.860550206638881e-06, | |
| "loss": 0.0002, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.1594311377245509, | |
| "grad_norm": 0.15753412246704102, | |
| "learning_rate": 9.859057841617709e-06, | |
| "loss": 0.0002, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.1601796407185629, | |
| "grad_norm": 0.08705739676952362, | |
| "learning_rate": 9.857557647685657e-06, | |
| "loss": 0.0002, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.16092814371257486, | |
| "grad_norm": 0.32878294587135315, | |
| "learning_rate": 9.856049627259833e-06, | |
| "loss": 0.0006, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.16167664670658682, | |
| "grad_norm": 0.19281232357025146, | |
| "learning_rate": 9.85453378276996e-06, | |
| "loss": 0.0001, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.1624251497005988, | |
| "grad_norm": 0.4002825617790222, | |
| "learning_rate": 9.853010116658368e-06, | |
| "loss": 0.0009, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.1631736526946108, | |
| "grad_norm": 0.15032881498336792, | |
| "learning_rate": 9.851478631379982e-06, | |
| "loss": 0.0002, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.16392215568862276, | |
| "grad_norm": 0.46663233637809753, | |
| "learning_rate": 9.849939329402337e-06, | |
| "loss": 0.0009, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.16467065868263472, | |
| "grad_norm": 0.032840508967638016, | |
| "learning_rate": 9.848392213205549e-06, | |
| "loss": 0.0003, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.16467065868263472, | |
| "eval_accuracy": 0.9997846752245753, | |
| "eval_loss": 0.0007238321122713387, | |
| "eval_runtime": 154.7968, | |
| "eval_samples_per_second": 32.3, | |
| "eval_steps_per_second": 8.075, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.16541916167664672, | |
| "grad_norm": 0.17962802946567535, | |
| "learning_rate": 9.846837285282331e-06, | |
| "loss": 0.0006, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.1661676646706587, | |
| "grad_norm": 0.03923157975077629, | |
| "learning_rate": 9.845274548137986e-06, | |
| "loss": 0.0002, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.16691616766467066, | |
| "grad_norm": 0.07774964720010757, | |
| "learning_rate": 9.843704004290393e-06, | |
| "loss": 0.0002, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.16766467065868262, | |
| "grad_norm": 0.2827122211456299, | |
| "learning_rate": 9.842125656270011e-06, | |
| "loss": 0.0006, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.16841317365269462, | |
| "grad_norm": 0.30080848932266235, | |
| "learning_rate": 9.840539506619874e-06, | |
| "loss": 0.0003, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1691616766467066, | |
| "grad_norm": 0.19179034233093262, | |
| "learning_rate": 9.838945557895586e-06, | |
| "loss": 0.0002, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.16991017964071856, | |
| "grad_norm": 0.044639382511377335, | |
| "learning_rate": 9.837343812665311e-06, | |
| "loss": 0.0002, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.17065868263473055, | |
| "grad_norm": 0.14254966378211975, | |
| "learning_rate": 9.835734273509787e-06, | |
| "loss": 0.0007, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.17140718562874252, | |
| "grad_norm": 0.10285581648349762, | |
| "learning_rate": 9.834116943022299e-06, | |
| "loss": 0.0003, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.1721556886227545, | |
| "grad_norm": 0.12203399091959, | |
| "learning_rate": 9.832491823808688e-06, | |
| "loss": 0.0003, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.17290419161676646, | |
| "grad_norm": 0.10512761771678925, | |
| "learning_rate": 9.830858918487347e-06, | |
| "loss": 0.0001, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.17365269461077845, | |
| "grad_norm": 0.14217980206012726, | |
| "learning_rate": 9.829218229689211e-06, | |
| "loss": 0.0004, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.17440119760479042, | |
| "grad_norm": 0.05573190748691559, | |
| "learning_rate": 9.827569760057755e-06, | |
| "loss": 0.0002, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.1751497005988024, | |
| "grad_norm": 0.1435333788394928, | |
| "learning_rate": 9.825913512248996e-06, | |
| "loss": 0.0002, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.17589820359281438, | |
| "grad_norm": 0.14290957152843475, | |
| "learning_rate": 9.824249488931477e-06, | |
| "loss": 0.0005, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.17664670658682635, | |
| "grad_norm": 0.0923268049955368, | |
| "learning_rate": 9.822577692786272e-06, | |
| "loss": 0.0003, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.17739520958083832, | |
| "grad_norm": 0.0938134640455246, | |
| "learning_rate": 9.820898126506978e-06, | |
| "loss": 0.0002, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.1781437125748503, | |
| "grad_norm": 0.09895174205303192, | |
| "learning_rate": 9.819210792799711e-06, | |
| "loss": 0.0003, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.17889221556886228, | |
| "grad_norm": 0.010202400386333466, | |
| "learning_rate": 9.817515694383102e-06, | |
| "loss": 0.0001, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.17964071856287425, | |
| "grad_norm": 0.045472726225852966, | |
| "learning_rate": 9.815812833988292e-06, | |
| "loss": 0.0001, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.17964071856287425, | |
| "eval_accuracy": 0.9998838001657226, | |
| "eval_loss": 0.000438039394794032, | |
| "eval_runtime": 154.4563, | |
| "eval_samples_per_second": 32.372, | |
| "eval_steps_per_second": 8.093, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.18038922155688622, | |
| "grad_norm": 0.1489792764186859, | |
| "learning_rate": 9.814102214358928e-06, | |
| "loss": 0.0002, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.18113772455089822, | |
| "grad_norm": 0.15599974989891052, | |
| "learning_rate": 9.81238383825116e-06, | |
| "loss": 0.0005, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.18188622754491018, | |
| "grad_norm": 0.03606925159692764, | |
| "learning_rate": 9.810657708433637e-06, | |
| "loss": 0.0004, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.18263473053892215, | |
| "grad_norm": 0.04655231162905693, | |
| "learning_rate": 9.808923827687494e-06, | |
| "loss": 0.0001, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.18338323353293412, | |
| "grad_norm": 0.2198714017868042, | |
| "learning_rate": 9.807182198806362e-06, | |
| "loss": 0.0002, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.18413173652694612, | |
| "grad_norm": 0.05768256261944771, | |
| "learning_rate": 9.805432824596347e-06, | |
| "loss": 0.0003, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.18488023952095808, | |
| "grad_norm": 0.17893020808696747, | |
| "learning_rate": 9.803675707876048e-06, | |
| "loss": 0.0005, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.18562874251497005, | |
| "grad_norm": 0.12833981215953827, | |
| "learning_rate": 9.801910851476524e-06, | |
| "loss": 0.0002, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.18637724550898205, | |
| "grad_norm": 0.03174396604299545, | |
| "learning_rate": 9.800138258241311e-06, | |
| "loss": 0.0001, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.18712574850299402, | |
| "grad_norm": 0.11265647411346436, | |
| "learning_rate": 9.798357931026411e-06, | |
| "loss": 0.0002, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.18787425149700598, | |
| "grad_norm": 0.10834460705518723, | |
| "learning_rate": 9.796569872700287e-06, | |
| "loss": 0.0004, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.18862275449101795, | |
| "grad_norm": 0.061082735657691956, | |
| "learning_rate": 9.79477408614386e-06, | |
| "loss": 0.0001, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.18937125748502995, | |
| "grad_norm": 0.16802391409873962, | |
| "learning_rate": 9.792970574250493e-06, | |
| "loss": 0.0002, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.19011976047904192, | |
| "grad_norm": 0.11000331491231918, | |
| "learning_rate": 9.791159339926009e-06, | |
| "loss": 0.0001, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.19086826347305388, | |
| "grad_norm": 0.06801439821720123, | |
| "learning_rate": 9.789340386088663e-06, | |
| "loss": 0.0002, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.19161676646706588, | |
| "grad_norm": 0.012815337628126144, | |
| "learning_rate": 9.787513715669158e-06, | |
| "loss": 0.0, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.19236526946107785, | |
| "grad_norm": 0.011311142705380917, | |
| "learning_rate": 9.78567933161062e-06, | |
| "loss": 0.0, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.19311377245508982, | |
| "grad_norm": 0.06330162286758423, | |
| "learning_rate": 9.78383723686861e-06, | |
| "loss": 0.0, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.19386227544910178, | |
| "grad_norm": 0.071534164249897, | |
| "learning_rate": 9.781987434411106e-06, | |
| "loss": 0.0001, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.19461077844311378, | |
| "grad_norm": 0.11816436052322388, | |
| "learning_rate": 9.780129927218513e-06, | |
| "loss": 0.0001, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.19461077844311378, | |
| "eval_accuracy": 0.9999860097407319, | |
| "eval_loss": 5.2422070439206436e-05, | |
| "eval_runtime": 155.6149, | |
| "eval_samples_per_second": 32.131, | |
| "eval_steps_per_second": 8.033, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.19535928143712575, | |
| "grad_norm": 0.06640541553497314, | |
| "learning_rate": 9.778264718283644e-06, | |
| "loss": 0.0, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.19610778443113772, | |
| "grad_norm": 0.026967424899339676, | |
| "learning_rate": 9.776391810611719e-06, | |
| "loss": 0.0, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.1968562874251497, | |
| "grad_norm": 0.11123115569353104, | |
| "learning_rate": 9.774511207220369e-06, | |
| "loss": 0.0001, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.19760479041916168, | |
| "grad_norm": 0.13741283118724823, | |
| "learning_rate": 9.772622911139622e-06, | |
| "loss": 0.0001, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.19835329341317365, | |
| "grad_norm": 0.009464044123888016, | |
| "learning_rate": 9.770726925411898e-06, | |
| "loss": 0.0, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.19910179640718562, | |
| "grad_norm": 0.0769435316324234, | |
| "learning_rate": 9.768823253092008e-06, | |
| "loss": 0.0001, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.1998502994011976, | |
| "grad_norm": 0.046003557741642, | |
| "learning_rate": 9.766911897247147e-06, | |
| "loss": 0.0001, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.20059880239520958, | |
| "grad_norm": 0.10196753591299057, | |
| "learning_rate": 9.76499286095689e-06, | |
| "loss": 0.0002, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.20134730538922155, | |
| "grad_norm": 0.020359348505735397, | |
| "learning_rate": 9.763066147313189e-06, | |
| "loss": 0.0, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.20209580838323354, | |
| "grad_norm": 0.20479270815849304, | |
| "learning_rate": 9.76113175942036e-06, | |
| "loss": 0.0001, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2028443113772455, | |
| "grad_norm": 0.11673811078071594, | |
| "learning_rate": 9.759189700395096e-06, | |
| "loss": 0.0001, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.20359281437125748, | |
| "grad_norm": 0.04004862159490585, | |
| "learning_rate": 9.75723997336643e-06, | |
| "loss": 0.0001, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.20434131736526945, | |
| "grad_norm": 0.13865888118743896, | |
| "learning_rate": 9.755282581475769e-06, | |
| "loss": 0.0004, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.20508982035928144, | |
| "grad_norm": 0.08988627046346664, | |
| "learning_rate": 9.753317527876857e-06, | |
| "loss": 0.0002, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.2058383233532934, | |
| "grad_norm": 0.09014202654361725, | |
| "learning_rate": 9.751344815735791e-06, | |
| "loss": 0.0003, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.20658682634730538, | |
| "grad_norm": 0.17278143763542175, | |
| "learning_rate": 9.749364448231001e-06, | |
| "loss": 0.0003, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.20733532934131738, | |
| "grad_norm": 0.07624712586402893, | |
| "learning_rate": 9.747376428553255e-06, | |
| "loss": 0.0002, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.20808383233532934, | |
| "grad_norm": 0.02646615356206894, | |
| "learning_rate": 9.745380759905648e-06, | |
| "loss": 0.0005, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.2088323353293413, | |
| "grad_norm": 0.1350707858800888, | |
| "learning_rate": 9.743377445503598e-06, | |
| "loss": 0.0005, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.20958083832335328, | |
| "grad_norm": 0.045723576098680496, | |
| "learning_rate": 9.74136648857485e-06, | |
| "loss": 0.0004, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.20958083832335328, | |
| "eval_accuracy": 0.9998195029826557, | |
| "eval_loss": 0.0005425158306024969, | |
| "eval_runtime": 155.8793, | |
| "eval_samples_per_second": 32.076, | |
| "eval_steps_per_second": 8.019, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.21032934131736528, | |
| "grad_norm": 0.11474994570016861, | |
| "learning_rate": 9.739347892359453e-06, | |
| "loss": 0.0003, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.21107784431137724, | |
| "grad_norm": 0.0819924846291542, | |
| "learning_rate": 9.737321660109767e-06, | |
| "loss": 0.0002, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.2118263473053892, | |
| "grad_norm": 0.098919577896595, | |
| "learning_rate": 9.735287795090455e-06, | |
| "loss": 0.0004, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.2125748502994012, | |
| "grad_norm": 0.034899163991212845, | |
| "learning_rate": 9.733246300578482e-06, | |
| "loss": 0.0004, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.21332335329341318, | |
| "grad_norm": 0.10499320924282074, | |
| "learning_rate": 9.731197179863104e-06, | |
| "loss": 0.0003, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.21407185628742514, | |
| "grad_norm": 0.078518345952034, | |
| "learning_rate": 9.729140436245857e-06, | |
| "loss": 0.0001, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.2148203592814371, | |
| "grad_norm": 0.04776620492339134, | |
| "learning_rate": 9.72707607304057e-06, | |
| "loss": 0.0002, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.2155688622754491, | |
| "grad_norm": 0.043205343186855316, | |
| "learning_rate": 9.725004093573343e-06, | |
| "loss": 0.0001, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.21631736526946108, | |
| "grad_norm": 0.0973254144191742, | |
| "learning_rate": 9.722924501182546e-06, | |
| "loss": 0.0002, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.21706586826347304, | |
| "grad_norm": 0.07782719284296036, | |
| "learning_rate": 9.72083729921882e-06, | |
| "loss": 0.0001, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.21781437125748504, | |
| "grad_norm": 0.04242849349975586, | |
| "learning_rate": 9.718742491045061e-06, | |
| "loss": 0.0001, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.218562874251497, | |
| "grad_norm": 0.04837155342102051, | |
| "learning_rate": 9.716640080036423e-06, | |
| "loss": 0.0001, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.21931137724550898, | |
| "grad_norm": 0.0814133882522583, | |
| "learning_rate": 9.71453006958031e-06, | |
| "loss": 0.0002, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.22005988023952097, | |
| "grad_norm": 0.047387998551130295, | |
| "learning_rate": 9.712412463076368e-06, | |
| "loss": 0.0, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.22080838323353294, | |
| "grad_norm": 0.017673810943961143, | |
| "learning_rate": 9.710287263936485e-06, | |
| "loss": 0.0, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2215568862275449, | |
| "grad_norm": 0.021801825612783432, | |
| "learning_rate": 9.708154475584779e-06, | |
| "loss": 0.0001, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.22230538922155688, | |
| "grad_norm": 0.03839518874883652, | |
| "learning_rate": 9.7060141014576e-06, | |
| "loss": 0.0002, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.22305389221556887, | |
| "grad_norm": 0.007782716304063797, | |
| "learning_rate": 9.703866145003512e-06, | |
| "loss": 0.0001, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.22380239520958084, | |
| "grad_norm": 0.02108747325837612, | |
| "learning_rate": 9.701710609683305e-06, | |
| "loss": 0.0001, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.2245508982035928, | |
| "grad_norm": 0.0026378484908491373, | |
| "learning_rate": 9.699547498969978e-06, | |
| "loss": 0.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2245508982035928, | |
| "eval_accuracy": 0.9999875145529564, | |
| "eval_loss": 4.2638039303710684e-05, | |
| "eval_runtime": 160.7051, | |
| "eval_samples_per_second": 31.113, | |
| "eval_steps_per_second": 7.778, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2252994011976048, | |
| "grad_norm": 0.02909325808286667, | |
| "learning_rate": 9.697376816348732e-06, | |
| "loss": 0.0001, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.22604790419161677, | |
| "grad_norm": 0.0025581123773008585, | |
| "learning_rate": 9.695198565316966e-06, | |
| "loss": 0.0001, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.22679640718562874, | |
| "grad_norm": 0.02005714178085327, | |
| "learning_rate": 9.69301274938428e-06, | |
| "loss": 0.0, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.2275449101796407, | |
| "grad_norm": 0.0037004246842116117, | |
| "learning_rate": 9.690819372072457e-06, | |
| "loss": 0.0, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.2282934131736527, | |
| "grad_norm": 0.032148100435733795, | |
| "learning_rate": 9.68861843691547e-06, | |
| "loss": 0.0001, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.22904191616766467, | |
| "grad_norm": 0.014080125838518143, | |
| "learning_rate": 9.68640994745946e-06, | |
| "loss": 0.0002, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.22979041916167664, | |
| "grad_norm": 0.010853869840502739, | |
| "learning_rate": 9.684193907262742e-06, | |
| "loss": 0.0, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.23053892215568864, | |
| "grad_norm": 0.032357871532440186, | |
| "learning_rate": 9.681970319895804e-06, | |
| "loss": 0.0, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.2312874251497006, | |
| "grad_norm": 0.008318758569657803, | |
| "learning_rate": 9.679739188941283e-06, | |
| "loss": 0.0, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.23203592814371257, | |
| "grad_norm": 0.037990834563970566, | |
| "learning_rate": 9.677500517993983e-06, | |
| "loss": 0.0, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.23278443113772454, | |
| "grad_norm": 0.00843075942248106, | |
| "learning_rate": 9.675254310660842e-06, | |
| "loss": 0.0001, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.23353293413173654, | |
| "grad_norm": 0.05007459223270416, | |
| "learning_rate": 9.673000570560952e-06, | |
| "loss": 0.0, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.2342814371257485, | |
| "grad_norm": 0.0009229404386132956, | |
| "learning_rate": 9.670739301325534e-06, | |
| "loss": 0.0, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.23502994011976047, | |
| "grad_norm": 0.02507946826517582, | |
| "learning_rate": 9.668470506597946e-06, | |
| "loss": 0.0, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.23577844311377247, | |
| "grad_norm": 0.09565775096416473, | |
| "learning_rate": 9.66619419003367e-06, | |
| "loss": 0.0002, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.23652694610778444, | |
| "grad_norm": 0.0022729237098246813, | |
| "learning_rate": 9.663910355300306e-06, | |
| "loss": 0.0, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.2372754491017964, | |
| "grad_norm": 0.0015811071498319507, | |
| "learning_rate": 9.661619006077562e-06, | |
| "loss": 0.0, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.23802395209580837, | |
| "grad_norm": 0.10619401931762695, | |
| "learning_rate": 9.659320146057263e-06, | |
| "loss": 0.0001, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.23877245508982037, | |
| "grad_norm": 0.0017936922376975417, | |
| "learning_rate": 9.657013778943328e-06, | |
| "loss": 0.0, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.23952095808383234, | |
| "grad_norm": 0.00227470719255507, | |
| "learning_rate": 9.654699908451777e-06, | |
| "loss": 0.0, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.23952095808383234, | |
| "eval_accuracy": 0.9999895288063639, | |
| "eval_loss": 2.794685133267194e-05, | |
| "eval_runtime": 156.1163, | |
| "eval_samples_per_second": 32.027, | |
| "eval_steps_per_second": 8.007, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2402694610778443, | |
| "grad_norm": 0.007332763634622097, | |
| "learning_rate": 9.652378538310715e-06, | |
| "loss": 0.0, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.2410179640718563, | |
| "grad_norm": 0.08600316196680069, | |
| "learning_rate": 9.650049672260333e-06, | |
| "loss": 0.0, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.24176646706586827, | |
| "grad_norm": 0.005560212302953005, | |
| "learning_rate": 9.647713314052896e-06, | |
| "loss": 0.0, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.24251497005988024, | |
| "grad_norm": 0.00411292864009738, | |
| "learning_rate": 9.645369467452746e-06, | |
| "loss": 0.0, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.2432634730538922, | |
| "grad_norm": 0.0018659079214558005, | |
| "learning_rate": 9.643018136236286e-06, | |
| "loss": 0.0, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2440119760479042, | |
| "grad_norm": 0.004269044380635023, | |
| "learning_rate": 9.64065932419198e-06, | |
| "loss": 0.0, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.24476047904191617, | |
| "grad_norm": 0.00309938658028841, | |
| "learning_rate": 9.638293035120342e-06, | |
| "loss": 0.0, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.24550898203592814, | |
| "grad_norm": 0.0024809043388813734, | |
| "learning_rate": 9.635919272833938e-06, | |
| "loss": 0.0, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.24625748502994013, | |
| "grad_norm": 0.003469419199973345, | |
| "learning_rate": 9.63353804115737e-06, | |
| "loss": 0.0, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.2470059880239521, | |
| "grad_norm": 0.0016053810250014067, | |
| "learning_rate": 9.63114934392728e-06, | |
| "loss": 0.0, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.24775449101796407, | |
| "grad_norm": 0.02661885879933834, | |
| "learning_rate": 9.628753184992334e-06, | |
| "loss": 0.0, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.24850299401197604, | |
| "grad_norm": 0.0016741787549108267, | |
| "learning_rate": 9.62634956821322e-06, | |
| "loss": 0.0001, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.24925149700598803, | |
| "grad_norm": 0.0019377709832042456, | |
| "learning_rate": 9.623938497462647e-06, | |
| "loss": 0.0, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.0012623120564967394, | |
| "learning_rate": 9.621519976625327e-06, | |
| "loss": 0.0, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.25074850299401197, | |
| "grad_norm": 0.0024038818664848804, | |
| "learning_rate": 9.619094009597982e-06, | |
| "loss": 0.0, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.25149700598802394, | |
| "grad_norm": 0.006172757130116224, | |
| "learning_rate": 9.616660600289329e-06, | |
| "loss": 0.0, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.2522455089820359, | |
| "grad_norm": 0.0028510144911706448, | |
| "learning_rate": 9.614219752620074e-06, | |
| "loss": 0.0, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.25299401197604793, | |
| "grad_norm": 0.02679716795682907, | |
| "learning_rate": 9.611771470522908e-06, | |
| "loss": 0.0, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.2537425149700599, | |
| "grad_norm": 0.02851109206676483, | |
| "learning_rate": 9.609315757942504e-06, | |
| "loss": 0.0, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.25449101796407186, | |
| "grad_norm": 0.0017305930377915502, | |
| "learning_rate": 9.606852618835503e-06, | |
| "loss": 0.0001, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.25449101796407186, | |
| "eval_accuracy": 0.9999997747747748, | |
| "eval_loss": 8.644859917694703e-06, | |
| "eval_runtime": 159.0892, | |
| "eval_samples_per_second": 31.429, | |
| "eval_steps_per_second": 7.857, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.25523952095808383, | |
| "grad_norm": 0.00403413875028491, | |
| "learning_rate": 9.604382057170514e-06, | |
| "loss": 0.0, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.2559880239520958, | |
| "grad_norm": 0.0027754653710871935, | |
| "learning_rate": 9.601904076928103e-06, | |
| "loss": 0.0, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.25673652694610777, | |
| "grad_norm": 0.0013081474462524056, | |
| "learning_rate": 9.599418682100793e-06, | |
| "loss": 0.0, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.25748502994011974, | |
| "grad_norm": 0.05064619705080986, | |
| "learning_rate": 9.596925876693047e-06, | |
| "loss": 0.0, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.25823353293413176, | |
| "grad_norm": 0.002823168644681573, | |
| "learning_rate": 9.594425664721275e-06, | |
| "loss": 0.0, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.25898203592814373, | |
| "grad_norm": 0.030349284410476685, | |
| "learning_rate": 9.591918050213814e-06, | |
| "loss": 0.0, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.2597305389221557, | |
| "grad_norm": 0.001790383132174611, | |
| "learning_rate": 9.589403037210933e-06, | |
| "loss": 0.0001, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.26047904191616766, | |
| "grad_norm": 0.010972312651574612, | |
| "learning_rate": 9.586880629764817e-06, | |
| "loss": 0.0, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.26122754491017963, | |
| "grad_norm": 0.06688281893730164, | |
| "learning_rate": 9.584350831939571e-06, | |
| "loss": 0.0001, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.2619760479041916, | |
| "grad_norm": 0.149211123585701, | |
| "learning_rate": 9.581813647811199e-06, | |
| "loss": 0.0001, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.26272455089820357, | |
| "grad_norm": 0.00245782732963562, | |
| "learning_rate": 9.579269081467614e-06, | |
| "loss": 0.0, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.2634730538922156, | |
| "grad_norm": 0.01430213451385498, | |
| "learning_rate": 9.576717137008617e-06, | |
| "loss": 0.0001, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.26422155688622756, | |
| "grad_norm": 0.013654684647917747, | |
| "learning_rate": 9.574157818545902e-06, | |
| "loss": 0.0, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.26497005988023953, | |
| "grad_norm": 0.015040101483464241, | |
| "learning_rate": 9.57159113020304e-06, | |
| "loss": 0.0, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.2657185628742515, | |
| "grad_norm": 0.01307929027825594, | |
| "learning_rate": 9.569017076115476e-06, | |
| "loss": 0.0001, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.26646706586826346, | |
| "grad_norm": 0.02330423705279827, | |
| "learning_rate": 9.566435660430528e-06, | |
| "loss": 0.0, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.26721556886227543, | |
| "grad_norm": 0.002268057782202959, | |
| "learning_rate": 9.563846887307369e-06, | |
| "loss": 0.0, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.2679640718562874, | |
| "grad_norm": 0.011261685751378536, | |
| "learning_rate": 9.561250760917026e-06, | |
| "loss": 0.0001, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.2687125748502994, | |
| "grad_norm": 0.03315627574920654, | |
| "learning_rate": 9.558647285442382e-06, | |
| "loss": 0.0, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.2694610778443114, | |
| "grad_norm": 0.002093307441100478, | |
| "learning_rate": 9.55603646507815e-06, | |
| "loss": 0.0, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2694610778443114, | |
| "eval_accuracy": 0.9999976841259713, | |
| "eval_loss": 9.612030225980561e-06, | |
| "eval_runtime": 155.2847, | |
| "eval_samples_per_second": 32.199, | |
| "eval_steps_per_second": 8.05, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.27020958083832336, | |
| "grad_norm": 0.001716041355393827, | |
| "learning_rate": 9.553418304030886e-06, | |
| "loss": 0.0, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.27095808383233533, | |
| "grad_norm": 0.0027342389803379774, | |
| "learning_rate": 9.550792806518967e-06, | |
| "loss": 0.0, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.2717065868263473, | |
| "grad_norm": 0.1821688860654831, | |
| "learning_rate": 9.548159976772593e-06, | |
| "loss": 0.0001, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.27245508982035926, | |
| "grad_norm": 0.0016638662200421095, | |
| "learning_rate": 9.545519819033777e-06, | |
| "loss": 0.0001, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.27320359281437123, | |
| "grad_norm": 0.021991174668073654, | |
| "learning_rate": 9.542872337556341e-06, | |
| "loss": 0.0, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.27395209580838326, | |
| "grad_norm": 0.0012851693900302052, | |
| "learning_rate": 9.540217536605906e-06, | |
| "loss": 0.0, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.2747005988023952, | |
| "grad_norm": 0.0014544121222570539, | |
| "learning_rate": 9.537555420459883e-06, | |
| "loss": 0.0, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.2754491017964072, | |
| "grad_norm": 0.009950781241059303, | |
| "learning_rate": 9.534885993407474e-06, | |
| "loss": 0.0, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.27619760479041916, | |
| "grad_norm": 0.00411807419732213, | |
| "learning_rate": 9.532209259749658e-06, | |
| "loss": 0.0, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.27694610778443113, | |
| "grad_norm": 0.006487260106950998, | |
| "learning_rate": 9.529525223799185e-06, | |
| "loss": 0.0, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2776946107784431, | |
| "grad_norm": 0.007635013200342655, | |
| "learning_rate": 9.526833889880573e-06, | |
| "loss": 0.0, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.27844311377245506, | |
| "grad_norm": 0.000996310613118112, | |
| "learning_rate": 9.524135262330098e-06, | |
| "loss": 0.0, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.2791916167664671, | |
| "grad_norm": 0.0031566142570227385, | |
| "learning_rate": 9.521429345495787e-06, | |
| "loss": 0.0001, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.27994011976047906, | |
| "grad_norm": 0.002025540452450514, | |
| "learning_rate": 9.51871614373741e-06, | |
| "loss": 0.0, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.280688622754491, | |
| "grad_norm": 0.1011413112282753, | |
| "learning_rate": 9.515995661426478e-06, | |
| "loss": 0.0001, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.281437125748503, | |
| "grad_norm": 0.021610310301184654, | |
| "learning_rate": 9.513267902946228e-06, | |
| "loss": 0.0, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.28218562874251496, | |
| "grad_norm": 0.0016732689691707492, | |
| "learning_rate": 9.510532872691624e-06, | |
| "loss": 0.0, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.28293413173652693, | |
| "grad_norm": 0.11272062361240387, | |
| "learning_rate": 9.507790575069347e-06, | |
| "loss": 0.0001, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.2836826347305389, | |
| "grad_norm": 0.0009099426679313183, | |
| "learning_rate": 9.50504101449778e-06, | |
| "loss": 0.0, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.2844311377245509, | |
| "grad_norm": 0.0009794794023036957, | |
| "learning_rate": 9.50228419540702e-06, | |
| "loss": 0.0, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2844311377245509, | |
| "eval_accuracy": 0.9999983934801854, | |
| "eval_loss": 9.44385647017043e-06, | |
| "eval_runtime": 156.9097, | |
| "eval_samples_per_second": 31.865, | |
| "eval_steps_per_second": 7.966, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2851796407185629, | |
| "grad_norm": 0.03243451938033104, | |
| "learning_rate": 9.499520122238846e-06, | |
| "loss": 0.0, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.28592814371257486, | |
| "grad_norm": 0.02839779108762741, | |
| "learning_rate": 9.496748799446733e-06, | |
| "loss": 0.0001, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.2866766467065868, | |
| "grad_norm": 0.0816827118396759, | |
| "learning_rate": 9.493970231495836e-06, | |
| "loss": 0.0, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.2874251497005988, | |
| "grad_norm": 0.0025276602245867252, | |
| "learning_rate": 9.49118442286298e-06, | |
| "loss": 0.0, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.28817365269461076, | |
| "grad_norm": 0.0015131831169128418, | |
| "learning_rate": 9.488391378036662e-06, | |
| "loss": 0.0, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.28892215568862273, | |
| "grad_norm": 0.001832049572840333, | |
| "learning_rate": 9.485591101517027e-06, | |
| "loss": 0.0, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.28967065868263475, | |
| "grad_norm": 0.047806382179260254, | |
| "learning_rate": 9.482783597815883e-06, | |
| "loss": 0.0, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.2904191616766467, | |
| "grad_norm": 0.03347828611731529, | |
| "learning_rate": 9.47996887145668e-06, | |
| "loss": 0.0, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.2911676646706587, | |
| "grad_norm": 0.0017931102775037289, | |
| "learning_rate": 9.477146926974501e-06, | |
| "loss": 0.0, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.29191616766467066, | |
| "grad_norm": 0.009210226126015186, | |
| "learning_rate": 9.47431776891606e-06, | |
| "loss": 0.0, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2926646706586826, | |
| "grad_norm": 0.0013418138260021806, | |
| "learning_rate": 9.471481401839696e-06, | |
| "loss": 0.0, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.2934131736526946, | |
| "grad_norm": 0.0009674608591012657, | |
| "learning_rate": 9.468637830315364e-06, | |
| "loss": 0.0, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.29416167664670656, | |
| "grad_norm": 0.0006195507594384253, | |
| "learning_rate": 9.46578705892462e-06, | |
| "loss": 0.0, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.2949101796407186, | |
| "grad_norm": 0.0013804810587316751, | |
| "learning_rate": 9.46292909226063e-06, | |
| "loss": 0.0, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.29565868263473055, | |
| "grad_norm": 0.0004127651918679476, | |
| "learning_rate": 9.460063934928142e-06, | |
| "loss": 0.0, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2964071856287425, | |
| "grad_norm": 0.0004895281745120883, | |
| "learning_rate": 9.4571915915435e-06, | |
| "loss": 0.0, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.2971556886227545, | |
| "grad_norm": 0.00033658542088232934, | |
| "learning_rate": 9.454312066734624e-06, | |
| "loss": 0.0, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.29790419161676646, | |
| "grad_norm": 0.07587553560733795, | |
| "learning_rate": 9.451425365140997e-06, | |
| "loss": 0.0, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.2986526946107784, | |
| "grad_norm": 0.00075916713103652, | |
| "learning_rate": 9.448531491413673e-06, | |
| "loss": 0.0, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.2994011976047904, | |
| "grad_norm": 0.0008038659580051899, | |
| "learning_rate": 9.445630450215259e-06, | |
| "loss": 0.0, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2994011976047904, | |
| "eval_accuracy": 0.9999963675587793, | |
| "eval_loss": 1.0820390343724284e-05, | |
| "eval_runtime": 155.3675, | |
| "eval_samples_per_second": 32.182, | |
| "eval_steps_per_second": 8.045, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3001497005988024, | |
| "grad_norm": 0.0022112810984253883, | |
| "learning_rate": 9.442722246219915e-06, | |
| "loss": 0.0, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.3008982035928144, | |
| "grad_norm": 0.0013486716197803617, | |
| "learning_rate": 9.439806884113331e-06, | |
| "loss": 0.0, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.30164670658682635, | |
| "grad_norm": 0.005311549641191959, | |
| "learning_rate": 9.43688436859274e-06, | |
| "loss": 0.0002, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.3023952095808383, | |
| "grad_norm": 0.000981526798568666, | |
| "learning_rate": 9.433954704366897e-06, | |
| "loss": 0.0, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.3031437125748503, | |
| "grad_norm": 0.09638898819684982, | |
| "learning_rate": 9.431017896156074e-06, | |
| "loss": 0.0, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.30389221556886226, | |
| "grad_norm": 0.04560961201786995, | |
| "learning_rate": 9.428073948692056e-06, | |
| "loss": 0.0001, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.3046407185628742, | |
| "grad_norm": 0.040918540209531784, | |
| "learning_rate": 9.425122866718128e-06, | |
| "loss": 0.0003, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.30538922155688625, | |
| "grad_norm": 0.03442908823490143, | |
| "learning_rate": 9.422164654989073e-06, | |
| "loss": 0.0, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.3061377245508982, | |
| "grad_norm": 0.13045716285705566, | |
| "learning_rate": 9.419199318271158e-06, | |
| "loss": 0.0001, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.3068862275449102, | |
| "grad_norm": 0.027492402121424675, | |
| "learning_rate": 9.416226861342132e-06, | |
| "loss": 0.0001, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.30763473053892215, | |
| "grad_norm": 0.003682814771309495, | |
| "learning_rate": 9.413247288991216e-06, | |
| "loss": 0.0, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.3083832335329341, | |
| "grad_norm": 0.10141133517026901, | |
| "learning_rate": 9.410260606019095e-06, | |
| "loss": 0.0002, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.3091317365269461, | |
| "grad_norm": 0.0007527911802753806, | |
| "learning_rate": 9.40726681723791e-06, | |
| "loss": 0.0001, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.30988023952095806, | |
| "grad_norm": 0.005670532584190369, | |
| "learning_rate": 9.404265927471255e-06, | |
| "loss": 0.0, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.3106287425149701, | |
| "grad_norm": 0.03817495331168175, | |
| "learning_rate": 9.401257941554157e-06, | |
| "loss": 0.0, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.31137724550898205, | |
| "grad_norm": 0.009812482632696629, | |
| "learning_rate": 9.398242864333084e-06, | |
| "loss": 0.0, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.312125748502994, | |
| "grad_norm": 0.007045481353998184, | |
| "learning_rate": 9.395220700665924e-06, | |
| "loss": 0.0, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.312874251497006, | |
| "grad_norm": 0.003998721018433571, | |
| "learning_rate": 9.392191455421989e-06, | |
| "loss": 0.0, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.31362275449101795, | |
| "grad_norm": 0.031697846949100494, | |
| "learning_rate": 9.389155133481993e-06, | |
| "loss": 0.0001, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.3143712574850299, | |
| "grad_norm": 0.0006167310057207942, | |
| "learning_rate": 9.386111739738057e-06, | |
| "loss": 0.0, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3143712574850299, | |
| "eval_accuracy": 0.9999984969179706, | |
| "eval_loss": 6.494924491562415e-06, | |
| "eval_runtime": 155.2786, | |
| "eval_samples_per_second": 32.2, | |
| "eval_steps_per_second": 8.05, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.31511976047904194, | |
| "grad_norm": 0.47339311242103577, | |
| "learning_rate": 9.383061279093697e-06, | |
| "loss": 0.0002, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.3158682634730539, | |
| "grad_norm": 0.0039043284486979246, | |
| "learning_rate": 9.380003756463812e-06, | |
| "loss": 0.0, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.3166167664670659, | |
| "grad_norm": 0.18402549624443054, | |
| "learning_rate": 9.376939176774678e-06, | |
| "loss": 0.0001, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.31736526946107785, | |
| "grad_norm": 0.03785166144371033, | |
| "learning_rate": 9.373867544963949e-06, | |
| "loss": 0.0004, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.3181137724550898, | |
| "grad_norm": 0.07002092897891998, | |
| "learning_rate": 9.370788865980633e-06, | |
| "loss": 0.0001, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3188622754491018, | |
| "grad_norm": 0.009300635196268559, | |
| "learning_rate": 9.367703144785097e-06, | |
| "loss": 0.0, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.31961077844311375, | |
| "grad_norm": 0.2740118205547333, | |
| "learning_rate": 9.364610386349048e-06, | |
| "loss": 0.0003, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.3203592814371258, | |
| "grad_norm": 0.023412982001900673, | |
| "learning_rate": 9.361510595655545e-06, | |
| "loss": 0.0001, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.32110778443113774, | |
| "grad_norm": 0.10502910614013672, | |
| "learning_rate": 9.358403777698962e-06, | |
| "loss": 0.0001, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.3218562874251497, | |
| "grad_norm": 0.17004919052124023, | |
| "learning_rate": 9.355289937485005e-06, | |
| "loss": 0.0001, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3226047904191617, | |
| "grad_norm": 0.020658617839217186, | |
| "learning_rate": 9.35216908003069e-06, | |
| "loss": 0.0, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.32335329341317365, | |
| "grad_norm": 0.2423926740884781, | |
| "learning_rate": 9.349041210364343e-06, | |
| "loss": 0.0003, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.3241017964071856, | |
| "grad_norm": 0.02749599702656269, | |
| "learning_rate": 9.345906333525582e-06, | |
| "loss": 0.0001, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.3248502994011976, | |
| "grad_norm": 0.10116691887378693, | |
| "learning_rate": 9.342764454565321e-06, | |
| "loss": 0.0001, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.3255988023952096, | |
| "grad_norm": 0.09531649202108383, | |
| "learning_rate": 9.339615578545753e-06, | |
| "loss": 0.0001, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3263473053892216, | |
| "grad_norm": 0.023796789348125458, | |
| "learning_rate": 9.336459710540344e-06, | |
| "loss": 0.0, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.32709580838323354, | |
| "grad_norm": 0.08885123580694199, | |
| "learning_rate": 9.333296855633828e-06, | |
| "loss": 0.0001, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.3278443113772455, | |
| "grad_norm": 0.13661184906959534, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 0.0001, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.3285928143712575, | |
| "grad_norm": 0.009723243303596973, | |
| "learning_rate": 9.326950205512682e-06, | |
| "loss": 0.0001, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.32934131736526945, | |
| "grad_norm": 0.017450012266635895, | |
| "learning_rate": 9.323766420523768e-06, | |
| "loss": 0.0001, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.32934131736526945, | |
| "eval_accuracy": 0.9999853499863853, | |
| "eval_loss": 5.076894740341231e-05, | |
| "eval_runtime": 154.2114, | |
| "eval_samples_per_second": 32.423, | |
| "eval_steps_per_second": 8.106, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3300898203592814, | |
| "grad_norm": 0.09020084142684937, | |
| "learning_rate": 9.32057566908517e-06, | |
| "loss": 0.0001, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.33083832335329344, | |
| "grad_norm": 0.014794589951634407, | |
| "learning_rate": 9.31737795633782e-06, | |
| "loss": 0.0, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.3315868263473054, | |
| "grad_norm": 0.1351051777601242, | |
| "learning_rate": 9.314173287433874e-06, | |
| "loss": 0.0001, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.3323353293413174, | |
| "grad_norm": 0.02759048528969288, | |
| "learning_rate": 9.310961667536689e-06, | |
| "loss": 0.0, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.33308383233532934, | |
| "grad_norm": 0.006297203712165356, | |
| "learning_rate": 9.307743101820828e-06, | |
| "loss": 0.0, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3338323353293413, | |
| "grad_norm": 0.1679803431034088, | |
| "learning_rate": 9.30451759547204e-06, | |
| "loss": 0.0004, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.3345808383233533, | |
| "grad_norm": 0.018898937851190567, | |
| "learning_rate": 9.301285153687261e-06, | |
| "loss": 0.0001, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.33532934131736525, | |
| "grad_norm": 0.010490099899470806, | |
| "learning_rate": 9.298045781674595e-06, | |
| "loss": 0.0001, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.33607784431137727, | |
| "grad_norm": 0.08461616188287735, | |
| "learning_rate": 9.294799484653323e-06, | |
| "loss": 0.0002, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.33682634730538924, | |
| "grad_norm": 0.009152884595096111, | |
| "learning_rate": 9.291546267853871e-06, | |
| "loss": 0.0001, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3375748502994012, | |
| "grad_norm": 0.04316161200404167, | |
| "learning_rate": 9.28828613651782e-06, | |
| "loss": 0.0001, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.3383233532934132, | |
| "grad_norm": 0.04677840694785118, | |
| "learning_rate": 9.285019095897894e-06, | |
| "loss": 0.0, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.33907185628742514, | |
| "grad_norm": 0.006453138776123524, | |
| "learning_rate": 9.281745151257946e-06, | |
| "loss": 0.0002, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.3398203592814371, | |
| "grad_norm": 0.00727870361879468, | |
| "learning_rate": 9.278464307872952e-06, | |
| "loss": 0.0, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.3405688622754491, | |
| "grad_norm": 0.15015535056591034, | |
| "learning_rate": 9.275176571029008e-06, | |
| "loss": 0.0002, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3413173652694611, | |
| "grad_norm": 0.01364520750939846, | |
| "learning_rate": 9.271881946023309e-06, | |
| "loss": 0.0, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.34206586826347307, | |
| "grad_norm": 0.13824740052223206, | |
| "learning_rate": 9.268580438164157e-06, | |
| "loss": 0.0001, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.34281437125748504, | |
| "grad_norm": 0.02371104806661606, | |
| "learning_rate": 9.265272052770936e-06, | |
| "loss": 0.0, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.343562874251497, | |
| "grad_norm": 0.07769843935966492, | |
| "learning_rate": 9.261956795174116e-06, | |
| "loss": 0.0002, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.344311377245509, | |
| "grad_norm": 0.0038983135018497705, | |
| "learning_rate": 9.25863467071524e-06, | |
| "loss": 0.0, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.344311377245509, | |
| "eval_accuracy": 0.9999698145371103, | |
| "eval_loss": 0.0001175394281744957, | |
| "eval_runtime": 154.4022, | |
| "eval_samples_per_second": 32.383, | |
| "eval_steps_per_second": 8.096, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.34505988023952094, | |
| "grad_norm": 0.04528482258319855, | |
| "learning_rate": 9.255305684746908e-06, | |
| "loss": 0.0001, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.3458083832335329, | |
| "grad_norm": 0.04112999513745308, | |
| "learning_rate": 9.251969842632785e-06, | |
| "loss": 0.0, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.34655688622754494, | |
| "grad_norm": 0.01982693374156952, | |
| "learning_rate": 9.248627149747573e-06, | |
| "loss": 0.0, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.3473053892215569, | |
| "grad_norm": 0.002507114317268133, | |
| "learning_rate": 9.24527761147702e-06, | |
| "loss": 0.0, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.34805389221556887, | |
| "grad_norm": 0.018373820930719376, | |
| "learning_rate": 9.241921233217899e-06, | |
| "loss": 0.0, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.34880239520958084, | |
| "grad_norm": 0.015127432532608509, | |
| "learning_rate": 9.238558020378003e-06, | |
| "loss": 0.0, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.3495508982035928, | |
| "grad_norm": 0.006092644762247801, | |
| "learning_rate": 9.235187978376141e-06, | |
| "loss": 0.0001, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.3502994011976048, | |
| "grad_norm": 0.14546248316764832, | |
| "learning_rate": 9.231811112642121e-06, | |
| "loss": 0.0002, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.35104790419161674, | |
| "grad_norm": 0.003949570469558239, | |
| "learning_rate": 9.228427428616749e-06, | |
| "loss": 0.0001, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.35179640718562877, | |
| "grad_norm": 0.008468257263302803, | |
| "learning_rate": 9.225036931751811e-06, | |
| "loss": 0.0002, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.35254491017964074, | |
| "grad_norm": 0.10494138300418854, | |
| "learning_rate": 9.221639627510076e-06, | |
| "loss": 0.0002, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.3532934131736527, | |
| "grad_norm": 0.06659938395023346, | |
| "learning_rate": 9.218235521365278e-06, | |
| "loss": 0.0004, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.35404191616766467, | |
| "grad_norm": 0.09659219533205032, | |
| "learning_rate": 9.214824618802108e-06, | |
| "loss": 0.0001, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.35479041916167664, | |
| "grad_norm": 0.022609582170844078, | |
| "learning_rate": 9.211406925316214e-06, | |
| "loss": 0.0001, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.3555389221556886, | |
| "grad_norm": 0.017719948664307594, | |
| "learning_rate": 9.20798244641418e-06, | |
| "loss": 0.0001, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3562874251497006, | |
| "grad_norm": 0.06319057196378708, | |
| "learning_rate": 9.204551187613521e-06, | |
| "loss": 0.0002, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.3570359281437126, | |
| "grad_norm": 0.03745066374540329, | |
| "learning_rate": 9.201113154442685e-06, | |
| "loss": 0.0001, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.35778443113772457, | |
| "grad_norm": 0.021028850227594376, | |
| "learning_rate": 9.197668352441025e-06, | |
| "loss": 0.0, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.35853293413173654, | |
| "grad_norm": 0.02389431931078434, | |
| "learning_rate": 9.194216787158805e-06, | |
| "loss": 0.0001, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.3592814371257485, | |
| "grad_norm": 0.03340911120176315, | |
| "learning_rate": 9.190758464157184e-06, | |
| "loss": 0.0002, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3592814371257485, | |
| "eval_accuracy": 0.9999895989441676, | |
| "eval_loss": 5.814629912492819e-05, | |
| "eval_runtime": 155.1915, | |
| "eval_samples_per_second": 32.218, | |
| "eval_steps_per_second": 8.055, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.36002994011976047, | |
| "grad_norm": 0.016582539305090904, | |
| "learning_rate": 9.18729338900821e-06, | |
| "loss": 0.0, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.36077844311377244, | |
| "grad_norm": 0.009625283069908619, | |
| "learning_rate": 9.18382156729481e-06, | |
| "loss": 0.0, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.3615269461077844, | |
| "grad_norm": 0.0010095112957060337, | |
| "learning_rate": 9.18034300461078e-06, | |
| "loss": 0.0, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.36227544910179643, | |
| "grad_norm": 0.0017203768948093057, | |
| "learning_rate": 9.17685770656078e-06, | |
| "loss": 0.0, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.3630239520958084, | |
| "grad_norm": 0.03041454404592514, | |
| "learning_rate": 9.173365678760318e-06, | |
| "loss": 0.0, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.36377245508982037, | |
| "grad_norm": 0.3855910897254944, | |
| "learning_rate": 9.169866926835749e-06, | |
| "loss": 0.0002, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.36452095808383234, | |
| "grad_norm": 0.04365074634552002, | |
| "learning_rate": 9.166361456424257e-06, | |
| "loss": 0.0001, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.3652694610778443, | |
| "grad_norm": 0.007284363266080618, | |
| "learning_rate": 9.162849273173857e-06, | |
| "loss": 0.0, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.36601796407185627, | |
| "grad_norm": 0.043903883546590805, | |
| "learning_rate": 9.159330382743375e-06, | |
| "loss": 0.0, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.36676646706586824, | |
| "grad_norm": 0.016840385273098946, | |
| "learning_rate": 9.155804790802444e-06, | |
| "loss": 0.0, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.36751497005988026, | |
| "grad_norm": 0.11350879073143005, | |
| "learning_rate": 9.152272503031496e-06, | |
| "loss": 0.0, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.36826347305389223, | |
| "grad_norm": 0.06382304430007935, | |
| "learning_rate": 9.148733525121751e-06, | |
| "loss": 0.0002, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.3690119760479042, | |
| "grad_norm": 0.09389964491128922, | |
| "learning_rate": 9.145187862775208e-06, | |
| "loss": 0.0001, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.36976047904191617, | |
| "grad_norm": 0.002736086491495371, | |
| "learning_rate": 9.141635521704638e-06, | |
| "loss": 0.0001, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.37050898203592814, | |
| "grad_norm": 0.07442247867584229, | |
| "learning_rate": 9.138076507633566e-06, | |
| "loss": 0.0001, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3712574850299401, | |
| "grad_norm": 0.026373956352472305, | |
| "learning_rate": 9.134510826296277e-06, | |
| "loss": 0.0, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.37200598802395207, | |
| "grad_norm": 0.0026233713142573833, | |
| "learning_rate": 9.130938483437792e-06, | |
| "loss": 0.0001, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.3727544910179641, | |
| "grad_norm": 0.1102319285273552, | |
| "learning_rate": 9.12735948481387e-06, | |
| "loss": 0.0001, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.37350299401197606, | |
| "grad_norm": 0.08953434228897095, | |
| "learning_rate": 9.12377383619099e-06, | |
| "loss": 0.0, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.37425149700598803, | |
| "grad_norm": 0.026983065530657768, | |
| "learning_rate": 9.120181543346348e-06, | |
| "loss": 0.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.37425149700598803, | |
| "eval_accuracy": 0.9999914303936028, | |
| "eval_loss": 4.0267019357997924e-05, | |
| "eval_runtime": 154.1351, | |
| "eval_samples_per_second": 32.439, | |
| "eval_steps_per_second": 8.11, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 0.03703652322292328, | |
| "learning_rate": 9.11658261206784e-06, | |
| "loss": 0.0, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.37574850299401197, | |
| "grad_norm": 0.06015906482934952, | |
| "learning_rate": 9.112977048154066e-06, | |
| "loss": 0.0, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.37649700598802394, | |
| "grad_norm": 0.171669602394104, | |
| "learning_rate": 9.109364857414306e-06, | |
| "loss": 0.0001, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.3772455089820359, | |
| "grad_norm": 0.027005095034837723, | |
| "learning_rate": 9.10574604566852e-06, | |
| "loss": 0.0, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.37799401197604793, | |
| "grad_norm": 0.06816914677619934, | |
| "learning_rate": 9.102120618747336e-06, | |
| "loss": 0.0, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3787425149700599, | |
| "grad_norm": 0.029688792303204536, | |
| "learning_rate": 9.09848858249204e-06, | |
| "loss": 0.0, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.37949101796407186, | |
| "grad_norm": 0.0352199524641037, | |
| "learning_rate": 9.094849942754564e-06, | |
| "loss": 0.0, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.38023952095808383, | |
| "grad_norm": 0.42947277426719666, | |
| "learning_rate": 9.091204705397485e-06, | |
| "loss": 0.0002, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.3809880239520958, | |
| "grad_norm": 0.038584258407354355, | |
| "learning_rate": 9.087552876294003e-06, | |
| "loss": 0.0, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.38173652694610777, | |
| "grad_norm": 0.2603873312473297, | |
| "learning_rate": 9.083894461327946e-06, | |
| "loss": 0.0015, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.38248502994011974, | |
| "grad_norm": 0.13592231273651123, | |
| "learning_rate": 9.08022946639375e-06, | |
| "loss": 0.0002, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.38323353293413176, | |
| "grad_norm": 0.013513598591089249, | |
| "learning_rate": 9.076557897396452e-06, | |
| "loss": 0.0001, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.38398203592814373, | |
| "grad_norm": 0.06492534279823303, | |
| "learning_rate": 9.07287976025168e-06, | |
| "loss": 0.0001, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.3847305389221557, | |
| "grad_norm": 0.04138237237930298, | |
| "learning_rate": 9.069195060885647e-06, | |
| "loss": 0.0002, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.38547904191616766, | |
| "grad_norm": 0.013964397832751274, | |
| "learning_rate": 9.065503805235139e-06, | |
| "loss": 0.0001, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.38622754491017963, | |
| "grad_norm": 0.1758122593164444, | |
| "learning_rate": 9.061805999247504e-06, | |
| "loss": 0.0001, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.3869760479041916, | |
| "grad_norm": 0.185356006026268, | |
| "learning_rate": 9.058101648880646e-06, | |
| "loss": 0.0003, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.38772455089820357, | |
| "grad_norm": 0.020207742229104042, | |
| "learning_rate": 9.05439076010301e-06, | |
| "loss": 0.0003, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.3884730538922156, | |
| "grad_norm": 0.07574658840894699, | |
| "learning_rate": 9.050673338893578e-06, | |
| "loss": 0.0002, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.38922155688622756, | |
| "grad_norm": 0.15675880014896393, | |
| "learning_rate": 9.046949391241859e-06, | |
| "loss": 0.0003, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.38922155688622756, | |
| "eval_accuracy": 0.9999522992784509, | |
| "eval_loss": 0.00014203271712176502, | |
| "eval_runtime": 154.4084, | |
| "eval_samples_per_second": 32.382, | |
| "eval_steps_per_second": 8.095, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.38997005988023953, | |
| "grad_norm": 0.10081563144922256, | |
| "learning_rate": 9.043218923147874e-06, | |
| "loss": 0.0001, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.3907185628742515, | |
| "grad_norm": 0.028760971501469612, | |
| "learning_rate": 9.039481940622148e-06, | |
| "loss": 0.0003, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.39146706586826346, | |
| "grad_norm": 0.37775400280952454, | |
| "learning_rate": 9.035738449685707e-06, | |
| "loss": 0.0007, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.39221556886227543, | |
| "grad_norm": 0.14730341732501984, | |
| "learning_rate": 9.031988456370062e-06, | |
| "loss": 0.0003, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.3929640718562874, | |
| "grad_norm": 0.16259920597076416, | |
| "learning_rate": 9.0282319667172e-06, | |
| "loss": 0.0006, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.3937125748502994, | |
| "grad_norm": 0.11165869981050491, | |
| "learning_rate": 9.02446898677957e-06, | |
| "loss": 0.0002, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.3944610778443114, | |
| "grad_norm": 0.236286461353302, | |
| "learning_rate": 9.020699522620091e-06, | |
| "loss": 0.0006, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.39520958083832336, | |
| "grad_norm": 0.17146489024162292, | |
| "learning_rate": 9.016923580312114e-06, | |
| "loss": 0.0006, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.39595808383233533, | |
| "grad_norm": 0.13749942183494568, | |
| "learning_rate": 9.013141165939439e-06, | |
| "loss": 0.0005, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.3967065868263473, | |
| "grad_norm": 0.0854322612285614, | |
| "learning_rate": 9.009352285596287e-06, | |
| "loss": 0.0004, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.39745508982035926, | |
| "grad_norm": 0.3005140423774719, | |
| "learning_rate": 9.005556945387301e-06, | |
| "loss": 0.0009, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.39820359281437123, | |
| "grad_norm": 0.061198897659778595, | |
| "learning_rate": 9.001755151427532e-06, | |
| "loss": 0.0002, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.39895209580838326, | |
| "grad_norm": 0.13300061225891113, | |
| "learning_rate": 8.997946909842426e-06, | |
| "loss": 0.0003, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.3997005988023952, | |
| "grad_norm": 0.05639196187257767, | |
| "learning_rate": 8.99413222676782e-06, | |
| "loss": 0.0002, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.4004491017964072, | |
| "grad_norm": 0.0920565128326416, | |
| "learning_rate": 8.990311108349926e-06, | |
| "loss": 0.0002, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.40119760479041916, | |
| "grad_norm": 0.2794632613658905, | |
| "learning_rate": 8.986483560745335e-06, | |
| "loss": 0.0003, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.40194610778443113, | |
| "grad_norm": 0.05511578544974327, | |
| "learning_rate": 8.982649590120982e-06, | |
| "loss": 0.0001, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.4026946107784431, | |
| "grad_norm": 0.11161552369594574, | |
| "learning_rate": 8.978809202654161e-06, | |
| "loss": 0.0003, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.40344311377245506, | |
| "grad_norm": 0.04912755638360977, | |
| "learning_rate": 8.974962404532503e-06, | |
| "loss": 0.0002, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.4041916167664671, | |
| "grad_norm": 0.130497545003891, | |
| "learning_rate": 8.971109201953962e-06, | |
| "loss": 0.0002, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.4041916167664671, | |
| "eval_accuracy": 0.9999309907446557, | |
| "eval_loss": 0.00030308307032100856, | |
| "eval_runtime": 157.0526, | |
| "eval_samples_per_second": 31.836, | |
| "eval_steps_per_second": 7.959, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.40494011976047906, | |
| "grad_norm": 0.06516057252883911, | |
| "learning_rate": 8.967249601126821e-06, | |
| "loss": 0.0001, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.405688622754491, | |
| "grad_norm": 0.0653974711894989, | |
| "learning_rate": 8.963383608269665e-06, | |
| "loss": 0.0001, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.406437125748503, | |
| "grad_norm": 0.1652081459760666, | |
| "learning_rate": 8.959511229611377e-06, | |
| "loss": 0.0005, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.40718562874251496, | |
| "grad_norm": 0.2547818720340729, | |
| "learning_rate": 8.955632471391132e-06, | |
| "loss": 0.0004, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.40793413173652693, | |
| "grad_norm": 0.11153703182935715, | |
| "learning_rate": 8.951747339858383e-06, | |
| "loss": 0.0001, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.4086826347305389, | |
| "grad_norm": 0.20618999004364014, | |
| "learning_rate": 8.947855841272852e-06, | |
| "loss": 0.0004, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.4094311377245509, | |
| "grad_norm": 0.06252986937761307, | |
| "learning_rate": 8.943957981904518e-06, | |
| "loss": 0.0003, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.4101796407185629, | |
| "grad_norm": 0.12335634976625443, | |
| "learning_rate": 8.94005376803361e-06, | |
| "loss": 0.0002, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.41092814371257486, | |
| "grad_norm": 0.15102048218250275, | |
| "learning_rate": 8.936143205950596e-06, | |
| "loss": 0.0003, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.4116766467065868, | |
| "grad_norm": 0.2645941376686096, | |
| "learning_rate": 8.93222630195617e-06, | |
| "loss": 0.0002, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4124251497005988, | |
| "grad_norm": 0.16175216436386108, | |
| "learning_rate": 8.928303062361244e-06, | |
| "loss": 0.0002, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.41317365269461076, | |
| "grad_norm": 0.390656977891922, | |
| "learning_rate": 8.924373493486941e-06, | |
| "loss": 0.0008, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.41392215568862273, | |
| "grad_norm": 0.19943471252918243, | |
| "learning_rate": 8.92043760166458e-06, | |
| "loss": 0.0006, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.41467065868263475, | |
| "grad_norm": 0.08877554535865784, | |
| "learning_rate": 8.916495393235666e-06, | |
| "loss": 0.0003, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.4154191616766467, | |
| "grad_norm": 0.02073746733367443, | |
| "learning_rate": 8.912546874551883e-06, | |
| "loss": 0.0003, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.4161676646706587, | |
| "grad_norm": 0.229649618268013, | |
| "learning_rate": 8.908592051975083e-06, | |
| "loss": 0.0003, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.41691616766467066, | |
| "grad_norm": 0.2585594952106476, | |
| "learning_rate": 8.904630931877271e-06, | |
| "loss": 0.0005, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.4176646706586826, | |
| "grad_norm": 0.09236887842416763, | |
| "learning_rate": 8.900663520640605e-06, | |
| "loss": 0.0003, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.4184131736526946, | |
| "grad_norm": 0.1604318916797638, | |
| "learning_rate": 8.896689824657371e-06, | |
| "loss": 0.0008, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.41916167664670656, | |
| "grad_norm": 0.1640581637620926, | |
| "learning_rate": 8.892709850329991e-06, | |
| "loss": 0.0009, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.41916167664670656, | |
| "eval_accuracy": 0.9998298540459971, | |
| "eval_loss": 0.0006637079059146345, | |
| "eval_runtime": 158.0379, | |
| "eval_samples_per_second": 31.638, | |
| "eval_steps_per_second": 7.909, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.4199101796407186, | |
| "grad_norm": 0.1830235719680786, | |
| "learning_rate": 8.88872360407099e-06, | |
| "loss": 0.0007, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.42065868263473055, | |
| "grad_norm": 0.15978126227855682, | |
| "learning_rate": 8.884731092303011e-06, | |
| "loss": 0.0008, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.4214071856287425, | |
| "grad_norm": 0.07531040906906128, | |
| "learning_rate": 8.880732321458785e-06, | |
| "loss": 0.0004, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.4221556886227545, | |
| "grad_norm": 0.07047852128744125, | |
| "learning_rate": 8.876727297981129e-06, | |
| "loss": 0.0004, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.42290419161676646, | |
| "grad_norm": 0.11832007020711899, | |
| "learning_rate": 8.872716028322931e-06, | |
| "loss": 0.0006, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.4236526946107784, | |
| "grad_norm": 0.11789973080158234, | |
| "learning_rate": 8.868698518947152e-06, | |
| "loss": 0.0003, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.4244011976047904, | |
| "grad_norm": 0.06593231111764908, | |
| "learning_rate": 8.864674776326798e-06, | |
| "loss": 0.0003, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.4251497005988024, | |
| "grad_norm": 0.12147919833660126, | |
| "learning_rate": 8.860644806944917e-06, | |
| "loss": 0.0003, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.4258982035928144, | |
| "grad_norm": 0.014330295845866203, | |
| "learning_rate": 8.8566086172946e-06, | |
| "loss": 0.0001, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.42664670658682635, | |
| "grad_norm": 0.13002386689186096, | |
| "learning_rate": 8.852566213878947e-06, | |
| "loss": 0.0002, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.4273952095808383, | |
| "grad_norm": 0.028262050822377205, | |
| "learning_rate": 8.84851760321108e-06, | |
| "loss": 0.0001, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.4281437125748503, | |
| "grad_norm": 0.058746110647916794, | |
| "learning_rate": 8.844462791814113e-06, | |
| "loss": 0.0002, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.42889221556886226, | |
| "grad_norm": 0.006739677395671606, | |
| "learning_rate": 8.84040178622116e-06, | |
| "loss": 0.0, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.4296407185628742, | |
| "grad_norm": 0.0508301667869091, | |
| "learning_rate": 8.83633459297531e-06, | |
| "loss": 0.0, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.43038922155688625, | |
| "grad_norm": 0.06738423556089401, | |
| "learning_rate": 8.83226121862962e-06, | |
| "loss": 0.0, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4311377245508982, | |
| "grad_norm": 0.093570277094841, | |
| "learning_rate": 8.828181669747111e-06, | |
| "loss": 0.0002, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.4318862275449102, | |
| "grad_norm": 0.22780318558216095, | |
| "learning_rate": 8.824095952900746e-06, | |
| "loss": 0.0003, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.43263473053892215, | |
| "grad_norm": 0.006822109688073397, | |
| "learning_rate": 8.820004074673433e-06, | |
| "loss": 0.0, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.4333832335329341, | |
| "grad_norm": 0.03218008950352669, | |
| "learning_rate": 8.815906041658001e-06, | |
| "loss": 0.0, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.4341317365269461, | |
| "grad_norm": 0.021438656374812126, | |
| "learning_rate": 8.8118018604572e-06, | |
| "loss": 0.0, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.4341317365269461, | |
| "eval_accuracy": 0.9999750413955637, | |
| "eval_loss": 0.000117507777758874, | |
| "eval_runtime": 153.4917, | |
| "eval_samples_per_second": 32.575, | |
| "eval_steps_per_second": 8.144, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.43488023952095806, | |
| "grad_norm": 0.003635610453784466, | |
| "learning_rate": 8.807691537683685e-06, | |
| "loss": 0.0001, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.4356287425149701, | |
| "grad_norm": 0.03301112353801727, | |
| "learning_rate": 8.80357507996e-06, | |
| "loss": 0.0, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.43637724550898205, | |
| "grad_norm": 0.09721909463405609, | |
| "learning_rate": 8.799452493918586e-06, | |
| "loss": 0.0002, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.437125748502994, | |
| "grad_norm": 0.042455609887838364, | |
| "learning_rate": 8.795323786201746e-06, | |
| "loss": 0.0, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.437874251497006, | |
| "grad_norm": 0.022428715601563454, | |
| "learning_rate": 8.791188963461653e-06, | |
| "loss": 0.0001, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.43862275449101795, | |
| "grad_norm": 0.0063305930234491825, | |
| "learning_rate": 8.787048032360332e-06, | |
| "loss": 0.0, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.4393712574850299, | |
| "grad_norm": 0.027178645133972168, | |
| "learning_rate": 8.782900999569646e-06, | |
| "loss": 0.0001, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.44011976047904194, | |
| "grad_norm": 0.016478972509503365, | |
| "learning_rate": 8.778747871771293e-06, | |
| "loss": 0.0001, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.4408682634730539, | |
| "grad_norm": 0.0024430316407233477, | |
| "learning_rate": 8.774588655656787e-06, | |
| "loss": 0.0, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.4416167664670659, | |
| "grad_norm": 0.0025271910708397627, | |
| "learning_rate": 8.770423357927463e-06, | |
| "loss": 0.0, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.44236526946107785, | |
| "grad_norm": 0.003522343933582306, | |
| "learning_rate": 8.766251985294435e-06, | |
| "loss": 0.0001, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.4431137724550898, | |
| "grad_norm": 0.0037217664066702127, | |
| "learning_rate": 8.762074544478622e-06, | |
| "loss": 0.0002, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.4438622754491018, | |
| "grad_norm": 0.003973813261836767, | |
| "learning_rate": 8.757891042210713e-06, | |
| "loss": 0.0, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.44461077844311375, | |
| "grad_norm": 0.005763462278991938, | |
| "learning_rate": 8.753701485231165e-06, | |
| "loss": 0.0, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.4453592814371258, | |
| "grad_norm": 0.005196116399019957, | |
| "learning_rate": 8.749505880290188e-06, | |
| "loss": 0.0, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.44610778443113774, | |
| "grad_norm": 0.0032948690932244062, | |
| "learning_rate": 8.74530423414774e-06, | |
| "loss": 0.0, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.4468562874251497, | |
| "grad_norm": 0.009100310504436493, | |
| "learning_rate": 8.741096553573506e-06, | |
| "loss": 0.0, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.4476047904191617, | |
| "grad_norm": 0.0061983345076441765, | |
| "learning_rate": 8.736882845346906e-06, | |
| "loss": 0.0, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.44835329341317365, | |
| "grad_norm": 0.0011341345962136984, | |
| "learning_rate": 8.732663116257057e-06, | |
| "loss": 0.0, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.4491017964071856, | |
| "grad_norm": 0.0010620451066643, | |
| "learning_rate": 8.728437373102784e-06, | |
| "loss": 0.0, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4491017964071856, | |
| "eval_accuracy": 0.9999988700564972, | |
| "eval_loss": 9.44121893553529e-06, | |
| "eval_runtime": 153.9703, | |
| "eval_samples_per_second": 32.474, | |
| "eval_steps_per_second": 8.118, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4498502994011976, | |
| "grad_norm": 0.0015724517870694399, | |
| "learning_rate": 8.724205622692608e-06, | |
| "loss": 0.0001, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.4505988023952096, | |
| "grad_norm": 0.0011631123488768935, | |
| "learning_rate": 8.719967871844715e-06, | |
| "loss": 0.0, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.4513473053892216, | |
| "grad_norm": 0.003892699722200632, | |
| "learning_rate": 8.715724127386971e-06, | |
| "loss": 0.0, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.45209580838323354, | |
| "grad_norm": 0.001457210979424417, | |
| "learning_rate": 8.711474396156894e-06, | |
| "loss": 0.0, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.4528443113772455, | |
| "grad_norm": 0.04398849606513977, | |
| "learning_rate": 8.707218685001648e-06, | |
| "loss": 0.0, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4535928143712575, | |
| "grad_norm": 0.0016321828588843346, | |
| "learning_rate": 8.702957000778029e-06, | |
| "loss": 0.0, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.45434131736526945, | |
| "grad_norm": 0.006306509952992201, | |
| "learning_rate": 8.698689350352465e-06, | |
| "loss": 0.0, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.4550898203592814, | |
| "grad_norm": 0.003088061697781086, | |
| "learning_rate": 8.69441574060099e-06, | |
| "loss": 0.0, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.45583832335329344, | |
| "grad_norm": 0.0059152874164283276, | |
| "learning_rate": 8.690136178409237e-06, | |
| "loss": 0.0, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.4565868263473054, | |
| "grad_norm": 0.0006292685866355896, | |
| "learning_rate": 8.685850670672438e-06, | |
| "loss": 0.0, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.4573353293413174, | |
| "grad_norm": 0.001894032466225326, | |
| "learning_rate": 8.681559224295401e-06, | |
| "loss": 0.0, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.45808383233532934, | |
| "grad_norm": 0.000890376337338239, | |
| "learning_rate": 8.6772618461925e-06, | |
| "loss": 0.0, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.4588323353293413, | |
| "grad_norm": 0.047846511006355286, | |
| "learning_rate": 8.672958543287666e-06, | |
| "loss": 0.0, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.4595808383233533, | |
| "grad_norm": 0.0039296639151871204, | |
| "learning_rate": 8.668649322514382e-06, | |
| "loss": 0.0, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.46032934131736525, | |
| "grad_norm": 0.0010013898136094213, | |
| "learning_rate": 8.66433419081566e-06, | |
| "loss": 0.0, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.46107784431137727, | |
| "grad_norm": 0.0013401862233877182, | |
| "learning_rate": 8.660013155144036e-06, | |
| "loss": 0.0, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.46182634730538924, | |
| "grad_norm": 0.01721956580877304, | |
| "learning_rate": 8.655686222461561e-06, | |
| "loss": 0.0, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.4625748502994012, | |
| "grad_norm": 0.10839847475290298, | |
| "learning_rate": 8.651353399739787e-06, | |
| "loss": 0.0, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.4633233532934132, | |
| "grad_norm": 0.0019374943803995848, | |
| "learning_rate": 8.647014693959754e-06, | |
| "loss": 0.0, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.46407185628742514, | |
| "grad_norm": 0.0010845274664461613, | |
| "learning_rate": 8.642670112111982e-06, | |
| "loss": 0.0, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.46407185628742514, | |
| "eval_accuracy": 0.999998779749899, | |
| "eval_loss": 7.805577297403943e-06, | |
| "eval_runtime": 153.998, | |
| "eval_samples_per_second": 32.468, | |
| "eval_steps_per_second": 8.117, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.4648203592814371, | |
| "grad_norm": 0.14908014237880707, | |
| "learning_rate": 8.63831966119646e-06, | |
| "loss": 0.0002, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.4655688622754491, | |
| "grad_norm": 0.0005565496394410729, | |
| "learning_rate": 8.633963348222628e-06, | |
| "loss": 0.0, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.4663173652694611, | |
| "grad_norm": 0.016671478748321533, | |
| "learning_rate": 8.629601180209382e-06, | |
| "loss": 0.0, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.46706586826347307, | |
| "grad_norm": 0.0019113154849037528, | |
| "learning_rate": 8.625233164185035e-06, | |
| "loss": 0.0, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.46781437125748504, | |
| "grad_norm": 0.24114775657653809, | |
| "learning_rate": 8.620859307187339e-06, | |
| "loss": 0.0002, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.468562874251497, | |
| "grad_norm": 0.12908697128295898, | |
| "learning_rate": 8.616479616263444e-06, | |
| "loss": 0.0001, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.469311377245509, | |
| "grad_norm": 0.04974567890167236, | |
| "learning_rate": 8.61209409846991e-06, | |
| "loss": 0.0, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.47005988023952094, | |
| "grad_norm": 0.14523474872112274, | |
| "learning_rate": 8.607702760872679e-06, | |
| "loss": 0.0005, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.4708083832335329, | |
| "grad_norm": 0.0028332837391644716, | |
| "learning_rate": 8.60330561054707e-06, | |
| "loss": 0.0003, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.47155688622754494, | |
| "grad_norm": 0.09443452209234238, | |
| "learning_rate": 8.598902654577768e-06, | |
| "loss": 0.0002, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.4723053892215569, | |
| "grad_norm": 0.1735847145318985, | |
| "learning_rate": 8.594493900058817e-06, | |
| "loss": 0.0005, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.47305389221556887, | |
| "grad_norm": 0.09729497134685516, | |
| "learning_rate": 8.590079354093594e-06, | |
| "loss": 0.0001, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.47380239520958084, | |
| "grad_norm": 0.014571278356015682, | |
| "learning_rate": 8.585659023794818e-06, | |
| "loss": 0.0001, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.4745508982035928, | |
| "grad_norm": 0.08880387991666794, | |
| "learning_rate": 8.581232916284519e-06, | |
| "loss": 0.0002, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.4752994011976048, | |
| "grad_norm": 0.056134164333343506, | |
| "learning_rate": 8.57680103869404e-06, | |
| "loss": 0.0001, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.47604790419161674, | |
| "grad_norm": 0.06601478904485703, | |
| "learning_rate": 8.572363398164017e-06, | |
| "loss": 0.0001, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.47679640718562877, | |
| "grad_norm": 0.04634417966008186, | |
| "learning_rate": 8.567920001844376e-06, | |
| "loss": 0.0001, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.47754491017964074, | |
| "grad_norm": 0.06786518543958664, | |
| "learning_rate": 8.563470856894316e-06, | |
| "loss": 0.0002, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.4782934131736527, | |
| "grad_norm": 0.0038419270422309637, | |
| "learning_rate": 8.559015970482292e-06, | |
| "loss": 0.0, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.47904191616766467, | |
| "grad_norm": 0.005290990229696035, | |
| "learning_rate": 8.554555349786016e-06, | |
| "loss": 0.0, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.47904191616766467, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 1.6949796190601774e-05, | |
| "eval_runtime": 162.9441, | |
| "eval_samples_per_second": 30.685, | |
| "eval_steps_per_second": 7.671, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.47979041916167664, | |
| "grad_norm": 0.015714962035417557, | |
| "learning_rate": 8.550089001992438e-06, | |
| "loss": 0.0, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.4805389221556886, | |
| "grad_norm": 0.016865752637386322, | |
| "learning_rate": 8.545616934297733e-06, | |
| "loss": 0.0, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.4812874251497006, | |
| "grad_norm": 0.0030540430452674627, | |
| "learning_rate": 8.541139153907296e-06, | |
| "loss": 0.0, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.4820359281437126, | |
| "grad_norm": 0.004076346755027771, | |
| "learning_rate": 8.536655668035723e-06, | |
| "loss": 0.0, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.48278443113772457, | |
| "grad_norm": 0.0015489828074350953, | |
| "learning_rate": 8.532166483906804e-06, | |
| "loss": 0.0, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.48353293413173654, | |
| "grad_norm": 0.0032020832877606153, | |
| "learning_rate": 8.527671608753508e-06, | |
| "loss": 0.0, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.4842814371257485, | |
| "grad_norm": 0.0029930637683719397, | |
| "learning_rate": 8.523171049817974e-06, | |
| "loss": 0.0, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.48502994011976047, | |
| "grad_norm": 0.00045903853606432676, | |
| "learning_rate": 8.518664814351502e-06, | |
| "loss": 0.0, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.48577844311377244, | |
| "grad_norm": 0.002175545785576105, | |
| "learning_rate": 8.514152909614538e-06, | |
| "loss": 0.0, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.4865269461077844, | |
| "grad_norm": 0.00034521182533353567, | |
| "learning_rate": 8.509635342876655e-06, | |
| "loss": 0.0, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.48727544910179643, | |
| "grad_norm": 0.0007213663193397224, | |
| "learning_rate": 8.505112121416554e-06, | |
| "loss": 0.0, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.4880239520958084, | |
| "grad_norm": 0.000988309970125556, | |
| "learning_rate": 8.500583252522053e-06, | |
| "loss": 0.0, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.48877245508982037, | |
| "grad_norm": 0.0006475381087511778, | |
| "learning_rate": 8.496048743490053e-06, | |
| "loss": 0.0, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.48952095808383234, | |
| "grad_norm": 0.004469580017030239, | |
| "learning_rate": 8.49150860162656e-06, | |
| "loss": 0.0, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.4902694610778443, | |
| "grad_norm": 0.0006323789712041616, | |
| "learning_rate": 8.486962834246646e-06, | |
| "loss": 0.0, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.49101796407185627, | |
| "grad_norm": 0.0003597615868784487, | |
| "learning_rate": 8.482411448674445e-06, | |
| "loss": 0.0, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.49176646706586824, | |
| "grad_norm": 0.0009737180080264807, | |
| "learning_rate": 8.477854452243149e-06, | |
| "loss": 0.0, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.49251497005988026, | |
| "grad_norm": 0.00047102788812480867, | |
| "learning_rate": 8.473291852294986e-06, | |
| "loss": 0.0, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.49326347305389223, | |
| "grad_norm": 0.0006197803886607289, | |
| "learning_rate": 8.468723656181219e-06, | |
| "loss": 0.0, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.4940119760479042, | |
| "grad_norm": 0.0006234439788386226, | |
| "learning_rate": 8.464149871262118e-06, | |
| "loss": 0.0, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4940119760479042, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 1.9553074253053637e-06, | |
| "eval_runtime": 156.0817, | |
| "eval_samples_per_second": 32.035, | |
| "eval_steps_per_second": 8.009, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.49476047904191617, | |
| "grad_norm": 0.0012577202869579196, | |
| "learning_rate": 8.459570504906962e-06, | |
| "loss": 0.0, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.49550898203592814, | |
| "grad_norm": 0.0013276775134727359, | |
| "learning_rate": 8.454985564494025e-06, | |
| "loss": 0.0, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.4962574850299401, | |
| "grad_norm": 0.0007339988951571286, | |
| "learning_rate": 8.450395057410561e-06, | |
| "loss": 0.0, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.49700598802395207, | |
| "grad_norm": 0.00023488645092584193, | |
| "learning_rate": 8.445798991052791e-06, | |
| "loss": 0.0, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.4977544910179641, | |
| "grad_norm": 0.008007602766156197, | |
| "learning_rate": 8.441197372825892e-06, | |
| "loss": 0.0, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.49850299401197606, | |
| "grad_norm": 0.00031445815693587065, | |
| "learning_rate": 8.436590210143991e-06, | |
| "loss": 0.0, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.49925149700598803, | |
| "grad_norm": 0.000408270803745836, | |
| "learning_rate": 8.431977510430145e-06, | |
| "loss": 0.0, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.00037585021345876157, | |
| "learning_rate": 8.427359281116335e-06, | |
| "loss": 0.0, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.500748502994012, | |
| "grad_norm": 0.00020924248383380473, | |
| "learning_rate": 8.422735529643445e-06, | |
| "loss": 0.0, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.5014970059880239, | |
| "grad_norm": 0.00034080087789334357, | |
| "learning_rate": 8.418106263461261e-06, | |
| "loss": 0.0, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5022455089820359, | |
| "grad_norm": 0.001094786450266838, | |
| "learning_rate": 8.413471490028456e-06, | |
| "loss": 0.0, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.5029940119760479, | |
| "grad_norm": 0.00020074410713277757, | |
| "learning_rate": 8.408831216812574e-06, | |
| "loss": 0.0, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.5037425149700598, | |
| "grad_norm": 0.00031989437411539257, | |
| "learning_rate": 8.404185451290017e-06, | |
| "loss": 0.0, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.5044910179640718, | |
| "grad_norm": 0.0004511797451414168, | |
| "learning_rate": 8.399534200946044e-06, | |
| "loss": 0.0, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.5052395209580839, | |
| "grad_norm": 0.0033039976842701435, | |
| "learning_rate": 8.394877473274743e-06, | |
| "loss": 0.0, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5059880239520959, | |
| "grad_norm": 0.00033330474980175495, | |
| "learning_rate": 8.39021527577903e-06, | |
| "loss": 0.0, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.5067365269461078, | |
| "grad_norm": 0.00710050156340003, | |
| "learning_rate": 8.38554761597064e-06, | |
| "loss": 0.0, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.5074850299401198, | |
| "grad_norm": 0.00019886674999725074, | |
| "learning_rate": 8.380874501370098e-06, | |
| "loss": 0.0, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.5082335329341318, | |
| "grad_norm": 0.0007205790607258677, | |
| "learning_rate": 8.376195939506727e-06, | |
| "loss": 0.0, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.5089820359281437, | |
| "grad_norm": 0.00022511072165798396, | |
| "learning_rate": 8.371511937918616e-06, | |
| "loss": 0.0, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5089820359281437, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 1.2852336794821895e-06, | |
| "eval_runtime": 155.6867, | |
| "eval_samples_per_second": 32.116, | |
| "eval_steps_per_second": 8.029, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5097305389221557, | |
| "grad_norm": 0.000407641549827531, | |
| "learning_rate": 8.366822504152636e-06, | |
| "loss": 0.0, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.5104790419161677, | |
| "grad_norm": 0.001749478979036212, | |
| "learning_rate": 8.362127645764392e-06, | |
| "loss": 0.0, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.5112275449101796, | |
| "grad_norm": 0.00031519352342002094, | |
| "learning_rate": 8.357427370318239e-06, | |
| "loss": 0.0, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.5119760479041916, | |
| "grad_norm": 0.0004909691051580012, | |
| "learning_rate": 8.352721685387258e-06, | |
| "loss": 0.0, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.5127245508982036, | |
| "grad_norm": 0.0003617761831264943, | |
| "learning_rate": 8.348010598553245e-06, | |
| "loss": 0.0, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5134730538922155, | |
| "grad_norm": 0.0003041178279090673, | |
| "learning_rate": 8.3432941174067e-06, | |
| "loss": 0.0, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.5142215568862275, | |
| "grad_norm": 0.0009227189584635198, | |
| "learning_rate": 8.338572249546813e-06, | |
| "loss": 0.0, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.5149700598802395, | |
| "grad_norm": 0.00037840052391402423, | |
| "learning_rate": 8.33384500258146e-06, | |
| "loss": 0.0, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.5157185628742516, | |
| "grad_norm": 0.0003028454084414989, | |
| "learning_rate": 8.329112384127172e-06, | |
| "loss": 0.0, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.5164670658682635, | |
| "grad_norm": 0.00024328533618245274, | |
| "learning_rate": 8.324374401809144e-06, | |
| "loss": 0.0, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5172155688622755, | |
| "grad_norm": 0.00041966489516198635, | |
| "learning_rate": 8.319631063261209e-06, | |
| "loss": 0.0, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.5179640718562875, | |
| "grad_norm": 0.0001933051535161212, | |
| "learning_rate": 8.314882376125832e-06, | |
| "loss": 0.0, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.5187125748502994, | |
| "grad_norm": 0.00017571724310982972, | |
| "learning_rate": 8.310128348054093e-06, | |
| "loss": 0.0, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.5194610778443114, | |
| "grad_norm": 0.00010965206456603482, | |
| "learning_rate": 8.305368986705683e-06, | |
| "loss": 0.0, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.5202095808383234, | |
| "grad_norm": 0.00016223240527324378, | |
| "learning_rate": 8.300604299748876e-06, | |
| "loss": 0.0, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5209580838323353, | |
| "grad_norm": 0.00020105067233089358, | |
| "learning_rate": 8.295834294860535e-06, | |
| "loss": 0.0, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.5217065868263473, | |
| "grad_norm": 0.00012361881090328097, | |
| "learning_rate": 8.291058979726092e-06, | |
| "loss": 0.0, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.5224550898203593, | |
| "grad_norm": 0.00031712997588329017, | |
| "learning_rate": 8.286278362039527e-06, | |
| "loss": 0.0, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.5232035928143712, | |
| "grad_norm": 0.0007049996056593955, | |
| "learning_rate": 8.281492449503372e-06, | |
| "loss": 0.0, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.5239520958083832, | |
| "grad_norm": 0.00017834010941442102, | |
| "learning_rate": 8.276701249828684e-06, | |
| "loss": 0.0, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5239520958083832, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 1.0108310561918188e-06, | |
| "eval_runtime": 164.1638, | |
| "eval_samples_per_second": 30.457, | |
| "eval_steps_per_second": 7.614, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5247005988023952, | |
| "grad_norm": 0.00016547701670788229, | |
| "learning_rate": 8.271904770735042e-06, | |
| "loss": 0.0, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.5254491017964071, | |
| "grad_norm": 0.00018670795543584973, | |
| "learning_rate": 8.267103019950529e-06, | |
| "loss": 0.0, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.5261976047904192, | |
| "grad_norm": 0.0005327853723429143, | |
| "learning_rate": 8.262296005211722e-06, | |
| "loss": 0.0, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.5269461077844312, | |
| "grad_norm": 0.00011932725465158, | |
| "learning_rate": 8.257483734263682e-06, | |
| "loss": 0.0, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.5276946107784432, | |
| "grad_norm": 0.0010636606020852923, | |
| "learning_rate": 8.252666214859936e-06, | |
| "loss": 0.0, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5284431137724551, | |
| "grad_norm": 0.00017524044960737228, | |
| "learning_rate": 8.247843454762467e-06, | |
| "loss": 0.0, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.5291916167664671, | |
| "grad_norm": 0.00048075238009914756, | |
| "learning_rate": 8.243015461741707e-06, | |
| "loss": 0.0, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.5299401197604791, | |
| "grad_norm": 0.000147451224620454, | |
| "learning_rate": 8.238182243576512e-06, | |
| "loss": 0.0, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.530688622754491, | |
| "grad_norm": 0.00012748232984449714, | |
| "learning_rate": 8.233343808054159e-06, | |
| "loss": 0.0, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.531437125748503, | |
| "grad_norm": 0.00022851829999126494, | |
| "learning_rate": 8.228500162970333e-06, | |
| "loss": 0.0, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.532185628742515, | |
| "grad_norm": 0.00014870429004076868, | |
| "learning_rate": 8.223651316129115e-06, | |
| "loss": 0.0, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.5329341317365269, | |
| "grad_norm": 0.00023895545746199787, | |
| "learning_rate": 8.21879727534296e-06, | |
| "loss": 0.0, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.5336826347305389, | |
| "grad_norm": 0.0001619049144210294, | |
| "learning_rate": 8.213938048432697e-06, | |
| "loss": 0.0, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.5344311377245509, | |
| "grad_norm": 0.0002205699129262939, | |
| "learning_rate": 8.20907364322751e-06, | |
| "loss": 0.0, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.5351796407185628, | |
| "grad_norm": 0.0001235086820088327, | |
| "learning_rate": 8.204204067564924e-06, | |
| "loss": 0.0, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5359281437125748, | |
| "grad_norm": 0.00023735742433927953, | |
| "learning_rate": 8.199329329290798e-06, | |
| "loss": 0.0, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.5366766467065869, | |
| "grad_norm": 0.00017734240100253373, | |
| "learning_rate": 8.194449436259305e-06, | |
| "loss": 0.0, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.5374251497005988, | |
| "grad_norm": 0.00017838150961324573, | |
| "learning_rate": 8.189564396332927e-06, | |
| "loss": 0.0, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.5381736526946108, | |
| "grad_norm": 0.0001554026093799621, | |
| "learning_rate": 8.184674217382438e-06, | |
| "loss": 0.0, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.5389221556886228, | |
| "grad_norm": 0.00021117751020938158, | |
| "learning_rate": 8.179778907286889e-06, | |
| "loss": 0.0, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5389221556886228, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 8.898123837752792e-07, | |
| "eval_runtime": 163.5535, | |
| "eval_samples_per_second": 30.571, | |
| "eval_steps_per_second": 7.643, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5396706586826348, | |
| "grad_norm": 0.0002229697274742648, | |
| "learning_rate": 8.174878473933601e-06, | |
| "loss": 0.0, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.5404191616766467, | |
| "grad_norm": 9.283604595111683e-05, | |
| "learning_rate": 8.16997292521815e-06, | |
| "loss": 0.0, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.5411676646706587, | |
| "grad_norm": 0.0001886676182039082, | |
| "learning_rate": 8.165062269044353e-06, | |
| "loss": 0.0, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.5419161676646707, | |
| "grad_norm": 0.0001873478468041867, | |
| "learning_rate": 8.160146513324256e-06, | |
| "loss": 0.0, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.5426646706586826, | |
| "grad_norm": 0.00011518682003952563, | |
| "learning_rate": 8.15522566597812e-06, | |
| "loss": 0.0, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5434131736526946, | |
| "grad_norm": 0.0001653393410379067, | |
| "learning_rate": 8.150299734934413e-06, | |
| "loss": 0.0, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.5441616766467066, | |
| "grad_norm": 0.0001261346333194524, | |
| "learning_rate": 8.14536872812979e-06, | |
| "loss": 0.0, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.5449101796407185, | |
| "grad_norm": 0.00019827600044663996, | |
| "learning_rate": 8.140432653509089e-06, | |
| "loss": 0.0, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.5456586826347305, | |
| "grad_norm": 0.00042794988257810473, | |
| "learning_rate": 8.135491519025307e-06, | |
| "loss": 0.0, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.5464071856287425, | |
| "grad_norm": 0.00011408683349145576, | |
| "learning_rate": 8.130545332639599e-06, | |
| "loss": 0.0, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5471556886227545, | |
| "grad_norm": 0.0001599421666469425, | |
| "learning_rate": 8.125594102321256e-06, | |
| "loss": 0.0, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.5479041916167665, | |
| "grad_norm": 0.00013200732064433396, | |
| "learning_rate": 8.120637836047698e-06, | |
| "loss": 0.0, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.5486526946107785, | |
| "grad_norm": 0.00012747867731377482, | |
| "learning_rate": 8.115676541804456e-06, | |
| "loss": 0.0, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.5494011976047904, | |
| "grad_norm": 0.00013039227633271366, | |
| "learning_rate": 8.110710227585169e-06, | |
| "loss": 0.0, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.5501497005988024, | |
| "grad_norm": 0.0001378858432872221, | |
| "learning_rate": 8.105738901391553e-06, | |
| "loss": 0.0, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5508982035928144, | |
| "grad_norm": 0.00019915043958462775, | |
| "learning_rate": 8.100762571233409e-06, | |
| "loss": 0.0, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.5516467065868264, | |
| "grad_norm": 0.00013786421914119273, | |
| "learning_rate": 8.095781245128598e-06, | |
| "loss": 0.0, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.5523952095808383, | |
| "grad_norm": 0.00011769870616262779, | |
| "learning_rate": 8.090794931103026e-06, | |
| "loss": 0.0, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.5531437125748503, | |
| "grad_norm": 0.0001314223773078993, | |
| "learning_rate": 8.085803637190643e-06, | |
| "loss": 0.0, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.5538922155688623, | |
| "grad_norm": 0.0017582617001608014, | |
| "learning_rate": 8.080807371433415e-06, | |
| "loss": 0.0, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5538922155688623, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 7.754564421702526e-07, | |
| "eval_runtime": 165.5806, | |
| "eval_samples_per_second": 30.197, | |
| "eval_steps_per_second": 7.549, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5546407185628742, | |
| "grad_norm": 0.0001497814228059724, | |
| "learning_rate": 8.075806141881327e-06, | |
| "loss": 0.0001, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.5553892215568862, | |
| "grad_norm": 0.022318042814731598, | |
| "learning_rate": 8.07079995659235e-06, | |
| "loss": 0.0, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.5561377245508982, | |
| "grad_norm": 0.00037483617779798806, | |
| "learning_rate": 8.065788823632451e-06, | |
| "loss": 0.0, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.5568862275449101, | |
| "grad_norm": 0.017027398571372032, | |
| "learning_rate": 8.060772751075564e-06, | |
| "loss": 0.0, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.5576347305389222, | |
| "grad_norm": 0.0005349895800463855, | |
| "learning_rate": 8.05575174700358e-06, | |
| "loss": 0.0, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.5583832335329342, | |
| "grad_norm": 9.526366193313152e-05, | |
| "learning_rate": 8.05072581950634e-06, | |
| "loss": 0.0, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.5591317365269461, | |
| "grad_norm": 0.0018775092903524637, | |
| "learning_rate": 8.045694976681613e-06, | |
| "loss": 0.0, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.5598802395209581, | |
| "grad_norm": 0.006572918966412544, | |
| "learning_rate": 8.04065922663509e-06, | |
| "loss": 0.0, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.5606287425149701, | |
| "grad_norm": 0.00012014804087812081, | |
| "learning_rate": 8.035618577480369e-06, | |
| "loss": 0.0, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.561377245508982, | |
| "grad_norm": 0.00019066958338953555, | |
| "learning_rate": 8.030573037338942e-06, | |
| "loss": 0.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.562125748502994, | |
| "grad_norm": 0.000158317168825306, | |
| "learning_rate": 8.025522614340177e-06, | |
| "loss": 0.0, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.562874251497006, | |
| "grad_norm": 0.0006968253292143345, | |
| "learning_rate": 8.020467316621316e-06, | |
| "loss": 0.0, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.563622754491018, | |
| "grad_norm": 0.00015155051369220018, | |
| "learning_rate": 8.015407152327448e-06, | |
| "loss": 0.0, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.5643712574850299, | |
| "grad_norm": 0.00017856295744422823, | |
| "learning_rate": 8.010342129611508e-06, | |
| "loss": 0.0, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.5651197604790419, | |
| "grad_norm": 0.0007032952271401882, | |
| "learning_rate": 8.005272256634257e-06, | |
| "loss": 0.0, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.5658682634730539, | |
| "grad_norm": 0.0001873714500106871, | |
| "learning_rate": 8.000197541564273e-06, | |
| "loss": 0.0, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.5666167664670658, | |
| "grad_norm": 0.00024626540835015476, | |
| "learning_rate": 7.99511799257793e-06, | |
| "loss": 0.0, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.5673652694610778, | |
| "grad_norm": 0.00011799616186181083, | |
| "learning_rate": 7.990033617859396e-06, | |
| "loss": 0.0, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.5681137724550899, | |
| "grad_norm": 0.00017817386833485216, | |
| "learning_rate": 7.984944425600614e-06, | |
| "loss": 0.0, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.5688622754491018, | |
| "grad_norm": 0.00012566296209115535, | |
| "learning_rate": 7.979850424001283e-06, | |
| "loss": 0.0, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5688622754491018, | |
| "eval_accuracy": 0.9999997747747748, | |
| "eval_loss": 2.201959887315752e-06, | |
| "eval_runtime": 164.4185, | |
| "eval_samples_per_second": 30.41, | |
| "eval_steps_per_second": 7.603, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5696107784431138, | |
| "grad_norm": 0.00012715034245047718, | |
| "learning_rate": 7.97475162126886e-06, | |
| "loss": 0.0, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.5703592814371258, | |
| "grad_norm": 0.0005135888350196183, | |
| "learning_rate": 7.96964802561853e-06, | |
| "loss": 0.0, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.5711077844311377, | |
| "grad_norm": 0.0008899507811293006, | |
| "learning_rate": 7.964539645273204e-06, | |
| "loss": 0.0, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.5718562874251497, | |
| "grad_norm": 0.00014234622358344495, | |
| "learning_rate": 7.9594264884635e-06, | |
| "loss": 0.0, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.5726047904191617, | |
| "grad_norm": 0.00044490184518508613, | |
| "learning_rate": 7.954308563427732e-06, | |
| "loss": 0.0, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.5733532934131736, | |
| "grad_norm": 0.0004495025204960257, | |
| "learning_rate": 7.9491858784119e-06, | |
| "loss": 0.0, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.5741017964071856, | |
| "grad_norm": 0.0003148759133182466, | |
| "learning_rate": 7.944058441669671e-06, | |
| "loss": 0.0, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.5748502994011976, | |
| "grad_norm": 0.01583685912191868, | |
| "learning_rate": 7.938926261462366e-06, | |
| "loss": 0.0, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.5755988023952096, | |
| "grad_norm": 0.0001242299476871267, | |
| "learning_rate": 7.933789346058951e-06, | |
| "loss": 0.0, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.5763473053892215, | |
| "grad_norm": 0.0006354754441417754, | |
| "learning_rate": 7.928647703736024e-06, | |
| "loss": 0.0, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5770958083832335, | |
| "grad_norm": 0.00038198617403395474, | |
| "learning_rate": 7.923501342777788e-06, | |
| "loss": 0.0, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.5778443113772455, | |
| "grad_norm": 0.0022715749219059944, | |
| "learning_rate": 7.918350271476064e-06, | |
| "loss": 0.0, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.5785928143712575, | |
| "grad_norm": 7.122563692973927e-05, | |
| "learning_rate": 7.913194498130252e-06, | |
| "loss": 0.0, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.5793413173652695, | |
| "grad_norm": 0.00045244794455356896, | |
| "learning_rate": 7.90803403104733e-06, | |
| "loss": 0.0, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.5800898203592815, | |
| "grad_norm": 0.00015517730207648128, | |
| "learning_rate": 7.90286887854184e-06, | |
| "loss": 0.0, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5808383233532934, | |
| "grad_norm": 0.0001861519122030586, | |
| "learning_rate": 7.897699048935875e-06, | |
| "loss": 0.0, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.5815868263473054, | |
| "grad_norm": 0.00018396999803371727, | |
| "learning_rate": 7.892524550559056e-06, | |
| "loss": 0.0, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.5823353293413174, | |
| "grad_norm": 0.000191383485798724, | |
| "learning_rate": 7.887345391748533e-06, | |
| "loss": 0.0, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.5830838323353293, | |
| "grad_norm": 0.0018197696190327406, | |
| "learning_rate": 7.882161580848966e-06, | |
| "loss": 0.0, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.5838323353293413, | |
| "grad_norm": 0.0004387347144074738, | |
| "learning_rate": 7.876973126212507e-06, | |
| "loss": 0.0, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5838323353293413, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 1.0737475122368778e-06, | |
| "eval_runtime": 162.5348, | |
| "eval_samples_per_second": 30.763, | |
| "eval_steps_per_second": 7.691, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5845808383233533, | |
| "grad_norm": 0.0022077385801821947, | |
| "learning_rate": 7.87178003619879e-06, | |
| "loss": 0.0, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.5853293413173652, | |
| "grad_norm": 0.0002099570701830089, | |
| "learning_rate": 7.866582319174918e-06, | |
| "loss": 0.0, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.5860778443113772, | |
| "grad_norm": 0.00012316112406551838, | |
| "learning_rate": 7.861379983515449e-06, | |
| "loss": 0.0, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.5868263473053892, | |
| "grad_norm": 0.00024173619749490172, | |
| "learning_rate": 7.856173037602383e-06, | |
| "loss": 0.0, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.5875748502994012, | |
| "grad_norm": 8.62416927702725e-05, | |
| "learning_rate": 7.85096148982515e-06, | |
| "loss": 0.0, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.5883233532934131, | |
| "grad_norm": 0.0014817145420238376, | |
| "learning_rate": 7.845745348580592e-06, | |
| "loss": 0.0, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.5890718562874252, | |
| "grad_norm": 0.0001866584934759885, | |
| "learning_rate": 7.840524622272949e-06, | |
| "loss": 0.0, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.5898203592814372, | |
| "grad_norm": 0.00014589863712899387, | |
| "learning_rate": 7.835299319313854e-06, | |
| "loss": 0.0, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.5905688622754491, | |
| "grad_norm": 0.0001201587583636865, | |
| "learning_rate": 7.830069448122313e-06, | |
| "loss": 0.0, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.5913173652694611, | |
| "grad_norm": 0.0004075410251971334, | |
| "learning_rate": 7.82483501712469e-06, | |
| "loss": 0.0, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.5920658682634731, | |
| "grad_norm": 0.0007497837650589645, | |
| "learning_rate": 7.819596034754696e-06, | |
| "loss": 0.0, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.592814371257485, | |
| "grad_norm": 0.0001937482156790793, | |
| "learning_rate": 7.81435250945338e-06, | |
| "loss": 0.0, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.593562874251497, | |
| "grad_norm": 0.00015605123189743608, | |
| "learning_rate": 7.8091044496691e-06, | |
| "loss": 0.0, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.594311377245509, | |
| "grad_norm": 0.0008134747622534633, | |
| "learning_rate": 7.803851863857533e-06, | |
| "loss": 0.0, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.5950598802395209, | |
| "grad_norm": 0.016990555450320244, | |
| "learning_rate": 7.798594760481639e-06, | |
| "loss": 0.0, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.5958083832335329, | |
| "grad_norm": 8.968533074948937e-05, | |
| "learning_rate": 7.793333148011658e-06, | |
| "loss": 0.0, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.5965568862275449, | |
| "grad_norm": 0.00021849350014235824, | |
| "learning_rate": 7.7880670349251e-06, | |
| "loss": 0.0, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.5973053892215568, | |
| "grad_norm": 0.00010406466026324779, | |
| "learning_rate": 7.782796429706721e-06, | |
| "loss": 0.0, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.5980538922155688, | |
| "grad_norm": 0.0001633252395549789, | |
| "learning_rate": 7.777521340848515e-06, | |
| "loss": 0.0, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.5988023952095808, | |
| "grad_norm": 0.00010362563625676557, | |
| "learning_rate": 7.772241776849705e-06, | |
| "loss": 0.0, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5988023952095808, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 8.31741829188104e-07, | |
| "eval_runtime": 160.7648, | |
| "eval_samples_per_second": 31.101, | |
| "eval_steps_per_second": 7.775, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5995508982035929, | |
| "grad_norm": 0.0001440924679627642, | |
| "learning_rate": 7.76695774621672e-06, | |
| "loss": 0.0, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.6002994011976048, | |
| "grad_norm": 0.00024095825210679322, | |
| "learning_rate": 7.761669257463188e-06, | |
| "loss": 0.0, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.6010479041916168, | |
| "grad_norm": 0.0010255592642351985, | |
| "learning_rate": 7.756376319109917e-06, | |
| "loss": 0.0, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.6017964071856288, | |
| "grad_norm": 0.0003512499970383942, | |
| "learning_rate": 7.751078939684886e-06, | |
| "loss": 0.0, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.6025449101796407, | |
| "grad_norm": 0.0001344636984867975, | |
| "learning_rate": 7.74577712772323e-06, | |
| "loss": 0.0, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6032934131736527, | |
| "grad_norm": 0.00015981386241037399, | |
| "learning_rate": 7.740470891767225e-06, | |
| "loss": 0.0, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.6040419161676647, | |
| "grad_norm": 0.00013109891733620316, | |
| "learning_rate": 7.735160240366276e-06, | |
| "loss": 0.0, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.6047904191616766, | |
| "grad_norm": 0.00010152284085052088, | |
| "learning_rate": 7.729845182076896e-06, | |
| "loss": 0.0, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.6055389221556886, | |
| "grad_norm": 0.0010211137123405933, | |
| "learning_rate": 7.72452572546271e-06, | |
| "loss": 0.0, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.6062874251497006, | |
| "grad_norm": 0.00015486738993786275, | |
| "learning_rate": 7.71920187909442e-06, | |
| "loss": 0.0, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6070359281437125, | |
| "grad_norm": 0.00011970350897172466, | |
| "learning_rate": 7.713873651549805e-06, | |
| "loss": 0.0, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.6077844311377245, | |
| "grad_norm": 7.165825081756338e-05, | |
| "learning_rate": 7.7085410514137e-06, | |
| "loss": 0.0, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.6085329341317365, | |
| "grad_norm": 0.00015439889102708548, | |
| "learning_rate": 7.703204087277989e-06, | |
| "loss": 0.0, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.6092814371257484, | |
| "grad_norm": 9.300145757151768e-05, | |
| "learning_rate": 7.697862767741584e-06, | |
| "loss": 0.0, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.6100299401197605, | |
| "grad_norm": 0.00014098809333518147, | |
| "learning_rate": 7.692517101410414e-06, | |
| "loss": 0.0, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6107784431137725, | |
| "grad_norm": 0.00011235267447773367, | |
| "learning_rate": 7.68716709689742e-06, | |
| "loss": 0.0, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.6115269461077845, | |
| "grad_norm": 0.00014485006977338344, | |
| "learning_rate": 7.681812762822517e-06, | |
| "loss": 0.0, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.6122754491017964, | |
| "grad_norm": 8.417559729423374e-05, | |
| "learning_rate": 7.676454107812608e-06, | |
| "loss": 0.0, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.6130239520958084, | |
| "grad_norm": 0.00010664058936526999, | |
| "learning_rate": 7.671091140501557e-06, | |
| "loss": 0.0, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.6137724550898204, | |
| "grad_norm": 0.00019991857698187232, | |
| "learning_rate": 7.66572386953017e-06, | |
| "loss": 0.0, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6137724550898204, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.937613648005936e-07, | |
| "eval_runtime": 164.9592, | |
| "eval_samples_per_second": 30.311, | |
| "eval_steps_per_second": 7.578, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6145209580838323, | |
| "grad_norm": 0.00013954796304460615, | |
| "learning_rate": 7.660352303546192e-06, | |
| "loss": 0.0, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.6152694610778443, | |
| "grad_norm": 0.00022430805256590247, | |
| "learning_rate": 7.654976451204288e-06, | |
| "loss": 0.0, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.6160179640718563, | |
| "grad_norm": 8.612870442448184e-05, | |
| "learning_rate": 7.649596321166024e-06, | |
| "loss": 0.0, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.6167664670658682, | |
| "grad_norm": 0.00018191659182775766, | |
| "learning_rate": 7.644211922099867e-06, | |
| "loss": 0.0, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.6175149700598802, | |
| "grad_norm": 0.00012960025924257934, | |
| "learning_rate": 7.638823262681155e-06, | |
| "loss": 0.0, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6182634730538922, | |
| "grad_norm": 0.00048115866957232356, | |
| "learning_rate": 7.633430351592093e-06, | |
| "loss": 0.0, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.6190119760479041, | |
| "grad_norm": 0.004453903064131737, | |
| "learning_rate": 7.6280331975217356e-06, | |
| "loss": 0.0, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.6197604790419161, | |
| "grad_norm": 0.00019366122432984412, | |
| "learning_rate": 7.622631809165972e-06, | |
| "loss": 0.0, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.6205089820359282, | |
| "grad_norm": 0.0001328593207290396, | |
| "learning_rate": 7.617226195227518e-06, | |
| "loss": 0.0, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.6212574850299402, | |
| "grad_norm": 0.00019288925977889448, | |
| "learning_rate": 7.611816364415896e-06, | |
| "loss": 0.0, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6220059880239521, | |
| "grad_norm": 0.00020607073383871466, | |
| "learning_rate": 7.606402325447421e-06, | |
| "loss": 0.0, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.6227544910179641, | |
| "grad_norm": 0.0010563160758465528, | |
| "learning_rate": 7.600984087045187e-06, | |
| "loss": 0.0, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.6235029940119761, | |
| "grad_norm": 7.731228834018111e-05, | |
| "learning_rate": 7.595561657939061e-06, | |
| "loss": 0.0, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.624251497005988, | |
| "grad_norm": 0.00039579844451509416, | |
| "learning_rate": 7.590135046865652e-06, | |
| "loss": 0.0, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 0.00012335921928752214, | |
| "learning_rate": 7.584704262568315e-06, | |
| "loss": 0.0, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.625748502994012, | |
| "grad_norm": 8.330697164637968e-05, | |
| "learning_rate": 7.579269313797126e-06, | |
| "loss": 0.0, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.6264970059880239, | |
| "grad_norm": 0.00023956908262334764, | |
| "learning_rate": 7.573830209308872e-06, | |
| "loss": 0.0, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.6272455089820359, | |
| "grad_norm": 0.00013747882621828467, | |
| "learning_rate": 7.568386957867033e-06, | |
| "loss": 0.0, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.6279940119760479, | |
| "grad_norm": 0.00012644138769246638, | |
| "learning_rate": 7.562939568241772e-06, | |
| "loss": 0.0, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.6287425149700598, | |
| "grad_norm": 0.00021938206919003278, | |
| "learning_rate": 7.557488049209921e-06, | |
| "loss": 0.0, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6287425149700598, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 6.026603500686178e-07, | |
| "eval_runtime": 164.079, | |
| "eval_samples_per_second": 30.473, | |
| "eval_steps_per_second": 7.618, | |
| "step": 1680 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 5000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 40, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5997706414881505e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |