| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0710377267927425, | |
| "eval_steps": 500, | |
| "global_step": 8000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003839877123932034, | |
| "grad_norm": 9.855803343729887, | |
| "learning_rate": 1.9984639016897083e-05, | |
| "loss": 0.9511, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007679754247864068, | |
| "grad_norm": 7.900613749182144, | |
| "learning_rate": 1.9969278033794163e-05, | |
| "loss": 0.1502, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.011519631371796103, | |
| "grad_norm": 0.22927913857697063, | |
| "learning_rate": 1.9953917050691244e-05, | |
| "loss": 0.0234, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.015359508495728137, | |
| "grad_norm": 0.08089452288889179, | |
| "learning_rate": 1.993855606758833e-05, | |
| "loss": 0.0068, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01919938561966017, | |
| "grad_norm": 0.1111623671137423, | |
| "learning_rate": 1.992319508448541e-05, | |
| "loss": 0.0042, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.023039262743592206, | |
| "grad_norm": 0.0743738067650186, | |
| "learning_rate": 1.990783410138249e-05, | |
| "loss": 0.0027, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02687913986752424, | |
| "grad_norm": 0.08445026035259467, | |
| "learning_rate": 1.989247311827957e-05, | |
| "loss": 0.0023, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.030719016991456273, | |
| "grad_norm": 0.032381204224608405, | |
| "learning_rate": 1.9877112135176652e-05, | |
| "loss": 0.0019, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03455889411538831, | |
| "grad_norm": 0.034738835887687565, | |
| "learning_rate": 1.9861751152073733e-05, | |
| "loss": 0.0015, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03839877123932034, | |
| "grad_norm": 0.03125979639961295, | |
| "learning_rate": 1.9846390168970814e-05, | |
| "loss": 0.0012, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.042238648363252376, | |
| "grad_norm": 0.027922819061575684, | |
| "learning_rate": 1.98310291858679e-05, | |
| "loss": 0.001, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04607852548718441, | |
| "grad_norm": 0.027107384881600128, | |
| "learning_rate": 1.981566820276498e-05, | |
| "loss": 0.0009, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04991840261111644, | |
| "grad_norm": 0.030852661769757015, | |
| "learning_rate": 1.980030721966206e-05, | |
| "loss": 0.0008, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05375827973504848, | |
| "grad_norm": 0.017018191812887797, | |
| "learning_rate": 1.978494623655914e-05, | |
| "loss": 0.0007, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05759815685898051, | |
| "grad_norm": 0.02039007906173804, | |
| "learning_rate": 1.9769585253456222e-05, | |
| "loss": 0.0007, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.061438033982912546, | |
| "grad_norm": 0.01751117077975313, | |
| "learning_rate": 1.9754224270353303e-05, | |
| "loss": 0.0007, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06527791110684458, | |
| "grad_norm": 0.025527484156853922, | |
| "learning_rate": 1.9738863287250384e-05, | |
| "loss": 0.0007, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06911778823077662, | |
| "grad_norm": 0.010356304510994301, | |
| "learning_rate": 1.9723502304147465e-05, | |
| "loss": 0.0006, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07295766535470866, | |
| "grad_norm": 0.019221562602418918, | |
| "learning_rate": 1.970814132104455e-05, | |
| "loss": 0.0006, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.07679754247864068, | |
| "grad_norm": 0.019408746838376397, | |
| "learning_rate": 1.969278033794163e-05, | |
| "loss": 0.0006, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08063741960257272, | |
| "grad_norm": 0.012272661989691892, | |
| "learning_rate": 1.967741935483871e-05, | |
| "loss": 0.0006, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08447729672650475, | |
| "grad_norm": 0.01575335759164804, | |
| "learning_rate": 1.9662058371735792e-05, | |
| "loss": 0.0006, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08831717385043679, | |
| "grad_norm": 0.02337266868962172, | |
| "learning_rate": 1.9646697388632873e-05, | |
| "loss": 0.0006, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.09215705097436883, | |
| "grad_norm": 0.02044371550228021, | |
| "learning_rate": 1.9631336405529954e-05, | |
| "loss": 0.0005, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09599692809830085, | |
| "grad_norm": 0.013592727470100728, | |
| "learning_rate": 1.9615975422427035e-05, | |
| "loss": 0.0005, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09983680522223289, | |
| "grad_norm": 0.01722960420346088, | |
| "learning_rate": 1.960061443932412e-05, | |
| "loss": 0.0006, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10367668234616492, | |
| "grad_norm": 0.010831279896438627, | |
| "learning_rate": 1.95852534562212e-05, | |
| "loss": 0.0005, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.10751655947009696, | |
| "grad_norm": 0.04396421107808547, | |
| "learning_rate": 1.956989247311828e-05, | |
| "loss": 0.0007, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.111356436594029, | |
| "grad_norm": 0.009538347218684752, | |
| "learning_rate": 1.9554531490015362e-05, | |
| "loss": 0.0007, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.11519631371796102, | |
| "grad_norm": 0.019732833272054093, | |
| "learning_rate": 1.9539170506912443e-05, | |
| "loss": 0.0006, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11903619084189306, | |
| "grad_norm": 0.01102864765215555, | |
| "learning_rate": 1.9523809523809524e-05, | |
| "loss": 0.0006, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.12287606796582509, | |
| "grad_norm": 0.022789866556699984, | |
| "learning_rate": 1.9508448540706605e-05, | |
| "loss": 0.0008, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.12671594508975711, | |
| "grad_norm": 0.026179745030787457, | |
| "learning_rate": 1.949308755760369e-05, | |
| "loss": 0.0007, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.13055582221368917, | |
| "grad_norm": 0.006770076864326156, | |
| "learning_rate": 1.947772657450077e-05, | |
| "loss": 0.0006, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1343956993376212, | |
| "grad_norm": 0.012631828755799612, | |
| "learning_rate": 1.946236559139785e-05, | |
| "loss": 0.0006, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.13823557646155324, | |
| "grad_norm": 0.012963546321523804, | |
| "learning_rate": 1.9447004608294932e-05, | |
| "loss": 0.0005, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.14207545358548526, | |
| "grad_norm": 0.024135972419695974, | |
| "learning_rate": 1.9431643625192013e-05, | |
| "loss": 0.0007, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1459153307094173, | |
| "grad_norm": 0.023687976253774837, | |
| "learning_rate": 1.9416282642089094e-05, | |
| "loss": 0.0007, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.14975520783334934, | |
| "grad_norm": 0.027951604107350918, | |
| "learning_rate": 1.9400921658986175e-05, | |
| "loss": 0.0007, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.15359508495728136, | |
| "grad_norm": 0.010865274419369686, | |
| "learning_rate": 1.9385560675883256e-05, | |
| "loss": 0.0006, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1574349620812134, | |
| "grad_norm": 0.010649058165805126, | |
| "learning_rate": 1.937019969278034e-05, | |
| "loss": 0.0006, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.16127483920514543, | |
| "grad_norm": 0.017247417697752888, | |
| "learning_rate": 1.935483870967742e-05, | |
| "loss": 0.0005, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.16511471632907748, | |
| "grad_norm": 0.009598794250166713, | |
| "learning_rate": 1.9339477726574502e-05, | |
| "loss": 0.0005, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1689545934530095, | |
| "grad_norm": 0.013190891135776709, | |
| "learning_rate": 1.9324116743471583e-05, | |
| "loss": 0.0005, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.17279447057694153, | |
| "grad_norm": 0.015198390723045437, | |
| "learning_rate": 1.9308755760368664e-05, | |
| "loss": 0.0005, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.17663434770087358, | |
| "grad_norm": 0.00888530246811667, | |
| "learning_rate": 1.9293394777265745e-05, | |
| "loss": 0.0005, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.1804742248248056, | |
| "grad_norm": 0.0072193681049761505, | |
| "learning_rate": 1.9278033794162825e-05, | |
| "loss": 0.0005, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.18431410194873765, | |
| "grad_norm": 0.015423575939404187, | |
| "learning_rate": 1.926267281105991e-05, | |
| "loss": 0.0004, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.18815397907266967, | |
| "grad_norm": 0.005888900378170728, | |
| "learning_rate": 1.924731182795699e-05, | |
| "loss": 0.0004, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.1919938561966017, | |
| "grad_norm": 0.009370771214962732, | |
| "learning_rate": 1.923195084485407e-05, | |
| "loss": 0.0005, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19583373332053375, | |
| "grad_norm": 0.008880476477762786, | |
| "learning_rate": 1.9216589861751153e-05, | |
| "loss": 0.0004, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.19967361044446577, | |
| "grad_norm": 0.014710261381765207, | |
| "learning_rate": 1.9201228878648233e-05, | |
| "loss": 0.0004, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.20351348756839782, | |
| "grad_norm": 0.010564538973197759, | |
| "learning_rate": 1.9185867895545314e-05, | |
| "loss": 0.0005, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.20735336469232984, | |
| "grad_norm": 0.011022024037436684, | |
| "learning_rate": 1.91705069124424e-05, | |
| "loss": 0.0005, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.21119324181626187, | |
| "grad_norm": 0.010224510974710398, | |
| "learning_rate": 1.915514592933948e-05, | |
| "loss": 0.0005, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.21503311894019392, | |
| "grad_norm": 0.018187207031321574, | |
| "learning_rate": 1.913978494623656e-05, | |
| "loss": 0.0004, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.21887299606412594, | |
| "grad_norm": 0.01408592901908285, | |
| "learning_rate": 1.912442396313364e-05, | |
| "loss": 0.0004, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.222712873188058, | |
| "grad_norm": 0.00579354434664508, | |
| "learning_rate": 1.9109062980030722e-05, | |
| "loss": 0.0004, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.22655275031199001, | |
| "grad_norm": 0.007179585527455935, | |
| "learning_rate": 1.9093701996927803e-05, | |
| "loss": 0.0004, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.23039262743592204, | |
| "grad_norm": 0.01087115590194149, | |
| "learning_rate": 1.9078341013824884e-05, | |
| "loss": 0.0004, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2342325045598541, | |
| "grad_norm": 0.013488801384563282, | |
| "learning_rate": 1.906298003072197e-05, | |
| "loss": 0.0004, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2380723816837861, | |
| "grad_norm": 0.014270453003895688, | |
| "learning_rate": 1.904761904761905e-05, | |
| "loss": 0.0004, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.24191225880771816, | |
| "grad_norm": 0.005608293770659859, | |
| "learning_rate": 1.903225806451613e-05, | |
| "loss": 0.0004, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.24575213593165018, | |
| "grad_norm": 0.01942922314775013, | |
| "learning_rate": 1.901689708141321e-05, | |
| "loss": 0.0004, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.24959201305558223, | |
| "grad_norm": 0.008807942106723612, | |
| "learning_rate": 1.9001536098310292e-05, | |
| "loss": 0.0004, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.25343189017951423, | |
| "grad_norm": 0.009642123808480296, | |
| "learning_rate": 1.8986175115207373e-05, | |
| "loss": 0.0004, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2572717673034463, | |
| "grad_norm": 0.010839972501351821, | |
| "learning_rate": 1.8970814132104458e-05, | |
| "loss": 0.0004, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.26111164442737833, | |
| "grad_norm": 0.007016830642797472, | |
| "learning_rate": 1.895545314900154e-05, | |
| "loss": 0.0004, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.26495152155131035, | |
| "grad_norm": 0.01036596596633793, | |
| "learning_rate": 1.894009216589862e-05, | |
| "loss": 0.0004, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2687913986752424, | |
| "grad_norm": 0.00699112176439698, | |
| "learning_rate": 1.89247311827957e-05, | |
| "loss": 0.0004, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2726312757991744, | |
| "grad_norm": 0.023143846781726766, | |
| "learning_rate": 1.890937019969278e-05, | |
| "loss": 0.0004, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2764711529231065, | |
| "grad_norm": 0.014371836855906761, | |
| "learning_rate": 1.8894009216589862e-05, | |
| "loss": 0.0004, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2803110300470385, | |
| "grad_norm": 0.013134481657118259, | |
| "learning_rate": 1.8878648233486943e-05, | |
| "loss": 0.0004, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2841509071709705, | |
| "grad_norm": 0.00936736410572265, | |
| "learning_rate": 1.8863287250384027e-05, | |
| "loss": 0.0004, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.28799078429490255, | |
| "grad_norm": 0.0177202255192513, | |
| "learning_rate": 1.8847926267281108e-05, | |
| "loss": 0.0004, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2918306614188346, | |
| "grad_norm": 0.006114501253180761, | |
| "learning_rate": 1.883256528417819e-05, | |
| "loss": 0.0004, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.29567053854276665, | |
| "grad_norm": 0.008265452601599499, | |
| "learning_rate": 1.881720430107527e-05, | |
| "loss": 0.0004, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.29951041566669867, | |
| "grad_norm": 0.014907543135145678, | |
| "learning_rate": 1.880184331797235e-05, | |
| "loss": 0.0004, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3033502927906307, | |
| "grad_norm": 0.014797049223610434, | |
| "learning_rate": 1.8786482334869432e-05, | |
| "loss": 0.0004, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.3071901699145627, | |
| "grad_norm": 0.004652330628741432, | |
| "learning_rate": 1.8771121351766516e-05, | |
| "loss": 0.0003, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3110300470384948, | |
| "grad_norm": 0.005493451170013414, | |
| "learning_rate": 1.8755760368663597e-05, | |
| "loss": 0.0004, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.3148699241624268, | |
| "grad_norm": 0.007045732848967435, | |
| "learning_rate": 1.8740399385560678e-05, | |
| "loss": 0.0003, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.31870980128635884, | |
| "grad_norm": 0.018179892197985704, | |
| "learning_rate": 1.872503840245776e-05, | |
| "loss": 0.0004, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.32254967841029086, | |
| "grad_norm": 0.005668747866614938, | |
| "learning_rate": 1.870967741935484e-05, | |
| "loss": 0.0003, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3263895555342229, | |
| "grad_norm": 0.005624631016307953, | |
| "learning_rate": 1.869431643625192e-05, | |
| "loss": 0.0003, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.33022943265815496, | |
| "grad_norm": 0.012701139148209117, | |
| "learning_rate": 1.8678955453149005e-05, | |
| "loss": 0.0004, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.334069309782087, | |
| "grad_norm": 0.020568594933285278, | |
| "learning_rate": 1.8663594470046086e-05, | |
| "loss": 0.0004, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.337909186906019, | |
| "grad_norm": 0.016752438047633097, | |
| "learning_rate": 1.8648233486943167e-05, | |
| "loss": 0.0003, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.34174906402995103, | |
| "grad_norm": 0.022040592531457844, | |
| "learning_rate": 1.8632872503840248e-05, | |
| "loss": 0.0003, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.34558894115388306, | |
| "grad_norm": 0.005501761305071796, | |
| "learning_rate": 1.861751152073733e-05, | |
| "loss": 0.0003, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.34942881827781513, | |
| "grad_norm": 0.00913565126513445, | |
| "learning_rate": 1.860215053763441e-05, | |
| "loss": 0.0003, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.35326869540174716, | |
| "grad_norm": 0.00643352891432492, | |
| "learning_rate": 1.858678955453149e-05, | |
| "loss": 0.0004, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3571085725256792, | |
| "grad_norm": 0.005932277310972733, | |
| "learning_rate": 1.8571428571428575e-05, | |
| "loss": 0.0003, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3609484496496112, | |
| "grad_norm": 0.00854566416057147, | |
| "learning_rate": 1.8556067588325656e-05, | |
| "loss": 0.0004, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3647883267735432, | |
| "grad_norm": 0.004198303186077754, | |
| "learning_rate": 1.8540706605222737e-05, | |
| "loss": 0.0003, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3686282038974753, | |
| "grad_norm": 0.006013969871660999, | |
| "learning_rate": 1.8525345622119818e-05, | |
| "loss": 0.0003, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3724680810214073, | |
| "grad_norm": 0.00821940173101188, | |
| "learning_rate": 1.85099846390169e-05, | |
| "loss": 0.0003, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.37630795814533935, | |
| "grad_norm": 0.01706199510535657, | |
| "learning_rate": 1.849462365591398e-05, | |
| "loss": 0.0003, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3801478352692714, | |
| "grad_norm": 0.012195179945468527, | |
| "learning_rate": 1.8479262672811064e-05, | |
| "loss": 0.0003, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3839877123932034, | |
| "grad_norm": 0.00807499700581235, | |
| "learning_rate": 1.8463901689708145e-05, | |
| "loss": 0.0003, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3878275895171355, | |
| "grad_norm": 0.010945346397012758, | |
| "learning_rate": 1.8448540706605226e-05, | |
| "loss": 0.0003, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3916674666410675, | |
| "grad_norm": 0.00880396626822876, | |
| "learning_rate": 1.8433179723502307e-05, | |
| "loss": 0.0003, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.3955073437649995, | |
| "grad_norm": 0.014399672052520717, | |
| "learning_rate": 1.8417818740399388e-05, | |
| "loss": 0.0003, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.39934722088893154, | |
| "grad_norm": 0.006831959770996685, | |
| "learning_rate": 1.840245775729647e-05, | |
| "loss": 0.0003, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.40318709801286357, | |
| "grad_norm": 18.516333985308375, | |
| "learning_rate": 1.838709677419355e-05, | |
| "loss": 0.0478, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.40702697513679564, | |
| "grad_norm": 0.6477183946866085, | |
| "learning_rate": 1.837173579109063e-05, | |
| "loss": 0.1816, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.41086685226072767, | |
| "grad_norm": 2.6861506025108475, | |
| "learning_rate": 1.8356374807987715e-05, | |
| "loss": 0.0536, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.4147067293846597, | |
| "grad_norm": 0.4416236285189527, | |
| "learning_rate": 1.8341013824884796e-05, | |
| "loss": 0.0249, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.4185466065085917, | |
| "grad_norm": 0.9786805141459802, | |
| "learning_rate": 1.8325652841781877e-05, | |
| "loss": 0.0101, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.42238648363252373, | |
| "grad_norm": 1.627598109506058, | |
| "learning_rate": 1.8310291858678958e-05, | |
| "loss": 0.0108, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4262263607564558, | |
| "grad_norm": 2.2472146744387635, | |
| "learning_rate": 1.829493087557604e-05, | |
| "loss": 0.0101, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.43006623788038784, | |
| "grad_norm": 0.5695485826329719, | |
| "learning_rate": 1.827956989247312e-05, | |
| "loss": 0.0043, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.43390611500431986, | |
| "grad_norm": 1.530827959431516, | |
| "learning_rate": 1.82642089093702e-05, | |
| "loss": 0.013, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.4377459921282519, | |
| "grad_norm": 0.029509683112095193, | |
| "learning_rate": 1.8248847926267285e-05, | |
| "loss": 0.0113, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.4415858692521839, | |
| "grad_norm": 0.01781992132649757, | |
| "learning_rate": 1.8233486943164366e-05, | |
| "loss": 0.0007, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.445425746376116, | |
| "grad_norm": 0.010151888479514436, | |
| "learning_rate": 1.8218125960061447e-05, | |
| "loss": 0.0005, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.449265623500048, | |
| "grad_norm": 0.014420471837514583, | |
| "learning_rate": 1.8202764976958527e-05, | |
| "loss": 0.0005, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.45310550062398003, | |
| "grad_norm": 0.010072124184727966, | |
| "learning_rate": 1.818740399385561e-05, | |
| "loss": 0.0004, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.45694537774791205, | |
| "grad_norm": 0.007661769308843087, | |
| "learning_rate": 1.817204301075269e-05, | |
| "loss": 0.0004, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.4607852548718441, | |
| "grad_norm": 0.010038812848366137, | |
| "learning_rate": 1.815668202764977e-05, | |
| "loss": 0.0004, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.46462513199577615, | |
| "grad_norm": 0.011258850363850582, | |
| "learning_rate": 1.8141321044546855e-05, | |
| "loss": 0.0004, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4684650091197082, | |
| "grad_norm": 0.016055405689836853, | |
| "learning_rate": 1.8125960061443936e-05, | |
| "loss": 0.0004, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.4723048862436402, | |
| "grad_norm": 0.007967416713376401, | |
| "learning_rate": 1.8110599078341016e-05, | |
| "loss": 0.0004, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.4761447633675722, | |
| "grad_norm": 0.008025613224993348, | |
| "learning_rate": 1.8095238095238097e-05, | |
| "loss": 0.0003, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.47998464049150424, | |
| "grad_norm": 0.004966250706848606, | |
| "learning_rate": 1.8079877112135178e-05, | |
| "loss": 0.0004, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4838245176154363, | |
| "grad_norm": 0.007419454029578721, | |
| "learning_rate": 1.806451612903226e-05, | |
| "loss": 0.0004, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.48766439473936835, | |
| "grad_norm": 0.007372896744459003, | |
| "learning_rate": 1.804915514592934e-05, | |
| "loss": 0.0004, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.49150427186330037, | |
| "grad_norm": 0.007260032706837447, | |
| "learning_rate": 1.803379416282642e-05, | |
| "loss": 0.0004, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.4953441489872324, | |
| "grad_norm": 0.006797112689312068, | |
| "learning_rate": 1.8018433179723505e-05, | |
| "loss": 0.0003, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.49918402611116447, | |
| "grad_norm": 0.006596862668254978, | |
| "learning_rate": 1.8003072196620586e-05, | |
| "loss": 0.0003, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5030239032350965, | |
| "grad_norm": 0.007442569811616661, | |
| "learning_rate": 1.7987711213517667e-05, | |
| "loss": 0.0004, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.5068637803590285, | |
| "grad_norm": 0.011659097052332864, | |
| "learning_rate": 1.7972350230414748e-05, | |
| "loss": 0.0003, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.5107036574829605, | |
| "grad_norm": 0.003189461384393768, | |
| "learning_rate": 1.795698924731183e-05, | |
| "loss": 0.0003, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.5145435346068926, | |
| "grad_norm": 0.006218121023821658, | |
| "learning_rate": 1.794162826420891e-05, | |
| "loss": 0.0003, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5183834117308246, | |
| "grad_norm": 0.004661385155418944, | |
| "learning_rate": 1.792626728110599e-05, | |
| "loss": 0.0004, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5222232888547567, | |
| "grad_norm": 0.007451036130599556, | |
| "learning_rate": 1.7910906298003075e-05, | |
| "loss": 0.0003, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5260631659786886, | |
| "grad_norm": 0.005739057587058598, | |
| "learning_rate": 1.7895545314900156e-05, | |
| "loss": 0.0003, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5299030431026207, | |
| "grad_norm": 0.008293201974170215, | |
| "learning_rate": 1.7880184331797237e-05, | |
| "loss": 0.0004, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5337429202265528, | |
| "grad_norm": 0.011616342167072335, | |
| "learning_rate": 1.7864823348694318e-05, | |
| "loss": 0.0003, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5375827973504848, | |
| "grad_norm": 0.011567680895725766, | |
| "learning_rate": 1.78494623655914e-05, | |
| "loss": 0.0004, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5414226744744168, | |
| "grad_norm": 0.0057168290679564795, | |
| "learning_rate": 1.783410138248848e-05, | |
| "loss": 0.0003, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5452625515983488, | |
| "grad_norm": 0.010165783676708838, | |
| "learning_rate": 1.781874039938556e-05, | |
| "loss": 0.0003, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5491024287222809, | |
| "grad_norm": 0.0045309573507459015, | |
| "learning_rate": 1.7803379416282645e-05, | |
| "loss": 0.0003, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.552942305846213, | |
| "grad_norm": 0.00811076581038844, | |
| "learning_rate": 1.7788018433179726e-05, | |
| "loss": 0.0003, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5567821829701449, | |
| "grad_norm": 0.009995480779616097, | |
| "learning_rate": 1.7772657450076807e-05, | |
| "loss": 0.0003, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.560622060094077, | |
| "grad_norm": 0.006925240596184182, | |
| "learning_rate": 1.7757296466973888e-05, | |
| "loss": 0.0003, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.564461937218009, | |
| "grad_norm": 0.013412407169843198, | |
| "learning_rate": 1.774193548387097e-05, | |
| "loss": 0.0003, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.568301814341941, | |
| "grad_norm": 0.012787736722349891, | |
| "learning_rate": 1.772657450076805e-05, | |
| "loss": 0.0003, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5721416914658731, | |
| "grad_norm": 0.007058357804663414, | |
| "learning_rate": 1.771121351766513e-05, | |
| "loss": 0.0003, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.5759815685898051, | |
| "grad_norm": 0.007736272349706681, | |
| "learning_rate": 1.7695852534562215e-05, | |
| "loss": 0.0003, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5798214457137372, | |
| "grad_norm": 0.0033192017596056908, | |
| "learning_rate": 1.7680491551459296e-05, | |
| "loss": 0.0003, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.5836613228376692, | |
| "grad_norm": 0.014268997426681756, | |
| "learning_rate": 1.7665130568356377e-05, | |
| "loss": 0.0003, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5875011999616012, | |
| "grad_norm": 0.007258373149860229, | |
| "learning_rate": 1.7649769585253458e-05, | |
| "loss": 0.0003, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.5913410770855333, | |
| "grad_norm": 0.005355993128279297, | |
| "learning_rate": 1.763440860215054e-05, | |
| "loss": 0.0003, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5951809542094653, | |
| "grad_norm": 0.006539831015011762, | |
| "learning_rate": 1.761904761904762e-05, | |
| "loss": 0.0003, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5990208313333973, | |
| "grad_norm": 0.0029413603917756745, | |
| "learning_rate": 1.76036866359447e-05, | |
| "loss": 0.0003, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.6028607084573294, | |
| "grad_norm": 0.005311044247403118, | |
| "learning_rate": 1.758832565284178e-05, | |
| "loss": 0.0003, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.6067005855812614, | |
| "grad_norm": 0.008810927875552908, | |
| "learning_rate": 1.7572964669738866e-05, | |
| "loss": 0.0003, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.6105404627051935, | |
| "grad_norm": 0.019081216576469953, | |
| "learning_rate": 1.7557603686635947e-05, | |
| "loss": 0.0003, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.6143803398291254, | |
| "grad_norm": 0.018890578715268194, | |
| "learning_rate": 1.7542242703533028e-05, | |
| "loss": 0.0003, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6182202169530575, | |
| "grad_norm": 0.006788409840537928, | |
| "learning_rate": 1.752688172043011e-05, | |
| "loss": 0.0003, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6220600940769896, | |
| "grad_norm": 0.007667765007600492, | |
| "learning_rate": 1.751152073732719e-05, | |
| "loss": 0.0003, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6258999712009216, | |
| "grad_norm": 0.0042895580282391686, | |
| "learning_rate": 1.749615975422427e-05, | |
| "loss": 0.0003, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6297398483248536, | |
| "grad_norm": 0.0046351980587696125, | |
| "learning_rate": 1.748079877112135e-05, | |
| "loss": 0.0003, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6335797254487856, | |
| "grad_norm": 0.0033174467847263173, | |
| "learning_rate": 1.7465437788018436e-05, | |
| "loss": 0.0003, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6374196025727177, | |
| "grad_norm": 0.005803214350891364, | |
| "learning_rate": 1.7450076804915517e-05, | |
| "loss": 0.0003, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6412594796966498, | |
| "grad_norm": 0.010355179051111019, | |
| "learning_rate": 1.7434715821812597e-05, | |
| "loss": 0.0003, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6450993568205817, | |
| "grad_norm": 0.00695229076668098, | |
| "learning_rate": 1.741935483870968e-05, | |
| "loss": 0.0003, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6489392339445138, | |
| "grad_norm": 0.0026433167192979326, | |
| "learning_rate": 1.740399385560676e-05, | |
| "loss": 0.0003, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.6527791110684458, | |
| "grad_norm": 0.004001528867502428, | |
| "learning_rate": 1.738863287250384e-05, | |
| "loss": 0.0003, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6566189881923779, | |
| "grad_norm": 0.005032030468208995, | |
| "learning_rate": 1.737327188940092e-05, | |
| "loss": 0.0003, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6604588653163099, | |
| "grad_norm": 0.012342312681179527, | |
| "learning_rate": 1.7357910906298005e-05, | |
| "loss": 0.0003, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6642987424402419, | |
| "grad_norm": 0.006477437534892976, | |
| "learning_rate": 1.7342549923195086e-05, | |
| "loss": 0.0003, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.668138619564174, | |
| "grad_norm": 0.003880319040550072, | |
| "learning_rate": 1.7327188940092167e-05, | |
| "loss": 0.0003, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.6719784966881059, | |
| "grad_norm": 0.007668035468060641, | |
| "learning_rate": 1.7311827956989248e-05, | |
| "loss": 0.0003, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.675818373812038, | |
| "grad_norm": 0.011038361878502203, | |
| "learning_rate": 1.729646697388633e-05, | |
| "loss": 0.0003, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.6796582509359701, | |
| "grad_norm": 0.004974769727903427, | |
| "learning_rate": 1.728110599078341e-05, | |
| "loss": 0.0003, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.6834981280599021, | |
| "grad_norm": 0.004325284236550939, | |
| "learning_rate": 1.726574500768049e-05, | |
| "loss": 0.0003, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.6873380051838341, | |
| "grad_norm": 0.0042080867657549705, | |
| "learning_rate": 1.7250384024577572e-05, | |
| "loss": 0.0003, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.6911778823077661, | |
| "grad_norm": 0.0039328487679914535, | |
| "learning_rate": 1.7235023041474656e-05, | |
| "loss": 0.0003, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6950177594316982, | |
| "grad_norm": 0.004207050570838415, | |
| "learning_rate": 1.7219662058371737e-05, | |
| "loss": 0.0003, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.6988576365556303, | |
| "grad_norm": 0.0036413526215648487, | |
| "learning_rate": 1.7204301075268818e-05, | |
| "loss": 0.0003, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.7026975136795622, | |
| "grad_norm": 0.004178601636330483, | |
| "learning_rate": 1.71889400921659e-05, | |
| "loss": 0.0003, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.7065373908034943, | |
| "grad_norm": 0.00973331298779335, | |
| "learning_rate": 1.717357910906298e-05, | |
| "loss": 0.0003, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.7103772679274263, | |
| "grad_norm": 0.004286254481889245, | |
| "learning_rate": 1.715821812596006e-05, | |
| "loss": 0.0003, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.7142171450513584, | |
| "grad_norm": 0.005281447209048475, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.0003, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.7180570221752904, | |
| "grad_norm": 0.011520628884629904, | |
| "learning_rate": 1.7127496159754226e-05, | |
| "loss": 0.0003, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.7218968992992224, | |
| "grad_norm": 0.015096661910423118, | |
| "learning_rate": 1.7112135176651307e-05, | |
| "loss": 0.0003, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7257367764231545, | |
| "grad_norm": 0.004269384954031992, | |
| "learning_rate": 1.7096774193548388e-05, | |
| "loss": 0.0003, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7295766535470865, | |
| "grad_norm": 0.007284302523487442, | |
| "learning_rate": 1.708141321044547e-05, | |
| "loss": 0.0003, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7334165306710185, | |
| "grad_norm": 0.013816212104358527, | |
| "learning_rate": 1.706605222734255e-05, | |
| "loss": 0.0003, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.7372564077949506, | |
| "grad_norm": 0.006389002588565134, | |
| "learning_rate": 1.705069124423963e-05, | |
| "loss": 0.0003, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7410962849188826, | |
| "grad_norm": 0.009485308445395068, | |
| "learning_rate": 1.7035330261136712e-05, | |
| "loss": 0.0003, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.7449361620428147, | |
| "grad_norm": 0.004423329749614452, | |
| "learning_rate": 1.7019969278033796e-05, | |
| "loss": 0.0003, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.7487760391667466, | |
| "grad_norm": 0.007527583045286338, | |
| "learning_rate": 1.7004608294930877e-05, | |
| "loss": 0.0003, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7526159162906787, | |
| "grad_norm": 0.014586231809369528, | |
| "learning_rate": 1.6989247311827958e-05, | |
| "loss": 0.0003, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7564557934146108, | |
| "grad_norm": 0.006651075913511302, | |
| "learning_rate": 1.697388632872504e-05, | |
| "loss": 0.0003, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.7602956705385427, | |
| "grad_norm": 0.009325021217663211, | |
| "learning_rate": 1.695852534562212e-05, | |
| "loss": 0.0003, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.7641355476624748, | |
| "grad_norm": 0.00448309467347562, | |
| "learning_rate": 1.69431643625192e-05, | |
| "loss": 0.0003, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.7679754247864068, | |
| "grad_norm": 0.00862440090599278, | |
| "learning_rate": 1.6927803379416285e-05, | |
| "loss": 0.0003, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7718153019103389, | |
| "grad_norm": 0.003564225910536377, | |
| "learning_rate": 1.6912442396313366e-05, | |
| "loss": 0.0003, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.775655179034271, | |
| "grad_norm": 0.009884322290648858, | |
| "learning_rate": 1.6897081413210447e-05, | |
| "loss": 0.0003, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.7794950561582029, | |
| "grad_norm": 0.0052686365175910795, | |
| "learning_rate": 1.6881720430107528e-05, | |
| "loss": 0.0003, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.783334933282135, | |
| "grad_norm": 0.006169173971857438, | |
| "learning_rate": 1.686635944700461e-05, | |
| "loss": 0.0003, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.787174810406067, | |
| "grad_norm": 0.006194757569521478, | |
| "learning_rate": 1.685099846390169e-05, | |
| "loss": 0.0003, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.791014687529999, | |
| "grad_norm": 0.00415409003374665, | |
| "learning_rate": 1.683563748079877e-05, | |
| "loss": 0.0003, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.7948545646539311, | |
| "grad_norm": 0.01310714973576285, | |
| "learning_rate": 1.6820276497695855e-05, | |
| "loss": 0.0003, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.7986944417778631, | |
| "grad_norm": 0.0046992213585696965, | |
| "learning_rate": 1.6804915514592936e-05, | |
| "loss": 0.0003, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.8025343189017952, | |
| "grad_norm": 0.006291272173141966, | |
| "learning_rate": 1.6789554531490017e-05, | |
| "loss": 0.0003, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.8063741960257271, | |
| "grad_norm": 0.007546577147358044, | |
| "learning_rate": 1.6774193548387098e-05, | |
| "loss": 0.0003, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8102140731496592, | |
| "grad_norm": 0.005443900744121676, | |
| "learning_rate": 1.675883256528418e-05, | |
| "loss": 0.0003, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.8140539502735913, | |
| "grad_norm": 0.0019010839135315712, | |
| "learning_rate": 1.674347158218126e-05, | |
| "loss": 0.0003, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.8178938273975233, | |
| "grad_norm": 0.009951343848549921, | |
| "learning_rate": 1.6728110599078344e-05, | |
| "loss": 0.0003, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.8217337045214553, | |
| "grad_norm": 0.003896513453508667, | |
| "learning_rate": 1.6712749615975425e-05, | |
| "loss": 0.0003, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.8255735816453873, | |
| "grad_norm": 0.014173651446143855, | |
| "learning_rate": 1.6697388632872506e-05, | |
| "loss": 0.0003, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8294134587693194, | |
| "grad_norm": 0.003677873891818207, | |
| "learning_rate": 1.6682027649769587e-05, | |
| "loss": 0.0003, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.8332533358932515, | |
| "grad_norm": 0.007358001701120111, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0003, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.8370932130171834, | |
| "grad_norm": 0.014586891892807584, | |
| "learning_rate": 1.665130568356375e-05, | |
| "loss": 0.0003, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.8409330901411155, | |
| "grad_norm": 0.011685080400631487, | |
| "learning_rate": 1.663594470046083e-05, | |
| "loss": 0.0003, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8447729672650475, | |
| "grad_norm": 0.006355858658307203, | |
| "learning_rate": 1.6620583717357914e-05, | |
| "loss": 0.0003, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8486128443889795, | |
| "grad_norm": 0.004711497431600735, | |
| "learning_rate": 1.6605222734254995e-05, | |
| "loss": 0.0003, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.8524527215129116, | |
| "grad_norm": 0.00975542945138729, | |
| "learning_rate": 1.6589861751152075e-05, | |
| "loss": 0.0003, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.8562925986368436, | |
| "grad_norm": 0.009147508665138223, | |
| "learning_rate": 1.6574500768049156e-05, | |
| "loss": 0.0003, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.8601324757607757, | |
| "grad_norm": 0.004608721023888835, | |
| "learning_rate": 1.6559139784946237e-05, | |
| "loss": 0.0003, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.8639723528847076, | |
| "grad_norm": 0.007377051739331657, | |
| "learning_rate": 1.6543778801843318e-05, | |
| "loss": 0.0003, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8678122300086397, | |
| "grad_norm": 0.012946767220354212, | |
| "learning_rate": 1.6528417818740403e-05, | |
| "loss": 0.0003, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.8716521071325718, | |
| "grad_norm": 0.014416613075616386, | |
| "learning_rate": 1.6513056835637483e-05, | |
| "loss": 0.0003, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.8754919842565038, | |
| "grad_norm": 0.006287700917447439, | |
| "learning_rate": 1.6497695852534564e-05, | |
| "loss": 0.0003, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.8793318613804358, | |
| "grad_norm": 0.0137632558355742, | |
| "learning_rate": 1.6482334869431645e-05, | |
| "loss": 0.0003, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.8831717385043678, | |
| "grad_norm": 0.004387114997812537, | |
| "learning_rate": 1.6466973886328726e-05, | |
| "loss": 0.0003, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.8870116156282999, | |
| "grad_norm": 0.006112696865963161, | |
| "learning_rate": 1.6451612903225807e-05, | |
| "loss": 0.0003, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.890851492752232, | |
| "grad_norm": 0.00335985624587701, | |
| "learning_rate": 1.643625192012289e-05, | |
| "loss": 0.0003, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.8946913698761639, | |
| "grad_norm": 0.012311531291747248, | |
| "learning_rate": 1.6420890937019972e-05, | |
| "loss": 0.0003, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.898531247000096, | |
| "grad_norm": 0.0056121939234220625, | |
| "learning_rate": 1.6405529953917053e-05, | |
| "loss": 0.0003, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.902371124124028, | |
| "grad_norm": 0.008461725308188539, | |
| "learning_rate": 1.6390168970814134e-05, | |
| "loss": 0.0003, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.9062110012479601, | |
| "grad_norm": 0.007674455481245043, | |
| "learning_rate": 1.6374807987711215e-05, | |
| "loss": 0.0003, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.9100508783718921, | |
| "grad_norm": 0.015158413045124617, | |
| "learning_rate": 1.6359447004608296e-05, | |
| "loss": 0.0003, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.9138907554958241, | |
| "grad_norm": 0.012286030614540876, | |
| "learning_rate": 1.6344086021505377e-05, | |
| "loss": 0.0003, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.9177306326197562, | |
| "grad_norm": 0.0030696065567895983, | |
| "learning_rate": 1.632872503840246e-05, | |
| "loss": 0.0003, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.9215705097436881, | |
| "grad_norm": 0.004565605709323647, | |
| "learning_rate": 1.6313364055299542e-05, | |
| "loss": 0.0003, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9254103868676202, | |
| "grad_norm": 0.006838334269203355, | |
| "learning_rate": 1.6298003072196623e-05, | |
| "loss": 0.0003, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.9292502639915523, | |
| "grad_norm": 0.00990773655397776, | |
| "learning_rate": 1.6282642089093704e-05, | |
| "loss": 0.0003, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.9330901411154843, | |
| "grad_norm": 0.013734696643942659, | |
| "learning_rate": 1.6267281105990785e-05, | |
| "loss": 0.0003, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.9369300182394164, | |
| "grad_norm": 0.004412383577588846, | |
| "learning_rate": 1.6251920122887866e-05, | |
| "loss": 0.0003, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.9407698953633483, | |
| "grad_norm": 0.00711020501768463, | |
| "learning_rate": 1.6236559139784947e-05, | |
| "loss": 0.0003, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9446097724872804, | |
| "grad_norm": 0.004814816584197371, | |
| "learning_rate": 1.622119815668203e-05, | |
| "loss": 0.0003, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.9484496496112125, | |
| "grad_norm": 0.007067507479770793, | |
| "learning_rate": 1.6205837173579112e-05, | |
| "loss": 0.0003, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.9522895267351444, | |
| "grad_norm": 0.0019971454498982043, | |
| "learning_rate": 1.6190476190476193e-05, | |
| "loss": 0.0003, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.9561294038590765, | |
| "grad_norm": 0.003950118900464544, | |
| "learning_rate": 1.6175115207373274e-05, | |
| "loss": 0.0003, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.9599692809830085, | |
| "grad_norm": 0.007758837909723049, | |
| "learning_rate": 1.6159754224270355e-05, | |
| "loss": 0.0003, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9638091581069406, | |
| "grad_norm": 0.0033188489546766026, | |
| "learning_rate": 1.6144393241167436e-05, | |
| "loss": 0.0003, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.9676490352308726, | |
| "grad_norm": 0.005559097491387871, | |
| "learning_rate": 1.6129032258064517e-05, | |
| "loss": 0.0003, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.9714889123548046, | |
| "grad_norm": 0.0038490165506594267, | |
| "learning_rate": 1.61136712749616e-05, | |
| "loss": 0.0003, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.9753287894787367, | |
| "grad_norm": 0.00511457830944101, | |
| "learning_rate": 1.6098310291858682e-05, | |
| "loss": 0.0003, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.9791686666026688, | |
| "grad_norm": 0.0030355393802215156, | |
| "learning_rate": 1.6082949308755763e-05, | |
| "loss": 0.0003, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9830085437266007, | |
| "grad_norm": 0.004994307766841962, | |
| "learning_rate": 1.6067588325652844e-05, | |
| "loss": 0.0003, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.9868484208505328, | |
| "grad_norm": 0.00586917516793509, | |
| "learning_rate": 1.6052227342549925e-05, | |
| "loss": 0.0003, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.9906882979744648, | |
| "grad_norm": 0.003295655448503593, | |
| "learning_rate": 1.6036866359447006e-05, | |
| "loss": 0.0003, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.9945281750983969, | |
| "grad_norm": 0.0017248350640545309, | |
| "learning_rate": 1.6021505376344087e-05, | |
| "loss": 0.0003, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.9983680522223289, | |
| "grad_norm": 0.008360356694852496, | |
| "learning_rate": 1.600614439324117e-05, | |
| "loss": 0.0003, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.001919938561966, | |
| "grad_norm": 0.00941746675646077, | |
| "learning_rate": 1.5990783410138252e-05, | |
| "loss": 0.0003, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.005759815685898, | |
| "grad_norm": 0.007001645607480019, | |
| "learning_rate": 1.5975422427035333e-05, | |
| "loss": 0.0003, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.00959969280983, | |
| "grad_norm": 0.00872263934893287, | |
| "learning_rate": 1.5960061443932414e-05, | |
| "loss": 0.0003, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.0134395699337622, | |
| "grad_norm": 0.006998252185455207, | |
| "learning_rate": 1.5944700460829495e-05, | |
| "loss": 0.0003, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.0172794470576942, | |
| "grad_norm": 0.008293218599741276, | |
| "learning_rate": 1.5929339477726576e-05, | |
| "loss": 0.0003, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.021119324181626, | |
| "grad_norm": 0.006889505177967215, | |
| "learning_rate": 1.5913978494623657e-05, | |
| "loss": 0.0003, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.0249592013055582, | |
| "grad_norm": 0.010069071017105902, | |
| "learning_rate": 1.589861751152074e-05, | |
| "loss": 0.0003, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.0287990784294903, | |
| "grad_norm": 0.024634152994068834, | |
| "learning_rate": 1.5883256528417822e-05, | |
| "loss": 0.0003, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.0326389555534223, | |
| "grad_norm": 0.010662609432049419, | |
| "learning_rate": 1.5867895545314903e-05, | |
| "loss": 0.0003, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.0364788326773544, | |
| "grad_norm": 0.013283658088640498, | |
| "learning_rate": 1.5852534562211984e-05, | |
| "loss": 0.0003, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.0403187098012863, | |
| "grad_norm": 0.004408580576716252, | |
| "learning_rate": 1.5837173579109065e-05, | |
| "loss": 0.0003, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.0441585869252183, | |
| "grad_norm": 0.005069354965439019, | |
| "learning_rate": 1.5821812596006145e-05, | |
| "loss": 0.0003, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.0479984640491504, | |
| "grad_norm": 0.005316965623453909, | |
| "learning_rate": 1.5806451612903226e-05, | |
| "loss": 0.0003, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.0518383411730825, | |
| "grad_norm": 0.004614759009371165, | |
| "learning_rate": 1.5791090629800307e-05, | |
| "loss": 0.0003, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.0556782182970146, | |
| "grad_norm": 0.006520159764607735, | |
| "learning_rate": 1.577572964669739e-05, | |
| "loss": 0.0003, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.0595180954209464, | |
| "grad_norm": 0.007212891832854262, | |
| "learning_rate": 1.5760368663594473e-05, | |
| "loss": 0.0003, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.0633579725448785, | |
| "grad_norm": 0.007329478767553912, | |
| "learning_rate": 1.5745007680491553e-05, | |
| "loss": 0.0003, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.0671978496688106, | |
| "grad_norm": 0.012902138519829355, | |
| "learning_rate": 1.5729646697388634e-05, | |
| "loss": 0.0003, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.0710377267927427, | |
| "grad_norm": 0.008115156348023293, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 0.0003, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.0748776039166748, | |
| "grad_norm": 0.0031222148539581814, | |
| "learning_rate": 1.5698924731182796e-05, | |
| "loss": 0.0003, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.0787174810406066, | |
| "grad_norm": 0.00358892329366379, | |
| "learning_rate": 1.5683563748079877e-05, | |
| "loss": 0.0003, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.0825573581645387, | |
| "grad_norm": 0.003731764641158579, | |
| "learning_rate": 1.566820276497696e-05, | |
| "loss": 0.0003, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.0863972352884708, | |
| "grad_norm": 0.009651710392543447, | |
| "learning_rate": 1.5652841781874042e-05, | |
| "loss": 0.0003, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.0902371124124028, | |
| "grad_norm": 0.007272437262278606, | |
| "learning_rate": 1.5637480798771123e-05, | |
| "loss": 0.0003, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.094076989536335, | |
| "grad_norm": 0.0038138336241257302, | |
| "learning_rate": 1.5622119815668204e-05, | |
| "loss": 0.0003, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.0979168666602668, | |
| "grad_norm": 0.007645435686600415, | |
| "learning_rate": 1.5606758832565285e-05, | |
| "loss": 0.0003, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.1017567437841989, | |
| "grad_norm": 0.004228910730827151, | |
| "learning_rate": 1.5591397849462366e-05, | |
| "loss": 0.0003, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.105596620908131, | |
| "grad_norm": 0.007428720623939754, | |
| "learning_rate": 1.5576036866359447e-05, | |
| "loss": 0.0003, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.109436498032063, | |
| "grad_norm": 0.0109342599815278, | |
| "learning_rate": 1.556067588325653e-05, | |
| "loss": 0.0003, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.113276375155995, | |
| "grad_norm": 0.004880866544074755, | |
| "learning_rate": 1.5545314900153612e-05, | |
| "loss": 0.0003, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.117116252279927, | |
| "grad_norm": 0.013738617802960406, | |
| "learning_rate": 1.5529953917050693e-05, | |
| "loss": 0.0003, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.120956129403859, | |
| "grad_norm": 0.00498835427587417, | |
| "learning_rate": 1.5514592933947774e-05, | |
| "loss": 0.0003, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.124796006527791, | |
| "grad_norm": 0.0030319982820064474, | |
| "learning_rate": 1.5499231950844855e-05, | |
| "loss": 0.0003, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.1286358836517232, | |
| "grad_norm": 0.01067113935847921, | |
| "learning_rate": 1.5483870967741936e-05, | |
| "loss": 0.0003, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.1324757607756553, | |
| "grad_norm": 0.003620520899235841, | |
| "learning_rate": 1.5468509984639017e-05, | |
| "loss": 0.0003, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.1363156378995871, | |
| "grad_norm": 0.004795888699506633, | |
| "learning_rate": 1.5453149001536098e-05, | |
| "loss": 0.0003, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.1401555150235192, | |
| "grad_norm": 0.006942279632235508, | |
| "learning_rate": 1.5437788018433182e-05, | |
| "loss": 0.0003, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.1439953921474513, | |
| "grad_norm": 0.002130066736956189, | |
| "learning_rate": 1.5422427035330263e-05, | |
| "loss": 0.0003, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.1478352692713834, | |
| "grad_norm": 0.005306111233629307, | |
| "learning_rate": 1.5407066052227344e-05, | |
| "loss": 0.0003, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.1516751463953154, | |
| "grad_norm": 0.007715185339433146, | |
| "learning_rate": 1.5391705069124425e-05, | |
| "loss": 0.0003, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.1555150235192473, | |
| "grad_norm": 0.005655458788113601, | |
| "learning_rate": 1.5376344086021506e-05, | |
| "loss": 0.0003, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.1593549006431794, | |
| "grad_norm": 0.008468884041379838, | |
| "learning_rate": 1.5360983102918587e-05, | |
| "loss": 0.0003, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.1631947777671114, | |
| "grad_norm": 0.015885667831357166, | |
| "learning_rate": 1.5345622119815668e-05, | |
| "loss": 0.0003, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.1670346548910435, | |
| "grad_norm": 0.003501130092621392, | |
| "learning_rate": 1.5330261136712752e-05, | |
| "loss": 0.0003, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.1708745320149756, | |
| "grad_norm": 0.007702536520407232, | |
| "learning_rate": 1.5314900153609833e-05, | |
| "loss": 0.0003, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.1747144091389075, | |
| "grad_norm": 0.00397165276711294, | |
| "learning_rate": 1.5299539170506914e-05, | |
| "loss": 0.0003, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.1785542862628395, | |
| "grad_norm": 0.005806224235699376, | |
| "learning_rate": 1.5284178187403995e-05, | |
| "loss": 0.0003, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.1823941633867716, | |
| "grad_norm": 0.014031967789109468, | |
| "learning_rate": 1.5268817204301076e-05, | |
| "loss": 0.0003, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.1862340405107037, | |
| "grad_norm": 0.011597083317184202, | |
| "learning_rate": 1.5253456221198157e-05, | |
| "loss": 0.0003, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.1900739176346358, | |
| "grad_norm": 0.003137805117200966, | |
| "learning_rate": 1.523809523809524e-05, | |
| "loss": 0.0003, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.1939137947585676, | |
| "grad_norm": 0.008571751181818212, | |
| "learning_rate": 1.5222734254992322e-05, | |
| "loss": 0.0003, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.1977536718824997, | |
| "grad_norm": 0.010354799239461948, | |
| "learning_rate": 1.5207373271889403e-05, | |
| "loss": 0.0003, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.2015935490064318, | |
| "grad_norm": 0.006888806938098952, | |
| "learning_rate": 1.5192012288786484e-05, | |
| "loss": 0.0003, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.2054334261303639, | |
| "grad_norm": 0.00560528529757068, | |
| "learning_rate": 1.5176651305683565e-05, | |
| "loss": 0.0003, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.209273303254296, | |
| "grad_norm": 0.004803989045845827, | |
| "learning_rate": 1.5161290322580646e-05, | |
| "loss": 0.0003, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.2131131803782278, | |
| "grad_norm": 0.010138889246585629, | |
| "learning_rate": 1.5145929339477728e-05, | |
| "loss": 0.0003, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.2169530575021599, | |
| "grad_norm": 0.010337939960763999, | |
| "learning_rate": 1.5130568356374809e-05, | |
| "loss": 0.0003, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.220792934626092, | |
| "grad_norm": 0.0034014308570009524, | |
| "learning_rate": 1.511520737327189e-05, | |
| "loss": 0.0003, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.224632811750024, | |
| "grad_norm": 0.0029402516168536455, | |
| "learning_rate": 1.5099846390168973e-05, | |
| "loss": 0.0003, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.228472688873956, | |
| "grad_norm": 0.0077860538241216895, | |
| "learning_rate": 1.5084485407066054e-05, | |
| "loss": 0.0003, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.232312565997888, | |
| "grad_norm": 0.005558974463946693, | |
| "learning_rate": 1.5069124423963135e-05, | |
| "loss": 0.0003, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.23615244312182, | |
| "grad_norm": 0.010803718895790473, | |
| "learning_rate": 1.5053763440860215e-05, | |
| "loss": 0.0003, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.2399923202457521, | |
| "grad_norm": 0.007530607391625148, | |
| "learning_rate": 1.5038402457757298e-05, | |
| "loss": 0.0003, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.2438321973696842, | |
| "grad_norm": 0.0024515971935964494, | |
| "learning_rate": 1.5023041474654379e-05, | |
| "loss": 0.0003, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.2476720744936163, | |
| "grad_norm": 0.004793434169621187, | |
| "learning_rate": 1.500768049155146e-05, | |
| "loss": 0.0003, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.2515119516175481, | |
| "grad_norm": 0.005198731469624748, | |
| "learning_rate": 1.4992319508448543e-05, | |
| "loss": 0.0003, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.2553518287414802, | |
| "grad_norm": 0.002181117809025224, | |
| "learning_rate": 1.4976958525345623e-05, | |
| "loss": 0.0003, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.2591917058654123, | |
| "grad_norm": 0.0037625493242586724, | |
| "learning_rate": 1.4961597542242704e-05, | |
| "loss": 0.0003, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.2630315829893444, | |
| "grad_norm": 0.005316485072585287, | |
| "learning_rate": 1.4946236559139787e-05, | |
| "loss": 0.0003, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.2668714601132764, | |
| "grad_norm": 0.004784634700102955, | |
| "learning_rate": 1.4930875576036868e-05, | |
| "loss": 0.0003, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.2707113372372083, | |
| "grad_norm": 0.012696100545567314, | |
| "learning_rate": 1.4915514592933949e-05, | |
| "loss": 0.0003, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.2745512143611404, | |
| "grad_norm": 0.005311522663075818, | |
| "learning_rate": 1.490015360983103e-05, | |
| "loss": 0.0003, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.2783910914850725, | |
| "grad_norm": 0.0028746469015276917, | |
| "learning_rate": 1.4884792626728112e-05, | |
| "loss": 0.0003, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.2822309686090045, | |
| "grad_norm": 0.010467141532211954, | |
| "learning_rate": 1.4869431643625193e-05, | |
| "loss": 0.0003, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.2860708457329366, | |
| "grad_norm": 0.004927112231184059, | |
| "learning_rate": 1.4854070660522274e-05, | |
| "loss": 0.0003, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.2899107228568685, | |
| "grad_norm": 0.004240757320008939, | |
| "learning_rate": 1.4838709677419357e-05, | |
| "loss": 0.0003, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.2937505999808006, | |
| "grad_norm": 0.004593149961249671, | |
| "learning_rate": 1.4823348694316438e-05, | |
| "loss": 0.0003, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.2975904771047326, | |
| "grad_norm": 0.012379522683153942, | |
| "learning_rate": 1.4807987711213519e-05, | |
| "loss": 0.0003, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.3014303542286647, | |
| "grad_norm": 0.0030617662082256646, | |
| "learning_rate": 1.47926267281106e-05, | |
| "loss": 0.0003, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.3052702313525968, | |
| "grad_norm": 0.012477621815569138, | |
| "learning_rate": 1.477726574500768e-05, | |
| "loss": 0.0003, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.3091101084765286, | |
| "grad_norm": 0.008973089780140532, | |
| "learning_rate": 1.4761904761904763e-05, | |
| "loss": 0.0003, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.3129499856004607, | |
| "grad_norm": 0.006012620992408176, | |
| "learning_rate": 1.4746543778801846e-05, | |
| "loss": 0.0003, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.3167898627243928, | |
| "grad_norm": 0.002978034783135485, | |
| "learning_rate": 1.4731182795698927e-05, | |
| "loss": 0.0003, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.3206297398483249, | |
| "grad_norm": 0.0076361528432857696, | |
| "learning_rate": 1.4715821812596008e-05, | |
| "loss": 0.0003, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.324469616972257, | |
| "grad_norm": 0.01162698278703534, | |
| "learning_rate": 1.4700460829493089e-05, | |
| "loss": 0.0003, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.3283094940961888, | |
| "grad_norm": 0.0023240753476107792, | |
| "learning_rate": 1.468509984639017e-05, | |
| "loss": 0.0003, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.332149371220121, | |
| "grad_norm": 0.005329365238908933, | |
| "learning_rate": 1.466973886328725e-05, | |
| "loss": 0.0003, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.335989248344053, | |
| "grad_norm": 0.007711907461069916, | |
| "learning_rate": 1.4654377880184335e-05, | |
| "loss": 0.0003, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.339829125467985, | |
| "grad_norm": 0.0051703315905598365, | |
| "learning_rate": 1.4639016897081416e-05, | |
| "loss": 0.0003, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.3436690025919171, | |
| "grad_norm": 0.00656849551678766, | |
| "learning_rate": 1.4623655913978497e-05, | |
| "loss": 0.0003, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.347508879715849, | |
| "grad_norm": 0.004684960157051687, | |
| "learning_rate": 1.4608294930875578e-05, | |
| "loss": 0.0003, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.351348756839781, | |
| "grad_norm": 0.003653453570474896, | |
| "learning_rate": 1.4592933947772658e-05, | |
| "loss": 0.0003, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.3551886339637131, | |
| "grad_norm": 0.0036918985360549997, | |
| "learning_rate": 1.457757296466974e-05, | |
| "loss": 0.0003, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.3590285110876452, | |
| "grad_norm": 0.005265072866815289, | |
| "learning_rate": 1.456221198156682e-05, | |
| "loss": 0.0003, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.3628683882115773, | |
| "grad_norm": 0.002918853603638478, | |
| "learning_rate": 1.4546850998463905e-05, | |
| "loss": 0.0003, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.3667082653355092, | |
| "grad_norm": 0.00935186960220143, | |
| "learning_rate": 1.4531490015360986e-05, | |
| "loss": 0.0003, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.3705481424594412, | |
| "grad_norm": 0.0047564377664160475, | |
| "learning_rate": 1.4516129032258066e-05, | |
| "loss": 0.0003, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.3743880195833733, | |
| "grad_norm": 0.0027827569792821744, | |
| "learning_rate": 1.4500768049155147e-05, | |
| "loss": 0.0003, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.3782278967073054, | |
| "grad_norm": 0.007874618569613728, | |
| "learning_rate": 1.4485407066052228e-05, | |
| "loss": 0.0003, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.3820677738312375, | |
| "grad_norm": 0.0021217629192740463, | |
| "learning_rate": 1.447004608294931e-05, | |
| "loss": 0.0003, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.3859076509551693, | |
| "grad_norm": 0.0015950623575258483, | |
| "learning_rate": 1.445468509984639e-05, | |
| "loss": 0.0003, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.3897475280791014, | |
| "grad_norm": 0.014526354269413554, | |
| "learning_rate": 1.4439324116743471e-05, | |
| "loss": 0.0003, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.3935874052030335, | |
| "grad_norm": 0.004710032072376596, | |
| "learning_rate": 1.4423963133640555e-05, | |
| "loss": 0.0003, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.3974272823269656, | |
| "grad_norm": 0.008945201526158983, | |
| "learning_rate": 1.4408602150537636e-05, | |
| "loss": 0.0003, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.4012671594508976, | |
| "grad_norm": 0.004277811812491989, | |
| "learning_rate": 1.4393241167434717e-05, | |
| "loss": 0.0003, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.4051070365748295, | |
| "grad_norm": 0.0039422467987323675, | |
| "learning_rate": 1.4377880184331798e-05, | |
| "loss": 0.0003, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.4089469136987616, | |
| "grad_norm": 0.00676608035228549, | |
| "learning_rate": 1.4362519201228879e-05, | |
| "loss": 0.0003, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.4127867908226936, | |
| "grad_norm": 0.010133852268400016, | |
| "learning_rate": 1.434715821812596e-05, | |
| "loss": 0.0003, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.4166266679466257, | |
| "grad_norm": 0.009268588463915765, | |
| "learning_rate": 1.4331797235023041e-05, | |
| "loss": 0.0003, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.4204665450705578, | |
| "grad_norm": 0.01313301243339411, | |
| "learning_rate": 1.4316436251920125e-05, | |
| "loss": 0.0003, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.4243064221944897, | |
| "grad_norm": 0.0037448179676893684, | |
| "learning_rate": 1.4301075268817206e-05, | |
| "loss": 0.0003, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.4281462993184217, | |
| "grad_norm": 0.007552592646915242, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 0.0003, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.4319861764423538, | |
| "grad_norm": 0.0020607608386333867, | |
| "learning_rate": 1.4270353302611368e-05, | |
| "loss": 0.0003, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.435826053566286, | |
| "grad_norm": 0.0017099532229802538, | |
| "learning_rate": 1.4254992319508449e-05, | |
| "loss": 0.0003, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.439665930690218, | |
| "grad_norm": 0.009438446281080615, | |
| "learning_rate": 1.423963133640553e-05, | |
| "loss": 0.0003, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.4435058078141498, | |
| "grad_norm": 0.0031712205983693118, | |
| "learning_rate": 1.422427035330261e-05, | |
| "loss": 0.0003, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.447345684938082, | |
| "grad_norm": 0.004189847354145225, | |
| "learning_rate": 1.4208909370199695e-05, | |
| "loss": 0.0003, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.451185562062014, | |
| "grad_norm": 0.01235381752195261, | |
| "learning_rate": 1.4193548387096776e-05, | |
| "loss": 0.0003, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.455025439185946, | |
| "grad_norm": 0.008228828470936082, | |
| "learning_rate": 1.4178187403993857e-05, | |
| "loss": 0.0003, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.4588653163098781, | |
| "grad_norm": 0.00803929836492854, | |
| "learning_rate": 1.4162826420890938e-05, | |
| "loss": 0.0003, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.46270519343381, | |
| "grad_norm": 0.007786110195975117, | |
| "learning_rate": 1.4147465437788019e-05, | |
| "loss": 0.0003, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.466545070557742, | |
| "grad_norm": 0.001228617041920703, | |
| "learning_rate": 1.41321044546851e-05, | |
| "loss": 0.0003, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.4703849476816742, | |
| "grad_norm": 0.0038876544517334236, | |
| "learning_rate": 1.4116743471582182e-05, | |
| "loss": 0.0003, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.4742248248056062, | |
| "grad_norm": 0.00866982390635113, | |
| "learning_rate": 1.4101382488479263e-05, | |
| "loss": 0.0003, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.4780647019295383, | |
| "grad_norm": 0.011102877135429783, | |
| "learning_rate": 1.4086021505376346e-05, | |
| "loss": 0.0003, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.4819045790534702, | |
| "grad_norm": 0.01047300225237876, | |
| "learning_rate": 1.4070660522273427e-05, | |
| "loss": 0.0003, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.4857444561774023, | |
| "grad_norm": 0.006851930231571451, | |
| "learning_rate": 1.4055299539170508e-05, | |
| "loss": 0.0003, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.4895843333013343, | |
| "grad_norm": 0.004987839668683684, | |
| "learning_rate": 1.4039938556067589e-05, | |
| "loss": 0.0003, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.4934242104252664, | |
| "grad_norm": 0.0034966043588402418, | |
| "learning_rate": 1.4024577572964671e-05, | |
| "loss": 0.0003, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.4972640875491985, | |
| "grad_norm": 0.002801267441148025, | |
| "learning_rate": 1.4009216589861752e-05, | |
| "loss": 0.0003, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.5011039646731303, | |
| "grad_norm": 0.0018703310178060316, | |
| "learning_rate": 1.3993855606758833e-05, | |
| "loss": 0.0003, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.5049438417970626, | |
| "grad_norm": 0.0015330340455295792, | |
| "learning_rate": 1.3978494623655916e-05, | |
| "loss": 0.0003, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.5087837189209945, | |
| "grad_norm": 0.006896242096430408, | |
| "learning_rate": 1.3963133640552997e-05, | |
| "loss": 0.0003, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.5126235960449266, | |
| "grad_norm": 0.0030363392744381756, | |
| "learning_rate": 1.3947772657450078e-05, | |
| "loss": 0.0003, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.5164634731688587, | |
| "grad_norm": 0.0036790867879865252, | |
| "learning_rate": 1.3932411674347159e-05, | |
| "loss": 0.0003, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.5203033502927905, | |
| "grad_norm": 0.003933748182425131, | |
| "learning_rate": 1.3917050691244241e-05, | |
| "loss": 0.0003, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.5241432274167228, | |
| "grad_norm": 0.004219499765943358, | |
| "learning_rate": 1.3901689708141322e-05, | |
| "loss": 0.0003, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.5279831045406547, | |
| "grad_norm": 0.007300405744499423, | |
| "learning_rate": 1.3886328725038403e-05, | |
| "loss": 0.0003, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.5318229816645867, | |
| "grad_norm": 0.004617157024817587, | |
| "learning_rate": 1.3870967741935486e-05, | |
| "loss": 0.0003, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.5356628587885188, | |
| "grad_norm": 0.010017965659017577, | |
| "learning_rate": 1.3855606758832567e-05, | |
| "loss": 0.0003, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.5395027359124507, | |
| "grad_norm": 0.006735678653952309, | |
| "learning_rate": 1.3840245775729648e-05, | |
| "loss": 0.0003, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.543342613036383, | |
| "grad_norm": 0.0050467679764191345, | |
| "learning_rate": 1.382488479262673e-05, | |
| "loss": 0.0003, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.5471824901603148, | |
| "grad_norm": 0.0068324972480196195, | |
| "learning_rate": 1.3809523809523811e-05, | |
| "loss": 0.0003, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.551022367284247, | |
| "grad_norm": 0.0114684792397768, | |
| "learning_rate": 1.3794162826420892e-05, | |
| "loss": 0.0003, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.554862244408179, | |
| "grad_norm": 0.005883994452757438, | |
| "learning_rate": 1.3778801843317973e-05, | |
| "loss": 0.0003, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.5587021215321109, | |
| "grad_norm": 0.01105835794681893, | |
| "learning_rate": 1.3763440860215056e-05, | |
| "loss": 0.0003, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.5625419986560432, | |
| "grad_norm": 0.005406277635716297, | |
| "learning_rate": 1.3748079877112136e-05, | |
| "loss": 0.0003, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.566381875779975, | |
| "grad_norm": 0.003846164185768484, | |
| "learning_rate": 1.3732718894009217e-05, | |
| "loss": 0.0003, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.570221752903907, | |
| "grad_norm": 0.017545096795138392, | |
| "learning_rate": 1.37173579109063e-05, | |
| "loss": 0.0003, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.5740616300278392, | |
| "grad_norm": 0.001157703963834084, | |
| "learning_rate": 1.3701996927803381e-05, | |
| "loss": 0.0003, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.577901507151771, | |
| "grad_norm": 0.0047739921811557765, | |
| "learning_rate": 1.3686635944700462e-05, | |
| "loss": 0.0003, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.5817413842757033, | |
| "grad_norm": 0.005347890941721552, | |
| "learning_rate": 1.3671274961597543e-05, | |
| "loss": 0.0003, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.5855812613996352, | |
| "grad_norm": 0.0057929878311273305, | |
| "learning_rate": 1.3655913978494624e-05, | |
| "loss": 0.0003, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.5894211385235673, | |
| "grad_norm": 0.0025138054493328834, | |
| "learning_rate": 1.3640552995391706e-05, | |
| "loss": 0.0003, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.5932610156474993, | |
| "grad_norm": 0.005021480665595978, | |
| "learning_rate": 1.3625192012288789e-05, | |
| "loss": 0.0003, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.5971008927714312, | |
| "grad_norm": 0.00485782939696147, | |
| "learning_rate": 1.360983102918587e-05, | |
| "loss": 0.0003, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.6009407698953635, | |
| "grad_norm": 0.005107522993390195, | |
| "learning_rate": 1.359447004608295e-05, | |
| "loss": 0.0003, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.6047806470192953, | |
| "grad_norm": 0.004581168496164048, | |
| "learning_rate": 1.3579109062980032e-05, | |
| "loss": 0.0003, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.6086205241432274, | |
| "grad_norm": 0.0034042153583185666, | |
| "learning_rate": 1.3563748079877113e-05, | |
| "loss": 0.0003, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.6124604012671595, | |
| "grad_norm": 0.004002545971611289, | |
| "learning_rate": 1.3548387096774194e-05, | |
| "loss": 0.0003, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.6163002783910914, | |
| "grad_norm": 0.01005471207244953, | |
| "learning_rate": 1.3533026113671278e-05, | |
| "loss": 0.0003, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.6201401555150237, | |
| "grad_norm": 0.007276858107586832, | |
| "learning_rate": 1.3517665130568359e-05, | |
| "loss": 0.0003, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.6239800326389555, | |
| "grad_norm": 0.015261156983378876, | |
| "learning_rate": 1.350230414746544e-05, | |
| "loss": 0.0003, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.6278199097628876, | |
| "grad_norm": 0.001529811428658314, | |
| "learning_rate": 1.348694316436252e-05, | |
| "loss": 0.0003, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.6316597868868197, | |
| "grad_norm": 0.01176486699064342, | |
| "learning_rate": 1.3471582181259602e-05, | |
| "loss": 0.0003, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.6354996640107515, | |
| "grad_norm": 0.005155818155633164, | |
| "learning_rate": 1.3456221198156683e-05, | |
| "loss": 0.0003, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.6393395411346838, | |
| "grad_norm": 0.003646302493009192, | |
| "learning_rate": 1.3440860215053763e-05, | |
| "loss": 0.0003, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.6431794182586157, | |
| "grad_norm": 0.013145592318521696, | |
| "learning_rate": 1.3425499231950848e-05, | |
| "loss": 0.0003, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.6470192953825478, | |
| "grad_norm": 0.00808033295372671, | |
| "learning_rate": 1.3410138248847929e-05, | |
| "loss": 0.0003, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.6508591725064798, | |
| "grad_norm": 0.004814145910232119, | |
| "learning_rate": 1.339477726574501e-05, | |
| "loss": 0.0003, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.6546990496304117, | |
| "grad_norm": 0.0036057449146750355, | |
| "learning_rate": 1.337941628264209e-05, | |
| "loss": 0.0003, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.658538926754344, | |
| "grad_norm": 0.0047424187952164075, | |
| "learning_rate": 1.3364055299539171e-05, | |
| "loss": 0.0003, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.6623788038782759, | |
| "grad_norm": 0.006050475433915184, | |
| "learning_rate": 1.3348694316436252e-05, | |
| "loss": 0.0003, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.666218681002208, | |
| "grad_norm": 0.0036218140340270734, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.0003, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.67005855812614, | |
| "grad_norm": 0.006466212744722768, | |
| "learning_rate": 1.3317972350230414e-05, | |
| "loss": 0.0003, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.6738984352500719, | |
| "grad_norm": 0.00888961973510433, | |
| "learning_rate": 1.3302611367127499e-05, | |
| "loss": 0.0003, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.6777383123740042, | |
| "grad_norm": 0.003812962996861351, | |
| "learning_rate": 1.328725038402458e-05, | |
| "loss": 0.0003, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.681578189497936, | |
| "grad_norm": 0.0022713408875346015, | |
| "learning_rate": 1.327188940092166e-05, | |
| "loss": 0.0003, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.685418066621868, | |
| "grad_norm": 0.006949027274056569, | |
| "learning_rate": 1.3256528417818741e-05, | |
| "loss": 0.0003, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.6892579437458002, | |
| "grad_norm": 0.002962901611754243, | |
| "learning_rate": 1.3241167434715822e-05, | |
| "loss": 0.0003, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.693097820869732, | |
| "grad_norm": 0.003682820073862977, | |
| "learning_rate": 1.3225806451612903e-05, | |
| "loss": 0.0003, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.6969376979936643, | |
| "grad_norm": 0.006163319026317314, | |
| "learning_rate": 1.3210445468509984e-05, | |
| "loss": 0.0003, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.7007775751175962, | |
| "grad_norm": 0.012379479151877055, | |
| "learning_rate": 1.3195084485407068e-05, | |
| "loss": 0.0003, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.7046174522415283, | |
| "grad_norm": 0.010094598031404283, | |
| "learning_rate": 1.317972350230415e-05, | |
| "loss": 0.0003, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.7084573293654604, | |
| "grad_norm": 0.006009523374733768, | |
| "learning_rate": 1.316436251920123e-05, | |
| "loss": 0.0003, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.7122972064893922, | |
| "grad_norm": 0.006067418145337392, | |
| "learning_rate": 1.3149001536098311e-05, | |
| "loss": 0.0003, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.7161370836133245, | |
| "grad_norm": 0.0027024918930963412, | |
| "learning_rate": 1.3133640552995392e-05, | |
| "loss": 0.0003, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.7199769607372564, | |
| "grad_norm": 0.0021877878803625056, | |
| "learning_rate": 1.3118279569892473e-05, | |
| "loss": 0.0003, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.7238168378611884, | |
| "grad_norm": 0.007268918222066073, | |
| "learning_rate": 1.3102918586789554e-05, | |
| "loss": 0.0003, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.7276567149851205, | |
| "grad_norm": 0.006001244259273792, | |
| "learning_rate": 1.3087557603686638e-05, | |
| "loss": 0.0003, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.7314965921090524, | |
| "grad_norm": 0.00978511362310727, | |
| "learning_rate": 1.307219662058372e-05, | |
| "loss": 0.0003, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.7353364692329847, | |
| "grad_norm": 0.0061450285375501765, | |
| "learning_rate": 1.30568356374808e-05, | |
| "loss": 0.0003, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.7391763463569165, | |
| "grad_norm": 0.009230442874108066, | |
| "learning_rate": 1.3041474654377881e-05, | |
| "loss": 0.0003, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.7430162234808486, | |
| "grad_norm": 0.00791075409422416, | |
| "learning_rate": 1.3026113671274962e-05, | |
| "loss": 0.0003, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.7468561006047807, | |
| "grad_norm": 0.011115169586698243, | |
| "learning_rate": 1.3010752688172043e-05, | |
| "loss": 0.0003, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.7506959777287125, | |
| "grad_norm": 0.009573241931257201, | |
| "learning_rate": 1.2995391705069126e-05, | |
| "loss": 0.0003, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.7545358548526448, | |
| "grad_norm": 0.004473006067490304, | |
| "learning_rate": 1.2980030721966206e-05, | |
| "loss": 0.0003, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.7583757319765767, | |
| "grad_norm": 0.012766183527931843, | |
| "learning_rate": 1.2964669738863289e-05, | |
| "loss": 0.0003, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.7622156091005088, | |
| "grad_norm": 0.0009086621286683109, | |
| "learning_rate": 1.294930875576037e-05, | |
| "loss": 0.0003, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.7660554862244409, | |
| "grad_norm": 0.002772310397620897, | |
| "learning_rate": 1.2933947772657451e-05, | |
| "loss": 0.0003, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.7698953633483727, | |
| "grad_norm": 0.00251248063627462, | |
| "learning_rate": 1.2918586789554532e-05, | |
| "loss": 0.0003, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.773735240472305, | |
| "grad_norm": 0.005139480616844398, | |
| "learning_rate": 1.2903225806451613e-05, | |
| "loss": 0.0003, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.7775751175962369, | |
| "grad_norm": 0.005373139082891636, | |
| "learning_rate": 1.2887864823348695e-05, | |
| "loss": 0.0003, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.781414994720169, | |
| "grad_norm": 0.0061935858766664015, | |
| "learning_rate": 1.2872503840245776e-05, | |
| "loss": 0.0003, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.785254871844101, | |
| "grad_norm": 0.0021300277356281886, | |
| "learning_rate": 1.2857142857142859e-05, | |
| "loss": 0.0003, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.7890947489680329, | |
| "grad_norm": 0.002853522173265363, | |
| "learning_rate": 1.284178187403994e-05, | |
| "loss": 0.0003, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.7929346260919652, | |
| "grad_norm": 0.005011976697348055, | |
| "learning_rate": 1.282642089093702e-05, | |
| "loss": 0.0003, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.796774503215897, | |
| "grad_norm": 0.009681647398622931, | |
| "learning_rate": 1.2811059907834102e-05, | |
| "loss": 0.0003, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.8006143803398291, | |
| "grad_norm": 0.004303781195289312, | |
| "learning_rate": 1.2795698924731184e-05, | |
| "loss": 0.0003, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.8044542574637612, | |
| "grad_norm": 0.007494764921984889, | |
| "learning_rate": 1.2780337941628265e-05, | |
| "loss": 0.0003, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.808294134587693, | |
| "grad_norm": 0.007339386856967861, | |
| "learning_rate": 1.2764976958525346e-05, | |
| "loss": 0.0003, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.8121340117116254, | |
| "grad_norm": 0.00291836055735903, | |
| "learning_rate": 1.2749615975422429e-05, | |
| "loss": 0.0003, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.8159738888355572, | |
| "grad_norm": 0.0029509058375776632, | |
| "learning_rate": 1.273425499231951e-05, | |
| "loss": 0.0003, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.8198137659594893, | |
| "grad_norm": 0.0032136174047263694, | |
| "learning_rate": 1.271889400921659e-05, | |
| "loss": 0.0003, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.8236536430834214, | |
| "grad_norm": 0.006459170016331068, | |
| "learning_rate": 1.2703533026113673e-05, | |
| "loss": 0.0003, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.8274935202073532, | |
| "grad_norm": 0.004128532101702144, | |
| "learning_rate": 1.2688172043010754e-05, | |
| "loss": 0.0003, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.8313333973312855, | |
| "grad_norm": 0.007831239303432262, | |
| "learning_rate": 1.2672811059907835e-05, | |
| "loss": 0.0003, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.8351732744552174, | |
| "grad_norm": 0.006446091317683293, | |
| "learning_rate": 1.2657450076804916e-05, | |
| "loss": 0.0003, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.8390131515791495, | |
| "grad_norm": 0.005149611980043962, | |
| "learning_rate": 1.2642089093701997e-05, | |
| "loss": 0.0003, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.8428530287030815, | |
| "grad_norm": 0.00870763552581347, | |
| "learning_rate": 1.262672811059908e-05, | |
| "loss": 0.0003, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.8466929058270134, | |
| "grad_norm": 0.00174497439177369, | |
| "learning_rate": 1.261136712749616e-05, | |
| "loss": 0.0003, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.8505327829509457, | |
| "grad_norm": 0.004544066116015543, | |
| "learning_rate": 1.2596006144393243e-05, | |
| "loss": 0.0003, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.8543726600748776, | |
| "grad_norm": 0.009197541337159318, | |
| "learning_rate": 1.2580645161290324e-05, | |
| "loss": 0.0003, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.8582125371988096, | |
| "grad_norm": 0.005155440888093791, | |
| "learning_rate": 1.2565284178187405e-05, | |
| "loss": 0.0003, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.8620524143227417, | |
| "grad_norm": 0.0033030466348720976, | |
| "learning_rate": 1.2549923195084486e-05, | |
| "loss": 0.0003, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.8658922914466736, | |
| "grad_norm": 0.002903647020793528, | |
| "learning_rate": 1.2534562211981567e-05, | |
| "loss": 0.0003, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.8697321685706059, | |
| "grad_norm": 0.0037452166656142243, | |
| "learning_rate": 1.251920122887865e-05, | |
| "loss": 0.0003, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.8735720456945377, | |
| "grad_norm": 0.005423455715151995, | |
| "learning_rate": 1.2503840245775732e-05, | |
| "loss": 0.0003, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.8774119228184698, | |
| "grad_norm": 0.004529789228413102, | |
| "learning_rate": 1.2488479262672813e-05, | |
| "loss": 0.0003, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.8812517999424019, | |
| "grad_norm": 0.005454742890452505, | |
| "learning_rate": 1.2473118279569894e-05, | |
| "loss": 0.0003, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.8850916770663337, | |
| "grad_norm": 0.003634537048057344, | |
| "learning_rate": 1.2457757296466975e-05, | |
| "loss": 0.0003, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.888931554190266, | |
| "grad_norm": 0.005577515598529365, | |
| "learning_rate": 1.2442396313364056e-05, | |
| "loss": 0.0002, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.892771431314198, | |
| "grad_norm": 0.003272095460140622, | |
| "learning_rate": 1.2427035330261137e-05, | |
| "loss": 0.0003, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.89661130843813, | |
| "grad_norm": 0.002842547869737635, | |
| "learning_rate": 1.2411674347158221e-05, | |
| "loss": 0.0003, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.900451185562062, | |
| "grad_norm": 0.0030756210619306856, | |
| "learning_rate": 1.2396313364055302e-05, | |
| "loss": 0.0003, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.904291062685994, | |
| "grad_norm": 0.003876374581736294, | |
| "learning_rate": 1.2380952380952383e-05, | |
| "loss": 0.0003, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.9081309398099262, | |
| "grad_norm": 0.0036495611616124695, | |
| "learning_rate": 1.2365591397849464e-05, | |
| "loss": 0.0003, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.911970816933858, | |
| "grad_norm": 0.0022680105975665986, | |
| "learning_rate": 1.2350230414746545e-05, | |
| "loss": 0.0003, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.9158106940577901, | |
| "grad_norm": 0.007439434020570674, | |
| "learning_rate": 1.2334869431643626e-05, | |
| "loss": 0.0003, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.9196505711817222, | |
| "grad_norm": 0.004474265624120062, | |
| "learning_rate": 1.2319508448540707e-05, | |
| "loss": 0.0003, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.923490448305654, | |
| "grad_norm": 0.009088460064078376, | |
| "learning_rate": 1.2304147465437787e-05, | |
| "loss": 0.0003, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.9273303254295864, | |
| "grad_norm": 0.003261287645656727, | |
| "learning_rate": 1.2288786482334872e-05, | |
| "loss": 0.0003, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.9311702025535182, | |
| "grad_norm": 0.003381354646964584, | |
| "learning_rate": 1.2273425499231953e-05, | |
| "loss": 0.0003, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.9350100796774503, | |
| "grad_norm": 0.011785456037052021, | |
| "learning_rate": 1.2258064516129034e-05, | |
| "loss": 0.0003, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.9388499568013824, | |
| "grad_norm": 0.003962236601696711, | |
| "learning_rate": 1.2242703533026115e-05, | |
| "loss": 0.0003, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.9426898339253142, | |
| "grad_norm": 0.009479934166440634, | |
| "learning_rate": 1.2227342549923195e-05, | |
| "loss": 0.0003, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.9465297110492465, | |
| "grad_norm": 0.0017963941477855995, | |
| "learning_rate": 1.2211981566820276e-05, | |
| "loss": 0.0003, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.9503695881731784, | |
| "grad_norm": 0.003122530310710232, | |
| "learning_rate": 1.2196620583717357e-05, | |
| "loss": 0.0003, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.9542094652971105, | |
| "grad_norm": 0.007807269576409189, | |
| "learning_rate": 1.2181259600614442e-05, | |
| "loss": 0.0003, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.9580493424210426, | |
| "grad_norm": 0.010712992154630164, | |
| "learning_rate": 1.2165898617511523e-05, | |
| "loss": 0.0003, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.9618892195449744, | |
| "grad_norm": 0.007059579575096798, | |
| "learning_rate": 1.2150537634408604e-05, | |
| "loss": 0.0003, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.9657290966689067, | |
| "grad_norm": 0.006965891986586176, | |
| "learning_rate": 1.2135176651305684e-05, | |
| "loss": 0.0003, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.9695689737928386, | |
| "grad_norm": 0.0032147842412904005, | |
| "learning_rate": 1.2119815668202765e-05, | |
| "loss": 0.0003, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.9734088509167707, | |
| "grad_norm": 0.002816251401671389, | |
| "learning_rate": 1.2104454685099846e-05, | |
| "loss": 0.0003, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.9772487280407027, | |
| "grad_norm": 0.00406117662486731, | |
| "learning_rate": 1.2089093701996927e-05, | |
| "loss": 0.0003, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.9810886051646346, | |
| "grad_norm": 0.005187669887429079, | |
| "learning_rate": 1.2073732718894012e-05, | |
| "loss": 0.0003, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.9849284822885669, | |
| "grad_norm": 0.006087920999209694, | |
| "learning_rate": 1.2058371735791092e-05, | |
| "loss": 0.0003, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.9887683594124987, | |
| "grad_norm": 0.004575814464986424, | |
| "learning_rate": 1.2043010752688173e-05, | |
| "loss": 0.0003, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.9926082365364308, | |
| "grad_norm": 0.014370083560726126, | |
| "learning_rate": 1.2027649769585254e-05, | |
| "loss": 0.0003, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.996448113660363, | |
| "grad_norm": 0.005014106197288017, | |
| "learning_rate": 1.2012288786482335e-05, | |
| "loss": 0.0003, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.007128867284239335, | |
| "learning_rate": 1.1996927803379416e-05, | |
| "loss": 0.0002, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.003839877123932, | |
| "grad_norm": 0.0035767749443771112, | |
| "learning_rate": 1.1981566820276497e-05, | |
| "loss": 0.0003, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.007679754247864, | |
| "grad_norm": 0.010409746160305624, | |
| "learning_rate": 1.1966205837173581e-05, | |
| "loss": 0.0003, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.011519631371796, | |
| "grad_norm": 0.006663355286188172, | |
| "learning_rate": 1.1950844854070662e-05, | |
| "loss": 0.0003, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.0153595084957283, | |
| "grad_norm": 0.007056337672257391, | |
| "learning_rate": 1.1935483870967743e-05, | |
| "loss": 0.0003, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.01919938561966, | |
| "grad_norm": 0.007042233672731786, | |
| "learning_rate": 1.1920122887864824e-05, | |
| "loss": 0.0002, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.023039262743592, | |
| "grad_norm": 0.0034724836039492263, | |
| "learning_rate": 1.1904761904761905e-05, | |
| "loss": 0.0002, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.0268791398675243, | |
| "grad_norm": 0.00582311493886213, | |
| "learning_rate": 1.1889400921658986e-05, | |
| "loss": 0.0003, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.030719016991456, | |
| "grad_norm": 0.008442699615605335, | |
| "learning_rate": 1.1874039938556069e-05, | |
| "loss": 0.0003, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.0345588941153885, | |
| "grad_norm": 0.007067754727575771, | |
| "learning_rate": 1.185867895545315e-05, | |
| "loss": 0.0003, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.0383987712393203, | |
| "grad_norm": 0.011473488398813741, | |
| "learning_rate": 1.1843317972350232e-05, | |
| "loss": 0.0003, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.042238648363252, | |
| "grad_norm": 0.0059529034647220706, | |
| "learning_rate": 1.1827956989247313e-05, | |
| "loss": 0.0003, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.0460785254871845, | |
| "grad_norm": 0.005899117884748007, | |
| "learning_rate": 1.1812596006144394e-05, | |
| "loss": 0.0003, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.0499184026111164, | |
| "grad_norm": 0.011298726784506993, | |
| "learning_rate": 1.1797235023041475e-05, | |
| "loss": 0.0003, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.0537582797350487, | |
| "grad_norm": 0.005795088286120419, | |
| "learning_rate": 1.1781874039938556e-05, | |
| "loss": 0.0003, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.0575981568589805, | |
| "grad_norm": 0.0036837813367058586, | |
| "learning_rate": 1.1766513056835639e-05, | |
| "loss": 0.0003, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.0614380339829124, | |
| "grad_norm": 0.006214527860410358, | |
| "learning_rate": 1.175115207373272e-05, | |
| "loss": 0.0003, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.0652779111068447, | |
| "grad_norm": 0.005963634039163199, | |
| "learning_rate": 1.1735791090629802e-05, | |
| "loss": 0.0003, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.0691177882307765, | |
| "grad_norm": 0.009711371016533323, | |
| "learning_rate": 1.1720430107526883e-05, | |
| "loss": 0.0003, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.072957665354709, | |
| "grad_norm": 0.004479690428922436, | |
| "learning_rate": 1.1705069124423964e-05, | |
| "loss": 0.0003, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.0767975424786407, | |
| "grad_norm": 0.004339498596198677, | |
| "learning_rate": 1.1689708141321045e-05, | |
| "loss": 0.0003, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.0806374196025725, | |
| "grad_norm": 0.005524750165344554, | |
| "learning_rate": 1.1674347158218127e-05, | |
| "loss": 0.0003, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.084477296726505, | |
| "grad_norm": 0.0068414029393528645, | |
| "learning_rate": 1.1658986175115208e-05, | |
| "loss": 0.0003, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.0883171738504367, | |
| "grad_norm": 0.0029993939116700393, | |
| "learning_rate": 1.164362519201229e-05, | |
| "loss": 0.0003, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.092157050974369, | |
| "grad_norm": 0.0065445333490569925, | |
| "learning_rate": 1.1628264208909372e-05, | |
| "loss": 0.0003, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.095996928098301, | |
| "grad_norm": 0.004157809191374774, | |
| "learning_rate": 1.1612903225806453e-05, | |
| "loss": 0.0003, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.0998368052222327, | |
| "grad_norm": 0.01036397645054627, | |
| "learning_rate": 1.1597542242703534e-05, | |
| "loss": 0.0003, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.103676682346165, | |
| "grad_norm": 0.004619789710041621, | |
| "learning_rate": 1.1582181259600616e-05, | |
| "loss": 0.0003, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.107516559470097, | |
| "grad_norm": 0.0031701670908054584, | |
| "learning_rate": 1.1566820276497697e-05, | |
| "loss": 0.0003, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.111356436594029, | |
| "grad_norm": 0.0049090365155047685, | |
| "learning_rate": 1.1551459293394778e-05, | |
| "loss": 0.0003, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.115196313717961, | |
| "grad_norm": 0.006228538445781431, | |
| "learning_rate": 1.153609831029186e-05, | |
| "loss": 0.0003, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.119036190841893, | |
| "grad_norm": 0.003439458661613514, | |
| "learning_rate": 1.152073732718894e-05, | |
| "loss": 0.0003, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.122876067965825, | |
| "grad_norm": 0.0019012366528865447, | |
| "learning_rate": 1.1505376344086023e-05, | |
| "loss": 0.0003, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.126715945089757, | |
| "grad_norm": 0.003757506993975558, | |
| "learning_rate": 1.1490015360983104e-05, | |
| "loss": 0.0003, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.1305558222136893, | |
| "grad_norm": 0.007477619429838125, | |
| "learning_rate": 1.1474654377880186e-05, | |
| "loss": 0.0003, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.134395699337621, | |
| "grad_norm": 0.0031273099272674763, | |
| "learning_rate": 1.1459293394777267e-05, | |
| "loss": 0.0003, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.138235576461553, | |
| "grad_norm": 0.00735341646000325, | |
| "learning_rate": 1.1443932411674348e-05, | |
| "loss": 0.0003, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.1420754535854853, | |
| "grad_norm": 0.00804142143071962, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 0.0003, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.145915330709417, | |
| "grad_norm": 0.004355210176544316, | |
| "learning_rate": 1.141321044546851e-05, | |
| "loss": 0.0003, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.1497552078333495, | |
| "grad_norm": 0.004213055601660093, | |
| "learning_rate": 1.1397849462365593e-05, | |
| "loss": 0.0003, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.1535950849572814, | |
| "grad_norm": 0.0052307406743254785, | |
| "learning_rate": 1.1382488479262675e-05, | |
| "loss": 0.0003, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.157434962081213, | |
| "grad_norm": 0.004583788106873781, | |
| "learning_rate": 1.1367127496159756e-05, | |
| "loss": 0.0003, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.1612748392051455, | |
| "grad_norm": 0.0019315254969018546, | |
| "learning_rate": 1.1351766513056837e-05, | |
| "loss": 0.0003, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.1651147163290774, | |
| "grad_norm": 0.0037899992818163615, | |
| "learning_rate": 1.1336405529953918e-05, | |
| "loss": 0.0003, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.1689545934530097, | |
| "grad_norm": 0.006764631940574645, | |
| "learning_rate": 1.1321044546850999e-05, | |
| "loss": 0.0003, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.1727944705769415, | |
| "grad_norm": 0.009761685741017033, | |
| "learning_rate": 1.130568356374808e-05, | |
| "loss": 0.0003, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.1766343477008734, | |
| "grad_norm": 0.0020006686679916847, | |
| "learning_rate": 1.1290322580645164e-05, | |
| "loss": 0.0003, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.1804742248248057, | |
| "grad_norm": 0.0037488861983966605, | |
| "learning_rate": 1.1274961597542245e-05, | |
| "loss": 0.0003, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.1843141019487375, | |
| "grad_norm": 0.00537849863699846, | |
| "learning_rate": 1.1259600614439326e-05, | |
| "loss": 0.0003, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.18815397907267, | |
| "grad_norm": 0.010248964745953612, | |
| "learning_rate": 1.1244239631336407e-05, | |
| "loss": 0.0003, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.1919938561966017, | |
| "grad_norm": 0.005824548989521622, | |
| "learning_rate": 1.1228878648233488e-05, | |
| "loss": 0.0003, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.1958337333205336, | |
| "grad_norm": 0.004325296871628092, | |
| "learning_rate": 1.1213517665130569e-05, | |
| "loss": 0.0003, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.199673610444466, | |
| "grad_norm": 0.0035972982963026214, | |
| "learning_rate": 1.119815668202765e-05, | |
| "loss": 0.0003, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.2035134875683977, | |
| "grad_norm": 0.004846244232237887, | |
| "learning_rate": 1.118279569892473e-05, | |
| "loss": 0.0003, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.20735336469233, | |
| "grad_norm": 0.008745922011732979, | |
| "learning_rate": 1.1167434715821815e-05, | |
| "loss": 0.0003, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.211193241816262, | |
| "grad_norm": 0.00778907419188414, | |
| "learning_rate": 1.1152073732718896e-05, | |
| "loss": 0.0003, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.2150331189401937, | |
| "grad_norm": 0.0043654759196176075, | |
| "learning_rate": 1.1136712749615977e-05, | |
| "loss": 0.0003, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.218872996064126, | |
| "grad_norm": 0.005005097526262015, | |
| "learning_rate": 1.1121351766513058e-05, | |
| "loss": 0.0003, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.222712873188058, | |
| "grad_norm": 0.005100545621721535, | |
| "learning_rate": 1.1105990783410139e-05, | |
| "loss": 0.0003, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.22655275031199, | |
| "grad_norm": 0.005711204873227365, | |
| "learning_rate": 1.109062980030722e-05, | |
| "loss": 0.0003, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.230392627435922, | |
| "grad_norm": 0.005985518729144408, | |
| "learning_rate": 1.10752688172043e-05, | |
| "loss": 0.0003, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.234232504559854, | |
| "grad_norm": 0.009325255194251696, | |
| "learning_rate": 1.1059907834101385e-05, | |
| "loss": 0.0003, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.238072381683786, | |
| "grad_norm": 0.0009671272882854296, | |
| "learning_rate": 1.1044546850998466e-05, | |
| "loss": 0.0003, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.241912258807718, | |
| "grad_norm": 0.007433837280776717, | |
| "learning_rate": 1.1029185867895547e-05, | |
| "loss": 0.0003, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.2457521359316504, | |
| "grad_norm": 0.0069928744941826155, | |
| "learning_rate": 1.1013824884792628e-05, | |
| "loss": 0.0003, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.249592013055582, | |
| "grad_norm": 0.004247699109310477, | |
| "learning_rate": 1.0998463901689708e-05, | |
| "loss": 0.0003, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.253431890179514, | |
| "grad_norm": 0.004112472440297516, | |
| "learning_rate": 1.098310291858679e-05, | |
| "loss": 0.0003, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.2572717673034464, | |
| "grad_norm": 0.003591482285585461, | |
| "learning_rate": 1.096774193548387e-05, | |
| "loss": 0.0003, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.261111644427378, | |
| "grad_norm": 0.0021551424115954687, | |
| "learning_rate": 1.0952380952380955e-05, | |
| "loss": 0.0003, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 2.2649515215513105, | |
| "grad_norm": 0.0037350002650392232, | |
| "learning_rate": 1.0937019969278036e-05, | |
| "loss": 0.0003, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.2687913986752424, | |
| "grad_norm": 0.004979865757746785, | |
| "learning_rate": 1.0921658986175116e-05, | |
| "loss": 0.0003, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 2.2726312757991742, | |
| "grad_norm": 0.005203006741838927, | |
| "learning_rate": 1.0906298003072197e-05, | |
| "loss": 0.0003, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.2764711529231065, | |
| "grad_norm": 0.006728064653510132, | |
| "learning_rate": 1.0890937019969278e-05, | |
| "loss": 0.0003, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 2.2803110300470384, | |
| "grad_norm": 0.009359939665881964, | |
| "learning_rate": 1.087557603686636e-05, | |
| "loss": 0.0003, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.2841509071709707, | |
| "grad_norm": 0.0035717146182484686, | |
| "learning_rate": 1.086021505376344e-05, | |
| "loss": 0.0003, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.2879907842949025, | |
| "grad_norm": 0.00761174029980443, | |
| "learning_rate": 1.0844854070660523e-05, | |
| "loss": 0.0003, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 2.2918306614188344, | |
| "grad_norm": 0.005512789873618017, | |
| "learning_rate": 1.0829493087557605e-05, | |
| "loss": 0.0003, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 2.2956705385427667, | |
| "grad_norm": 0.003572666545585101, | |
| "learning_rate": 1.0814132104454686e-05, | |
| "loss": 0.0003, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 2.2995104156666986, | |
| "grad_norm": 0.0047703107744972405, | |
| "learning_rate": 1.0798771121351767e-05, | |
| "loss": 0.0003, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 2.303350292790631, | |
| "grad_norm": 0.0034190136655308364, | |
| "learning_rate": 1.0783410138248848e-05, | |
| "loss": 0.0003, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.3071901699145627, | |
| "grad_norm": 0.0036676135087216543, | |
| "learning_rate": 1.0768049155145929e-05, | |
| "loss": 0.0003, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 2.3110300470384946, | |
| "grad_norm": 0.005769656112064071, | |
| "learning_rate": 1.0752688172043012e-05, | |
| "loss": 0.0003, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 2.314869924162427, | |
| "grad_norm": 0.004110269843427993, | |
| "learning_rate": 1.0737327188940093e-05, | |
| "loss": 0.0003, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 2.3187098012863587, | |
| "grad_norm": 0.0023331666885418414, | |
| "learning_rate": 1.0721966205837175e-05, | |
| "loss": 0.0003, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 2.322549678410291, | |
| "grad_norm": 0.0027550349293288595, | |
| "learning_rate": 1.0706605222734256e-05, | |
| "loss": 0.0003, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.326389555534223, | |
| "grad_norm": 0.00739949517331183, | |
| "learning_rate": 1.0691244239631337e-05, | |
| "loss": 0.0003, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 2.3302294326581547, | |
| "grad_norm": 0.007948349024988999, | |
| "learning_rate": 1.0675883256528418e-05, | |
| "loss": 0.0003, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 2.334069309782087, | |
| "grad_norm": 0.0068282243290130416, | |
| "learning_rate": 1.0660522273425499e-05, | |
| "loss": 0.0003, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 2.337909186906019, | |
| "grad_norm": 0.0027681416973556576, | |
| "learning_rate": 1.0645161290322582e-05, | |
| "loss": 0.0003, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 2.341749064029951, | |
| "grad_norm": 0.0048135041265458395, | |
| "learning_rate": 1.0629800307219663e-05, | |
| "loss": 0.0003, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.345588941153883, | |
| "grad_norm": 0.003988841136950386, | |
| "learning_rate": 1.0614439324116745e-05, | |
| "loss": 0.0003, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 2.349428818277815, | |
| "grad_norm": 0.001427852368200693, | |
| "learning_rate": 1.0599078341013826e-05, | |
| "loss": 0.0003, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 2.353268695401747, | |
| "grad_norm": 0.0028685965319891656, | |
| "learning_rate": 1.0583717357910907e-05, | |
| "loss": 0.0003, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 2.357108572525679, | |
| "grad_norm": 0.004228704639871503, | |
| "learning_rate": 1.0568356374807988e-05, | |
| "loss": 0.0003, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 2.3609484496496114, | |
| "grad_norm": 0.004544577992861476, | |
| "learning_rate": 1.055299539170507e-05, | |
| "loss": 0.0003, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.3647883267735432, | |
| "grad_norm": 0.0014514795044217516, | |
| "learning_rate": 1.0537634408602151e-05, | |
| "loss": 0.0003, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 2.368628203897475, | |
| "grad_norm": 0.005123018146892371, | |
| "learning_rate": 1.0522273425499232e-05, | |
| "loss": 0.0003, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 2.3724680810214074, | |
| "grad_norm": 0.007113409507414172, | |
| "learning_rate": 1.0506912442396313e-05, | |
| "loss": 0.0003, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 2.3763079581453392, | |
| "grad_norm": 0.0051352903847912985, | |
| "learning_rate": 1.0491551459293396e-05, | |
| "loss": 0.0003, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 2.3801478352692715, | |
| "grad_norm": 0.002104107790233039, | |
| "learning_rate": 1.0476190476190477e-05, | |
| "loss": 0.0003, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.3839877123932034, | |
| "grad_norm": 0.0025435299650544215, | |
| "learning_rate": 1.046082949308756e-05, | |
| "loss": 0.0003, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 2.3878275895171353, | |
| "grad_norm": 0.0012255752995362287, | |
| "learning_rate": 1.044546850998464e-05, | |
| "loss": 0.0003, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 2.3916674666410676, | |
| "grad_norm": 0.011675318963073902, | |
| "learning_rate": 1.0430107526881721e-05, | |
| "loss": 0.0003, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 2.3955073437649994, | |
| "grad_norm": 0.002461069919600081, | |
| "learning_rate": 1.0414746543778802e-05, | |
| "loss": 0.0003, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 2.3993472208889317, | |
| "grad_norm": 0.0034531583429282644, | |
| "learning_rate": 1.0399385560675883e-05, | |
| "loss": 0.0003, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.4031870980128636, | |
| "grad_norm": 0.004785216112107441, | |
| "learning_rate": 1.0384024577572966e-05, | |
| "loss": 0.0003, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 2.4070269751367954, | |
| "grad_norm": 0.005178536980072039, | |
| "learning_rate": 1.0368663594470047e-05, | |
| "loss": 0.0003, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 2.4108668522607277, | |
| "grad_norm": 0.007894702946313798, | |
| "learning_rate": 1.035330261136713e-05, | |
| "loss": 0.0003, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 2.4147067293846596, | |
| "grad_norm": 0.006680693675035738, | |
| "learning_rate": 1.033794162826421e-05, | |
| "loss": 0.0003, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 2.418546606508592, | |
| "grad_norm": 0.0012495646198747838, | |
| "learning_rate": 1.0322580645161291e-05, | |
| "loss": 0.0003, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.4223864836325237, | |
| "grad_norm": 0.0019525091355373828, | |
| "learning_rate": 1.0307219662058372e-05, | |
| "loss": 0.0003, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 2.4262263607564556, | |
| "grad_norm": 0.010231795338312226, | |
| "learning_rate": 1.0291858678955453e-05, | |
| "loss": 0.0003, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 2.430066237880388, | |
| "grad_norm": 0.0030330611729114856, | |
| "learning_rate": 1.0276497695852536e-05, | |
| "loss": 0.0002, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 2.4339061150043197, | |
| "grad_norm": 0.004059549924408756, | |
| "learning_rate": 1.0261136712749618e-05, | |
| "loss": 0.0003, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 2.437745992128252, | |
| "grad_norm": 0.0019612251556256, | |
| "learning_rate": 1.02457757296467e-05, | |
| "loss": 0.0003, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.441585869252184, | |
| "grad_norm": 0.015437204765968675, | |
| "learning_rate": 1.023041474654378e-05, | |
| "loss": 0.0003, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 2.4454257463761158, | |
| "grad_norm": 0.012071394006097472, | |
| "learning_rate": 1.0215053763440861e-05, | |
| "loss": 0.0003, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 2.449265623500048, | |
| "grad_norm": 0.0036098717816734674, | |
| "learning_rate": 1.0199692780337942e-05, | |
| "loss": 0.0003, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 2.45310550062398, | |
| "grad_norm": 0.0019974234019019756, | |
| "learning_rate": 1.0184331797235023e-05, | |
| "loss": 0.0003, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 2.456945377747912, | |
| "grad_norm": 0.0020862007756023856, | |
| "learning_rate": 1.0168970814132104e-05, | |
| "loss": 0.0003, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.460785254871844, | |
| "grad_norm": 0.003547053858291849, | |
| "learning_rate": 1.0153609831029188e-05, | |
| "loss": 0.0003, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 2.464625131995776, | |
| "grad_norm": 0.007416597783167198, | |
| "learning_rate": 1.0138248847926269e-05, | |
| "loss": 0.0003, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 2.4684650091197082, | |
| "grad_norm": 0.013003680388147372, | |
| "learning_rate": 1.012288786482335e-05, | |
| "loss": 0.0003, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 2.47230488624364, | |
| "grad_norm": 0.008074660660103197, | |
| "learning_rate": 1.0107526881720431e-05, | |
| "loss": 0.0003, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 2.4761447633675724, | |
| "grad_norm": 0.009777230368946704, | |
| "learning_rate": 1.0092165898617512e-05, | |
| "loss": 0.0003, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.4799846404915042, | |
| "grad_norm": 0.004447494802395034, | |
| "learning_rate": 1.0076804915514593e-05, | |
| "loss": 0.0003, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 2.483824517615436, | |
| "grad_norm": 0.007205042771433959, | |
| "learning_rate": 1.0061443932411674e-05, | |
| "loss": 0.0003, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 2.4876643947393684, | |
| "grad_norm": 0.004129432165666555, | |
| "learning_rate": 1.0046082949308758e-05, | |
| "loss": 0.0003, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 2.4915042718633003, | |
| "grad_norm": 0.003293907536270406, | |
| "learning_rate": 1.0030721966205839e-05, | |
| "loss": 0.0003, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 2.4953441489872326, | |
| "grad_norm": 0.003480440018564576, | |
| "learning_rate": 1.001536098310292e-05, | |
| "loss": 0.0003, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.4991840261111644, | |
| "grad_norm": 0.00941404682853377, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0003, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 2.5030239032350963, | |
| "grad_norm": 0.0027230276406464524, | |
| "learning_rate": 9.984639016897082e-06, | |
| "loss": 0.0003, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 2.5068637803590286, | |
| "grad_norm": 0.005538952293239004, | |
| "learning_rate": 9.969278033794164e-06, | |
| "loss": 0.0003, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 2.5107036574829604, | |
| "grad_norm": 0.0050333328452282, | |
| "learning_rate": 9.953917050691245e-06, | |
| "loss": 0.0003, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 2.5145435346068927, | |
| "grad_norm": 0.00522155205199295, | |
| "learning_rate": 9.938556067588326e-06, | |
| "loss": 0.0002, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.5183834117308246, | |
| "grad_norm": 0.007188536096954456, | |
| "learning_rate": 9.923195084485407e-06, | |
| "loss": 0.0003, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 2.5222232888547564, | |
| "grad_norm": 0.003184906537752492, | |
| "learning_rate": 9.90783410138249e-06, | |
| "loss": 0.0002, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 2.5260631659786887, | |
| "grad_norm": 0.007133938771763716, | |
| "learning_rate": 9.89247311827957e-06, | |
| "loss": 0.0002, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 2.5299030431026206, | |
| "grad_norm": 0.002331216788213924, | |
| "learning_rate": 9.877112135176652e-06, | |
| "loss": 0.0003, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 2.533742920226553, | |
| "grad_norm": 0.00479250433136117, | |
| "learning_rate": 9.861751152073733e-06, | |
| "loss": 0.0003, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.5375827973504848, | |
| "grad_norm": 0.005137631489738605, | |
| "learning_rate": 9.846390168970815e-06, | |
| "loss": 0.0002, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 2.5414226744744166, | |
| "grad_norm": 0.0018644753603249751, | |
| "learning_rate": 9.831029185867896e-06, | |
| "loss": 0.0002, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 2.545262551598349, | |
| "grad_norm": 0.00606148048013519, | |
| "learning_rate": 9.815668202764977e-06, | |
| "loss": 0.0003, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 2.5491024287222808, | |
| "grad_norm": 0.0046942947495266455, | |
| "learning_rate": 9.80030721966206e-06, | |
| "loss": 0.0003, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 2.552942305846213, | |
| "grad_norm": 0.0033581298827081688, | |
| "learning_rate": 9.78494623655914e-06, | |
| "loss": 0.0003, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.556782182970145, | |
| "grad_norm": 0.0050898256061523055, | |
| "learning_rate": 9.769585253456221e-06, | |
| "loss": 0.0003, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 2.560622060094077, | |
| "grad_norm": 0.007276030994283694, | |
| "learning_rate": 9.754224270353302e-06, | |
| "loss": 0.0003, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 2.564461937218009, | |
| "grad_norm": 0.008102589186392525, | |
| "learning_rate": 9.738863287250385e-06, | |
| "loss": 0.0003, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 2.568301814341941, | |
| "grad_norm": 0.007025327596243143, | |
| "learning_rate": 9.723502304147466e-06, | |
| "loss": 0.0003, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 2.5721416914658732, | |
| "grad_norm": 0.005845460452608034, | |
| "learning_rate": 9.708141321044547e-06, | |
| "loss": 0.0003, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.575981568589805, | |
| "grad_norm": 0.012489139495375251, | |
| "learning_rate": 9.692780337941628e-06, | |
| "loss": 0.0003, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 2.579821445713737, | |
| "grad_norm": 0.012227770449223498, | |
| "learning_rate": 9.67741935483871e-06, | |
| "loss": 0.0003, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 2.5836613228376692, | |
| "grad_norm": 0.005069955164515195, | |
| "learning_rate": 9.662058371735791e-06, | |
| "loss": 0.0003, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 2.587501199961601, | |
| "grad_norm": 0.0061167037017676555, | |
| "learning_rate": 9.646697388632872e-06, | |
| "loss": 0.0002, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 2.5913410770855334, | |
| "grad_norm": 0.08374847767752854, | |
| "learning_rate": 9.631336405529955e-06, | |
| "loss": 0.0004, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.5951809542094653, | |
| "grad_norm": 0.8655212802945428, | |
| "learning_rate": 9.615975422427036e-06, | |
| "loss": 0.014, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 2.599020831333397, | |
| "grad_norm": 0.5910483392483017, | |
| "learning_rate": 9.600614439324117e-06, | |
| "loss": 0.0096, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 2.6028607084573294, | |
| "grad_norm": 0.013833647799367984, | |
| "learning_rate": 9.5852534562212e-06, | |
| "loss": 0.0017, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 2.6067005855812613, | |
| "grad_norm": 0.006258343437812146, | |
| "learning_rate": 9.56989247311828e-06, | |
| "loss": 0.0003, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 2.6105404627051936, | |
| "grad_norm": 0.007017002859118159, | |
| "learning_rate": 9.554531490015361e-06, | |
| "loss": 0.0003, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.6143803398291254, | |
| "grad_norm": 0.003369751095391674, | |
| "learning_rate": 9.539170506912442e-06, | |
| "loss": 0.0003, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 2.6182202169530573, | |
| "grad_norm": 0.003526571263237409, | |
| "learning_rate": 9.523809523809525e-06, | |
| "loss": 0.0003, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 2.6220600940769896, | |
| "grad_norm": 0.014811925020528588, | |
| "learning_rate": 9.508448540706606e-06, | |
| "loss": 0.0003, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 2.6258999712009214, | |
| "grad_norm": 0.0031378309452129815, | |
| "learning_rate": 9.493087557603687e-06, | |
| "loss": 0.0003, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 2.6297398483248537, | |
| "grad_norm": 0.003306012394640935, | |
| "learning_rate": 9.47772657450077e-06, | |
| "loss": 0.0003, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.6335797254487856, | |
| "grad_norm": 0.00579074066003059, | |
| "learning_rate": 9.46236559139785e-06, | |
| "loss": 0.0003, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 2.6374196025727175, | |
| "grad_norm": 0.006190406938703898, | |
| "learning_rate": 9.447004608294931e-06, | |
| "loss": 0.0003, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 2.6412594796966498, | |
| "grad_norm": 0.0019373298398581817, | |
| "learning_rate": 9.431643625192014e-06, | |
| "loss": 0.0003, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 2.6450993568205816, | |
| "grad_norm": 0.003767593626643142, | |
| "learning_rate": 9.416282642089095e-06, | |
| "loss": 0.0003, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 2.648939233944514, | |
| "grad_norm": 0.0021313401606799036, | |
| "learning_rate": 9.400921658986176e-06, | |
| "loss": 0.0003, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.6527791110684458, | |
| "grad_norm": 0.0073770172122847415, | |
| "learning_rate": 9.385560675883258e-06, | |
| "loss": 0.0003, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 2.6566189881923776, | |
| "grad_norm": 0.0067510318233110125, | |
| "learning_rate": 9.370199692780339e-06, | |
| "loss": 0.0003, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 2.66045886531631, | |
| "grad_norm": 0.005588267928353358, | |
| "learning_rate": 9.35483870967742e-06, | |
| "loss": 0.0003, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 2.664298742440242, | |
| "grad_norm": 0.006170795151990667, | |
| "learning_rate": 9.339477726574503e-06, | |
| "loss": 0.0003, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 2.668138619564174, | |
| "grad_norm": 0.0012556712204226065, | |
| "learning_rate": 9.324116743471584e-06, | |
| "loss": 0.0003, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.671978496688106, | |
| "grad_norm": 0.006374796263066127, | |
| "learning_rate": 9.308755760368664e-06, | |
| "loss": 0.0003, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 2.675818373812038, | |
| "grad_norm": 0.003408128999224669, | |
| "learning_rate": 9.293394777265745e-06, | |
| "loss": 0.0003, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 2.67965825093597, | |
| "grad_norm": 0.004088069338470728, | |
| "learning_rate": 9.278033794162828e-06, | |
| "loss": 0.0003, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 2.683498128059902, | |
| "grad_norm": 0.0039123405460172464, | |
| "learning_rate": 9.262672811059909e-06, | |
| "loss": 0.0003, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 2.6873380051838343, | |
| "grad_norm": 0.0039015005051876795, | |
| "learning_rate": 9.24731182795699e-06, | |
| "loss": 0.0003, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.691177882307766, | |
| "grad_norm": 0.00822944757312854, | |
| "learning_rate": 9.231950844854072e-06, | |
| "loss": 0.0003, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 2.695017759431698, | |
| "grad_norm": 0.009264902761011475, | |
| "learning_rate": 9.216589861751153e-06, | |
| "loss": 0.0003, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 2.6988576365556303, | |
| "grad_norm": 0.0027568421905536863, | |
| "learning_rate": 9.201228878648234e-06, | |
| "loss": 0.0003, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 2.702697513679562, | |
| "grad_norm": 0.007150407401258832, | |
| "learning_rate": 9.185867895545315e-06, | |
| "loss": 0.0003, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 2.7065373908034944, | |
| "grad_norm": 0.0037931552321473815, | |
| "learning_rate": 9.170506912442398e-06, | |
| "loss": 0.0003, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.7103772679274263, | |
| "grad_norm": 0.0035618752848147433, | |
| "learning_rate": 9.155145929339479e-06, | |
| "loss": 0.0003, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 2.714217145051358, | |
| "grad_norm": 0.005406421463186957, | |
| "learning_rate": 9.13978494623656e-06, | |
| "loss": 0.0003, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 2.7180570221752904, | |
| "grad_norm": 0.006311758799823545, | |
| "learning_rate": 9.124423963133642e-06, | |
| "loss": 0.0002, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 2.7218968992992223, | |
| "grad_norm": 0.00455306940271519, | |
| "learning_rate": 9.109062980030723e-06, | |
| "loss": 0.0003, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 2.7257367764231546, | |
| "grad_norm": 0.009213759005302373, | |
| "learning_rate": 9.093701996927804e-06, | |
| "loss": 0.0003, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.7295766535470865, | |
| "grad_norm": 0.001955875427936466, | |
| "learning_rate": 9.078341013824885e-06, | |
| "loss": 0.0003, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 2.7334165306710183, | |
| "grad_norm": 0.002645510266753951, | |
| "learning_rate": 9.062980030721968e-06, | |
| "loss": 0.0003, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 2.7372564077949506, | |
| "grad_norm": 0.0015492690289872618, | |
| "learning_rate": 9.047619047619049e-06, | |
| "loss": 0.0003, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 2.7410962849188825, | |
| "grad_norm": 0.004407452957932437, | |
| "learning_rate": 9.03225806451613e-06, | |
| "loss": 0.0003, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 2.7449361620428148, | |
| "grad_norm": 0.003584874788768233, | |
| "learning_rate": 9.01689708141321e-06, | |
| "loss": 0.0003, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.7487760391667466, | |
| "grad_norm": 0.003426299476431039, | |
| "learning_rate": 9.001536098310293e-06, | |
| "loss": 0.0002, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 2.7526159162906785, | |
| "grad_norm": 0.003706527902281191, | |
| "learning_rate": 8.986175115207374e-06, | |
| "loss": 0.0003, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 2.7564557934146108, | |
| "grad_norm": 0.005686639850889155, | |
| "learning_rate": 8.970814132104455e-06, | |
| "loss": 0.0003, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 2.7602956705385426, | |
| "grad_norm": 0.0052320038014579216, | |
| "learning_rate": 8.955453149001538e-06, | |
| "loss": 0.0002, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 2.764135547662475, | |
| "grad_norm": 0.0035293581965504047, | |
| "learning_rate": 8.940092165898619e-06, | |
| "loss": 0.0003, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.767975424786407, | |
| "grad_norm": 0.0030584138772079433, | |
| "learning_rate": 8.9247311827957e-06, | |
| "loss": 0.0003, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 2.7718153019103386, | |
| "grad_norm": 0.001802338947140362, | |
| "learning_rate": 8.90937019969278e-06, | |
| "loss": 0.0003, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 2.775655179034271, | |
| "grad_norm": 0.003941656074171531, | |
| "learning_rate": 8.894009216589863e-06, | |
| "loss": 0.0003, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 2.779495056158203, | |
| "grad_norm": 0.007971016203195725, | |
| "learning_rate": 8.878648233486944e-06, | |
| "loss": 0.0003, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 2.783334933282135, | |
| "grad_norm": 0.0030065159813745896, | |
| "learning_rate": 8.863287250384025e-06, | |
| "loss": 0.0003, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.787174810406067, | |
| "grad_norm": 0.012438848822631438, | |
| "learning_rate": 8.847926267281107e-06, | |
| "loss": 0.0003, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 2.791014687529999, | |
| "grad_norm": 0.004555632702125586, | |
| "learning_rate": 8.832565284178188e-06, | |
| "loss": 0.0003, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 2.794854564653931, | |
| "grad_norm": 0.009624526970252073, | |
| "learning_rate": 8.81720430107527e-06, | |
| "loss": 0.0003, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 2.798694441777863, | |
| "grad_norm": 0.006646400675058001, | |
| "learning_rate": 8.80184331797235e-06, | |
| "loss": 0.0003, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 2.8025343189017953, | |
| "grad_norm": 0.00547555841349304, | |
| "learning_rate": 8.786482334869433e-06, | |
| "loss": 0.0003, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.806374196025727, | |
| "grad_norm": 0.006365409097436495, | |
| "learning_rate": 8.771121351766514e-06, | |
| "loss": 0.0003, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 2.810214073149659, | |
| "grad_norm": 0.004289823731985147, | |
| "learning_rate": 8.755760368663595e-06, | |
| "loss": 0.0003, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 2.8140539502735913, | |
| "grad_norm": 0.0009114031462774198, | |
| "learning_rate": 8.740399385560676e-06, | |
| "loss": 0.0003, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 2.817893827397523, | |
| "grad_norm": 0.0040925566094191235, | |
| "learning_rate": 8.725038402457758e-06, | |
| "loss": 0.0003, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 2.8217337045214554, | |
| "grad_norm": 0.004769056547562217, | |
| "learning_rate": 8.70967741935484e-06, | |
| "loss": 0.0003, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.8255735816453873, | |
| "grad_norm": 0.00403037096763482, | |
| "learning_rate": 8.69431643625192e-06, | |
| "loss": 0.0003, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 2.829413458769319, | |
| "grad_norm": 0.0018637193862899913, | |
| "learning_rate": 8.678955453149003e-06, | |
| "loss": 0.0003, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 2.8332533358932515, | |
| "grad_norm": 0.003596057132391645, | |
| "learning_rate": 8.663594470046084e-06, | |
| "loss": 0.0002, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 2.8370932130171833, | |
| "grad_norm": 0.003687354810004547, | |
| "learning_rate": 8.648233486943165e-06, | |
| "loss": 0.0003, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 2.8409330901411156, | |
| "grad_norm": 0.005906169853780166, | |
| "learning_rate": 8.632872503840246e-06, | |
| "loss": 0.0003, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.8447729672650475, | |
| "grad_norm": 0.008785446912324073, | |
| "learning_rate": 8.617511520737328e-06, | |
| "loss": 0.0003, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 2.8486128443889793, | |
| "grad_norm": 0.006765639595362392, | |
| "learning_rate": 8.602150537634409e-06, | |
| "loss": 0.0003, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 2.8524527215129116, | |
| "grad_norm": 0.0034460169241499507, | |
| "learning_rate": 8.58678955453149e-06, | |
| "loss": 0.0003, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 2.8562925986368435, | |
| "grad_norm": 0.0029012348201393513, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.0003, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 2.860132475760776, | |
| "grad_norm": 0.009176699580615625, | |
| "learning_rate": 8.556067588325654e-06, | |
| "loss": 0.0003, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.8639723528847076, | |
| "grad_norm": 0.007162151782757863, | |
| "learning_rate": 8.540706605222734e-06, | |
| "loss": 0.0002, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 2.8678122300086395, | |
| "grad_norm": 0.011405641448979225, | |
| "learning_rate": 8.525345622119815e-06, | |
| "loss": 0.0002, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 2.871652107132572, | |
| "grad_norm": 0.002892641787357458, | |
| "learning_rate": 8.509984639016898e-06, | |
| "loss": 0.0003, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 2.8754919842565037, | |
| "grad_norm": 0.0045100170495708195, | |
| "learning_rate": 8.494623655913979e-06, | |
| "loss": 0.0003, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 2.879331861380436, | |
| "grad_norm": 0.0042421348190372936, | |
| "learning_rate": 8.47926267281106e-06, | |
| "loss": 0.0002, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.883171738504368, | |
| "grad_norm": 0.007795804545941071, | |
| "learning_rate": 8.463901689708142e-06, | |
| "loss": 0.0002, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 2.8870116156282997, | |
| "grad_norm": 0.008298948469422989, | |
| "learning_rate": 8.448540706605223e-06, | |
| "loss": 0.0003, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 2.890851492752232, | |
| "grad_norm": 0.0031254716702214095, | |
| "learning_rate": 8.433179723502304e-06, | |
| "loss": 0.0003, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 2.894691369876164, | |
| "grad_norm": 0.005192654900334731, | |
| "learning_rate": 8.417818740399385e-06, | |
| "loss": 0.0003, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 2.898531247000096, | |
| "grad_norm": 0.01235662584667344, | |
| "learning_rate": 8.402457757296468e-06, | |
| "loss": 0.0003, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.902371124124028, | |
| "grad_norm": 0.00496363081609215, | |
| "learning_rate": 8.387096774193549e-06, | |
| "loss": 0.0003, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 2.90621100124796, | |
| "grad_norm": 0.0036361713152800973, | |
| "learning_rate": 8.37173579109063e-06, | |
| "loss": 0.0003, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 2.910050878371892, | |
| "grad_norm": 0.001736163807789765, | |
| "learning_rate": 8.356374807987712e-06, | |
| "loss": 0.0003, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 2.913890755495824, | |
| "grad_norm": 0.0037050159683279602, | |
| "learning_rate": 8.341013824884793e-06, | |
| "loss": 0.0003, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 2.9177306326197563, | |
| "grad_norm": 0.0039102919819924975, | |
| "learning_rate": 8.325652841781874e-06, | |
| "loss": 0.0002, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.921570509743688, | |
| "grad_norm": 0.004742691161045984, | |
| "learning_rate": 8.310291858678957e-06, | |
| "loss": 0.0003, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 2.92541038686762, | |
| "grad_norm": 0.005839965213671288, | |
| "learning_rate": 8.294930875576038e-06, | |
| "loss": 0.0003, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 2.9292502639915523, | |
| "grad_norm": 0.005078116114432955, | |
| "learning_rate": 8.279569892473119e-06, | |
| "loss": 0.0003, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 2.933090141115484, | |
| "grad_norm": 0.0047744269458432275, | |
| "learning_rate": 8.264208909370201e-06, | |
| "loss": 0.0003, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 2.9369300182394165, | |
| "grad_norm": 0.003503766564796104, | |
| "learning_rate": 8.248847926267282e-06, | |
| "loss": 0.0003, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.9407698953633483, | |
| "grad_norm": 0.002919305506009367, | |
| "learning_rate": 8.233486943164363e-06, | |
| "loss": 0.0003, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 2.94460977248728, | |
| "grad_norm": 0.005234438137723877, | |
| "learning_rate": 8.218125960061446e-06, | |
| "loss": 0.0002, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 2.9484496496112125, | |
| "grad_norm": 0.0058797401671725905, | |
| "learning_rate": 8.202764976958527e-06, | |
| "loss": 0.0003, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 2.9522895267351443, | |
| "grad_norm": 0.003378937083777895, | |
| "learning_rate": 8.187403993855608e-06, | |
| "loss": 0.0002, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 2.9561294038590766, | |
| "grad_norm": 0.0027598025310823916, | |
| "learning_rate": 8.172043010752689e-06, | |
| "loss": 0.0003, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.9599692809830085, | |
| "grad_norm": 0.006302224478544003, | |
| "learning_rate": 8.156682027649771e-06, | |
| "loss": 0.0003, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 2.9638091581069403, | |
| "grad_norm": 0.005721010444274242, | |
| "learning_rate": 8.141321044546852e-06, | |
| "loss": 0.0003, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 2.9676490352308726, | |
| "grad_norm": 0.00895258523648075, | |
| "learning_rate": 8.125960061443933e-06, | |
| "loss": 0.0003, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 2.9714889123548045, | |
| "grad_norm": 0.006250897732901797, | |
| "learning_rate": 8.110599078341016e-06, | |
| "loss": 0.0003, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 2.975328789478737, | |
| "grad_norm": 0.004710767186934721, | |
| "learning_rate": 8.095238095238097e-06, | |
| "loss": 0.0003, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.9791686666026687, | |
| "grad_norm": 0.004749110615282267, | |
| "learning_rate": 8.079877112135177e-06, | |
| "loss": 0.0003, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 2.9830085437266005, | |
| "grad_norm": 0.0024146312389459854, | |
| "learning_rate": 8.064516129032258e-06, | |
| "loss": 0.0003, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 2.986848420850533, | |
| "grad_norm": 0.0017491812102154464, | |
| "learning_rate": 8.049155145929341e-06, | |
| "loss": 0.0002, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 2.9906882979744647, | |
| "grad_norm": 0.008039358375107466, | |
| "learning_rate": 8.033794162826422e-06, | |
| "loss": 0.0003, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 2.994528175098397, | |
| "grad_norm": 0.003445401481616659, | |
| "learning_rate": 8.018433179723503e-06, | |
| "loss": 0.0003, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.998368052222329, | |
| "grad_norm": 0.004124288268774834, | |
| "learning_rate": 8.003072196620585e-06, | |
| "loss": 0.0003, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 3.001919938561966, | |
| "grad_norm": 0.002674977868985105, | |
| "learning_rate": 7.987711213517666e-06, | |
| "loss": 0.0002, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 3.0057598156858982, | |
| "grad_norm": 0.0050648363913657094, | |
| "learning_rate": 7.972350230414747e-06, | |
| "loss": 0.0003, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 3.00959969280983, | |
| "grad_norm": 0.0007924830296134951, | |
| "learning_rate": 7.956989247311828e-06, | |
| "loss": 0.0003, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 3.013439569933762, | |
| "grad_norm": 0.001913560970676822, | |
| "learning_rate": 7.941628264208911e-06, | |
| "loss": 0.0003, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 3.0172794470576942, | |
| "grad_norm": 0.0031764216786887415, | |
| "learning_rate": 7.926267281105992e-06, | |
| "loss": 0.0002, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 3.021119324181626, | |
| "grad_norm": 0.00509301739025399, | |
| "learning_rate": 7.910906298003073e-06, | |
| "loss": 0.0003, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 3.0249592013055584, | |
| "grad_norm": 0.004796585886393999, | |
| "learning_rate": 7.895545314900154e-06, | |
| "loss": 0.0003, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 3.0287990784294903, | |
| "grad_norm": 0.006878984874609061, | |
| "learning_rate": 7.880184331797236e-06, | |
| "loss": 0.0003, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 3.032638955553422, | |
| "grad_norm": 0.0028825313800368113, | |
| "learning_rate": 7.864823348694317e-06, | |
| "loss": 0.0003, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 3.0364788326773544, | |
| "grad_norm": 0.008888757116371175, | |
| "learning_rate": 7.849462365591398e-06, | |
| "loss": 0.0002, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 3.0403187098012863, | |
| "grad_norm": 0.011189685195352432, | |
| "learning_rate": 7.83410138248848e-06, | |
| "loss": 0.0003, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 3.0441585869252186, | |
| "grad_norm": 0.004445925224066104, | |
| "learning_rate": 7.818740399385562e-06, | |
| "loss": 0.0003, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 3.0479984640491504, | |
| "grad_norm": 0.005046704426390252, | |
| "learning_rate": 7.803379416282643e-06, | |
| "loss": 0.0002, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 3.0518383411730823, | |
| "grad_norm": 0.00325039934600695, | |
| "learning_rate": 7.788018433179724e-06, | |
| "loss": 0.0002, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 3.0556782182970146, | |
| "grad_norm": 0.009122195080615519, | |
| "learning_rate": 7.772657450076806e-06, | |
| "loss": 0.0002, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 3.0595180954209464, | |
| "grad_norm": 0.0035357373556863343, | |
| "learning_rate": 7.757296466973887e-06, | |
| "loss": 0.0002, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 3.0633579725448787, | |
| "grad_norm": 0.0030570656872309174, | |
| "learning_rate": 7.741935483870968e-06, | |
| "loss": 0.0003, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 3.0671978496688106, | |
| "grad_norm": 0.0021996646452019718, | |
| "learning_rate": 7.726574500768049e-06, | |
| "loss": 0.0003, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 3.0710377267927425, | |
| "grad_norm": 0.0018882278920401504, | |
| "learning_rate": 7.711213517665132e-06, | |
| "loss": 0.0003, | |
| "step": 8000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 13020, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.7534171666710528e+16, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |