| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 10190, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009813542688910697, | |
| "grad_norm": 1.582729697227478, | |
| "learning_rate": 4.9955839057899906e-05, | |
| "loss": 0.6655, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.019627085377821395, | |
| "grad_norm": 2.231220006942749, | |
| "learning_rate": 4.990677134445535e-05, | |
| "loss": 0.6564, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.029440628066732092, | |
| "grad_norm": 2.0203068256378174, | |
| "learning_rate": 4.98577036310108e-05, | |
| "loss": 0.5558, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03925417075564279, | |
| "grad_norm": 1.8266687393188477, | |
| "learning_rate": 4.980863591756624e-05, | |
| "loss": 0.3794, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04906771344455348, | |
| "grad_norm": 0.9987813830375671, | |
| "learning_rate": 4.9759568204121696e-05, | |
| "loss": 0.3129, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.058881256133464184, | |
| "grad_norm": 6.515292644500732, | |
| "learning_rate": 4.971050049067714e-05, | |
| "loss": 0.2824, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06869479882237488, | |
| "grad_norm": 12.534858703613281, | |
| "learning_rate": 4.966143277723259e-05, | |
| "loss": 0.2215, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07850834151128558, | |
| "grad_norm": 1.54276704788208, | |
| "learning_rate": 4.961236506378803e-05, | |
| "loss": 0.0981, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08832188420019627, | |
| "grad_norm": 1.2632302045822144, | |
| "learning_rate": 4.956329735034347e-05, | |
| "loss": 0.0815, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09813542688910697, | |
| "grad_norm": 8.447662353515625, | |
| "learning_rate": 4.951422963689892e-05, | |
| "loss": 0.076, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10794896957801767, | |
| "grad_norm": 0.18015748262405396, | |
| "learning_rate": 4.946516192345437e-05, | |
| "loss": 0.0877, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11776251226692837, | |
| "grad_norm": 0.57831871509552, | |
| "learning_rate": 4.941609421000982e-05, | |
| "loss": 0.2572, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.12757605495583907, | |
| "grad_norm": 0.19590969383716583, | |
| "learning_rate": 4.936702649656526e-05, | |
| "loss": 0.1026, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.13738959764474976, | |
| "grad_norm": 0.1655094027519226, | |
| "learning_rate": 4.931795878312071e-05, | |
| "loss": 0.0515, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.14720314033366044, | |
| "grad_norm": 0.09949669986963272, | |
| "learning_rate": 4.926889106967615e-05, | |
| "loss": 0.1725, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.15701668302257116, | |
| "grad_norm": 0.10112312436103821, | |
| "learning_rate": 4.92198233562316e-05, | |
| "loss": 0.279, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.16683022571148184, | |
| "grad_norm": 0.08742330223321915, | |
| "learning_rate": 4.917075564278705e-05, | |
| "loss": 0.1379, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.17664376840039253, | |
| "grad_norm": 0.08159990608692169, | |
| "learning_rate": 4.91216879293425e-05, | |
| "loss": 0.0306, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.18645731108930325, | |
| "grad_norm": 0.06950151175260544, | |
| "learning_rate": 4.907262021589794e-05, | |
| "loss": 0.0121, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.19627085377821393, | |
| "grad_norm": 10.549564361572266, | |
| "learning_rate": 4.902355250245339e-05, | |
| "loss": 0.1062, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.20608439646712462, | |
| "grad_norm": 0.06549222767353058, | |
| "learning_rate": 4.897448478900883e-05, | |
| "loss": 0.0633, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.21589793915603533, | |
| "grad_norm": 15.829712867736816, | |
| "learning_rate": 4.892541707556428e-05, | |
| "loss": 0.0802, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.22571148184494602, | |
| "grad_norm": 1.9238972663879395, | |
| "learning_rate": 4.887634936211973e-05, | |
| "loss": 0.0688, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.23552502453385674, | |
| "grad_norm": 0.07546406239271164, | |
| "learning_rate": 4.882728164867517e-05, | |
| "loss": 0.0553, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.24533856722276742, | |
| "grad_norm": 0.3736225366592407, | |
| "learning_rate": 4.877821393523062e-05, | |
| "loss": 0.0661, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.25515210991167814, | |
| "grad_norm": 0.2823021709918976, | |
| "learning_rate": 4.872914622178606e-05, | |
| "loss": 0.0617, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2649656526005888, | |
| "grad_norm": 4.213985919952393, | |
| "learning_rate": 4.868007850834151e-05, | |
| "loss": 0.0628, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2747791952894995, | |
| "grad_norm": 0.09498825669288635, | |
| "learning_rate": 4.863101079489696e-05, | |
| "loss": 0.2317, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2845927379784102, | |
| "grad_norm": 0.05033993721008301, | |
| "learning_rate": 4.858194308145241e-05, | |
| "loss": 0.008, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2944062806673209, | |
| "grad_norm": 0.05174524709582329, | |
| "learning_rate": 4.853287536800785e-05, | |
| "loss": 0.0062, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3042198233562316, | |
| "grad_norm": 0.05778587609529495, | |
| "learning_rate": 4.84838076545633e-05, | |
| "loss": 0.0728, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3140333660451423, | |
| "grad_norm": 0.04121818020939827, | |
| "learning_rate": 4.8434739941118744e-05, | |
| "loss": 0.1023, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.323846908734053, | |
| "grad_norm": 0.049691397696733475, | |
| "learning_rate": 4.838567222767419e-05, | |
| "loss": 0.1547, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3336604514229637, | |
| "grad_norm": 18.06907081604004, | |
| "learning_rate": 4.833660451422964e-05, | |
| "loss": 0.1047, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3434739941118744, | |
| "grad_norm": 4.178352355957031, | |
| "learning_rate": 4.828753680078509e-05, | |
| "loss": 0.0453, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.35328753680078506, | |
| "grad_norm": 0.04574347659945488, | |
| "learning_rate": 4.823846908734053e-05, | |
| "loss": 0.0051, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3631010794896958, | |
| "grad_norm": 0.0428372398018837, | |
| "learning_rate": 4.818940137389598e-05, | |
| "loss": 0.0606, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3729146221786065, | |
| "grad_norm": 1.1302443742752075, | |
| "learning_rate": 4.8140333660451424e-05, | |
| "loss": 0.1306, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.38272816486751715, | |
| "grad_norm": 0.05571115016937256, | |
| "learning_rate": 4.8091265947006866e-05, | |
| "loss": 0.1686, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.39254170755642787, | |
| "grad_norm": 7.147359371185303, | |
| "learning_rate": 4.804219823356232e-05, | |
| "loss": 0.2399, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4023552502453386, | |
| "grad_norm": 15.144064903259277, | |
| "learning_rate": 4.7993130520117764e-05, | |
| "loss": 0.1326, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.41216879293424924, | |
| "grad_norm": 36.820003509521484, | |
| "learning_rate": 4.794406280667321e-05, | |
| "loss": 0.0448, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.42198233562315995, | |
| "grad_norm": 0.14495636522769928, | |
| "learning_rate": 4.7894995093228655e-05, | |
| "loss": 0.1117, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.43179587831207067, | |
| "grad_norm": 0.046849966049194336, | |
| "learning_rate": 4.7845927379784104e-05, | |
| "loss": 0.0092, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.44160942100098133, | |
| "grad_norm": 11.208320617675781, | |
| "learning_rate": 4.7796859666339546e-05, | |
| "loss": 0.0612, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.45142296368989204, | |
| "grad_norm": 0.039328742772340775, | |
| "learning_rate": 4.7747791952895e-05, | |
| "loss": 0.0114, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.46123650637880276, | |
| "grad_norm": 0.06963516771793365, | |
| "learning_rate": 4.7698724239450444e-05, | |
| "loss": 0.0859, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.47105004906771347, | |
| "grad_norm": 0.0437813363969326, | |
| "learning_rate": 4.764965652600589e-05, | |
| "loss": 0.0734, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.48086359175662413, | |
| "grad_norm": 0.039823539555072784, | |
| "learning_rate": 4.7600588812561336e-05, | |
| "loss": 0.0085, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.49067713444553485, | |
| "grad_norm": 0.041767820715904236, | |
| "learning_rate": 4.7551521099116785e-05, | |
| "loss": 0.192, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5004906771344455, | |
| "grad_norm": 0.06061721593141556, | |
| "learning_rate": 4.750245338567223e-05, | |
| "loss": 0.0775, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5103042198233563, | |
| "grad_norm": 0.06972894817590714, | |
| "learning_rate": 4.7453385672227676e-05, | |
| "loss": 0.0186, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5201177625122669, | |
| "grad_norm": 0.07340658456087112, | |
| "learning_rate": 4.7404317958783125e-05, | |
| "loss": 0.1116, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5299313052011776, | |
| "grad_norm": 0.03600945696234703, | |
| "learning_rate": 4.735525024533857e-05, | |
| "loss": 0.0052, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5397448478900884, | |
| "grad_norm": 0.029176561161875725, | |
| "learning_rate": 4.7306182531894016e-05, | |
| "loss": 0.0042, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.549558390578999, | |
| "grad_norm": 0.029673976823687553, | |
| "learning_rate": 4.725711481844946e-05, | |
| "loss": 0.0649, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5593719332679097, | |
| "grad_norm": 0.028555743396282196, | |
| "learning_rate": 4.720804710500491e-05, | |
| "loss": 0.0639, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5691854759568205, | |
| "grad_norm": 5.0781378746032715, | |
| "learning_rate": 4.7158979391560356e-05, | |
| "loss": 0.1382, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5789990186457311, | |
| "grad_norm": 0.05270172283053398, | |
| "learning_rate": 4.7109911678115805e-05, | |
| "loss": 0.042, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5888125613346418, | |
| "grad_norm": 0.03829975053668022, | |
| "learning_rate": 4.706084396467125e-05, | |
| "loss": 0.1485, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5986261040235525, | |
| "grad_norm": 0.03773980960249901, | |
| "learning_rate": 4.7011776251226696e-05, | |
| "loss": 0.0435, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6084396467124632, | |
| "grad_norm": 0.06550557911396027, | |
| "learning_rate": 4.696270853778214e-05, | |
| "loss": 0.1656, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6182531894013739, | |
| "grad_norm": 0.033752694725990295, | |
| "learning_rate": 4.691364082433759e-05, | |
| "loss": 0.0613, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6280667320902846, | |
| "grad_norm": 0.10115873068571091, | |
| "learning_rate": 4.6864573110893036e-05, | |
| "loss": 0.0622, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6378802747791953, | |
| "grad_norm": 0.10119258612394333, | |
| "learning_rate": 4.6815505397448485e-05, | |
| "loss": 0.0049, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.647693817468106, | |
| "grad_norm": 0.029145225882530212, | |
| "learning_rate": 4.676643768400393e-05, | |
| "loss": 0.0446, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6575073601570167, | |
| "grad_norm": 0.026971256360411644, | |
| "learning_rate": 4.671736997055937e-05, | |
| "loss": 0.0035, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6673209028459274, | |
| "grad_norm": 0.05692035332322121, | |
| "learning_rate": 4.666830225711482e-05, | |
| "loss": 0.0051, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.677134445534838, | |
| "grad_norm": 0.023497162386775017, | |
| "learning_rate": 4.661923454367027e-05, | |
| "loss": 0.0031, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6869479882237488, | |
| "grad_norm": 44.54225540161133, | |
| "learning_rate": 4.657016683022572e-05, | |
| "loss": 0.0392, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6967615309126595, | |
| "grad_norm": 79.16250610351562, | |
| "learning_rate": 4.652109911678116e-05, | |
| "loss": 0.034, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7065750736015701, | |
| "grad_norm": 7.570616722106934, | |
| "learning_rate": 4.647203140333661e-05, | |
| "loss": 0.5111, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7163886162904809, | |
| "grad_norm": 0.05000855773687363, | |
| "learning_rate": 4.642296368989205e-05, | |
| "loss": 0.0211, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7262021589793916, | |
| "grad_norm": 0.04305935651063919, | |
| "learning_rate": 4.63738959764475e-05, | |
| "loss": 0.1129, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7360157016683022, | |
| "grad_norm": 0.03774946555495262, | |
| "learning_rate": 4.632482826300295e-05, | |
| "loss": 0.0963, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.745829244357213, | |
| "grad_norm": 0.04534028843045235, | |
| "learning_rate": 4.62757605495584e-05, | |
| "loss": 0.027, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7556427870461236, | |
| "grad_norm": 0.03019464947283268, | |
| "learning_rate": 4.622669283611384e-05, | |
| "loss": 0.0148, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7654563297350343, | |
| "grad_norm": 0.028817512094974518, | |
| "learning_rate": 4.617762512266929e-05, | |
| "loss": 0.1937, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7752698724239451, | |
| "grad_norm": 13.481318473815918, | |
| "learning_rate": 4.612855740922473e-05, | |
| "loss": 0.0143, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7850834151128557, | |
| "grad_norm": 0.029670100659132004, | |
| "learning_rate": 4.607948969578018e-05, | |
| "loss": 0.132, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7948969578017664, | |
| "grad_norm": 0.02657695673406124, | |
| "learning_rate": 4.603042198233563e-05, | |
| "loss": 0.0469, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8047105004906772, | |
| "grad_norm": 17.418657302856445, | |
| "learning_rate": 4.598135426889107e-05, | |
| "loss": 0.2565, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8145240431795878, | |
| "grad_norm": 32.446815490722656, | |
| "learning_rate": 4.593228655544652e-05, | |
| "loss": 0.1315, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8243375858684985, | |
| "grad_norm": 0.07365565747022629, | |
| "learning_rate": 4.588321884200196e-05, | |
| "loss": 0.0099, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8341511285574092, | |
| "grad_norm": 0.027613624930381775, | |
| "learning_rate": 4.583415112855741e-05, | |
| "loss": 0.1194, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8439646712463199, | |
| "grad_norm": 0.0382981114089489, | |
| "learning_rate": 4.578508341511285e-05, | |
| "loss": 0.0584, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.8537782139352306, | |
| "grad_norm": 0.03215600922703743, | |
| "learning_rate": 4.573601570166831e-05, | |
| "loss": 0.0699, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8635917566241413, | |
| "grad_norm": 0.09949254989624023, | |
| "learning_rate": 4.568694798822375e-05, | |
| "loss": 0.0052, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.873405299313052, | |
| "grad_norm": 0.024233952164649963, | |
| "learning_rate": 4.56378802747792e-05, | |
| "loss": 0.0739, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8832188420019627, | |
| "grad_norm": 0.04114871844649315, | |
| "learning_rate": 4.558881256133464e-05, | |
| "loss": 0.0629, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8930323846908734, | |
| "grad_norm": 0.02779584936797619, | |
| "learning_rate": 4.553974484789009e-05, | |
| "loss": 0.0137, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.9028459273797841, | |
| "grad_norm": 0.02605428174138069, | |
| "learning_rate": 4.549067713444553e-05, | |
| "loss": 0.0669, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.9126594700686947, | |
| "grad_norm": 0.02386913076043129, | |
| "learning_rate": 4.544160942100099e-05, | |
| "loss": 0.1172, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.9224730127576055, | |
| "grad_norm": 7.601992130279541, | |
| "learning_rate": 4.539254170755643e-05, | |
| "loss": 0.0611, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9322865554465162, | |
| "grad_norm": 37.712459564208984, | |
| "learning_rate": 4.534347399411188e-05, | |
| "loss": 0.0191, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9421000981354269, | |
| "grad_norm": 0.02080320380628109, | |
| "learning_rate": 4.529440628066732e-05, | |
| "loss": 0.0026, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9519136408243376, | |
| "grad_norm": 56.210121154785156, | |
| "learning_rate": 4.5245338567222765e-05, | |
| "loss": 0.0201, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.9617271835132483, | |
| "grad_norm": 0.020656289532780647, | |
| "learning_rate": 4.5196270853778214e-05, | |
| "loss": 0.0601, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.971540726202159, | |
| "grad_norm": 2.9532973766326904, | |
| "learning_rate": 4.514720314033366e-05, | |
| "loss": 0.2589, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.9813542688910697, | |
| "grad_norm": 0.6973829865455627, | |
| "learning_rate": 4.509813542688911e-05, | |
| "loss": 0.1333, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9911678115799804, | |
| "grad_norm": 0.07507435232400894, | |
| "learning_rate": 4.5049067713444554e-05, | |
| "loss": 0.033, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.000981354268891, | |
| "grad_norm": 0.025814848020672798, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.0752, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.0107948969578018, | |
| "grad_norm": 0.02597714029252529, | |
| "learning_rate": 4.4950932286555445e-05, | |
| "loss": 0.073, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.0206084396467126, | |
| "grad_norm": 0.045735057443380356, | |
| "learning_rate": 4.4901864573110894e-05, | |
| "loss": 0.0649, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.030421982335623, | |
| "grad_norm": 0.022413084283471107, | |
| "learning_rate": 4.485279685966634e-05, | |
| "loss": 0.1111, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.0402355250245339, | |
| "grad_norm": 0.07954522222280502, | |
| "learning_rate": 4.480372914622179e-05, | |
| "loss": 0.1623, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.0500490677134446, | |
| "grad_norm": 0.042877595871686935, | |
| "learning_rate": 4.4754661432777234e-05, | |
| "loss": 0.0954, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.0598626104023552, | |
| "grad_norm": 0.3953896462917328, | |
| "learning_rate": 4.470559371933268e-05, | |
| "loss": 0.007, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.069676153091266, | |
| "grad_norm": 0.023617839440703392, | |
| "learning_rate": 4.4656526005888125e-05, | |
| "loss": 0.0037, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.0794896957801767, | |
| "grad_norm": 0.018734032288193703, | |
| "learning_rate": 4.4607458292443574e-05, | |
| "loss": 0.0046, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.0893032384690873, | |
| "grad_norm": 0.0192360058426857, | |
| "learning_rate": 4.455839057899902e-05, | |
| "loss": 0.1574, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.099116781157998, | |
| "grad_norm": 0.028046630322933197, | |
| "learning_rate": 4.4509322865554466e-05, | |
| "loss": 0.0033, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.1089303238469088, | |
| "grad_norm": 0.01861531473696232, | |
| "learning_rate": 4.4460255152109915e-05, | |
| "loss": 0.0025, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.1187438665358194, | |
| "grad_norm": 0.018167071044445038, | |
| "learning_rate": 4.441118743866536e-05, | |
| "loss": 0.0023, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.1285574092247301, | |
| "grad_norm": 0.07722876965999603, | |
| "learning_rate": 4.4362119725220806e-05, | |
| "loss": 0.058, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.138370951913641, | |
| "grad_norm": 0.018998106941580772, | |
| "learning_rate": 4.4313052011776255e-05, | |
| "loss": 0.0022, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.1481844946025515, | |
| "grad_norm": 0.01866711676120758, | |
| "learning_rate": 4.4263984298331704e-05, | |
| "loss": 0.0025, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.1579980372914622, | |
| "grad_norm": 0.017769252881407738, | |
| "learning_rate": 4.4214916584887146e-05, | |
| "loss": 0.0022, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.167811579980373, | |
| "grad_norm": 0.019811883568763733, | |
| "learning_rate": 4.4165848871442595e-05, | |
| "loss": 0.0019, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.1776251226692835, | |
| "grad_norm": 0.02496548742055893, | |
| "learning_rate": 4.411678115799804e-05, | |
| "loss": 0.0021, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.1874386653581943, | |
| "grad_norm": 0.01511597540229559, | |
| "learning_rate": 4.4067713444553486e-05, | |
| "loss": 0.0019, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.197252208047105, | |
| "grad_norm": 0.01455361396074295, | |
| "learning_rate": 4.4018645731108935e-05, | |
| "loss": 0.0019, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.2070657507360156, | |
| "grad_norm": 0.0400017648935318, | |
| "learning_rate": 4.3969578017664384e-05, | |
| "loss": 0.0018, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.2168792934249264, | |
| "grad_norm": 0.016889173537492752, | |
| "learning_rate": 4.3920510304219826e-05, | |
| "loss": 0.1328, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.2266928361138372, | |
| "grad_norm": 0.07678048312664032, | |
| "learning_rate": 4.3871442590775275e-05, | |
| "loss": 0.002, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.2365063788027477, | |
| "grad_norm": 0.022459661588072777, | |
| "learning_rate": 4.382237487733072e-05, | |
| "loss": 0.0419, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.2463199214916585, | |
| "grad_norm": 0.015639062970876694, | |
| "learning_rate": 4.377330716388616e-05, | |
| "loss": 0.0021, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.2561334641805693, | |
| "grad_norm": 0.014097293838858604, | |
| "learning_rate": 4.372423945044161e-05, | |
| "loss": 0.013, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.2659470068694798, | |
| "grad_norm": 0.014198847115039825, | |
| "learning_rate": 4.367517173699706e-05, | |
| "loss": 0.0018, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.2757605495583906, | |
| "grad_norm": 0.020636072382330894, | |
| "learning_rate": 4.3626104023552507e-05, | |
| "loss": 0.002, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.2855740922473013, | |
| "grad_norm": 0.013957252725958824, | |
| "learning_rate": 4.357703631010795e-05, | |
| "loss": 0.0016, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.295387634936212, | |
| "grad_norm": 0.8039536476135254, | |
| "learning_rate": 4.35279685966634e-05, | |
| "loss": 0.0174, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.3052011776251227, | |
| "grad_norm": 0.034514885395765305, | |
| "learning_rate": 4.347890088321884e-05, | |
| "loss": 0.0018, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.3150147203140334, | |
| "grad_norm": 0.0127074820920825, | |
| "learning_rate": 4.342983316977429e-05, | |
| "loss": 0.2055, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.324828263002944, | |
| "grad_norm": 0.09654640406370163, | |
| "learning_rate": 4.338076545632974e-05, | |
| "loss": 0.0653, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.3346418056918548, | |
| "grad_norm": 0.018491486087441444, | |
| "learning_rate": 4.333169774288519e-05, | |
| "loss": 0.0019, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.3444553483807655, | |
| "grad_norm": 0.014405413530766964, | |
| "learning_rate": 4.328263002944063e-05, | |
| "loss": 0.0752, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.354268891069676, | |
| "grad_norm": 0.3947644531726837, | |
| "learning_rate": 4.323356231599608e-05, | |
| "loss": 0.0651, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.3640824337585868, | |
| "grad_norm": 0.027137773111462593, | |
| "learning_rate": 4.318449460255152e-05, | |
| "loss": 0.0833, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.3738959764474976, | |
| "grad_norm": 0.03568737953901291, | |
| "learning_rate": 4.313542688910697e-05, | |
| "loss": 0.002, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.3837095191364082, | |
| "grad_norm": 0.14877857267856598, | |
| "learning_rate": 4.308635917566242e-05, | |
| "loss": 0.0691, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.393523061825319, | |
| "grad_norm": 0.018405767157673836, | |
| "learning_rate": 4.303729146221786e-05, | |
| "loss": 0.0029, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.4033366045142297, | |
| "grad_norm": 0.5927426815032959, | |
| "learning_rate": 4.298822374877331e-05, | |
| "loss": 0.0024, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.4131501472031402, | |
| "grad_norm": 0.018540162593126297, | |
| "learning_rate": 4.293915603532875e-05, | |
| "loss": 0.0125, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.422963689892051, | |
| "grad_norm": 27.07039451599121, | |
| "learning_rate": 4.28900883218842e-05, | |
| "loss": 0.125, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.4327772325809618, | |
| "grad_norm": 0.020999347791075706, | |
| "learning_rate": 4.284102060843965e-05, | |
| "loss": 0.0787, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.4425907752698723, | |
| "grad_norm": 0.09069288522005081, | |
| "learning_rate": 4.27919528949951e-05, | |
| "loss": 0.0228, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.452404317958783, | |
| "grad_norm": 0.014280487783253193, | |
| "learning_rate": 4.274288518155054e-05, | |
| "loss": 0.0406, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.4622178606476939, | |
| "grad_norm": 0.014194531366229057, | |
| "learning_rate": 4.269381746810599e-05, | |
| "loss": 0.0024, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.4720314033366044, | |
| "grad_norm": 0.019226528704166412, | |
| "learning_rate": 4.264474975466143e-05, | |
| "loss": 0.0019, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4818449460255152, | |
| "grad_norm": 0.015254977159202099, | |
| "learning_rate": 4.259568204121688e-05, | |
| "loss": 0.0022, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.491658488714426, | |
| "grad_norm": 5.0018768310546875, | |
| "learning_rate": 4.254661432777233e-05, | |
| "loss": 0.1376, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.5014720314033365, | |
| "grad_norm": 0.032981228083372116, | |
| "learning_rate": 4.249754661432778e-05, | |
| "loss": 0.0174, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.5112855740922473, | |
| "grad_norm": 0.011964638717472553, | |
| "learning_rate": 4.244847890088322e-05, | |
| "loss": 0.0028, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.521099116781158, | |
| "grad_norm": 0.011394723318517208, | |
| "learning_rate": 4.239941118743867e-05, | |
| "loss": 0.0514, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.5309126594700686, | |
| "grad_norm": 0.01083845179527998, | |
| "learning_rate": 4.235034347399411e-05, | |
| "loss": 0.0428, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.5407262021589794, | |
| "grad_norm": 0.017966322600841522, | |
| "learning_rate": 4.230127576054956e-05, | |
| "loss": 0.0015, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.5505397448478901, | |
| "grad_norm": 0.029729802161455154, | |
| "learning_rate": 4.225220804710501e-05, | |
| "loss": 0.0021, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.5603532875368007, | |
| "grad_norm": 0.01271316409111023, | |
| "learning_rate": 4.220314033366045e-05, | |
| "loss": 0.1416, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.5701668302257115, | |
| "grad_norm": 0.01406879723072052, | |
| "learning_rate": 4.21540726202159e-05, | |
| "loss": 0.0022, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.5799803729146222, | |
| "grad_norm": 0.01311685424298048, | |
| "learning_rate": 4.2105004906771344e-05, | |
| "loss": 0.0505, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.5897939156035328, | |
| "grad_norm": 0.015997188165783882, | |
| "learning_rate": 4.205593719332679e-05, | |
| "loss": 0.1032, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.5996074582924436, | |
| "grad_norm": 0.021411443129181862, | |
| "learning_rate": 4.2006869479882235e-05, | |
| "loss": 0.0024, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.6094210009813543, | |
| "grad_norm": 0.011407027952373028, | |
| "learning_rate": 4.195780176643769e-05, | |
| "loss": 0.0021, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.6192345436702649, | |
| "grad_norm": 0.03794229030609131, | |
| "learning_rate": 4.190873405299313e-05, | |
| "loss": 0.0521, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.6290480863591756, | |
| "grad_norm": 0.012096689082682133, | |
| "learning_rate": 4.185966633954858e-05, | |
| "loss": 0.0017, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.6388616290480864, | |
| "grad_norm": 0.013807197101414204, | |
| "learning_rate": 4.1810598626104024e-05, | |
| "loss": 0.0014, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.648675171736997, | |
| "grad_norm": 0.010036585852503777, | |
| "learning_rate": 4.176153091265947e-05, | |
| "loss": 0.0016, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.6584887144259077, | |
| "grad_norm": 0.009630713611841202, | |
| "learning_rate": 4.1712463199214915e-05, | |
| "loss": 0.0017, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.6683022571148185, | |
| "grad_norm": 0.009277078323066235, | |
| "learning_rate": 4.1663395485770364e-05, | |
| "loss": 0.0421, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.678115799803729, | |
| "grad_norm": 0.01374200638383627, | |
| "learning_rate": 4.161432777232581e-05, | |
| "loss": 0.0755, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.6879293424926398, | |
| "grad_norm": 0.010488603264093399, | |
| "learning_rate": 4.1565260058881255e-05, | |
| "loss": 0.1944, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.6977428851815506, | |
| "grad_norm": 1.660660982131958, | |
| "learning_rate": 4.1516192345436704e-05, | |
| "loss": 0.0907, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.7075564278704611, | |
| "grad_norm": 0.1264234334230423, | |
| "learning_rate": 4.1467124631992147e-05, | |
| "loss": 0.0785, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.717369970559372, | |
| "grad_norm": 0.1920449286699295, | |
| "learning_rate": 4.1418056918547595e-05, | |
| "loss": 0.1021, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.7271835132482827, | |
| "grad_norm": 0.012796717695891857, | |
| "learning_rate": 4.1368989205103044e-05, | |
| "loss": 0.0559, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.7369970559371932, | |
| "grad_norm": 27.7369384765625, | |
| "learning_rate": 4.1319921491658493e-05, | |
| "loss": 0.0922, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.746810598626104, | |
| "grad_norm": 6.674612522125244, | |
| "learning_rate": 4.1270853778213936e-05, | |
| "loss": 0.0059, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.7566241413150148, | |
| "grad_norm": 0.016583973541855812, | |
| "learning_rate": 4.1221786064769385e-05, | |
| "loss": 0.0692, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.7664376840039253, | |
| "grad_norm": 9.963221549987793, | |
| "learning_rate": 4.117271835132483e-05, | |
| "loss": 0.1275, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.776251226692836, | |
| "grad_norm": 0.011029438115656376, | |
| "learning_rate": 4.1123650637880276e-05, | |
| "loss": 0.0016, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.7860647693817469, | |
| "grad_norm": 0.019155096262693405, | |
| "learning_rate": 4.1074582924435725e-05, | |
| "loss": 0.0013, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.7958783120706574, | |
| "grad_norm": 0.011651580221951008, | |
| "learning_rate": 4.1025515210991174e-05, | |
| "loss": 0.0022, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.8056918547595682, | |
| "grad_norm": 0.08313179016113281, | |
| "learning_rate": 4.0976447497546616e-05, | |
| "loss": 0.0016, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.815505397448479, | |
| "grad_norm": 0.0103986244648695, | |
| "learning_rate": 4.092737978410206e-05, | |
| "loss": 0.0791, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.8253189401373895, | |
| "grad_norm": 0.009013223461806774, | |
| "learning_rate": 4.087831207065751e-05, | |
| "loss": 0.0582, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.8351324828263003, | |
| "grad_norm": 0.010367879644036293, | |
| "learning_rate": 4.0829244357212956e-05, | |
| "loss": 0.0111, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.844946025515211, | |
| "grad_norm": 0.03960138186812401, | |
| "learning_rate": 4.0780176643768405e-05, | |
| "loss": 0.0027, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.8547595682041216, | |
| "grad_norm": 8.385934829711914, | |
| "learning_rate": 4.073110893032385e-05, | |
| "loss": 0.0666, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.8645731108930323, | |
| "grad_norm": 0.008534184657037258, | |
| "learning_rate": 4.0682041216879296e-05, | |
| "loss": 0.0012, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.8743866535819431, | |
| "grad_norm": 0.009065428748726845, | |
| "learning_rate": 4.063297350343474e-05, | |
| "loss": 0.0017, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.8842001962708537, | |
| "grad_norm": 0.6313449144363403, | |
| "learning_rate": 4.058390578999019e-05, | |
| "loss": 0.13, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.8940137389597644, | |
| "grad_norm": 7.004214286804199, | |
| "learning_rate": 4.0534838076545636e-05, | |
| "loss": 0.1427, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.9038272816486752, | |
| "grad_norm": 0.045783668756484985, | |
| "learning_rate": 4.0485770363101085e-05, | |
| "loss": 0.1027, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.9136408243375858, | |
| "grad_norm": 0.28262466192245483, | |
| "learning_rate": 4.043670264965653e-05, | |
| "loss": 0.0067, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.9234543670264965, | |
| "grad_norm": 0.010344293899834156, | |
| "learning_rate": 4.038763493621198e-05, | |
| "loss": 0.0525, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.9332679097154073, | |
| "grad_norm": 0.06860088557004929, | |
| "learning_rate": 4.033856722276742e-05, | |
| "loss": 0.1299, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.9430814524043178, | |
| "grad_norm": 1.1787910461425781, | |
| "learning_rate": 4.028949950932287e-05, | |
| "loss": 0.123, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.9528949950932286, | |
| "grad_norm": 0.008300206623971462, | |
| "learning_rate": 4.024043179587832e-05, | |
| "loss": 0.0547, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.9627085377821394, | |
| "grad_norm": 19.000707626342773, | |
| "learning_rate": 4.019136408243376e-05, | |
| "loss": 0.1152, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.97252208047105, | |
| "grad_norm": 0.17480367422103882, | |
| "learning_rate": 4.014229636898921e-05, | |
| "loss": 0.0875, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.9823356231599607, | |
| "grad_norm": 0.014936638996005058, | |
| "learning_rate": 4.009322865554465e-05, | |
| "loss": 0.0463, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.9921491658488715, | |
| "grad_norm": 0.04136461392045021, | |
| "learning_rate": 4.00441609421001e-05, | |
| "loss": 0.0205, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.001962708537782, | |
| "grad_norm": 0.008648752234876156, | |
| "learning_rate": 3.999509322865554e-05, | |
| "loss": 0.0015, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.011776251226693, | |
| "grad_norm": 0.03223758190870285, | |
| "learning_rate": 3.9946025515211e-05, | |
| "loss": 0.0015, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.0215897939156036, | |
| "grad_norm": 0.07674010843038559, | |
| "learning_rate": 3.989695780176644e-05, | |
| "loss": 0.0437, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.031403336604514, | |
| "grad_norm": 0.00833066739141941, | |
| "learning_rate": 3.984789008832189e-05, | |
| "loss": 0.0073, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.041216879293425, | |
| "grad_norm": 0.04498624801635742, | |
| "learning_rate": 3.979882237487733e-05, | |
| "loss": 0.001, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.0510304219823356, | |
| "grad_norm": 0.00966518186032772, | |
| "learning_rate": 3.974975466143278e-05, | |
| "loss": 0.0009, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.060843964671246, | |
| "grad_norm": 0.00706259673461318, | |
| "learning_rate": 3.970068694798822e-05, | |
| "loss": 0.0399, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.070657507360157, | |
| "grad_norm": 0.8026844263076782, | |
| "learning_rate": 3.965161923454368e-05, | |
| "loss": 0.0022, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.0804710500490677, | |
| "grad_norm": 0.006883909460157156, | |
| "learning_rate": 3.960255152109912e-05, | |
| "loss": 0.0477, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.0902845927379783, | |
| "grad_norm": 0.007603227626532316, | |
| "learning_rate": 3.955348380765457e-05, | |
| "loss": 0.001, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.1000981354268893, | |
| "grad_norm": 0.007116556167602539, | |
| "learning_rate": 3.950441609421001e-05, | |
| "loss": 0.1037, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.1099116781158, | |
| "grad_norm": 0.01041839923709631, | |
| "learning_rate": 3.945534838076545e-05, | |
| "loss": 0.002, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.1197252208047104, | |
| "grad_norm": 0.007161868270486593, | |
| "learning_rate": 3.94062806673209e-05, | |
| "loss": 0.0019, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.1295387634936214, | |
| "grad_norm": 5.720839977264404, | |
| "learning_rate": 3.935721295387635e-05, | |
| "loss": 0.0342, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.139352306182532, | |
| "grad_norm": 0.006590616423636675, | |
| "learning_rate": 3.93081452404318e-05, | |
| "loss": 0.0009, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.1491658488714425, | |
| "grad_norm": 0.006484217941761017, | |
| "learning_rate": 3.925907752698724e-05, | |
| "loss": 0.0009, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.1589793915603535, | |
| "grad_norm": 0.006214112509042025, | |
| "learning_rate": 3.921000981354269e-05, | |
| "loss": 0.0007, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.168792934249264, | |
| "grad_norm": 0.006126193795353174, | |
| "learning_rate": 3.9160942100098133e-05, | |
| "loss": 0.0007, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.1786064769381746, | |
| "grad_norm": 0.006230359897017479, | |
| "learning_rate": 3.911187438665358e-05, | |
| "loss": 0.0008, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.1884200196270855, | |
| "grad_norm": 0.0061959754675626755, | |
| "learning_rate": 3.906280667320903e-05, | |
| "loss": 0.0008, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.198233562315996, | |
| "grad_norm": 0.006188957951962948, | |
| "learning_rate": 3.901373895976448e-05, | |
| "loss": 0.0007, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.2080471050049066, | |
| "grad_norm": 0.005858860444277525, | |
| "learning_rate": 3.896467124631992e-05, | |
| "loss": 0.0705, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.2178606476938176, | |
| "grad_norm": 0.006062773987650871, | |
| "learning_rate": 3.891560353287537e-05, | |
| "loss": 0.0526, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.227674190382728, | |
| "grad_norm": 0.005856741685420275, | |
| "learning_rate": 3.8866535819430814e-05, | |
| "loss": 0.0007, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.2374877330716387, | |
| "grad_norm": 0.009888865053653717, | |
| "learning_rate": 3.881746810598626e-05, | |
| "loss": 0.0467, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.2473012757605497, | |
| "grad_norm": 31.81001853942871, | |
| "learning_rate": 3.876840039254171e-05, | |
| "loss": 0.008, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.2571148184494603, | |
| "grad_norm": 0.006301484536379576, | |
| "learning_rate": 3.8719332679097154e-05, | |
| "loss": 0.0007, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.266928361138371, | |
| "grad_norm": 0.006280009169131517, | |
| "learning_rate": 3.86702649656526e-05, | |
| "loss": 0.001, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.276741903827282, | |
| "grad_norm": 1.7603585720062256, | |
| "learning_rate": 3.8621197252208045e-05, | |
| "loss": 0.1061, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.2865554465161924, | |
| "grad_norm": 0.006645340472459793, | |
| "learning_rate": 3.8572129538763494e-05, | |
| "loss": 0.0007, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.296368989205103, | |
| "grad_norm": 0.008996882475912571, | |
| "learning_rate": 3.852306182531894e-05, | |
| "loss": 0.0857, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.306182531894014, | |
| "grad_norm": 16.020790100097656, | |
| "learning_rate": 3.847399411187439e-05, | |
| "loss": 0.0797, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.3159960745829244, | |
| "grad_norm": 30.399795532226562, | |
| "learning_rate": 3.8424926398429834e-05, | |
| "loss": 0.0316, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.325809617271835, | |
| "grad_norm": 0.019369609653949738, | |
| "learning_rate": 3.837585868498528e-05, | |
| "loss": 0.0244, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.335623159960746, | |
| "grad_norm": 0.009330210275948048, | |
| "learning_rate": 3.8326790971540725e-05, | |
| "loss": 0.0308, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.3454367026496565, | |
| "grad_norm": 0.016002874821424484, | |
| "learning_rate": 3.8277723258096174e-05, | |
| "loss": 0.0056, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.355250245338567, | |
| "grad_norm": 0.008927385322749615, | |
| "learning_rate": 3.8228655544651623e-05, | |
| "loss": 0.001, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.365063788027478, | |
| "grad_norm": 0.010494639165699482, | |
| "learning_rate": 3.817958783120707e-05, | |
| "loss": 0.0014, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.3748773307163886, | |
| "grad_norm": 0.007917719893157482, | |
| "learning_rate": 3.8130520117762515e-05, | |
| "loss": 0.0009, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.384690873405299, | |
| "grad_norm": 0.005997834727168083, | |
| "learning_rate": 3.8081452404317964e-05, | |
| "loss": 0.001, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.39450441609421, | |
| "grad_norm": 0.006399065256118774, | |
| "learning_rate": 3.8032384690873406e-05, | |
| "loss": 0.0008, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.4043179587831207, | |
| "grad_norm": 0.010832864791154861, | |
| "learning_rate": 3.798331697742885e-05, | |
| "loss": 0.0016, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.4141315014720313, | |
| "grad_norm": 0.007472364231944084, | |
| "learning_rate": 3.7934249263984304e-05, | |
| "loss": 0.0007, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.4239450441609423, | |
| "grad_norm": 0.005750945303589106, | |
| "learning_rate": 3.7885181550539746e-05, | |
| "loss": 0.0007, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.433758586849853, | |
| "grad_norm": 0.00842629000544548, | |
| "learning_rate": 3.7836113837095195e-05, | |
| "loss": 0.0009, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.4435721295387633, | |
| "grad_norm": 1.4052761793136597, | |
| "learning_rate": 3.778704612365064e-05, | |
| "loss": 0.1304, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.4533856722276743, | |
| "grad_norm": 0.007391956634819508, | |
| "learning_rate": 3.7737978410206086e-05, | |
| "loss": 0.0007, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.463199214916585, | |
| "grad_norm": 20.43938446044922, | |
| "learning_rate": 3.768891069676153e-05, | |
| "loss": 0.1268, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.4730127576054954, | |
| "grad_norm": 0.05450147017836571, | |
| "learning_rate": 3.7639842983316984e-05, | |
| "loss": 0.0139, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.4828263002944064, | |
| "grad_norm": 0.03355271369218826, | |
| "learning_rate": 3.7590775269872426e-05, | |
| "loss": 0.0147, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.492639842983317, | |
| "grad_norm": 0.007103215903043747, | |
| "learning_rate": 3.7541707556427875e-05, | |
| "loss": 0.1271, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.5024533856722275, | |
| "grad_norm": 0.007379031740128994, | |
| "learning_rate": 3.749263984298332e-05, | |
| "loss": 0.0009, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.5122669283611385, | |
| "grad_norm": 0.2919241786003113, | |
| "learning_rate": 3.7443572129538766e-05, | |
| "loss": 0.0025, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.522080471050049, | |
| "grad_norm": 0.01704682782292366, | |
| "learning_rate": 3.739450441609421e-05, | |
| "loss": 0.0007, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.5318940137389596, | |
| "grad_norm": 0.005663587246090174, | |
| "learning_rate": 3.734543670264966e-05, | |
| "loss": 0.001, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.5417075564278706, | |
| "grad_norm": 0.006800634786486626, | |
| "learning_rate": 3.7296368989205107e-05, | |
| "loss": 0.0006, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.551521099116781, | |
| "grad_norm": 0.0049642156809568405, | |
| "learning_rate": 3.724730127576055e-05, | |
| "loss": 0.0007, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.5613346418056917, | |
| "grad_norm": 0.0051542771980166435, | |
| "learning_rate": 3.7198233562316e-05, | |
| "loss": 0.0006, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.5711481844946027, | |
| "grad_norm": 0.005128064192831516, | |
| "learning_rate": 3.714916584887144e-05, | |
| "loss": 0.0006, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.5809617271835132, | |
| "grad_norm": 0.005018030758947134, | |
| "learning_rate": 3.710009813542689e-05, | |
| "loss": 0.0007, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.590775269872424, | |
| "grad_norm": 0.004934113007038832, | |
| "learning_rate": 3.705103042198234e-05, | |
| "loss": 0.0006, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.600588812561335, | |
| "grad_norm": 0.004958492703735828, | |
| "learning_rate": 3.700196270853779e-05, | |
| "loss": 0.0006, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.6104023552502453, | |
| "grad_norm": 0.0050879898481070995, | |
| "learning_rate": 3.695289499509323e-05, | |
| "loss": 0.0006, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.620215897939156, | |
| "grad_norm": 0.004783379379659891, | |
| "learning_rate": 3.690382728164868e-05, | |
| "loss": 0.0006, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.630029440628067, | |
| "grad_norm": 0.004975931718945503, | |
| "learning_rate": 3.685475956820412e-05, | |
| "loss": 0.0006, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.6398429833169774, | |
| "grad_norm": 0.006240040063858032, | |
| "learning_rate": 3.680569185475957e-05, | |
| "loss": 0.0006, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.649656526005888, | |
| "grad_norm": 0.0050759222358465195, | |
| "learning_rate": 3.675662414131502e-05, | |
| "loss": 0.0006, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.659470068694799, | |
| "grad_norm": 0.004622638691216707, | |
| "learning_rate": 3.670755642787047e-05, | |
| "loss": 0.0005, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.6692836113837095, | |
| "grad_norm": 0.005237213335931301, | |
| "learning_rate": 3.665848871442591e-05, | |
| "loss": 0.0526, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.67909715407262, | |
| "grad_norm": 0.15502117574214935, | |
| "learning_rate": 3.660942100098136e-05, | |
| "loss": 0.0014, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.688910696761531, | |
| "grad_norm": 0.004649411886930466, | |
| "learning_rate": 3.65603532875368e-05, | |
| "loss": 0.0327, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.6987242394504416, | |
| "grad_norm": 0.004374220035970211, | |
| "learning_rate": 3.651128557409225e-05, | |
| "loss": 0.0019, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.708537782139352, | |
| "grad_norm": 7.425215244293213, | |
| "learning_rate": 3.64622178606477e-05, | |
| "loss": 0.0813, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.718351324828263, | |
| "grad_norm": 0.004420330747961998, | |
| "learning_rate": 3.641315014720314e-05, | |
| "loss": 0.0006, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.7281648675171737, | |
| "grad_norm": 0.004426442552357912, | |
| "learning_rate": 3.636408243375859e-05, | |
| "loss": 0.0008, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.7379784102060842, | |
| "grad_norm": 0.005173469893634319, | |
| "learning_rate": 3.631501472031403e-05, | |
| "loss": 0.0006, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.7477919528949952, | |
| "grad_norm": 0.0050672367215156555, | |
| "learning_rate": 3.626594700686948e-05, | |
| "loss": 0.0853, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.7576054955839058, | |
| "grad_norm": 0.005417036823928356, | |
| "learning_rate": 3.621687929342493e-05, | |
| "loss": 0.001, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.7674190382728163, | |
| "grad_norm": 0.005575211253017187, | |
| "learning_rate": 3.616781157998038e-05, | |
| "loss": 0.0007, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.7772325809617273, | |
| "grad_norm": 0.0057277195155620575, | |
| "learning_rate": 3.611874386653582e-05, | |
| "loss": 0.0006, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.787046123650638, | |
| "grad_norm": 0.005416057072579861, | |
| "learning_rate": 3.606967615309127e-05, | |
| "loss": 0.0006, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.7968596663395484, | |
| "grad_norm": 0.004573365673422813, | |
| "learning_rate": 3.602060843964671e-05, | |
| "loss": 0.0011, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.8066732090284594, | |
| "grad_norm": 0.0056626503355801105, | |
| "learning_rate": 3.597154072620216e-05, | |
| "loss": 0.001, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.81648675171737, | |
| "grad_norm": 0.006272735074162483, | |
| "learning_rate": 3.592247301275761e-05, | |
| "loss": 0.0005, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.8263002944062805, | |
| "grad_norm": 0.004290241748094559, | |
| "learning_rate": 3.587340529931306e-05, | |
| "loss": 0.0006, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.8361138370951915, | |
| "grad_norm": 0.0073272231966257095, | |
| "learning_rate": 3.58243375858685e-05, | |
| "loss": 0.059, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.845927379784102, | |
| "grad_norm": 0.0045128497295081615, | |
| "learning_rate": 3.5775269872423944e-05, | |
| "loss": 0.0773, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.8557409224730126, | |
| "grad_norm": 0.005028576590120792, | |
| "learning_rate": 3.572620215897939e-05, | |
| "loss": 0.0008, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.8655544651619236, | |
| "grad_norm": 0.004786403849720955, | |
| "learning_rate": 3.5677134445534835e-05, | |
| "loss": 0.0855, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.875368007850834, | |
| "grad_norm": 0.02878345362842083, | |
| "learning_rate": 3.5628066732090284e-05, | |
| "loss": 0.0006, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.8851815505397447, | |
| "grad_norm": 32.582359313964844, | |
| "learning_rate": 3.557899901864573e-05, | |
| "loss": 0.0652, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.8949950932286557, | |
| "grad_norm": 0.06951310485601425, | |
| "learning_rate": 3.552993130520118e-05, | |
| "loss": 0.033, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.904808635917566, | |
| "grad_norm": 0.00533737288787961, | |
| "learning_rate": 3.5480863591756624e-05, | |
| "loss": 0.0057, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.9146221786064768, | |
| "grad_norm": 0.005290019791573286, | |
| "learning_rate": 3.543179587831207e-05, | |
| "loss": 0.095, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.9244357212953878, | |
| "grad_norm": 0.0044818902388215065, | |
| "learning_rate": 3.5382728164867515e-05, | |
| "loss": 0.0009, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.9342492639842983, | |
| "grad_norm": 0.005349620245397091, | |
| "learning_rate": 3.5333660451422964e-05, | |
| "loss": 0.0829, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.944062806673209, | |
| "grad_norm": 0.011460080742835999, | |
| "learning_rate": 3.528459273797841e-05, | |
| "loss": 0.0529, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.95387634936212, | |
| "grad_norm": 0.0047313557006418705, | |
| "learning_rate": 3.523552502453386e-05, | |
| "loss": 0.0037, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.9636898920510304, | |
| "grad_norm": 0.01534937135875225, | |
| "learning_rate": 3.5186457311089304e-05, | |
| "loss": 0.1236, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.973503434739941, | |
| "grad_norm": 0.007522872183471918, | |
| "learning_rate": 3.5137389597644747e-05, | |
| "loss": 0.0115, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.983316977428852, | |
| "grad_norm": 0.024374373257160187, | |
| "learning_rate": 3.5088321884200196e-05, | |
| "loss": 0.0008, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.9931305201177625, | |
| "grad_norm": 0.08516921103000641, | |
| "learning_rate": 3.5039254170755645e-05, | |
| "loss": 0.0956, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 3.002944062806673, | |
| "grad_norm": 0.005535255651921034, | |
| "learning_rate": 3.4990186457311094e-05, | |
| "loss": 0.0519, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 3.012757605495584, | |
| "grad_norm": 0.015444884076714516, | |
| "learning_rate": 3.4941118743866536e-05, | |
| "loss": 0.0013, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 3.0225711481844946, | |
| "grad_norm": 0.00661628320813179, | |
| "learning_rate": 3.4892051030421985e-05, | |
| "loss": 0.0008, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 3.032384690873405, | |
| "grad_norm": 0.01968499645590782, | |
| "learning_rate": 3.484298331697743e-05, | |
| "loss": 0.0041, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 3.042198233562316, | |
| "grad_norm": 0.004277428146451712, | |
| "learning_rate": 3.4793915603532876e-05, | |
| "loss": 0.0104, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 3.0520117762512267, | |
| "grad_norm": 0.007642016280442476, | |
| "learning_rate": 3.4744847890088325e-05, | |
| "loss": 0.0006, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 3.061825318940137, | |
| "grad_norm": 0.004083346109837294, | |
| "learning_rate": 3.4695780176643774e-05, | |
| "loss": 0.128, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 3.071638861629048, | |
| "grad_norm": 0.01271857414394617, | |
| "learning_rate": 3.4646712463199216e-05, | |
| "loss": 0.0659, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 3.0814524043179587, | |
| "grad_norm": 0.009639259427785873, | |
| "learning_rate": 3.4597644749754665e-05, | |
| "loss": 0.0424, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 3.0912659470068693, | |
| "grad_norm": 0.023669827729463577, | |
| "learning_rate": 3.454857703631011e-05, | |
| "loss": 0.0309, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 3.1010794896957803, | |
| "grad_norm": 0.004919820465147495, | |
| "learning_rate": 3.4499509322865556e-05, | |
| "loss": 0.0146, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 3.110893032384691, | |
| "grad_norm": 0.003851011861115694, | |
| "learning_rate": 3.4450441609421005e-05, | |
| "loss": 0.0006, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 3.1207065750736014, | |
| "grad_norm": 0.005380318965762854, | |
| "learning_rate": 3.440137389597645e-05, | |
| "loss": 0.0177, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 3.1305201177625124, | |
| "grad_norm": 0.00603041285648942, | |
| "learning_rate": 3.4352306182531896e-05, | |
| "loss": 0.0006, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 3.140333660451423, | |
| "grad_norm": 0.003694745246320963, | |
| "learning_rate": 3.430323846908734e-05, | |
| "loss": 0.0007, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.1501472031403335, | |
| "grad_norm": 0.009091987274587154, | |
| "learning_rate": 3.425417075564279e-05, | |
| "loss": 0.0046, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 3.1599607458292445, | |
| "grad_norm": 44.486915588378906, | |
| "learning_rate": 3.4205103042198237e-05, | |
| "loss": 0.0156, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 3.169774288518155, | |
| "grad_norm": 0.003716795239597559, | |
| "learning_rate": 3.4156035328753686e-05, | |
| "loss": 0.0006, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 3.1795878312070656, | |
| "grad_norm": 0.010979007929563522, | |
| "learning_rate": 3.410696761530913e-05, | |
| "loss": 0.0006, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 3.1894013738959766, | |
| "grad_norm": 0.0035946909338235855, | |
| "learning_rate": 3.405789990186458e-05, | |
| "loss": 0.0526, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 3.199214916584887, | |
| "grad_norm": 0.0067933835089206696, | |
| "learning_rate": 3.400883218842002e-05, | |
| "loss": 0.0004, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 3.2090284592737977, | |
| "grad_norm": 0.0035232524387538433, | |
| "learning_rate": 3.395976447497547e-05, | |
| "loss": 0.0009, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 3.2188420019627086, | |
| "grad_norm": 0.02211836725473404, | |
| "learning_rate": 3.391069676153091e-05, | |
| "loss": 0.0004, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 3.228655544651619, | |
| "grad_norm": 0.0037303888238966465, | |
| "learning_rate": 3.3861629048086366e-05, | |
| "loss": 0.0006, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 3.2384690873405297, | |
| "grad_norm": 0.007376148831099272, | |
| "learning_rate": 3.381256133464181e-05, | |
| "loss": 0.0004, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 3.2482826300294407, | |
| "grad_norm": 0.003410831792280078, | |
| "learning_rate": 3.376349362119726e-05, | |
| "loss": 0.0004, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 3.2580961727183513, | |
| "grad_norm": 0.0033686254173517227, | |
| "learning_rate": 3.37144259077527e-05, | |
| "loss": 0.0004, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 3.267909715407262, | |
| "grad_norm": 0.0036628427915275097, | |
| "learning_rate": 3.366535819430814e-05, | |
| "loss": 0.0004, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 3.277723258096173, | |
| "grad_norm": 0.0034903271589428186, | |
| "learning_rate": 3.361629048086359e-05, | |
| "loss": 0.0676, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 3.2875368007850834, | |
| "grad_norm": 0.007418110966682434, | |
| "learning_rate": 3.356722276741904e-05, | |
| "loss": 0.0998, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 3.297350343473994, | |
| "grad_norm": 0.003807367756962776, | |
| "learning_rate": 3.351815505397449e-05, | |
| "loss": 0.0004, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 3.307163886162905, | |
| "grad_norm": 0.006307406350970268, | |
| "learning_rate": 3.346908734052993e-05, | |
| "loss": 0.0546, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 3.3169774288518155, | |
| "grad_norm": 0.004091127309948206, | |
| "learning_rate": 3.342001962708538e-05, | |
| "loss": 0.0688, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 3.326790971540726, | |
| "grad_norm": 0.008122970350086689, | |
| "learning_rate": 3.337095191364082e-05, | |
| "loss": 0.0535, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 3.336604514229637, | |
| "grad_norm": 0.2856459617614746, | |
| "learning_rate": 3.332188420019627e-05, | |
| "loss": 0.0479, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.3464180569185475, | |
| "grad_norm": 0.011355056427419186, | |
| "learning_rate": 3.327281648675172e-05, | |
| "loss": 0.062, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 3.356231599607458, | |
| "grad_norm": 0.010982933454215527, | |
| "learning_rate": 3.322374877330717e-05, | |
| "loss": 0.119, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 3.366045142296369, | |
| "grad_norm": 0.14039351046085358, | |
| "learning_rate": 3.317468105986261e-05, | |
| "loss": 0.0129, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 3.3758586849852796, | |
| "grad_norm": 0.005223874468356371, | |
| "learning_rate": 3.312561334641806e-05, | |
| "loss": 0.0417, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 3.38567222767419, | |
| "grad_norm": 0.0041849189437925816, | |
| "learning_rate": 3.30765456329735e-05, | |
| "loss": 0.036, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 3.395485770363101, | |
| "grad_norm": 0.004221642389893532, | |
| "learning_rate": 3.302747791952895e-05, | |
| "loss": 0.0021, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 3.4052993130520117, | |
| "grad_norm": 57.141910552978516, | |
| "learning_rate": 3.29784102060844e-05, | |
| "loss": 0.0982, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 3.4151128557409223, | |
| "grad_norm": 0.009060889482498169, | |
| "learning_rate": 3.292934249263984e-05, | |
| "loss": 0.0006, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 3.4249263984298333, | |
| "grad_norm": 0.003756599733605981, | |
| "learning_rate": 3.288027477919529e-05, | |
| "loss": 0.0033, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 3.434739941118744, | |
| "grad_norm": 0.0041136653162539005, | |
| "learning_rate": 3.2831207065750733e-05, | |
| "loss": 0.0015, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.4445534838076544, | |
| "grad_norm": 0.003665735013782978, | |
| "learning_rate": 3.278213935230618e-05, | |
| "loss": 0.0614, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 3.4543670264965654, | |
| "grad_norm": 0.003554809372872114, | |
| "learning_rate": 3.273307163886163e-05, | |
| "loss": 0.001, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 3.464180569185476, | |
| "grad_norm": 0.0034583976957947016, | |
| "learning_rate": 3.268400392541708e-05, | |
| "loss": 0.0008, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 3.4739941118743864, | |
| "grad_norm": 0.003728943644091487, | |
| "learning_rate": 3.263493621197252e-05, | |
| "loss": 0.0004, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 3.4838076545632974, | |
| "grad_norm": 0.003582128556445241, | |
| "learning_rate": 3.258586849852797e-05, | |
| "loss": 0.0004, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 3.493621197252208, | |
| "grad_norm": 0.0033694806043058634, | |
| "learning_rate": 3.2536800785083414e-05, | |
| "loss": 0.0004, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 3.5034347399411185, | |
| "grad_norm": 0.10974390059709549, | |
| "learning_rate": 3.248773307163886e-05, | |
| "loss": 0.0004, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 3.5132482826300295, | |
| "grad_norm": 0.003454001620411873, | |
| "learning_rate": 3.243866535819431e-05, | |
| "loss": 0.0004, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 3.52306182531894, | |
| "grad_norm": 0.0036948120687156916, | |
| "learning_rate": 3.238959764474976e-05, | |
| "loss": 0.0657, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 3.5328753680078506, | |
| "grad_norm": 0.0033204422798007727, | |
| "learning_rate": 3.23405299313052e-05, | |
| "loss": 0.0604, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 3.5426889106967616, | |
| "grad_norm": 0.00640474446117878, | |
| "learning_rate": 3.229146221786065e-05, | |
| "loss": 0.0013, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 3.552502453385672, | |
| "grad_norm": 0.0037864702753722668, | |
| "learning_rate": 3.2242394504416094e-05, | |
| "loss": 0.0388, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 3.5623159960745827, | |
| "grad_norm": 0.0036447476595640182, | |
| "learning_rate": 3.2193326790971536e-05, | |
| "loss": 0.0015, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 3.5721295387634937, | |
| "grad_norm": 0.004586994647979736, | |
| "learning_rate": 3.214425907752699e-05, | |
| "loss": 0.0008, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 3.5819430814524043, | |
| "grad_norm": 0.006420999765396118, | |
| "learning_rate": 3.2095191364082434e-05, | |
| "loss": 0.081, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 3.591756624141315, | |
| "grad_norm": 0.037869326770305634, | |
| "learning_rate": 3.204612365063788e-05, | |
| "loss": 0.0027, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 3.601570166830226, | |
| "grad_norm": 0.0033209428656846285, | |
| "learning_rate": 3.1997055937193325e-05, | |
| "loss": 0.0004, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 3.6113837095191363, | |
| "grad_norm": 0.0032525446731597185, | |
| "learning_rate": 3.1947988223748774e-05, | |
| "loss": 0.0004, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 3.621197252208047, | |
| "grad_norm": 0.0034604640677571297, | |
| "learning_rate": 3.189892051030422e-05, | |
| "loss": 0.0004, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 3.631010794896958, | |
| "grad_norm": 0.0048661488108336926, | |
| "learning_rate": 3.184985279685967e-05, | |
| "loss": 0.0006, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 3.6408243375858684, | |
| "grad_norm": 0.003736069891601801, | |
| "learning_rate": 3.1800785083415115e-05, | |
| "loss": 0.0004, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 3.650637880274779, | |
| "grad_norm": 0.0031651423778384924, | |
| "learning_rate": 3.1751717369970564e-05, | |
| "loss": 0.0004, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 3.66045142296369, | |
| "grad_norm": 0.0032348737586289644, | |
| "learning_rate": 3.1702649656526006e-05, | |
| "loss": 0.0004, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 3.6702649656526005, | |
| "grad_norm": 0.003265490522608161, | |
| "learning_rate": 3.1653581943081455e-05, | |
| "loss": 0.0004, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 3.680078508341511, | |
| "grad_norm": 0.18621397018432617, | |
| "learning_rate": 3.16045142296369e-05, | |
| "loss": 0.0432, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 3.689892051030422, | |
| "grad_norm": 49.72319793701172, | |
| "learning_rate": 3.155544651619235e-05, | |
| "loss": 0.0419, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 3.6997055937193326, | |
| "grad_norm": 0.003202399704605341, | |
| "learning_rate": 3.1506378802747795e-05, | |
| "loss": 0.0005, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 3.709519136408243, | |
| "grad_norm": 0.003484070301055908, | |
| "learning_rate": 3.145731108930324e-05, | |
| "loss": 0.0004, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 3.719332679097154, | |
| "grad_norm": 0.003013091627508402, | |
| "learning_rate": 3.1408243375858686e-05, | |
| "loss": 0.0004, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 3.7291462217860647, | |
| "grad_norm": 0.0030194155406206846, | |
| "learning_rate": 3.135917566241413e-05, | |
| "loss": 0.0008, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.7389597644749752, | |
| "grad_norm": 0.0030365772545337677, | |
| "learning_rate": 3.131010794896958e-05, | |
| "loss": 0.0004, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 3.7487733071638862, | |
| "grad_norm": 0.002989945001900196, | |
| "learning_rate": 3.1261040235525026e-05, | |
| "loss": 0.0006, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 3.758586849852797, | |
| "grad_norm": 0.0031468605156987906, | |
| "learning_rate": 3.1211972522080475e-05, | |
| "loss": 0.0005, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 3.7684003925417073, | |
| "grad_norm": 0.004800264723598957, | |
| "learning_rate": 3.116290480863592e-05, | |
| "loss": 0.0108, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 3.7782139352306183, | |
| "grad_norm": 0.0044929636642336845, | |
| "learning_rate": 3.1113837095191366e-05, | |
| "loss": 0.0009, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 3.788027477919529, | |
| "grad_norm": 0.0028576962649822235, | |
| "learning_rate": 3.106476938174681e-05, | |
| "loss": 0.0003, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 3.7978410206084394, | |
| "grad_norm": 0.0031541618518531322, | |
| "learning_rate": 3.101570166830226e-05, | |
| "loss": 0.0003, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 3.8076545632973504, | |
| "grad_norm": 0.0027680331841111183, | |
| "learning_rate": 3.096663395485771e-05, | |
| "loss": 0.0004, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 3.817468105986261, | |
| "grad_norm": 0.0027752595487982035, | |
| "learning_rate": 3.0917566241413156e-05, | |
| "loss": 0.0003, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 3.8272816486751715, | |
| "grad_norm": 0.0027524100150913, | |
| "learning_rate": 3.08684985279686e-05, | |
| "loss": 0.0003, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.8370951913640825, | |
| "grad_norm": 0.0028699261602014303, | |
| "learning_rate": 3.081943081452405e-05, | |
| "loss": 0.0005, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 3.846908734052993, | |
| "grad_norm": 0.002797529799863696, | |
| "learning_rate": 3.077036310107949e-05, | |
| "loss": 0.0003, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 3.8567222767419036, | |
| "grad_norm": 0.002745892619714141, | |
| "learning_rate": 3.072129538763494e-05, | |
| "loss": 0.0003, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 3.8665358194308146, | |
| "grad_norm": 0.0030794497579336166, | |
| "learning_rate": 3.067222767419039e-05, | |
| "loss": 0.1156, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 3.876349362119725, | |
| "grad_norm": 0.007240855600684881, | |
| "learning_rate": 3.062315996074583e-05, | |
| "loss": 0.0018, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 3.8861629048086357, | |
| "grad_norm": 0.0031701885163784027, | |
| "learning_rate": 3.057409224730128e-05, | |
| "loss": 0.0008, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 3.8959764474975467, | |
| "grad_norm": 0.007171397563070059, | |
| "learning_rate": 3.052502453385672e-05, | |
| "loss": 0.0545, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 3.9057899901864572, | |
| "grad_norm": 0.0026376626919955015, | |
| "learning_rate": 3.0475956820412173e-05, | |
| "loss": 0.1456, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 3.9156035328753678, | |
| "grad_norm": 0.004865538328886032, | |
| "learning_rate": 3.0426889106967615e-05, | |
| "loss": 0.054, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 3.9254170755642788, | |
| "grad_norm": 0.06169740855693817, | |
| "learning_rate": 3.0377821393523064e-05, | |
| "loss": 0.1127, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.9352306182531893, | |
| "grad_norm": 0.01491067185997963, | |
| "learning_rate": 3.032875368007851e-05, | |
| "loss": 0.0013, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 3.9450441609421, | |
| "grad_norm": 0.003037821501493454, | |
| "learning_rate": 3.027968596663396e-05, | |
| "loss": 0.1189, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 3.954857703631011, | |
| "grad_norm": 0.05605999380350113, | |
| "learning_rate": 3.02306182531894e-05, | |
| "loss": 0.0008, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 3.9646712463199214, | |
| "grad_norm": 0.0034519529435783625, | |
| "learning_rate": 3.0181550539744853e-05, | |
| "loss": 0.0003, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 3.974484789008832, | |
| "grad_norm": 0.0033396417275071144, | |
| "learning_rate": 3.0132482826300295e-05, | |
| "loss": 0.0431, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 3.984298331697743, | |
| "grad_norm": 0.002848528092727065, | |
| "learning_rate": 3.0083415112855744e-05, | |
| "loss": 0.0003, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 3.9941118743866535, | |
| "grad_norm": 0.09806457161903381, | |
| "learning_rate": 3.003434739941119e-05, | |
| "loss": 0.0576, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 4.003925417075564, | |
| "grad_norm": 0.009162220172584057, | |
| "learning_rate": 2.9985279685966632e-05, | |
| "loss": 0.0003, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 4.013738959764475, | |
| "grad_norm": 0.039267465472221375, | |
| "learning_rate": 2.993621197252208e-05, | |
| "loss": 0.0004, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 4.023552502453386, | |
| "grad_norm": 0.002605535788461566, | |
| "learning_rate": 2.9887144259077527e-05, | |
| "loss": 0.0004, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 4.033366045142296, | |
| "grad_norm": 0.003241546219214797, | |
| "learning_rate": 2.9838076545632976e-05, | |
| "loss": 0.0004, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 4.043179587831207, | |
| "grad_norm": 11.987616539001465, | |
| "learning_rate": 2.978900883218842e-05, | |
| "loss": 0.0393, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 4.052993130520118, | |
| "grad_norm": 0.002549890661612153, | |
| "learning_rate": 2.973994111874387e-05, | |
| "loss": 0.0011, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 4.062806673209028, | |
| "grad_norm": 0.002623894950374961, | |
| "learning_rate": 2.9690873405299312e-05, | |
| "loss": 0.0008, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 4.072620215897939, | |
| "grad_norm": 0.0023546249140053988, | |
| "learning_rate": 2.964180569185476e-05, | |
| "loss": 0.0026, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 4.08243375858685, | |
| "grad_norm": 0.0023659905418753624, | |
| "learning_rate": 2.9592737978410207e-05, | |
| "loss": 0.078, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 4.09224730127576, | |
| "grad_norm": 0.002533614169806242, | |
| "learning_rate": 2.9543670264965656e-05, | |
| "loss": 0.0745, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 4.102060843964671, | |
| "grad_norm": 0.012038661167025566, | |
| "learning_rate": 2.94946025515211e-05, | |
| "loss": 0.0551, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 4.111874386653582, | |
| "grad_norm": 26.8253173828125, | |
| "learning_rate": 2.944553483807655e-05, | |
| "loss": 0.0928, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 4.121687929342492, | |
| "grad_norm": 0.03977564349770546, | |
| "learning_rate": 2.9396467124631993e-05, | |
| "loss": 0.0528, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 4.131501472031403, | |
| "grad_norm": 0.0031746893655508757, | |
| "learning_rate": 2.934739941118744e-05, | |
| "loss": 0.0106, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 4.141315014720314, | |
| "grad_norm": 0.0031474102288484573, | |
| "learning_rate": 2.9298331697742887e-05, | |
| "loss": 0.0008, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 4.1511285574092245, | |
| "grad_norm": 0.04280337691307068, | |
| "learning_rate": 2.924926398429833e-05, | |
| "loss": 0.0004, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 4.1609421000981355, | |
| "grad_norm": 0.002831744961440563, | |
| "learning_rate": 2.920019627085378e-05, | |
| "loss": 0.0004, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 4.1707556427870465, | |
| "grad_norm": 0.002495395252481103, | |
| "learning_rate": 2.9151128557409224e-05, | |
| "loss": 0.0009, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 4.180569185475957, | |
| "grad_norm": 0.0024046385660767555, | |
| "learning_rate": 2.9102060843964673e-05, | |
| "loss": 0.0007, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 4.190382728164868, | |
| "grad_norm": 0.0030680035706609488, | |
| "learning_rate": 2.905299313052012e-05, | |
| "loss": 0.0005, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 4.200196270853779, | |
| "grad_norm": 0.0061622122302651405, | |
| "learning_rate": 2.9003925417075568e-05, | |
| "loss": 0.0018, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 4.210009813542689, | |
| "grad_norm": 0.0022845251951366663, | |
| "learning_rate": 2.895485770363101e-05, | |
| "loss": 0.0366, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 4.2198233562316, | |
| "grad_norm": 0.011359172873198986, | |
| "learning_rate": 2.890578999018646e-05, | |
| "loss": 0.0005, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 4.229636898920511, | |
| "grad_norm": 0.002846726682037115, | |
| "learning_rate": 2.8856722276741904e-05, | |
| "loss": 0.0004, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 4.239450441609421, | |
| "grad_norm": 0.002284892601892352, | |
| "learning_rate": 2.8807654563297353e-05, | |
| "loss": 0.0984, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 4.249263984298332, | |
| "grad_norm": 0.002528236713260412, | |
| "learning_rate": 2.87585868498528e-05, | |
| "loss": 0.0007, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 4.259077526987243, | |
| "grad_norm": 0.003352473024278879, | |
| "learning_rate": 2.8709519136408248e-05, | |
| "loss": 0.0004, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 4.268891069676153, | |
| "grad_norm": 0.004708737134933472, | |
| "learning_rate": 2.866045142296369e-05, | |
| "loss": 0.0013, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 4.278704612365064, | |
| "grad_norm": 0.358195036649704, | |
| "learning_rate": 2.8611383709519136e-05, | |
| "loss": 0.0282, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 4.288518155053975, | |
| "grad_norm": 0.002740907482802868, | |
| "learning_rate": 2.8562315996074585e-05, | |
| "loss": 0.1356, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 4.298331697742885, | |
| "grad_norm": 0.002787757897749543, | |
| "learning_rate": 2.8513248282630027e-05, | |
| "loss": 0.0028, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 4.308145240431796, | |
| "grad_norm": 8.950927734375, | |
| "learning_rate": 2.846418056918548e-05, | |
| "loss": 0.0024, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 4.317958783120707, | |
| "grad_norm": 0.0048212092369794846, | |
| "learning_rate": 2.841511285574092e-05, | |
| "loss": 0.0006, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 4.327772325809617, | |
| "grad_norm": 0.0025500452611595392, | |
| "learning_rate": 2.836604514229637e-05, | |
| "loss": 0.0006, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 4.337585868498528, | |
| "grad_norm": 0.0027642964851111174, | |
| "learning_rate": 2.8316977428851816e-05, | |
| "loss": 0.0436, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 4.347399411187439, | |
| "grad_norm": 0.0026419861242175102, | |
| "learning_rate": 2.8267909715407265e-05, | |
| "loss": 0.0004, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 4.357212953876349, | |
| "grad_norm": 0.004611722193658352, | |
| "learning_rate": 2.8218842001962707e-05, | |
| "loss": 0.0823, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 4.36702649656526, | |
| "grad_norm": 0.0055962237529456615, | |
| "learning_rate": 2.816977428851816e-05, | |
| "loss": 0.0005, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 4.376840039254171, | |
| "grad_norm": 0.004676250275224447, | |
| "learning_rate": 2.8120706575073602e-05, | |
| "loss": 0.0004, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 4.386653581943081, | |
| "grad_norm": 0.0034532281570136547, | |
| "learning_rate": 2.807163886162905e-05, | |
| "loss": 0.0006, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 4.396467124631992, | |
| "grad_norm": 0.016467634588479996, | |
| "learning_rate": 2.8022571148184496e-05, | |
| "loss": 0.0005, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 4.406280667320903, | |
| "grad_norm": 0.011575533077120781, | |
| "learning_rate": 2.7973503434739945e-05, | |
| "loss": 0.0004, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 4.416094210009813, | |
| "grad_norm": 0.002654923591762781, | |
| "learning_rate": 2.7924435721295388e-05, | |
| "loss": 0.0003, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.425907752698724, | |
| "grad_norm": 0.00244724890217185, | |
| "learning_rate": 2.7875368007850833e-05, | |
| "loss": 0.0003, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 4.435721295387635, | |
| "grad_norm": 0.00237859645858407, | |
| "learning_rate": 2.7826300294406282e-05, | |
| "loss": 0.0004, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 4.445534838076545, | |
| "grad_norm": 0.002822286682203412, | |
| "learning_rate": 2.7777232580961728e-05, | |
| "loss": 0.0003, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 4.455348380765456, | |
| "grad_norm": 0.0033684764057397842, | |
| "learning_rate": 2.7728164867517177e-05, | |
| "loss": 0.0568, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 4.465161923454367, | |
| "grad_norm": 0.003250374225899577, | |
| "learning_rate": 2.767909715407262e-05, | |
| "loss": 0.0004, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 4.4749754661432775, | |
| "grad_norm": 0.0025315198581665754, | |
| "learning_rate": 2.7630029440628068e-05, | |
| "loss": 0.0008, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 4.4847890088321885, | |
| "grad_norm": 0.010016725398600101, | |
| "learning_rate": 2.7580961727183514e-05, | |
| "loss": 0.0572, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 4.494602551521099, | |
| "grad_norm": 0.0029501202516257763, | |
| "learning_rate": 2.7531894013738963e-05, | |
| "loss": 0.0003, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 4.5044160942100095, | |
| "grad_norm": 0.002199607901275158, | |
| "learning_rate": 2.7482826300294405e-05, | |
| "loss": 0.0003, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 4.5142296368989205, | |
| "grad_norm": 0.002721391385421157, | |
| "learning_rate": 2.7433758586849857e-05, | |
| "loss": 0.0319, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 4.5240431795878315, | |
| "grad_norm": 0.0022027925588190556, | |
| "learning_rate": 2.73846908734053e-05, | |
| "loss": 0.001, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 4.533856722276742, | |
| "grad_norm": 0.002053765347227454, | |
| "learning_rate": 2.7335623159960748e-05, | |
| "loss": 0.0539, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 4.543670264965653, | |
| "grad_norm": 0.019470343366265297, | |
| "learning_rate": 2.7286555446516194e-05, | |
| "loss": 0.07, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 4.553483807654564, | |
| "grad_norm": 0.0020952706690877676, | |
| "learning_rate": 2.7237487733071643e-05, | |
| "loss": 0.0896, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 4.563297350343474, | |
| "grad_norm": 0.0032566250301897526, | |
| "learning_rate": 2.7188420019627085e-05, | |
| "loss": 0.0004, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 4.573110893032385, | |
| "grad_norm": 0.01735255867242813, | |
| "learning_rate": 2.713935230618253e-05, | |
| "loss": 0.0763, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 4.582924435721296, | |
| "grad_norm": 0.005380355753004551, | |
| "learning_rate": 2.709028459273798e-05, | |
| "loss": 0.0004, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 4.592737978410206, | |
| "grad_norm": 0.021537847816944122, | |
| "learning_rate": 2.7041216879293425e-05, | |
| "loss": 0.008, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 4.602551521099117, | |
| "grad_norm": 0.004185757599771023, | |
| "learning_rate": 2.6992149165848874e-05, | |
| "loss": 0.0004, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 4.612365063788028, | |
| "grad_norm": 0.012351655401289463, | |
| "learning_rate": 2.6943081452404316e-05, | |
| "loss": 0.0016, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 4.622178606476938, | |
| "grad_norm": 0.002798211993649602, | |
| "learning_rate": 2.6894013738959765e-05, | |
| "loss": 0.0403, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 4.631992149165849, | |
| "grad_norm": 0.002490241779014468, | |
| "learning_rate": 2.684494602551521e-05, | |
| "loss": 0.0003, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 4.64180569185476, | |
| "grad_norm": 0.020930418744683266, | |
| "learning_rate": 2.679587831207066e-05, | |
| "loss": 0.0029, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 4.65161923454367, | |
| "grad_norm": 37.52565383911133, | |
| "learning_rate": 2.6746810598626106e-05, | |
| "loss": 0.0143, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 4.661432777232581, | |
| "grad_norm": 0.002027578419074416, | |
| "learning_rate": 2.6697742885181555e-05, | |
| "loss": 0.0002, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 4.671246319921492, | |
| "grad_norm": 0.0019966133404523134, | |
| "learning_rate": 2.6648675171736997e-05, | |
| "loss": 0.0002, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 4.681059862610402, | |
| "grad_norm": 0.001950482139363885, | |
| "learning_rate": 2.6599607458292446e-05, | |
| "loss": 0.0002, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 4.690873405299313, | |
| "grad_norm": 0.0020267153158783913, | |
| "learning_rate": 2.655053974484789e-05, | |
| "loss": 0.0826, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 4.700686947988224, | |
| "grad_norm": 17.92084503173828, | |
| "learning_rate": 2.650147203140334e-05, | |
| "loss": 0.0432, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 4.710500490677134, | |
| "grad_norm": 0.002029955852776766, | |
| "learning_rate": 2.6452404317958786e-05, | |
| "loss": 0.0004, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 4.720314033366045, | |
| "grad_norm": 0.002043253742158413, | |
| "learning_rate": 2.6403336604514228e-05, | |
| "loss": 0.0004, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 4.730127576054956, | |
| "grad_norm": 8.624307632446289, | |
| "learning_rate": 2.6354268891069677e-05, | |
| "loss": 0.0568, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 4.739941118743866, | |
| "grad_norm": 0.0022134315222501755, | |
| "learning_rate": 2.6305201177625123e-05, | |
| "loss": 0.0003, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 4.749754661432777, | |
| "grad_norm": 0.003354401560500264, | |
| "learning_rate": 2.625613346418057e-05, | |
| "loss": 0.0004, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 4.759568204121688, | |
| "grad_norm": 0.025983460247516632, | |
| "learning_rate": 2.6207065750736014e-05, | |
| "loss": 0.0021, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 4.769381746810598, | |
| "grad_norm": 0.0028674264904111624, | |
| "learning_rate": 2.6157998037291466e-05, | |
| "loss": 0.0003, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 4.779195289499509, | |
| "grad_norm": 0.0024552124086767435, | |
| "learning_rate": 2.610893032384691e-05, | |
| "loss": 0.0161, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 4.78900883218842, | |
| "grad_norm": 0.01599975675344467, | |
| "learning_rate": 2.6059862610402357e-05, | |
| "loss": 0.0003, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 4.79882237487733, | |
| "grad_norm": 0.05640334263443947, | |
| "learning_rate": 2.6010794896957803e-05, | |
| "loss": 0.0003, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 4.808635917566241, | |
| "grad_norm": 0.1503908485174179, | |
| "learning_rate": 2.5961727183513252e-05, | |
| "loss": 0.0426, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 4.818449460255152, | |
| "grad_norm": 0.0021854902151972055, | |
| "learning_rate": 2.5912659470068694e-05, | |
| "loss": 0.0003, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 4.8282630029440625, | |
| "grad_norm": 0.0022083704825490713, | |
| "learning_rate": 2.5863591756624143e-05, | |
| "loss": 0.0002, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 4.8380765456329735, | |
| "grad_norm": 0.0018174449214711785, | |
| "learning_rate": 2.581452404317959e-05, | |
| "loss": 0.0441, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 4.8478900883218845, | |
| "grad_norm": 0.0019975032191723585, | |
| "learning_rate": 2.5765456329735038e-05, | |
| "loss": 0.0008, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 4.857703631010795, | |
| "grad_norm": 0.0022116098552942276, | |
| "learning_rate": 2.5716388616290483e-05, | |
| "loss": 0.0002, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 4.867517173699706, | |
| "grad_norm": 0.0019927374087274075, | |
| "learning_rate": 2.5667320902845926e-05, | |
| "loss": 0.0005, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 4.877330716388617, | |
| "grad_norm": 0.003586186794564128, | |
| "learning_rate": 2.5618253189401375e-05, | |
| "loss": 0.0357, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 4.887144259077527, | |
| "grad_norm": 0.006166779901832342, | |
| "learning_rate": 2.556918547595682e-05, | |
| "loss": 0.0003, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 4.896957801766438, | |
| "grad_norm": 0.03852635622024536, | |
| "learning_rate": 2.552011776251227e-05, | |
| "loss": 0.0005, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 4.906771344455349, | |
| "grad_norm": 0.001826342660933733, | |
| "learning_rate": 2.547105004906771e-05, | |
| "loss": 0.0007, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.916584887144259, | |
| "grad_norm": 0.0018040341092273593, | |
| "learning_rate": 2.5421982335623164e-05, | |
| "loss": 0.0002, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 4.92639842983317, | |
| "grad_norm": 0.0018869714112952352, | |
| "learning_rate": 2.5372914622178606e-05, | |
| "loss": 0.0002, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 4.936211972522081, | |
| "grad_norm": 0.0017143889563158154, | |
| "learning_rate": 2.5323846908734055e-05, | |
| "loss": 0.0002, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 4.946025515210991, | |
| "grad_norm": 0.0018076589331030846, | |
| "learning_rate": 2.52747791952895e-05, | |
| "loss": 0.0002, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 4.955839057899902, | |
| "grad_norm": 0.002003490924835205, | |
| "learning_rate": 2.522571148184495e-05, | |
| "loss": 0.0002, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 4.965652600588813, | |
| "grad_norm": 0.001990032149478793, | |
| "learning_rate": 2.517664376840039e-05, | |
| "loss": 0.0002, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 4.975466143277723, | |
| "grad_norm": 0.0017091418849304318, | |
| "learning_rate": 2.5127576054955844e-05, | |
| "loss": 0.0002, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 4.985279685966634, | |
| "grad_norm": 0.0019396455027163029, | |
| "learning_rate": 2.5078508341511286e-05, | |
| "loss": 0.0002, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 4.995093228655545, | |
| "grad_norm": 0.0016776375705376267, | |
| "learning_rate": 2.5029440628066735e-05, | |
| "loss": 0.0002, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 5.004906771344455, | |
| "grad_norm": 0.0017573200166225433, | |
| "learning_rate": 2.498037291462218e-05, | |
| "loss": 0.0442, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 5.014720314033366, | |
| "grad_norm": 0.0017642441671341658, | |
| "learning_rate": 2.4931305201177626e-05, | |
| "loss": 0.0002, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 5.024533856722277, | |
| "grad_norm": 0.0016604288248345256, | |
| "learning_rate": 2.4882237487733072e-05, | |
| "loss": 0.0016, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 5.034347399411187, | |
| "grad_norm": 0.001731898752041161, | |
| "learning_rate": 2.483316977428852e-05, | |
| "loss": 0.0002, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 5.044160942100098, | |
| "grad_norm": 0.0016634787898510695, | |
| "learning_rate": 2.4784102060843967e-05, | |
| "loss": 0.0002, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 5.053974484789009, | |
| "grad_norm": 0.0016294183442369103, | |
| "learning_rate": 2.4735034347399412e-05, | |
| "loss": 0.0002, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 5.063788027477919, | |
| "grad_norm": 0.0017350780544802547, | |
| "learning_rate": 2.468596663395486e-05, | |
| "loss": 0.0006, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 5.07360157016683, | |
| "grad_norm": 0.0015964311314746737, | |
| "learning_rate": 2.4636898920510303e-05, | |
| "loss": 0.0033, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 5.083415112855741, | |
| "grad_norm": 0.0018725765403360128, | |
| "learning_rate": 2.4587831207065752e-05, | |
| "loss": 0.0002, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 5.093228655544651, | |
| "grad_norm": 0.001561222830787301, | |
| "learning_rate": 2.4538763493621198e-05, | |
| "loss": 0.0002, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 5.103042198233562, | |
| "grad_norm": 0.0913846343755722, | |
| "learning_rate": 2.4489695780176643e-05, | |
| "loss": 0.0003, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 5.112855740922473, | |
| "grad_norm": 0.001611059415154159, | |
| "learning_rate": 2.4440628066732092e-05, | |
| "loss": 0.0002, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 5.122669283611383, | |
| "grad_norm": 0.0015166820958256721, | |
| "learning_rate": 2.4391560353287538e-05, | |
| "loss": 0.0002, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 5.132482826300294, | |
| "grad_norm": 0.0015743138501420617, | |
| "learning_rate": 2.4342492639842984e-05, | |
| "loss": 0.0002, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 5.142296368989205, | |
| "grad_norm": 0.0015384262660518289, | |
| "learning_rate": 2.429342492639843e-05, | |
| "loss": 0.0002, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 5.1521099116781155, | |
| "grad_norm": 0.004025735892355442, | |
| "learning_rate": 2.4244357212953878e-05, | |
| "loss": 0.1963, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 5.1619234543670265, | |
| "grad_norm": 0.052797187119722366, | |
| "learning_rate": 2.4195289499509324e-05, | |
| "loss": 0.001, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 5.1717369970559375, | |
| "grad_norm": 0.0028745972085744143, | |
| "learning_rate": 2.414622178606477e-05, | |
| "loss": 0.0236, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 5.181550539744848, | |
| "grad_norm": 0.005021668970584869, | |
| "learning_rate": 2.409715407262022e-05, | |
| "loss": 0.0002, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 5.191364082433759, | |
| "grad_norm": 0.08027222752571106, | |
| "learning_rate": 2.4048086359175664e-05, | |
| "loss": 0.0006, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 5.20117762512267, | |
| "grad_norm": 0.001573985326103866, | |
| "learning_rate": 2.399901864573111e-05, | |
| "loss": 0.0002, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 5.21099116781158, | |
| "grad_norm": 0.0015114896232262254, | |
| "learning_rate": 2.394995093228656e-05, | |
| "loss": 0.0002, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 5.220804710500491, | |
| "grad_norm": 0.0018572395201772451, | |
| "learning_rate": 2.3900883218842e-05, | |
| "loss": 0.0003, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 5.230618253189402, | |
| "grad_norm": 0.0015180202899500728, | |
| "learning_rate": 2.385181550539745e-05, | |
| "loss": 0.0003, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 5.240431795878312, | |
| "grad_norm": 0.0016390462405979633, | |
| "learning_rate": 2.3802747791952895e-05, | |
| "loss": 0.0002, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 5.250245338567223, | |
| "grad_norm": 0.0015287548303604126, | |
| "learning_rate": 2.375368007850834e-05, | |
| "loss": 0.0002, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 5.260058881256134, | |
| "grad_norm": 0.0014907275326550007, | |
| "learning_rate": 2.370461236506379e-05, | |
| "loss": 0.0002, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 5.269872423945044, | |
| "grad_norm": 0.001557844690978527, | |
| "learning_rate": 2.3655544651619236e-05, | |
| "loss": 0.0002, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 5.279685966633955, | |
| "grad_norm": 0.0018678128253668547, | |
| "learning_rate": 2.360647693817468e-05, | |
| "loss": 0.0002, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 5.289499509322866, | |
| "grad_norm": 0.0015175668522715569, | |
| "learning_rate": 2.355740922473013e-05, | |
| "loss": 0.0002, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 5.299313052011776, | |
| "grad_norm": 0.0014625848270952702, | |
| "learning_rate": 2.3508341511285576e-05, | |
| "loss": 0.0002, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 5.309126594700687, | |
| "grad_norm": 0.00429932726547122, | |
| "learning_rate": 2.345927379784102e-05, | |
| "loss": 0.0607, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 5.318940137389598, | |
| "grad_norm": 0.0014821887016296387, | |
| "learning_rate": 2.341020608439647e-05, | |
| "loss": 0.0033, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 5.328753680078508, | |
| "grad_norm": 0.001476548844948411, | |
| "learning_rate": 2.3361138370951916e-05, | |
| "loss": 0.0002, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 5.338567222767419, | |
| "grad_norm": 0.0014416587073355913, | |
| "learning_rate": 2.331207065750736e-05, | |
| "loss": 0.0002, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 5.34838076545633, | |
| "grad_norm": 0.001489553484134376, | |
| "learning_rate": 2.326300294406281e-05, | |
| "loss": 0.0698, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 5.35819430814524, | |
| "grad_norm": 0.004528726451098919, | |
| "learning_rate": 2.3213935230618256e-05, | |
| "loss": 0.0628, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 5.368007850834151, | |
| "grad_norm": 0.0017940645338967443, | |
| "learning_rate": 2.3164867517173698e-05, | |
| "loss": 0.0002, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 5.377821393523062, | |
| "grad_norm": 0.0015537918079644442, | |
| "learning_rate": 2.3115799803729147e-05, | |
| "loss": 0.0002, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 5.387634936211972, | |
| "grad_norm": 0.0015587827656418085, | |
| "learning_rate": 2.3066732090284593e-05, | |
| "loss": 0.0002, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 5.397448478900883, | |
| "grad_norm": 0.0015445965109393, | |
| "learning_rate": 2.301766437684004e-05, | |
| "loss": 0.0002, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.407262021589794, | |
| "grad_norm": 0.00230443780310452, | |
| "learning_rate": 2.2968596663395487e-05, | |
| "loss": 0.0002, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 5.417075564278704, | |
| "grad_norm": 0.001530683832243085, | |
| "learning_rate": 2.2919528949950933e-05, | |
| "loss": 0.0002, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 5.426889106967615, | |
| "grad_norm": 0.006643714848905802, | |
| "learning_rate": 2.287046123650638e-05, | |
| "loss": 0.0003, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 5.436702649656526, | |
| "grad_norm": 0.0021695613395422697, | |
| "learning_rate": 2.2821393523061828e-05, | |
| "loss": 0.0002, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 5.446516192345436, | |
| "grad_norm": 0.0014126679161563516, | |
| "learning_rate": 2.2772325809617273e-05, | |
| "loss": 0.0655, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 5.456329735034347, | |
| "grad_norm": 0.01729333959519863, | |
| "learning_rate": 2.272325809617272e-05, | |
| "loss": 0.0002, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 5.466143277723258, | |
| "grad_norm": 0.0014916701475158334, | |
| "learning_rate": 2.2674190382728168e-05, | |
| "loss": 0.0002, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 5.4759568204121685, | |
| "grad_norm": 0.001467019901610911, | |
| "learning_rate": 2.2625122669283613e-05, | |
| "loss": 0.0002, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 5.4857703631010795, | |
| "grad_norm": 0.0014575383393093944, | |
| "learning_rate": 2.257605495583906e-05, | |
| "loss": 0.0002, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 5.4955839057899905, | |
| "grad_norm": 0.0014117214595898986, | |
| "learning_rate": 2.2526987242394508e-05, | |
| "loss": 0.0002, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 5.505397448478901, | |
| "grad_norm": 0.0014430248411372304, | |
| "learning_rate": 2.2477919528949953e-05, | |
| "loss": 0.0002, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 5.5152109911678115, | |
| "grad_norm": 0.001443715300410986, | |
| "learning_rate": 2.2428851815505396e-05, | |
| "loss": 0.0002, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 5.5250245338567225, | |
| "grad_norm": 0.0013962725643068552, | |
| "learning_rate": 2.2379784102060845e-05, | |
| "loss": 0.0767, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 5.534838076545633, | |
| "grad_norm": 0.0016859096940606833, | |
| "learning_rate": 2.233071638861629e-05, | |
| "loss": 0.0314, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 5.544651619234544, | |
| "grad_norm": 0.0021301463712006807, | |
| "learning_rate": 2.2281648675171736e-05, | |
| "loss": 0.0968, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 5.554465161923455, | |
| "grad_norm": 0.003654947504401207, | |
| "learning_rate": 2.2232580961727185e-05, | |
| "loss": 0.0051, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 5.564278704612365, | |
| "grad_norm": 0.003530005691573024, | |
| "learning_rate": 2.218351324828263e-05, | |
| "loss": 0.0053, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 5.574092247301276, | |
| "grad_norm": 0.004440093878656626, | |
| "learning_rate": 2.2134445534838076e-05, | |
| "loss": 0.0012, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 5.583905789990187, | |
| "grad_norm": 0.0015052916714921594, | |
| "learning_rate": 2.2085377821393525e-05, | |
| "loss": 0.0013, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 5.593719332679097, | |
| "grad_norm": 0.0014009432634338737, | |
| "learning_rate": 2.203631010794897e-05, | |
| "loss": 0.0492, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 5.603532875368008, | |
| "grad_norm": 0.0015393829671666026, | |
| "learning_rate": 2.1987242394504416e-05, | |
| "loss": 0.0005, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 5.613346418056919, | |
| "grad_norm": 0.0039021980483084917, | |
| "learning_rate": 2.1938174681059865e-05, | |
| "loss": 0.0002, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 5.623159960745829, | |
| "grad_norm": 0.0014669959200546145, | |
| "learning_rate": 2.188910696761531e-05, | |
| "loss": 0.0004, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 5.63297350343474, | |
| "grad_norm": 0.0015139420283958316, | |
| "learning_rate": 2.1840039254170756e-05, | |
| "loss": 0.0303, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 5.642787046123651, | |
| "grad_norm": 0.001543746329843998, | |
| "learning_rate": 2.1790971540726205e-05, | |
| "loss": 0.0009, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 5.652600588812561, | |
| "grad_norm": 0.2851181924343109, | |
| "learning_rate": 2.174190382728165e-05, | |
| "loss": 0.0021, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 5.662414131501472, | |
| "grad_norm": 0.001427607610821724, | |
| "learning_rate": 2.1692836113837096e-05, | |
| "loss": 0.0002, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 5.672227674190383, | |
| "grad_norm": 0.0017000263324007392, | |
| "learning_rate": 2.1643768400392542e-05, | |
| "loss": 0.0101, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 5.682041216879293, | |
| "grad_norm": 0.02387947216629982, | |
| "learning_rate": 2.1594700686947988e-05, | |
| "loss": 0.0002, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 5.691854759568204, | |
| "grad_norm": 0.0013223286950960755, | |
| "learning_rate": 2.1545632973503437e-05, | |
| "loss": 0.0433, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 5.701668302257115, | |
| "grad_norm": 0.0013629156164824963, | |
| "learning_rate": 2.1496565260058882e-05, | |
| "loss": 0.0002, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 5.711481844946025, | |
| "grad_norm": 0.0015034314710646868, | |
| "learning_rate": 2.1447497546614328e-05, | |
| "loss": 0.0009, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 5.721295387634936, | |
| "grad_norm": 0.001305502257309854, | |
| "learning_rate": 2.1398429833169777e-05, | |
| "loss": 0.0002, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 5.731108930323847, | |
| "grad_norm": 0.0013675469672307372, | |
| "learning_rate": 2.1349362119725222e-05, | |
| "loss": 0.0002, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 5.740922473012757, | |
| "grad_norm": 0.0012498252326622605, | |
| "learning_rate": 2.1300294406280668e-05, | |
| "loss": 0.0285, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 5.750736015701668, | |
| "grad_norm": 0.001314906869083643, | |
| "learning_rate": 2.1251226692836117e-05, | |
| "loss": 0.0002, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 5.760549558390579, | |
| "grad_norm": 0.004441590514034033, | |
| "learning_rate": 2.1202158979391563e-05, | |
| "loss": 0.1261, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 5.770363101079489, | |
| "grad_norm": 0.018667880445718765, | |
| "learning_rate": 2.1153091265947008e-05, | |
| "loss": 0.0004, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 5.7801766437684, | |
| "grad_norm": 21.129253387451172, | |
| "learning_rate": 2.1104023552502454e-05, | |
| "loss": 0.0915, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 5.789990186457311, | |
| "grad_norm": 0.0012923305621370673, | |
| "learning_rate": 2.1054955839057903e-05, | |
| "loss": 0.0008, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 5.799803729146221, | |
| "grad_norm": 0.1054319515824318, | |
| "learning_rate": 2.1005888125613345e-05, | |
| "loss": 0.0003, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 5.809617271835132, | |
| "grad_norm": 0.002641110448166728, | |
| "learning_rate": 2.0956820412168794e-05, | |
| "loss": 0.0021, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 5.819430814524043, | |
| "grad_norm": 0.0012492777314037085, | |
| "learning_rate": 2.090775269872424e-05, | |
| "loss": 0.0002, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 5.8292443572129535, | |
| "grad_norm": 0.0012710640439763665, | |
| "learning_rate": 2.0858684985279685e-05, | |
| "loss": 0.0005, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 5.8390578999018645, | |
| "grad_norm": 0.0014566375175490975, | |
| "learning_rate": 2.0809617271835134e-05, | |
| "loss": 0.0004, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 5.8488714425907755, | |
| "grad_norm": 0.0022309215273708105, | |
| "learning_rate": 2.076054955839058e-05, | |
| "loss": 0.0755, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 5.858684985279686, | |
| "grad_norm": 0.00341408746317029, | |
| "learning_rate": 2.0711481844946025e-05, | |
| "loss": 0.014, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 5.868498527968597, | |
| "grad_norm": 0.001304444158449769, | |
| "learning_rate": 2.0662414131501474e-05, | |
| "loss": 0.0015, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 5.878312070657508, | |
| "grad_norm": 0.0012671782169491053, | |
| "learning_rate": 2.061334641805692e-05, | |
| "loss": 0.0002, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 5.888125613346418, | |
| "grad_norm": 0.0035885085817426443, | |
| "learning_rate": 2.0564278704612365e-05, | |
| "loss": 0.0002, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.897939156035329, | |
| "grad_norm": 0.0014621804002672434, | |
| "learning_rate": 2.0515210991167814e-05, | |
| "loss": 0.0003, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 5.90775269872424, | |
| "grad_norm": 0.001226249267347157, | |
| "learning_rate": 2.046614327772326e-05, | |
| "loss": 0.0002, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 5.91756624141315, | |
| "grad_norm": 0.0012753872433677316, | |
| "learning_rate": 2.0417075564278706e-05, | |
| "loss": 0.0002, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 5.927379784102061, | |
| "grad_norm": 0.0011854572221636772, | |
| "learning_rate": 2.0368007850834155e-05, | |
| "loss": 0.0002, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 5.937193326790972, | |
| "grad_norm": 0.0012309462763369083, | |
| "learning_rate": 2.03189401373896e-05, | |
| "loss": 0.0001, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 5.947006869479882, | |
| "grad_norm": 0.001222968683578074, | |
| "learning_rate": 2.0269872423945042e-05, | |
| "loss": 0.0001, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 5.956820412168793, | |
| "grad_norm": 0.004279905930161476, | |
| "learning_rate": 2.022080471050049e-05, | |
| "loss": 0.0002, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 5.966633954857704, | |
| "grad_norm": 0.0012088885996490717, | |
| "learning_rate": 2.0171736997055937e-05, | |
| "loss": 0.0001, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 5.976447497546614, | |
| "grad_norm": 0.0017938670935109258, | |
| "learning_rate": 2.0122669283611383e-05, | |
| "loss": 0.0001, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 5.986261040235525, | |
| "grad_norm": 0.033533725887537, | |
| "learning_rate": 2.007360157016683e-05, | |
| "loss": 0.0002, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 5.996074582924436, | |
| "grad_norm": 0.0012293298495933414, | |
| "learning_rate": 2.0024533856722277e-05, | |
| "loss": 0.0001, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 6.005888125613346, | |
| "grad_norm": 0.001815044553950429, | |
| "learning_rate": 1.9975466143277723e-05, | |
| "loss": 0.0002, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 6.015701668302257, | |
| "grad_norm": 0.001358096138574183, | |
| "learning_rate": 1.9926398429833172e-05, | |
| "loss": 0.0002, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 6.025515210991168, | |
| "grad_norm": 0.015642492100596428, | |
| "learning_rate": 1.9877330716388617e-05, | |
| "loss": 0.0001, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 6.035328753680078, | |
| "grad_norm": 0.001149074058048427, | |
| "learning_rate": 1.9828263002944063e-05, | |
| "loss": 0.0001, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 6.045142296368989, | |
| "grad_norm": 0.0011097900569438934, | |
| "learning_rate": 1.9779195289499512e-05, | |
| "loss": 0.0001, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 6.0549558390579, | |
| "grad_norm": 0.0014940439723432064, | |
| "learning_rate": 1.9730127576054957e-05, | |
| "loss": 0.0001, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 6.06476938174681, | |
| "grad_norm": 0.03362993523478508, | |
| "learning_rate": 1.9681059862610403e-05, | |
| "loss": 0.0004, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 6.074582924435721, | |
| "grad_norm": 0.16991779208183289, | |
| "learning_rate": 1.9631992149165852e-05, | |
| "loss": 0.0003, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 6.084396467124632, | |
| "grad_norm": 0.0011343214428052306, | |
| "learning_rate": 1.9582924435721298e-05, | |
| "loss": 0.0021, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 6.094210009813542, | |
| "grad_norm": 0.0011410163715481758, | |
| "learning_rate": 1.9533856722276743e-05, | |
| "loss": 0.0001, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 6.104023552502453, | |
| "grad_norm": 0.0011109898332506418, | |
| "learning_rate": 1.948478900883219e-05, | |
| "loss": 0.0001, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 6.113837095191364, | |
| "grad_norm": 0.001067674602381885, | |
| "learning_rate": 1.9435721295387634e-05, | |
| "loss": 0.0001, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 6.123650637880274, | |
| "grad_norm": 0.0010826255893334746, | |
| "learning_rate": 1.938665358194308e-05, | |
| "loss": 0.0001, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 6.133464180569185, | |
| "grad_norm": 0.0010834899730980396, | |
| "learning_rate": 1.933758586849853e-05, | |
| "loss": 0.0541, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 6.143277723258096, | |
| "grad_norm": 0.007662464864552021, | |
| "learning_rate": 1.9288518155053975e-05, | |
| "loss": 0.0002, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 6.1530912659470065, | |
| "grad_norm": 0.0014158189296722412, | |
| "learning_rate": 1.923945044160942e-05, | |
| "loss": 0.1787, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 6.1629048086359175, | |
| "grad_norm": 0.0036792519968003035, | |
| "learning_rate": 1.919038272816487e-05, | |
| "loss": 0.0462, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 6.1727183513248285, | |
| "grad_norm": 0.1517615020275116, | |
| "learning_rate": 1.9141315014720315e-05, | |
| "loss": 0.0004, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 6.182531894013739, | |
| "grad_norm": 0.002872257027775049, | |
| "learning_rate": 1.909224730127576e-05, | |
| "loss": 0.0003, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 6.19234543670265, | |
| "grad_norm": 0.0014831377193331718, | |
| "learning_rate": 1.904317958783121e-05, | |
| "loss": 0.0002, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 6.202158979391561, | |
| "grad_norm": 0.0015966458013281226, | |
| "learning_rate": 1.8994111874386655e-05, | |
| "loss": 0.0006, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 6.211972522080471, | |
| "grad_norm": 0.001315574860200286, | |
| "learning_rate": 1.89450441609421e-05, | |
| "loss": 0.0002, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 6.221786064769382, | |
| "grad_norm": 0.003673387225717306, | |
| "learning_rate": 1.889597644749755e-05, | |
| "loss": 0.0002, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 6.231599607458293, | |
| "grad_norm": 0.0022277962416410446, | |
| "learning_rate": 1.8846908734052995e-05, | |
| "loss": 0.0002, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 6.241413150147203, | |
| "grad_norm": 0.0013255071826279163, | |
| "learning_rate": 1.879784102060844e-05, | |
| "loss": 0.0002, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 6.251226692836114, | |
| "grad_norm": 0.0024367074947804213, | |
| "learning_rate": 1.8748773307163886e-05, | |
| "loss": 0.0002, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 6.261040235525025, | |
| "grad_norm": 0.0018190988339483738, | |
| "learning_rate": 1.8699705593719332e-05, | |
| "loss": 0.0002, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 6.270853778213935, | |
| "grad_norm": 0.002138520823791623, | |
| "learning_rate": 1.865063788027478e-05, | |
| "loss": 0.0002, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 6.280667320902846, | |
| "grad_norm": 0.0013069864362478256, | |
| "learning_rate": 1.8601570166830226e-05, | |
| "loss": 0.0002, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 6.290480863591757, | |
| "grad_norm": 0.0013102535158395767, | |
| "learning_rate": 1.8552502453385672e-05, | |
| "loss": 0.0003, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 6.300294406280667, | |
| "grad_norm": 0.004578573163598776, | |
| "learning_rate": 1.850343473994112e-05, | |
| "loss": 0.0002, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 6.310107948969578, | |
| "grad_norm": 0.00831854809075594, | |
| "learning_rate": 1.8454367026496567e-05, | |
| "loss": 0.0002, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 6.319921491658489, | |
| "grad_norm": 0.0014605351025238633, | |
| "learning_rate": 1.8405299313052012e-05, | |
| "loss": 0.0002, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 6.329735034347399, | |
| "grad_norm": 0.0013795517152175307, | |
| "learning_rate": 1.835623159960746e-05, | |
| "loss": 0.0002, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 6.33954857703631, | |
| "grad_norm": 0.0015935307601466775, | |
| "learning_rate": 1.8307163886162907e-05, | |
| "loss": 0.0002, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 6.349362119725221, | |
| "grad_norm": 0.0013198903761804104, | |
| "learning_rate": 1.8258096172718352e-05, | |
| "loss": 0.0003, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 6.359175662414131, | |
| "grad_norm": 0.002860839944332838, | |
| "learning_rate": 1.82090284592738e-05, | |
| "loss": 0.0002, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 6.368989205103042, | |
| "grad_norm": 0.0013555525802075863, | |
| "learning_rate": 1.8159960745829247e-05, | |
| "loss": 0.0002, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 6.378802747791953, | |
| "grad_norm": 0.0020145312882959843, | |
| "learning_rate": 1.811089303238469e-05, | |
| "loss": 0.0002, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 6.388616290480863, | |
| "grad_norm": 0.00473778136074543, | |
| "learning_rate": 1.8061825318940138e-05, | |
| "loss": 0.0001, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 6.398429833169774, | |
| "grad_norm": 0.0017492013284936547, | |
| "learning_rate": 1.8012757605495584e-05, | |
| "loss": 0.0002, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 6.408243375858685, | |
| "grad_norm": 0.0012156120501458645, | |
| "learning_rate": 1.796368989205103e-05, | |
| "loss": 0.0001, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 6.418056918547595, | |
| "grad_norm": 0.001362017123028636, | |
| "learning_rate": 1.7914622178606478e-05, | |
| "loss": 0.0005, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 6.427870461236506, | |
| "grad_norm": 0.0011874830815941095, | |
| "learning_rate": 1.7865554465161924e-05, | |
| "loss": 0.0002, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 6.437684003925417, | |
| "grad_norm": 0.0020989649929106236, | |
| "learning_rate": 1.781648675171737e-05, | |
| "loss": 0.0002, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 6.447497546614327, | |
| "grad_norm": 0.001271673827432096, | |
| "learning_rate": 1.776741903827282e-05, | |
| "loss": 0.0858, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 6.457311089303238, | |
| "grad_norm": 0.001192873460240662, | |
| "learning_rate": 1.7718351324828264e-05, | |
| "loss": 0.0001, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 6.467124631992149, | |
| "grad_norm": 0.011513526551425457, | |
| "learning_rate": 1.766928361138371e-05, | |
| "loss": 0.1562, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 6.4769381746810595, | |
| "grad_norm": 0.001225059386342764, | |
| "learning_rate": 1.762021589793916e-05, | |
| "loss": 0.0002, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 6.4867517173699705, | |
| "grad_norm": 0.0013161891838535666, | |
| "learning_rate": 1.7571148184494604e-05, | |
| "loss": 0.0074, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 6.4965652600588815, | |
| "grad_norm": 0.001231314497999847, | |
| "learning_rate": 1.752208047105005e-05, | |
| "loss": 0.0001, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 6.506378802747792, | |
| "grad_norm": 0.0012088754447177052, | |
| "learning_rate": 1.74730127576055e-05, | |
| "loss": 0.0285, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 6.516192345436703, | |
| "grad_norm": 0.0013558064820244908, | |
| "learning_rate": 1.7423945044160944e-05, | |
| "loss": 0.0004, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 6.5260058881256136, | |
| "grad_norm": 0.0016369909280911088, | |
| "learning_rate": 1.7374877330716387e-05, | |
| "loss": 0.0016, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 6.535819430814524, | |
| "grad_norm": 0.035988856106996536, | |
| "learning_rate": 1.7325809617271836e-05, | |
| "loss": 0.0002, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 6.545632973503435, | |
| "grad_norm": 0.0011288542300462723, | |
| "learning_rate": 1.727674190382728e-05, | |
| "loss": 0.0213, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 6.555446516192346, | |
| "grad_norm": 0.0014625934418290854, | |
| "learning_rate": 1.7227674190382727e-05, | |
| "loss": 0.0002, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 6.565260058881256, | |
| "grad_norm": 0.0011535960948094726, | |
| "learning_rate": 1.7178606476938176e-05, | |
| "loss": 0.0001, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 6.575073601570167, | |
| "grad_norm": 0.0011100315023213625, | |
| "learning_rate": 1.712953876349362e-05, | |
| "loss": 0.0002, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 6.584887144259078, | |
| "grad_norm": 0.0011173097882419825, | |
| "learning_rate": 1.7080471050049067e-05, | |
| "loss": 0.0001, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 6.594700686947988, | |
| "grad_norm": 0.0011760563356801867, | |
| "learning_rate": 1.7031403336604516e-05, | |
| "loss": 0.0002, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 6.604514229636899, | |
| "grad_norm": 0.0012068103533238173, | |
| "learning_rate": 1.698233562315996e-05, | |
| "loss": 0.0001, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 6.61432777232581, | |
| "grad_norm": 0.0010894141159951687, | |
| "learning_rate": 1.6933267909715407e-05, | |
| "loss": 0.0001, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 6.62414131501472, | |
| "grad_norm": 0.0014370041899383068, | |
| "learning_rate": 1.6884200196270856e-05, | |
| "loss": 0.0001, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 6.633954857703631, | |
| "grad_norm": 0.002420579083263874, | |
| "learning_rate": 1.68351324828263e-05, | |
| "loss": 0.0001, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 6.643768400392542, | |
| "grad_norm": 0.001223103143274784, | |
| "learning_rate": 1.6786064769381747e-05, | |
| "loss": 0.0001, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 6.653581943081452, | |
| "grad_norm": 0.0010998743819072843, | |
| "learning_rate": 1.6736997055937196e-05, | |
| "loss": 0.0314, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 6.663395485770363, | |
| "grad_norm": 0.00108517415355891, | |
| "learning_rate": 1.6687929342492642e-05, | |
| "loss": 0.0001, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 6.673209028459274, | |
| "grad_norm": 0.0011395640904083848, | |
| "learning_rate": 1.6638861629048087e-05, | |
| "loss": 0.0001, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 6.683022571148184, | |
| "grad_norm": 0.001564236357808113, | |
| "learning_rate": 1.6589793915603533e-05, | |
| "loss": 0.0962, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 6.692836113837095, | |
| "grad_norm": 0.0016074421582743526, | |
| "learning_rate": 1.654072620215898e-05, | |
| "loss": 0.0002, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 6.702649656526006, | |
| "grad_norm": 0.0012334993807598948, | |
| "learning_rate": 1.6491658488714428e-05, | |
| "loss": 0.0023, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 6.712463199214916, | |
| "grad_norm": 0.0011435603955760598, | |
| "learning_rate": 1.6442590775269873e-05, | |
| "loss": 0.0001, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 6.722276741903827, | |
| "grad_norm": 0.0016410372918471694, | |
| "learning_rate": 1.639352306182532e-05, | |
| "loss": 0.0298, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 6.732090284592738, | |
| "grad_norm": 0.0012846958125010133, | |
| "learning_rate": 1.6344455348380768e-05, | |
| "loss": 0.0001, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 6.741903827281648, | |
| "grad_norm": 0.0011800202773883939, | |
| "learning_rate": 1.6295387634936213e-05, | |
| "loss": 0.0002, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 6.751717369970559, | |
| "grad_norm": 0.0015586729859933257, | |
| "learning_rate": 1.624631992149166e-05, | |
| "loss": 0.0003, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 6.76153091265947, | |
| "grad_norm": 0.001090590376406908, | |
| "learning_rate": 1.6197252208047105e-05, | |
| "loss": 0.0002, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 6.77134445534838, | |
| "grad_norm": 0.0011874845949932933, | |
| "learning_rate": 1.6148184494602554e-05, | |
| "loss": 0.0001, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 6.781157998037291, | |
| "grad_norm": 0.0011030100286006927, | |
| "learning_rate": 1.6099116781158e-05, | |
| "loss": 0.0001, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 6.790971540726202, | |
| "grad_norm": 0.0012315625790506601, | |
| "learning_rate": 1.6050049067713445e-05, | |
| "loss": 0.0001, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 6.8007850834151125, | |
| "grad_norm": 0.0011062839766964316, | |
| "learning_rate": 1.6000981354268894e-05, | |
| "loss": 0.0001, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 6.8105986261040234, | |
| "grad_norm": 0.0011281865881755948, | |
| "learning_rate": 1.595191364082434e-05, | |
| "loss": 0.0001, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 6.820412168792934, | |
| "grad_norm": 0.001074342057108879, | |
| "learning_rate": 1.5902845927379785e-05, | |
| "loss": 0.0001, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 6.8302257114818445, | |
| "grad_norm": 0.0011061643017455935, | |
| "learning_rate": 1.585377821393523e-05, | |
| "loss": 0.0001, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 6.8400392541707555, | |
| "grad_norm": 0.002780759707093239, | |
| "learning_rate": 1.5804710500490676e-05, | |
| "loss": 0.0001, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 6.8498527968596665, | |
| "grad_norm": 0.0010947277769446373, | |
| "learning_rate": 1.5755642787046125e-05, | |
| "loss": 0.0001, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 6.859666339548577, | |
| "grad_norm": 0.001039006281644106, | |
| "learning_rate": 1.570657507360157e-05, | |
| "loss": 0.0001, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 6.869479882237488, | |
| "grad_norm": 0.0011975034140050411, | |
| "learning_rate": 1.5657507360157016e-05, | |
| "loss": 0.0001, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.879293424926399, | |
| "grad_norm": 0.0010505706304684281, | |
| "learning_rate": 1.5608439646712465e-05, | |
| "loss": 0.0001, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 6.889106967615309, | |
| "grad_norm": 0.001015416462905705, | |
| "learning_rate": 1.555937193326791e-05, | |
| "loss": 0.0001, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 6.89892051030422, | |
| "grad_norm": 0.001166634145192802, | |
| "learning_rate": 1.5510304219823356e-05, | |
| "loss": 0.0001, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 6.908734052993131, | |
| "grad_norm": 0.005132897291332483, | |
| "learning_rate": 1.5461236506378805e-05, | |
| "loss": 0.0001, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 6.918547595682041, | |
| "grad_norm": 0.001034508110024035, | |
| "learning_rate": 1.541216879293425e-05, | |
| "loss": 0.0001, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 6.928361138370952, | |
| "grad_norm": 0.0013660124968737364, | |
| "learning_rate": 1.5363101079489697e-05, | |
| "loss": 0.0001, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 6.938174681059863, | |
| "grad_norm": 0.001023141318000853, | |
| "learning_rate": 1.5314033366045146e-05, | |
| "loss": 0.0001, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 6.947988223748773, | |
| "grad_norm": 0.0009852561634033918, | |
| "learning_rate": 1.526496565260059e-05, | |
| "loss": 0.0006, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 6.957801766437684, | |
| "grad_norm": 0.0028536063618957996, | |
| "learning_rate": 1.5215897939156035e-05, | |
| "loss": 0.0001, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 6.967615309126595, | |
| "grad_norm": 0.0010083414381369948, | |
| "learning_rate": 1.516683022571148e-05, | |
| "loss": 0.0001, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 6.977428851815505, | |
| "grad_norm": 0.0009895939147099853, | |
| "learning_rate": 1.5117762512266928e-05, | |
| "loss": 0.0003, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 6.987242394504416, | |
| "grad_norm": 0.0009826653404161334, | |
| "learning_rate": 1.5068694798822375e-05, | |
| "loss": 0.0002, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 6.997055937193327, | |
| "grad_norm": 0.0010616799117997289, | |
| "learning_rate": 1.501962708537782e-05, | |
| "loss": 0.0001, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 7.006869479882237, | |
| "grad_norm": 0.000987286097370088, | |
| "learning_rate": 1.4970559371933268e-05, | |
| "loss": 0.0001, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 7.016683022571148, | |
| "grad_norm": 0.0009880246361717582, | |
| "learning_rate": 1.4921491658488715e-05, | |
| "loss": 0.0001, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 7.026496565260059, | |
| "grad_norm": 0.0010042705107480288, | |
| "learning_rate": 1.4872423945044161e-05, | |
| "loss": 0.0001, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 7.036310107948969, | |
| "grad_norm": 0.0010432158596813679, | |
| "learning_rate": 1.4823356231599608e-05, | |
| "loss": 0.0001, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 7.04612365063788, | |
| "grad_norm": 0.0010717209661379457, | |
| "learning_rate": 1.4774288518155056e-05, | |
| "loss": 0.0047, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 7.055937193326791, | |
| "grad_norm": 0.001039078924804926, | |
| "learning_rate": 1.4725220804710501e-05, | |
| "loss": 0.0001, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 7.065750736015701, | |
| "grad_norm": 0.0010033833095803857, | |
| "learning_rate": 1.4676153091265948e-05, | |
| "loss": 0.0002, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 7.075564278704612, | |
| "grad_norm": 0.003389047458767891, | |
| "learning_rate": 1.4627085377821396e-05, | |
| "loss": 0.0001, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 7.085377821393523, | |
| "grad_norm": 0.0009605743689462543, | |
| "learning_rate": 1.4578017664376841e-05, | |
| "loss": 0.0002, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 7.095191364082433, | |
| "grad_norm": 0.0016811139648780227, | |
| "learning_rate": 1.4528949950932289e-05, | |
| "loss": 0.0001, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 7.105004906771344, | |
| "grad_norm": 0.0009887183550745249, | |
| "learning_rate": 1.4479882237487732e-05, | |
| "loss": 0.0146, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 7.114818449460255, | |
| "grad_norm": 0.0009474638500250876, | |
| "learning_rate": 1.443081452404318e-05, | |
| "loss": 0.0001, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 7.124631992149165, | |
| "grad_norm": 0.0017863448010757565, | |
| "learning_rate": 1.4381746810598625e-05, | |
| "loss": 0.0001, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 7.134445534838076, | |
| "grad_norm": 0.0009406275930814445, | |
| "learning_rate": 1.4332679097154073e-05, | |
| "loss": 0.0069, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 7.144259077526987, | |
| "grad_norm": 0.0010239857947453856, | |
| "learning_rate": 1.428361138370952e-05, | |
| "loss": 0.1679, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 7.1540726202158975, | |
| "grad_norm": 0.0018188258400186896, | |
| "learning_rate": 1.4234543670264966e-05, | |
| "loss": 0.0003, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 7.1638861629048085, | |
| "grad_norm": 0.0012613933067768812, | |
| "learning_rate": 1.4185475956820413e-05, | |
| "loss": 0.0001, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 7.1736997055937195, | |
| "grad_norm": 0.019094325602054596, | |
| "learning_rate": 1.413640824337586e-05, | |
| "loss": 0.1222, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 7.18351324828263, | |
| "grad_norm": 0.013140466995537281, | |
| "learning_rate": 1.4087340529931306e-05, | |
| "loss": 0.0002, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 7.193326790971541, | |
| "grad_norm": 0.001887351623736322, | |
| "learning_rate": 1.4038272816486753e-05, | |
| "loss": 0.0028, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 7.203140333660452, | |
| "grad_norm": 0.008172539062798023, | |
| "learning_rate": 1.39892051030422e-05, | |
| "loss": 0.0002, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 7.212953876349362, | |
| "grad_norm": 0.017021648585796356, | |
| "learning_rate": 1.3940137389597646e-05, | |
| "loss": 0.0002, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 7.222767419038273, | |
| "grad_norm": 0.0010052472352981567, | |
| "learning_rate": 1.3891069676153093e-05, | |
| "loss": 0.0015, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 7.232580961727184, | |
| "grad_norm": 0.001076782587915659, | |
| "learning_rate": 1.3842001962708539e-05, | |
| "loss": 0.0048, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 7.242394504416094, | |
| "grad_norm": 0.05915454775094986, | |
| "learning_rate": 1.3792934249263986e-05, | |
| "loss": 0.0002, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 7.252208047105005, | |
| "grad_norm": 0.0009720100206322968, | |
| "learning_rate": 1.374386653581943e-05, | |
| "loss": 0.0002, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 7.262021589793916, | |
| "grad_norm": 0.019856898114085197, | |
| "learning_rate": 1.3694798822374877e-05, | |
| "loss": 0.0002, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 7.271835132482826, | |
| "grad_norm": 0.0022591969463974237, | |
| "learning_rate": 1.3645731108930323e-05, | |
| "loss": 0.0004, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 7.281648675171737, | |
| "grad_norm": 0.0010053004371002316, | |
| "learning_rate": 1.359666339548577e-05, | |
| "loss": 0.0001, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 7.291462217860648, | |
| "grad_norm": 0.0015725187258794904, | |
| "learning_rate": 1.3547595682041217e-05, | |
| "loss": 0.0002, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 7.301275760549558, | |
| "grad_norm": 0.0009938733419403434, | |
| "learning_rate": 1.3498527968596663e-05, | |
| "loss": 0.0001, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 7.311089303238469, | |
| "grad_norm": 0.0009750658646225929, | |
| "learning_rate": 1.344946025515211e-05, | |
| "loss": 0.0001, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 7.32090284592738, | |
| "grad_norm": 0.0026528111193329096, | |
| "learning_rate": 1.3400392541707558e-05, | |
| "loss": 0.0001, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 7.33071638861629, | |
| "grad_norm": 0.0010182139230892062, | |
| "learning_rate": 1.3351324828263003e-05, | |
| "loss": 0.0001, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 7.340529931305201, | |
| "grad_norm": 0.0009615565068088472, | |
| "learning_rate": 1.330225711481845e-05, | |
| "loss": 0.0001, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 7.350343473994112, | |
| "grad_norm": 0.000971368863247335, | |
| "learning_rate": 1.3253189401373898e-05, | |
| "loss": 0.0004, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 7.360157016683022, | |
| "grad_norm": 0.027576476335525513, | |
| "learning_rate": 1.3204121687929343e-05, | |
| "loss": 0.0002, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 7.369970559371933, | |
| "grad_norm": 0.0009151269332505763, | |
| "learning_rate": 1.315505397448479e-05, | |
| "loss": 0.0003, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 7.379784102060844, | |
| "grad_norm": 0.0013021818595007062, | |
| "learning_rate": 1.3105986261040238e-05, | |
| "loss": 0.0001, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 7.389597644749754, | |
| "grad_norm": 0.001062211929820478, | |
| "learning_rate": 1.3056918547595683e-05, | |
| "loss": 0.0001, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 7.399411187438665, | |
| "grad_norm": 114.82591247558594, | |
| "learning_rate": 1.3007850834151127e-05, | |
| "loss": 0.029, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 7.409224730127576, | |
| "grad_norm": 0.0009047266212292016, | |
| "learning_rate": 1.2958783120706575e-05, | |
| "loss": 0.0001, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 7.419038272816486, | |
| "grad_norm": 0.0017496274085715413, | |
| "learning_rate": 1.2909715407262022e-05, | |
| "loss": 0.0001, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 7.428851815505397, | |
| "grad_norm": 0.0009102231124415994, | |
| "learning_rate": 1.2860647693817468e-05, | |
| "loss": 0.0001, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 7.438665358194308, | |
| "grad_norm": 0.0017243401380255818, | |
| "learning_rate": 1.2811579980372915e-05, | |
| "loss": 0.0003, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 7.448478900883218, | |
| "grad_norm": 0.05692388117313385, | |
| "learning_rate": 1.2762512266928362e-05, | |
| "loss": 0.0001, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 7.458292443572129, | |
| "grad_norm": 0.0009312523761764169, | |
| "learning_rate": 1.2713444553483808e-05, | |
| "loss": 0.0915, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 7.46810598626104, | |
| "grad_norm": 0.0009480075677856803, | |
| "learning_rate": 1.2664376840039255e-05, | |
| "loss": 0.0001, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 7.4779195289499505, | |
| "grad_norm": 0.0009222645312547684, | |
| "learning_rate": 1.2615309126594702e-05, | |
| "loss": 0.0002, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 7.4877330716388615, | |
| "grad_norm": 0.0009319439996033907, | |
| "learning_rate": 1.2566241413150148e-05, | |
| "loss": 0.0001, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 7.4975466143277725, | |
| "grad_norm": 0.0008977550896815956, | |
| "learning_rate": 1.2517173699705595e-05, | |
| "loss": 0.0001, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 7.507360157016683, | |
| "grad_norm": 0.0010047757532447577, | |
| "learning_rate": 1.246810598626104e-05, | |
| "loss": 0.0001, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 7.517173699705594, | |
| "grad_norm": 0.0038417112082242966, | |
| "learning_rate": 1.2419038272816486e-05, | |
| "loss": 0.0001, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 7.526987242394505, | |
| "grad_norm": 0.0010750400833785534, | |
| "learning_rate": 1.2369970559371934e-05, | |
| "loss": 0.0001, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 7.536800785083415, | |
| "grad_norm": 0.0008918473613448441, | |
| "learning_rate": 1.2320902845927381e-05, | |
| "loss": 0.0001, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 7.546614327772326, | |
| "grad_norm": 0.0010516536422073841, | |
| "learning_rate": 1.2271835132482827e-05, | |
| "loss": 0.0001, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 7.556427870461237, | |
| "grad_norm": 0.0009324781713075936, | |
| "learning_rate": 1.2222767419038274e-05, | |
| "loss": 0.0001, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 7.566241413150147, | |
| "grad_norm": 0.0009400816052220762, | |
| "learning_rate": 1.2173699705593721e-05, | |
| "loss": 0.0001, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 7.576054955839058, | |
| "grad_norm": 0.0008609534706920385, | |
| "learning_rate": 1.2124631992149165e-05, | |
| "loss": 0.0001, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 7.585868498527969, | |
| "grad_norm": 0.0009011939982883632, | |
| "learning_rate": 1.2075564278704612e-05, | |
| "loss": 0.0001, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 7.595682041216879, | |
| "grad_norm": 0.0008951441268436611, | |
| "learning_rate": 1.202649656526006e-05, | |
| "loss": 0.0001, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 7.60549558390579, | |
| "grad_norm": 0.0008742365753278136, | |
| "learning_rate": 1.1977428851815505e-05, | |
| "loss": 0.0001, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 7.615309126594701, | |
| "grad_norm": 0.17169933021068573, | |
| "learning_rate": 1.1928361138370952e-05, | |
| "loss": 0.0002, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 7.625122669283611, | |
| "grad_norm": 0.0008738868637010455, | |
| "learning_rate": 1.18792934249264e-05, | |
| "loss": 0.0001, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 7.634936211972522, | |
| "grad_norm": 0.0011609562207013369, | |
| "learning_rate": 1.1830225711481845e-05, | |
| "loss": 0.0001, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 7.644749754661433, | |
| "grad_norm": 0.0009239889914169908, | |
| "learning_rate": 1.1781157998037293e-05, | |
| "loss": 0.0001, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 7.654563297350343, | |
| "grad_norm": 0.0009010569774545729, | |
| "learning_rate": 1.1732090284592738e-05, | |
| "loss": 0.0001, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 7.664376840039254, | |
| "grad_norm": 0.000879693659953773, | |
| "learning_rate": 1.1683022571148185e-05, | |
| "loss": 0.0001, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 7.674190382728165, | |
| "grad_norm": 0.0008639395819045603, | |
| "learning_rate": 1.1633954857703631e-05, | |
| "loss": 0.0001, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 7.684003925417075, | |
| "grad_norm": 0.0008466942235827446, | |
| "learning_rate": 1.1584887144259078e-05, | |
| "loss": 0.0001, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 7.693817468105986, | |
| "grad_norm": 0.0008819219656288624, | |
| "learning_rate": 1.1535819430814526e-05, | |
| "loss": 0.0001, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 7.703631010794897, | |
| "grad_norm": 0.009510258212685585, | |
| "learning_rate": 1.1486751717369971e-05, | |
| "loss": 0.0001, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 7.713444553483807, | |
| "grad_norm": 0.0008892007754184306, | |
| "learning_rate": 1.1437684003925419e-05, | |
| "loss": 0.0001, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 7.723258096172718, | |
| "grad_norm": 0.0009460031287744641, | |
| "learning_rate": 1.1388616290480864e-05, | |
| "loss": 0.0001, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 7.733071638861629, | |
| "grad_norm": 0.0008965510060079396, | |
| "learning_rate": 1.133954857703631e-05, | |
| "loss": 0.0001, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 7.742885181550539, | |
| "grad_norm": 0.05526250973343849, | |
| "learning_rate": 1.1290480863591757e-05, | |
| "loss": 0.0001, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 7.75269872423945, | |
| "grad_norm": 0.000836291816085577, | |
| "learning_rate": 1.1241413150147204e-05, | |
| "loss": 0.0001, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 7.762512266928361, | |
| "grad_norm": 0.0008228803635574877, | |
| "learning_rate": 1.119234543670265e-05, | |
| "loss": 0.0001, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 7.772325809617271, | |
| "grad_norm": 0.0009072457323782146, | |
| "learning_rate": 1.1143277723258097e-05, | |
| "loss": 0.0001, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 7.782139352306182, | |
| "grad_norm": 0.0010595405474305153, | |
| "learning_rate": 1.1094210009813544e-05, | |
| "loss": 0.0001, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 7.791952894995093, | |
| "grad_norm": 0.0008154577808454633, | |
| "learning_rate": 1.1045142296368988e-05, | |
| "loss": 0.0001, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 7.8017664376840035, | |
| "grad_norm": 0.0009557644953019917, | |
| "learning_rate": 1.0996074582924436e-05, | |
| "loss": 0.0001, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 7.8115799803729145, | |
| "grad_norm": 0.0008630304364487529, | |
| "learning_rate": 1.0947006869479883e-05, | |
| "loss": 0.0001, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 7.8213935230618254, | |
| "grad_norm": 0.004290347453206778, | |
| "learning_rate": 1.0897939156035329e-05, | |
| "loss": 0.0001, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 7.8312070657507356, | |
| "grad_norm": 0.0008026896975934505, | |
| "learning_rate": 1.0848871442590776e-05, | |
| "loss": 0.0001, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 7.8410206084396465, | |
| "grad_norm": 0.0008485147845931351, | |
| "learning_rate": 1.0799803729146223e-05, | |
| "loss": 0.0001, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 7.8508341511285575, | |
| "grad_norm": 0.0009684371179901063, | |
| "learning_rate": 1.0750736015701669e-05, | |
| "loss": 0.0001, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 7.860647693817468, | |
| "grad_norm": 0.00081270607188344, | |
| "learning_rate": 1.0701668302257116e-05, | |
| "loss": 0.0001, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 7.870461236506379, | |
| "grad_norm": 0.0008527148747816682, | |
| "learning_rate": 1.0652600588812562e-05, | |
| "loss": 0.0001, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 7.88027477919529, | |
| "grad_norm": 0.0011228329967707396, | |
| "learning_rate": 1.0603532875368007e-05, | |
| "loss": 0.0001, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 7.8900883218842, | |
| "grad_norm": 0.0011605530744418502, | |
| "learning_rate": 1.0554465161923454e-05, | |
| "loss": 0.0001, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 7.899901864573111, | |
| "grad_norm": 0.0008033498888835311, | |
| "learning_rate": 1.0505397448478902e-05, | |
| "loss": 0.0001, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 7.909715407262022, | |
| "grad_norm": 0.0008764792000874877, | |
| "learning_rate": 1.0456329735034347e-05, | |
| "loss": 0.0955, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 7.919528949950932, | |
| "grad_norm": 0.04982365667819977, | |
| "learning_rate": 1.0407262021589795e-05, | |
| "loss": 0.0002, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 7.929342492639843, | |
| "grad_norm": 0.0008406474371440709, | |
| "learning_rate": 1.0358194308145242e-05, | |
| "loss": 0.0002, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 7.939156035328754, | |
| "grad_norm": 0.000985965714789927, | |
| "learning_rate": 1.0309126594700687e-05, | |
| "loss": 0.0002, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 7.948969578017664, | |
| "grad_norm": 0.0008393987664021552, | |
| "learning_rate": 1.0260058881256133e-05, | |
| "loss": 0.0002, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 7.958783120706575, | |
| "grad_norm": 0.0008538268739357591, | |
| "learning_rate": 1.021099116781158e-05, | |
| "loss": 0.0001, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 7.968596663395486, | |
| "grad_norm": 0.0054728141985833645, | |
| "learning_rate": 1.0161923454367028e-05, | |
| "loss": 0.0001, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 7.978410206084396, | |
| "grad_norm": 0.0009096296853385866, | |
| "learning_rate": 1.0112855740922473e-05, | |
| "loss": 0.0001, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 7.988223748773307, | |
| "grad_norm": 0.0008633875986561179, | |
| "learning_rate": 1.006378802747792e-05, | |
| "loss": 0.0001, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 7.998037291462218, | |
| "grad_norm": 0.0009331282926723361, | |
| "learning_rate": 1.0014720314033368e-05, | |
| "loss": 0.0001, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 8.007850834151128, | |
| "grad_norm": 0.0007973794708959758, | |
| "learning_rate": 9.965652600588813e-06, | |
| "loss": 0.0001, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 8.01766437684004, | |
| "grad_norm": 0.0007764511392451823, | |
| "learning_rate": 9.916584887144259e-06, | |
| "loss": 0.0001, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 8.02747791952895, | |
| "grad_norm": 0.0008435621275566518, | |
| "learning_rate": 9.867517173699706e-06, | |
| "loss": 0.0001, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 8.03729146221786, | |
| "grad_norm": 0.0008471392211504281, | |
| "learning_rate": 9.818449460255152e-06, | |
| "loss": 0.0001, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 8.047105004906772, | |
| "grad_norm": 0.0015691117150709033, | |
| "learning_rate": 9.7693817468106e-06, | |
| "loss": 0.0001, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 8.056918547595682, | |
| "grad_norm": 0.0007785743218846619, | |
| "learning_rate": 9.720314033366046e-06, | |
| "loss": 0.0002, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 8.066732090284592, | |
| "grad_norm": 0.001200017984956503, | |
| "learning_rate": 9.671246319921492e-06, | |
| "loss": 0.0001, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 8.076545632973504, | |
| "grad_norm": 0.0007911358843557537, | |
| "learning_rate": 9.62217860647694e-06, | |
| "loss": 0.0001, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 8.086359175662414, | |
| "grad_norm": 0.0007746540359221399, | |
| "learning_rate": 9.573110893032385e-06, | |
| "loss": 0.0002, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 8.096172718351324, | |
| "grad_norm": 0.0007667599711567163, | |
| "learning_rate": 9.52404317958783e-06, | |
| "loss": 0.0001, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 8.105986261040236, | |
| "grad_norm": 0.0008504064753651619, | |
| "learning_rate": 9.474975466143278e-06, | |
| "loss": 0.0001, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 8.115799803729146, | |
| "grad_norm": 0.0007812583935447037, | |
| "learning_rate": 9.425907752698725e-06, | |
| "loss": 0.0001, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 8.125613346418056, | |
| "grad_norm": 0.0013848438393324614, | |
| "learning_rate": 9.37684003925417e-06, | |
| "loss": 0.0001, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 8.135426889106968, | |
| "grad_norm": 0.0008914385107345879, | |
| "learning_rate": 9.327772325809618e-06, | |
| "loss": 0.0001, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 8.145240431795878, | |
| "grad_norm": 0.0007613406050950289, | |
| "learning_rate": 9.278704612365065e-06, | |
| "loss": 0.0004, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 8.155053974484789, | |
| "grad_norm": 0.0007612567278556526, | |
| "learning_rate": 9.229636898920511e-06, | |
| "loss": 0.0786, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 8.1648675171737, | |
| "grad_norm": 0.0008015862549655139, | |
| "learning_rate": 9.180569185475956e-06, | |
| "loss": 0.0001, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 8.17468105986261, | |
| "grad_norm": 0.0007999239605851471, | |
| "learning_rate": 9.131501472031404e-06, | |
| "loss": 0.0001, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 8.18449460255152, | |
| "grad_norm": 0.0008124898886308074, | |
| "learning_rate": 9.082433758586851e-06, | |
| "loss": 0.0001, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 8.194308145240432, | |
| "grad_norm": 0.0007925584213808179, | |
| "learning_rate": 9.033366045142297e-06, | |
| "loss": 0.0001, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 8.204121687929343, | |
| "grad_norm": 0.0007474345620721579, | |
| "learning_rate": 8.984298331697744e-06, | |
| "loss": 0.0001, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 8.213935230618253, | |
| "grad_norm": 0.00099629582837224, | |
| "learning_rate": 8.93523061825319e-06, | |
| "loss": 0.0001, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 8.223748773307165, | |
| "grad_norm": 0.0007709822966717184, | |
| "learning_rate": 8.886162904808637e-06, | |
| "loss": 0.0001, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 8.233562315996075, | |
| "grad_norm": 0.0008605083567090333, | |
| "learning_rate": 8.837095191364082e-06, | |
| "loss": 0.0001, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 8.243375858684985, | |
| "grad_norm": 0.0007616875227540731, | |
| "learning_rate": 8.78802747791953e-06, | |
| "loss": 0.0001, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 8.253189401373897, | |
| "grad_norm": 0.0008648928487673402, | |
| "learning_rate": 8.738959764474975e-06, | |
| "loss": 0.0001, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 8.263002944062807, | |
| "grad_norm": 0.0007865010411478579, | |
| "learning_rate": 8.689892051030423e-06, | |
| "loss": 0.0001, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 8.272816486751717, | |
| "grad_norm": 0.0007759992149658501, | |
| "learning_rate": 8.64082433758587e-06, | |
| "loss": 0.0001, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 8.282630029440629, | |
| "grad_norm": 0.0007434001890942454, | |
| "learning_rate": 8.591756624141315e-06, | |
| "loss": 0.0001, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 8.292443572129539, | |
| "grad_norm": 0.0007561213569715619, | |
| "learning_rate": 8.542688910696763e-06, | |
| "loss": 0.0001, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 8.302257114818449, | |
| "grad_norm": 0.0008792446460574865, | |
| "learning_rate": 8.493621197252208e-06, | |
| "loss": 0.0001, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 8.31207065750736, | |
| "grad_norm": 0.0008201678283512592, | |
| "learning_rate": 8.444553483807654e-06, | |
| "loss": 0.0001, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 8.321884200196271, | |
| "grad_norm": 0.0007656855159439147, | |
| "learning_rate": 8.395485770363101e-06, | |
| "loss": 0.0001, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 8.331697742885181, | |
| "grad_norm": 0.0009525881614536047, | |
| "learning_rate": 8.346418056918548e-06, | |
| "loss": 0.0001, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 8.341511285574093, | |
| "grad_norm": 0.0007570137386210263, | |
| "learning_rate": 8.297350343473994e-06, | |
| "loss": 0.0001, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 8.351324828263003, | |
| "grad_norm": 0.002343561267480254, | |
| "learning_rate": 8.248282630029441e-06, | |
| "loss": 0.0001, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 8.361138370951913, | |
| "grad_norm": 0.000724265119060874, | |
| "learning_rate": 8.199214916584889e-06, | |
| "loss": 0.0001, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 8.370951913640825, | |
| "grad_norm": 0.0007559550576843321, | |
| "learning_rate": 8.150147203140333e-06, | |
| "loss": 0.0001, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 8.380765456329735, | |
| "grad_norm": 0.0007767178467474878, | |
| "learning_rate": 8.10107948969578e-06, | |
| "loss": 0.0001, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 8.390578999018645, | |
| "grad_norm": 0.0014819581992924213, | |
| "learning_rate": 8.052011776251227e-06, | |
| "loss": 0.095, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 8.400392541707557, | |
| "grad_norm": 0.0008053283672779799, | |
| "learning_rate": 8.002944062806673e-06, | |
| "loss": 0.0001, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 8.410206084396467, | |
| "grad_norm": 0.000741046154871583, | |
| "learning_rate": 7.95387634936212e-06, | |
| "loss": 0.0001, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 8.420019627085377, | |
| "grad_norm": 0.0009256862103939056, | |
| "learning_rate": 7.904808635917567e-06, | |
| "loss": 0.0001, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 8.42983316977429, | |
| "grad_norm": 0.0007935376488603652, | |
| "learning_rate": 7.855740922473013e-06, | |
| "loss": 0.0001, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 8.4396467124632, | |
| "grad_norm": 0.010961124673485756, | |
| "learning_rate": 7.80667320902846e-06, | |
| "loss": 0.0002, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 8.44946025515211, | |
| "grad_norm": 0.006000032182782888, | |
| "learning_rate": 7.757605495583906e-06, | |
| "loss": 0.0001, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 8.459273797841021, | |
| "grad_norm": 0.008318673819303513, | |
| "learning_rate": 7.708537782139353e-06, | |
| "loss": 0.0001, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 8.469087340529931, | |
| "grad_norm": 0.0007991963066160679, | |
| "learning_rate": 7.659470068694799e-06, | |
| "loss": 0.0616, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 8.478900883218841, | |
| "grad_norm": 0.0011054244823753834, | |
| "learning_rate": 7.610402355250246e-06, | |
| "loss": 0.0002, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 8.488714425907753, | |
| "grad_norm": 0.0007553680334240198, | |
| "learning_rate": 7.561334641805692e-06, | |
| "loss": 0.0001, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 8.498527968596663, | |
| "grad_norm": 0.0007292833179235458, | |
| "learning_rate": 7.512266928361139e-06, | |
| "loss": 0.0001, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 8.508341511285574, | |
| "grad_norm": 0.0007289135828614235, | |
| "learning_rate": 7.463199214916586e-06, | |
| "loss": 0.0001, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 8.518155053974485, | |
| "grad_norm": 0.0007795288693159819, | |
| "learning_rate": 7.414131501472031e-06, | |
| "loss": 0.0001, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 8.527968596663396, | |
| "grad_norm": 0.0007076899637468159, | |
| "learning_rate": 7.365063788027478e-06, | |
| "loss": 0.0001, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 8.537782139352306, | |
| "grad_norm": 0.0007375687710009515, | |
| "learning_rate": 7.3159960745829246e-06, | |
| "loss": 0.0001, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 8.547595682041218, | |
| "grad_norm": 0.0007277546101249754, | |
| "learning_rate": 7.266928361138371e-06, | |
| "loss": 0.0001, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 8.557409224730128, | |
| "grad_norm": 0.0007561793318018317, | |
| "learning_rate": 7.217860647693818e-06, | |
| "loss": 0.0001, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 8.567222767419038, | |
| "grad_norm": 0.000729912135284394, | |
| "learning_rate": 7.168792934249265e-06, | |
| "loss": 0.0001, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 8.57703631010795, | |
| "grad_norm": 0.0007444035727530718, | |
| "learning_rate": 7.119725220804711e-06, | |
| "loss": 0.0004, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 8.58684985279686, | |
| "grad_norm": 0.002724673831835389, | |
| "learning_rate": 7.0706575073601584e-06, | |
| "loss": 0.0001, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 8.59666339548577, | |
| "grad_norm": 0.0009111511171795428, | |
| "learning_rate": 7.021589793915603e-06, | |
| "loss": 0.0001, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 8.606476938174682, | |
| "grad_norm": 0.0007101638475432992, | |
| "learning_rate": 6.97252208047105e-06, | |
| "loss": 0.0638, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 8.616290480863592, | |
| "grad_norm": 0.0007315074326470494, | |
| "learning_rate": 6.923454367026497e-06, | |
| "loss": 0.0001, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 8.626104023552502, | |
| "grad_norm": 0.0007471499848179519, | |
| "learning_rate": 6.874386653581943e-06, | |
| "loss": 0.0001, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 8.635917566241414, | |
| "grad_norm": 0.001743357628583908, | |
| "learning_rate": 6.82531894013739e-06, | |
| "loss": 0.0001, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 8.645731108930324, | |
| "grad_norm": 0.0007213126518763602, | |
| "learning_rate": 6.776251226692837e-06, | |
| "loss": 0.0001, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 8.655544651619234, | |
| "grad_norm": 0.006596927065402269, | |
| "learning_rate": 6.7271835132482835e-06, | |
| "loss": 0.0001, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 8.665358194308146, | |
| "grad_norm": 0.0007276834803633392, | |
| "learning_rate": 6.678115799803729e-06, | |
| "loss": 0.0001, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 8.675171736997056, | |
| "grad_norm": 0.0007477464969269931, | |
| "learning_rate": 6.6290480863591756e-06, | |
| "loss": 0.0001, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 8.684985279685966, | |
| "grad_norm": 0.0008145422907546163, | |
| "learning_rate": 6.579980372914622e-06, | |
| "loss": 0.0758, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 8.694798822374878, | |
| "grad_norm": 0.0007996530621312559, | |
| "learning_rate": 6.530912659470069e-06, | |
| "loss": 0.0001, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 8.704612365063788, | |
| "grad_norm": 0.0007228550384752452, | |
| "learning_rate": 6.481844946025516e-06, | |
| "loss": 0.0003, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 8.714425907752698, | |
| "grad_norm": 0.0007497305050492287, | |
| "learning_rate": 6.432777232580962e-06, | |
| "loss": 0.0001, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 8.72423945044161, | |
| "grad_norm": 0.0018314624903723598, | |
| "learning_rate": 6.3837095191364094e-06, | |
| "loss": 0.0001, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 8.73405299313052, | |
| "grad_norm": 0.005474488250911236, | |
| "learning_rate": 6.334641805691854e-06, | |
| "loss": 0.0001, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 8.74386653581943, | |
| "grad_norm": 0.0007096781046129763, | |
| "learning_rate": 6.2855740922473015e-06, | |
| "loss": 0.0004, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 8.753680078508342, | |
| "grad_norm": 0.0007362039759755135, | |
| "learning_rate": 6.236506378802748e-06, | |
| "loss": 0.0001, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 8.763493621197252, | |
| "grad_norm": 0.0007442686473950744, | |
| "learning_rate": 6.187438665358194e-06, | |
| "loss": 0.0698, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 8.773307163886162, | |
| "grad_norm": 0.002618088386952877, | |
| "learning_rate": 6.138370951913641e-06, | |
| "loss": 0.0001, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 8.783120706575074, | |
| "grad_norm": 0.0007100084330886602, | |
| "learning_rate": 6.089303238469088e-06, | |
| "loss": 0.0001, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 8.792934249263984, | |
| "grad_norm": 0.0009615476010367274, | |
| "learning_rate": 6.040235525024534e-06, | |
| "loss": 0.0001, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 8.802747791952894, | |
| "grad_norm": 0.0011131309438496828, | |
| "learning_rate": 5.991167811579981e-06, | |
| "loss": 0.0001, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 8.812561334641806, | |
| "grad_norm": 0.000781961134634912, | |
| "learning_rate": 5.942100098135427e-06, | |
| "loss": 0.0001, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 8.822374877330716, | |
| "grad_norm": 0.0007456222083419561, | |
| "learning_rate": 5.893032384690874e-06, | |
| "loss": 0.0001, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 8.832188420019627, | |
| "grad_norm": 0.0007512273732572794, | |
| "learning_rate": 5.84396467124632e-06, | |
| "loss": 0.0001, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 8.842001962708538, | |
| "grad_norm": 0.0007723625167272985, | |
| "learning_rate": 5.794896957801767e-06, | |
| "loss": 0.0001, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 8.851815505397449, | |
| "grad_norm": 0.0006950558163225651, | |
| "learning_rate": 5.745829244357213e-06, | |
| "loss": 0.0001, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 8.861629048086359, | |
| "grad_norm": 0.0006956023280508816, | |
| "learning_rate": 5.69676153091266e-06, | |
| "loss": 0.0001, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 8.87144259077527, | |
| "grad_norm": 0.0006997225573286414, | |
| "learning_rate": 5.647693817468106e-06, | |
| "loss": 0.0001, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 8.88125613346418, | |
| "grad_norm": 0.0007857059827074409, | |
| "learning_rate": 5.5986261040235525e-06, | |
| "loss": 0.0001, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 8.89106967615309, | |
| "grad_norm": 0.0020336457528173923, | |
| "learning_rate": 5.549558390579e-06, | |
| "loss": 0.0001, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 8.900883218842003, | |
| "grad_norm": 0.0007115107146091759, | |
| "learning_rate": 5.500490677134445e-06, | |
| "loss": 0.0001, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 8.910696761530913, | |
| "grad_norm": 0.0007492152508348227, | |
| "learning_rate": 5.451422963689893e-06, | |
| "loss": 0.0001, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 8.920510304219823, | |
| "grad_norm": 0.0029001296497881413, | |
| "learning_rate": 5.402355250245339e-06, | |
| "loss": 0.0001, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 8.930323846908735, | |
| "grad_norm": 0.0006878664717078209, | |
| "learning_rate": 5.3532875368007855e-06, | |
| "loss": 0.0002, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 8.940137389597645, | |
| "grad_norm": 0.0007307238993234932, | |
| "learning_rate": 5.304219823356232e-06, | |
| "loss": 0.0001, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 8.949950932286555, | |
| "grad_norm": 0.0007018332253210247, | |
| "learning_rate": 5.255152109911678e-06, | |
| "loss": 0.0001, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 8.959764474975467, | |
| "grad_norm": 0.01293295156210661, | |
| "learning_rate": 5.206084396467125e-06, | |
| "loss": 0.0001, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 8.969578017664377, | |
| "grad_norm": 0.0006952588446438313, | |
| "learning_rate": 5.157016683022571e-06, | |
| "loss": 0.0001, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 8.979391560353287, | |
| "grad_norm": 0.0006813241052441299, | |
| "learning_rate": 5.107948969578018e-06, | |
| "loss": 0.0002, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 8.989205103042199, | |
| "grad_norm": 0.0007726841140538454, | |
| "learning_rate": 5.058881256133464e-06, | |
| "loss": 0.0001, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 8.999018645731109, | |
| "grad_norm": 0.0008112427312880754, | |
| "learning_rate": 5.0098135426889115e-06, | |
| "loss": 0.0001, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 9.008832188420019, | |
| "grad_norm": 0.0007123980321921408, | |
| "learning_rate": 4.960745829244357e-06, | |
| "loss": 0.0001, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 9.018645731108931, | |
| "grad_norm": 0.0006635936442762613, | |
| "learning_rate": 4.9116781157998035e-06, | |
| "loss": 0.0001, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 9.028459273797841, | |
| "grad_norm": 0.0006985082291066647, | |
| "learning_rate": 4.862610402355251e-06, | |
| "loss": 0.0001, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 9.038272816486751, | |
| "grad_norm": 0.0009679241920821369, | |
| "learning_rate": 4.813542688910697e-06, | |
| "loss": 0.0001, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 9.048086359175663, | |
| "grad_norm": 0.0007227755268104374, | |
| "learning_rate": 4.764474975466144e-06, | |
| "loss": 0.0001, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 9.057899901864573, | |
| "grad_norm": 0.0006677210330963135, | |
| "learning_rate": 4.71540726202159e-06, | |
| "loss": 0.0001, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 9.067713444553483, | |
| "grad_norm": 0.0007603775011375546, | |
| "learning_rate": 4.6663395485770365e-06, | |
| "loss": 0.0001, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 9.077526987242395, | |
| "grad_norm": 0.0007459863554686308, | |
| "learning_rate": 4.617271835132483e-06, | |
| "loss": 0.0001, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 9.087340529931305, | |
| "grad_norm": 0.001274227281101048, | |
| "learning_rate": 4.568204121687929e-06, | |
| "loss": 0.0001, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 9.097154072620215, | |
| "grad_norm": 0.0008047525770962238, | |
| "learning_rate": 4.519136408243376e-06, | |
| "loss": 0.0001, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 9.106967615309127, | |
| "grad_norm": 0.0009299516095779836, | |
| "learning_rate": 4.470068694798823e-06, | |
| "loss": 0.0001, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 9.116781157998037, | |
| "grad_norm": 0.0007023366051726043, | |
| "learning_rate": 4.421000981354269e-06, | |
| "loss": 0.0001, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 9.126594700686947, | |
| "grad_norm": 0.0008517011883668602, | |
| "learning_rate": 4.371933267909715e-06, | |
| "loss": 0.0001, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 9.13640824337586, | |
| "grad_norm": 0.0007383475895039737, | |
| "learning_rate": 4.3228655544651625e-06, | |
| "loss": 0.0001, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 9.14622178606477, | |
| "grad_norm": 0.0006836687098257244, | |
| "learning_rate": 4.273797841020609e-06, | |
| "loss": 0.0011, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 9.15603532875368, | |
| "grad_norm": 0.0007958838832564652, | |
| "learning_rate": 4.224730127576055e-06, | |
| "loss": 0.0001, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 9.165848871442591, | |
| "grad_norm": 0.0051173255778849125, | |
| "learning_rate": 4.175662414131502e-06, | |
| "loss": 0.0736, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 9.175662414131502, | |
| "grad_norm": 0.0006751357577741146, | |
| "learning_rate": 4.126594700686948e-06, | |
| "loss": 0.0001, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 9.185475956820412, | |
| "grad_norm": 0.0008489376050420105, | |
| "learning_rate": 4.077526987242395e-06, | |
| "loss": 0.0001, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 9.195289499509324, | |
| "grad_norm": 0.0006518946029245853, | |
| "learning_rate": 4.028459273797841e-06, | |
| "loss": 0.0001, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 9.205103042198234, | |
| "grad_norm": 0.0006742589175701141, | |
| "learning_rate": 3.9793915603532875e-06, | |
| "loss": 0.0002, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 9.214916584887144, | |
| "grad_norm": 0.0006998268072493374, | |
| "learning_rate": 3.930323846908735e-06, | |
| "loss": 0.0001, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 9.224730127576056, | |
| "grad_norm": 0.0006446267361752689, | |
| "learning_rate": 3.8812561334641804e-06, | |
| "loss": 0.0001, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 9.234543670264966, | |
| "grad_norm": 0.0006532249972224236, | |
| "learning_rate": 3.832188420019627e-06, | |
| "loss": 0.0001, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 9.244357212953876, | |
| "grad_norm": 0.0023807811085134745, | |
| "learning_rate": 3.7831207065750737e-06, | |
| "loss": 0.0279, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 9.254170755642788, | |
| "grad_norm": 39.921600341796875, | |
| "learning_rate": 3.7340529931305206e-06, | |
| "loss": 0.0554, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 9.263984298331698, | |
| "grad_norm": 0.08718841522932053, | |
| "learning_rate": 3.6849852796859666e-06, | |
| "loss": 0.0003, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 9.273797841020608, | |
| "grad_norm": 0.0006705551641061902, | |
| "learning_rate": 3.6359175662414135e-06, | |
| "loss": 0.0001, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 9.28361138370952, | |
| "grad_norm": 0.0007212602067738771, | |
| "learning_rate": 3.58684985279686e-06, | |
| "loss": 0.0001, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 9.29342492639843, | |
| "grad_norm": 0.0006956434808671474, | |
| "learning_rate": 3.5377821393523068e-06, | |
| "loss": 0.0001, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 9.30323846908734, | |
| "grad_norm": 0.0007320587756112218, | |
| "learning_rate": 3.488714425907753e-06, | |
| "loss": 0.0001, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 9.313052011776252, | |
| "grad_norm": 0.0006620934000238776, | |
| "learning_rate": 3.4396467124631992e-06, | |
| "loss": 0.0001, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 9.322865554465162, | |
| "grad_norm": 0.0008825812255963683, | |
| "learning_rate": 3.390578999018646e-06, | |
| "loss": 0.0001, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 9.332679097154072, | |
| "grad_norm": 0.000674651877488941, | |
| "learning_rate": 3.341511285574092e-06, | |
| "loss": 0.0001, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 9.342492639842984, | |
| "grad_norm": 0.0006929274532012641, | |
| "learning_rate": 3.292443572129539e-06, | |
| "loss": 0.0001, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 9.352306182531894, | |
| "grad_norm": 0.0007789958617649972, | |
| "learning_rate": 3.2433758586849854e-06, | |
| "loss": 0.0001, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 9.362119725220804, | |
| "grad_norm": 0.0006808873731642962, | |
| "learning_rate": 3.1943081452404323e-06, | |
| "loss": 0.0001, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 9.371933267909716, | |
| "grad_norm": 0.0006374814547598362, | |
| "learning_rate": 3.1452404317958783e-06, | |
| "loss": 0.0001, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 9.381746810598626, | |
| "grad_norm": 0.0006496473215520382, | |
| "learning_rate": 3.0961727183513247e-06, | |
| "loss": 0.0921, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 9.391560353287536, | |
| "grad_norm": 0.0006751060136593878, | |
| "learning_rate": 3.0471050049067716e-06, | |
| "loss": 0.0001, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 9.401373895976448, | |
| "grad_norm": 0.0006818071124143898, | |
| "learning_rate": 2.998037291462218e-06, | |
| "loss": 0.0001, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 9.411187438665358, | |
| "grad_norm": 0.008655051700770855, | |
| "learning_rate": 2.9489695780176645e-06, | |
| "loss": 0.0182, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 9.421000981354268, | |
| "grad_norm": 0.0007353053661063313, | |
| "learning_rate": 2.899901864573111e-06, | |
| "loss": 0.0001, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 9.43081452404318, | |
| "grad_norm": 0.0007057326729409397, | |
| "learning_rate": 2.8508341511285574e-06, | |
| "loss": 0.0001, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 9.44062806673209, | |
| "grad_norm": 0.0007384234922938049, | |
| "learning_rate": 2.8017664376840042e-06, | |
| "loss": 0.0004, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 9.450441609421, | |
| "grad_norm": 0.0007162457914091647, | |
| "learning_rate": 2.7526987242394502e-06, | |
| "loss": 0.0001, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 9.460255152109912, | |
| "grad_norm": 0.002138860058039427, | |
| "learning_rate": 2.703631010794897e-06, | |
| "loss": 0.0001, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 9.470068694798822, | |
| "grad_norm": 0.0006910859956406057, | |
| "learning_rate": 2.6545632973503435e-06, | |
| "loss": 0.0704, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 9.479882237487733, | |
| "grad_norm": 0.0006951667019166052, | |
| "learning_rate": 2.6054955839057904e-06, | |
| "loss": 0.0001, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 9.489695780176644, | |
| "grad_norm": 0.0007034169393591583, | |
| "learning_rate": 2.5564278704612364e-06, | |
| "loss": 0.0001, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 9.499509322865554, | |
| "grad_norm": 0.0009360564290545881, | |
| "learning_rate": 2.5073601570166833e-06, | |
| "loss": 0.0001, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 9.509322865554465, | |
| "grad_norm": 0.0009853884112089872, | |
| "learning_rate": 2.4582924435721297e-06, | |
| "loss": 0.0001, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 9.519136408243376, | |
| "grad_norm": 0.0009145635995082557, | |
| "learning_rate": 2.409224730127576e-06, | |
| "loss": 0.0001, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 9.528949950932287, | |
| "grad_norm": 0.000657937373034656, | |
| "learning_rate": 2.3601570166830226e-06, | |
| "loss": 0.0001, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 9.538763493621197, | |
| "grad_norm": 0.0009716741042211652, | |
| "learning_rate": 2.3110893032384695e-06, | |
| "loss": 0.0001, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 9.548577036310109, | |
| "grad_norm": 0.0008224455523304641, | |
| "learning_rate": 2.262021589793916e-06, | |
| "loss": 0.0001, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 9.558390578999019, | |
| "grad_norm": 0.0007166486466303468, | |
| "learning_rate": 2.212953876349362e-06, | |
| "loss": 0.0001, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 9.568204121687929, | |
| "grad_norm": 0.0006854226812720299, | |
| "learning_rate": 2.1638861629048088e-06, | |
| "loss": 0.0001, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 9.57801766437684, | |
| "grad_norm": 0.0013223073910921812, | |
| "learning_rate": 2.1148184494602552e-06, | |
| "loss": 0.0001, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 9.58783120706575, | |
| "grad_norm": 0.0006512215477414429, | |
| "learning_rate": 2.0657507360157017e-06, | |
| "loss": 0.0001, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 9.59764474975466, | |
| "grad_norm": 0.0006538184825330973, | |
| "learning_rate": 2.016683022571148e-06, | |
| "loss": 0.0001, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 9.607458292443573, | |
| "grad_norm": 0.0006954250857234001, | |
| "learning_rate": 1.967615309126595e-06, | |
| "loss": 0.0001, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 9.617271835132483, | |
| "grad_norm": 0.0006559567409567535, | |
| "learning_rate": 1.9185475956820414e-06, | |
| "loss": 0.0001, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 9.627085377821393, | |
| "grad_norm": 0.0012906268239021301, | |
| "learning_rate": 1.8694798822374878e-06, | |
| "loss": 0.0001, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 9.636898920510305, | |
| "grad_norm": 0.0006794478395022452, | |
| "learning_rate": 1.8204121687929343e-06, | |
| "loss": 0.0001, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 9.646712463199215, | |
| "grad_norm": 0.0007485067471861839, | |
| "learning_rate": 1.771344455348381e-06, | |
| "loss": 0.0001, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 9.656526005888125, | |
| "grad_norm": 0.0007018350879661739, | |
| "learning_rate": 1.7222767419038274e-06, | |
| "loss": 0.0001, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 9.666339548577037, | |
| "grad_norm": 0.000663910701405257, | |
| "learning_rate": 1.6732090284592738e-06, | |
| "loss": 0.0001, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 9.676153091265947, | |
| "grad_norm": 0.000718809780664742, | |
| "learning_rate": 1.6241413150147205e-06, | |
| "loss": 0.0001, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 9.685966633954857, | |
| "grad_norm": 0.0008578874403610826, | |
| "learning_rate": 1.5750736015701667e-06, | |
| "loss": 0.0001, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 9.695780176643769, | |
| "grad_norm": 0.0007033746223896742, | |
| "learning_rate": 1.5260058881256136e-06, | |
| "loss": 0.0001, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 9.70559371933268, | |
| "grad_norm": 0.00067708152346313, | |
| "learning_rate": 1.47693817468106e-06, | |
| "loss": 0.0001, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 9.71540726202159, | |
| "grad_norm": 0.0006639899802394211, | |
| "learning_rate": 1.4278704612365064e-06, | |
| "loss": 0.0001, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 9.725220804710501, | |
| "grad_norm": 0.0006598685868084431, | |
| "learning_rate": 1.3788027477919529e-06, | |
| "loss": 0.0001, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 9.735034347399411, | |
| "grad_norm": 0.01395090576261282, | |
| "learning_rate": 1.3297350343473993e-06, | |
| "loss": 0.0001, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 9.744847890088321, | |
| "grad_norm": 0.0008143746526911855, | |
| "learning_rate": 1.280667320902846e-06, | |
| "loss": 0.0001, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 9.754661432777233, | |
| "grad_norm": 0.0010220261756330729, | |
| "learning_rate": 1.2315996074582924e-06, | |
| "loss": 0.0001, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 9.764474975466143, | |
| "grad_norm": 0.003531807102262974, | |
| "learning_rate": 1.182531894013739e-06, | |
| "loss": 0.0001, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 9.774288518155053, | |
| "grad_norm": 0.0006864424794912338, | |
| "learning_rate": 1.1334641805691855e-06, | |
| "loss": 0.0001, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 9.784102060843965, | |
| "grad_norm": 0.0008860233356244862, | |
| "learning_rate": 1.0843964671246322e-06, | |
| "loss": 0.0001, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 9.793915603532875, | |
| "grad_norm": 0.001267165644094348, | |
| "learning_rate": 1.0353287536800786e-06, | |
| "loss": 0.0001, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 9.803729146221785, | |
| "grad_norm": 0.0006668745772913098, | |
| "learning_rate": 9.86261040235525e-07, | |
| "loss": 0.0001, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 9.813542688910697, | |
| "grad_norm": 0.000662625883705914, | |
| "learning_rate": 9.371933267909717e-07, | |
| "loss": 0.0001, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 9.823356231599607, | |
| "grad_norm": 0.0006619680789299309, | |
| "learning_rate": 8.881256133464181e-07, | |
| "loss": 0.0001, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 9.833169774288518, | |
| "grad_norm": 0.000696695176884532, | |
| "learning_rate": 8.390578999018647e-07, | |
| "loss": 0.0001, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 9.84298331697743, | |
| "grad_norm": 0.0006725791026838124, | |
| "learning_rate": 7.89990186457311e-07, | |
| "loss": 0.0001, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 9.85279685966634, | |
| "grad_norm": 0.0006575717707164586, | |
| "learning_rate": 7.409224730127577e-07, | |
| "loss": 0.0001, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 9.86261040235525, | |
| "grad_norm": 0.0006885197362862527, | |
| "learning_rate": 6.918547595682042e-07, | |
| "loss": 0.0001, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 9.872423945044162, | |
| "grad_norm": 0.0018992492696270347, | |
| "learning_rate": 6.427870461236506e-07, | |
| "loss": 0.0185, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 9.882237487733072, | |
| "grad_norm": 0.000654397183097899, | |
| "learning_rate": 5.937193326790972e-07, | |
| "loss": 0.0001, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 9.892051030421982, | |
| "grad_norm": 0.0006650349241681397, | |
| "learning_rate": 5.446516192345437e-07, | |
| "loss": 0.0001, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 9.901864573110894, | |
| "grad_norm": 0.0006798275862820446, | |
| "learning_rate": 4.955839057899902e-07, | |
| "loss": 0.0001, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 9.911678115799804, | |
| "grad_norm": 0.0006500816671177745, | |
| "learning_rate": 4.4651619234543677e-07, | |
| "loss": 0.0001, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 9.921491658488714, | |
| "grad_norm": 0.0008393925963900983, | |
| "learning_rate": 3.9744847890088327e-07, | |
| "loss": 0.0001, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 9.931305201177626, | |
| "grad_norm": 0.0007067256956361234, | |
| "learning_rate": 3.4838076545632976e-07, | |
| "loss": 0.0001, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 9.941118743866536, | |
| "grad_norm": 0.0007005089428275824, | |
| "learning_rate": 2.9931305201177625e-07, | |
| "loss": 0.0002, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 9.950932286555446, | |
| "grad_norm": 0.0006531529943458736, | |
| "learning_rate": 2.502453385672228e-07, | |
| "loss": 0.0001, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 9.960745829244358, | |
| "grad_norm": 0.0009129344252869487, | |
| "learning_rate": 2.0117762512266932e-07, | |
| "loss": 0.0001, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 9.970559371933268, | |
| "grad_norm": 0.0006892773672007024, | |
| "learning_rate": 1.521099116781158e-07, | |
| "loss": 0.0001, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 9.980372914622178, | |
| "grad_norm": 0.0006506032077595592, | |
| "learning_rate": 1.0304219823356231e-07, | |
| "loss": 0.0001, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 9.99018645731109, | |
| "grad_norm": 0.0006497541908174753, | |
| "learning_rate": 5.3974484789008834e-08, | |
| "loss": 0.0001, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.0008801518124528229, | |
| "learning_rate": 4.906771344455348e-09, | |
| "loss": 0.0001, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 10190, | |
| "total_flos": 6.31327239390081e+18, | |
| "train_loss": 0.024946213553118556, | |
| "train_runtime": 4093.664, | |
| "train_samples_per_second": 19.901, | |
| "train_steps_per_second": 2.489 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 10190, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.31327239390081e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |